summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig21
-rw-r--r--arch/alpha/Kconfig8
-rw-r--r--arch/alpha/include/asm/Kbuild1
-rw-r--r--arch/alpha/include/asm/io.h2
-rw-r--r--arch/alpha/include/asm/rwsem.h211
-rw-r--r--arch/alpha/include/asm/syscall.h2
-rw-r--r--arch/alpha/include/asm/tlb.h6
-rw-r--r--arch/alpha/include/uapi/asm/sockios.h4
-rw-r--r--arch/alpha/kernel/pci_iommu.c20
-rw-r--r--arch/arc/Kconfig3
-rw-r--r--arch/arc/boot/dts/hsdk.dts13
-rw-r--r--arch/arc/configs/haps_hs_defconfig1
-rw-r--r--arch/arc/configs/haps_hs_smp_defconfig1
-rw-r--r--arch/arc/configs/nsim_700_defconfig1
-rw-r--r--arch/arc/configs/nsim_hs_defconfig1
-rw-r--r--arch/arc/configs/nsim_hs_smp_defconfig1
-rw-r--r--arch/arc/configs/nsimosci_defconfig1
-rw-r--r--arch/arc/configs/nsimosci_hs_defconfig1
-rw-r--r--arch/arc/configs/nsimosci_hs_smp_defconfig1
-rw-r--r--arch/arc/include/asm/Kbuild1
-rw-r--r--arch/arc/include/asm/elf.h6
-rw-r--r--arch/arc/include/asm/syscall.h11
-rw-r--r--arch/arc/include/asm/tlb.h32
-rw-r--r--arch/arc/lib/memset-archs.S4
-rw-r--r--arch/arc/mm/cache.c31
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/boot/dts/ls1021a.dtsi28
-rw-r--r--arch/arm/boot/dts/omap4-droid4-xt894.dts27
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi7
-rw-r--r--arch/arm/configs/aspeed_g4_defconfig2
-rw-r--r--arch/arm/configs/aspeed_g5_defconfig2
-rw-r--r--arch/arm/configs/at91_dt_defconfig1
-rw-r--r--arch/arm/configs/clps711x_defconfig1
-rw-r--r--arch/arm/configs/efm32_defconfig1
-rw-r--r--arch/arm/configs/ezx_defconfig1
-rw-r--r--arch/arm/configs/h3600_defconfig1
-rw-r--r--arch/arm/configs/imote2_defconfig1
-rw-r--r--arch/arm/configs/moxart_defconfig1
-rw-r--r--arch/arm/configs/multi_v4t_defconfig1
-rw-r--r--arch/arm/configs/omap1_defconfig1
-rw-r--r--arch/arm/configs/stm32_defconfig1
-rw-r--r--arch/arm/configs/u300_defconfig1
-rw-r--r--arch/arm/configs/vexpress_defconfig1
-rw-r--r--arch/arm/crypto/aes-neonbs-glue.c2
-rw-r--r--arch/arm/crypto/chacha-neon-glue.c5
-rw-r--r--arch/arm/crypto/crc32-ce-glue.c5
-rw-r--r--arch/arm/crypto/crct10dif-ce-glue.c3
-rw-r--r--arch/arm/crypto/ghash-ce-glue.c10
-rw-r--r--arch/arm/crypto/nhpoly1305-neon-glue.c3
-rw-r--r--arch/arm/crypto/sha1-ce-glue.c5
-rw-r--r--arch/arm/crypto/sha1_neon_glue.c5
-rw-r--r--arch/arm/crypto/sha2-ce-glue.c5
-rw-r--r--arch/arm/crypto/sha256_neon_glue.c5
-rw-r--r--arch/arm/crypto/sha512-neon-glue.c5
-rw-r--r--arch/arm/include/asm/Kbuild2
-rw-r--r--arch/arm/include/asm/arch_timer.h18
-rw-r--r--arch/arm/include/asm/cp15.h2
-rw-r--r--arch/arm/include/asm/io.h2
-rw-r--r--arch/arm/include/asm/stage2_pgtable.h4
-rw-r--r--arch/arm/include/asm/syscall.h2
-rw-r--r--arch/arm/include/asm/tlb.h255
-rw-r--r--arch/arm/kernel/dma-isa.c8
-rw-r--r--arch/arm/kernel/signal.c3
-rw-r--r--arch/arm/kernel/stacktrace.c6
-rw-r--r--arch/arm/kvm/Kconfig1
-rw-r--r--arch/arm/mach-ep93xx/edb93xx.c13
-rw-r--r--arch/arm/mach-ep93xx/simone.c11
-rw-r--r--arch/arm/mach-ep93xx/ts72xx.c25
-rw-r--r--arch/arm/mach-ep93xx/vision_ep9307.c15
-rw-r--r--arch/arm/mach-imx/pm-imx6.c2
-rw-r--r--arch/arm/mach-mvebu/kirkwood.c3
-rw-r--r--arch/arm/mach-omap2/clock.c3
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c4
-rw-r--r--arch/arm/mach-rpc/dma.c8
-rw-r--r--arch/arm/mm/alignment.c2
-rw-r--r--arch/arm/nwfpe/fpmodule.c2
-rw-r--r--arch/arm/plat-omap/dma.c1
-rw-r--r--arch/arm/vdso/vgettimeofday.c5
-rw-r--r--arch/arm64/Kconfig137
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi25
-rw-r--r--arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h2
-rw-r--r--arch/arm64/crypto/aes-ce-ccm-glue.c9
-rw-r--r--arch/arm64/crypto/aes-ce-glue.c5
-rw-r--r--arch/arm64/crypto/aes-glue.c6
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c6
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c7
-rw-r--r--arch/arm64/crypto/crct10dif-ce-glue.c9
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c25
-rw-r--r--arch/arm64/crypto/nhpoly1305-neon-glue.c5
-rw-r--r--arch/arm64/crypto/sha1-ce-glue.c7
-rw-r--r--arch/arm64/crypto/sha2-ce-glue.c7
-rw-r--r--arch/arm64/crypto/sha256-glue.c9
-rw-r--r--arch/arm64/crypto/sha3-ce-glue.c5
-rw-r--r--arch/arm64/crypto/sha512-ce-glue.c7
-rw-r--r--arch/arm64/crypto/sm3-ce-glue.c7
-rw-r--r--arch/arm64/crypto/sm4-ce-glue.c5
-rw-r--r--arch/arm64/include/asm/Kbuild2
-rw-r--r--arch/arm64/include/asm/arch_timer.h119
-rw-r--r--arch/arm64/include/asm/assembler.h8
-rw-r--r--arch/arm64/include/asm/barrier.h24
-rw-r--r--arch/arm64/include/asm/brk-imm.h5
-rw-r--r--arch/arm64/include/asm/cpucaps.h3
-rw-r--r--arch/arm64/include/asm/cpufeature.h25
-rw-r--r--arch/arm64/include/asm/cputype.h2
-rw-r--r--arch/arm64/include/asm/debug-monitors.h25
-rw-r--r--arch/arm64/include/asm/elf.h6
-rw-r--r--arch/arm64/include/asm/esr.h7
-rw-r--r--arch/arm64/include/asm/futex.h59
-rw-r--r--arch/arm64/include/asm/hwcap.h60
-rw-r--r--arch/arm64/include/asm/insn.h8
-rw-r--r--arch/arm64/include/asm/io.h2
-rw-r--r--arch/arm64/include/asm/irqflags.h8
-rw-r--r--arch/arm64/include/asm/kprobes.h2
-rw-r--r--arch/arm64/include/asm/kvm_hyp.h4
-rw-r--r--arch/arm64/include/asm/memory.h2
-rw-r--r--arch/arm64/include/asm/pgalloc.h12
-rw-r--r--arch/arm64/include/asm/pgtable.h5
-rw-r--r--arch/arm64/include/asm/pointer_auth.h2
-rw-r--r--arch/arm64/include/asm/processor.h8
-rw-r--r--arch/arm64/include/asm/ptrace.h22
-rw-r--r--arch/arm64/include/asm/sdei.h2
-rw-r--r--arch/arm64/include/asm/signal32.h2
-rw-r--r--arch/arm64/include/asm/stage2_pgtable.h4
-rw-r--r--arch/arm64/include/asm/syscall.h4
-rw-r--r--arch/arm64/include/asm/sysreg.h59
-rw-r--r--arch/arm64/include/asm/system_misc.h1
-rw-r--r--arch/arm64/include/asm/tlb.h6
-rw-r--r--arch/arm64/include/asm/vdso_datapage.h1
-rw-r--r--arch/arm64/include/asm/vmap_stack.h2
-rw-r--r--arch/arm64/include/uapi/asm/hwcap.h13
-rw-r--r--arch/arm64/kernel/Makefile11
-rw-r--r--arch/arm64/kernel/acpi_numa.c2
-rw-r--r--arch/arm64/kernel/asm-offsets.c2
-rw-r--r--arch/arm64/kernel/cpu_errata.c256
-rw-r--r--arch/arm64/kernel/cpu_ops.c1
-rw-r--r--arch/arm64/kernel/cpufeature.c193
-rw-r--r--arch/arm64/kernel/cpuinfo.c9
-rw-r--r--arch/arm64/kernel/debug-monitors.c115
-rw-r--r--arch/arm64/kernel/entry.S19
-rw-r--r--arch/arm64/kernel/fpsimd.c4
-rw-r--r--arch/arm64/kernel/head.S12
-rw-r--r--arch/arm64/kernel/insn.c40
-rw-r--r--arch/arm64/kernel/kgdb.c30
-rw-r--r--arch/arm64/kernel/kuser32.S66
-rw-r--r--arch/arm64/kernel/perf_event.c4
-rw-r--r--arch/arm64/kernel/probes/kprobes.c22
-rw-r--r--arch/arm64/kernel/probes/uprobes.c19
-rw-r--r--arch/arm64/kernel/signal32.c3
-rw-r--r--arch/arm64/kernel/sigreturn32.S46
-rw-r--r--arch/arm64/kernel/smp.c4
-rw-r--r--arch/arm64/kernel/stacktrace.c4
-rw-r--r--arch/arm64/kernel/sys.c2
-rw-r--r--arch/arm64/kernel/traps.c33
-rw-r--r--arch/arm64/kernel/vdso.c139
-rw-r--r--arch/arm64/kernel/vdso/Makefile19
-rw-r--r--arch/arm64/kernel/vdso/gettimeofday.S22
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/lib/Makefile2
-rw-r--r--arch/arm64/mm/fault.c16
-rw-r--r--arch/arm64/mm/init.c5
-rw-r--r--arch/arm64/mm/mmu.c47
-rw-r--r--arch/arm64/mm/numa.c25
-rw-r--r--arch/arm64/mm/proc.S34
-rw-r--r--arch/arm64/net/bpf_jit.h8
-rw-r--r--arch/arm64/net/bpf_jit_comp.c29
-rw-r--r--arch/c6x/Kconfig4
-rw-r--r--arch/c6x/include/asm/Kbuild1
-rw-r--r--arch/c6x/include/asm/syscall.h7
-rw-r--r--arch/c6x/include/asm/tlb.h2
-rw-r--r--arch/csky/Kconfig11
-rw-r--r--arch/csky/Makefile2
-rw-r--r--arch/csky/abiv1/inc/abi/ckmmu.h24
-rw-r--r--arch/csky/abiv1/inc/abi/entry.h41
-rw-r--r--arch/csky/abiv1/inc/abi/regdef.h5
-rw-r--r--arch/csky/abiv2/cacheflush.c13
-rw-r--r--arch/csky/abiv2/inc/abi/ckmmu.h34
-rw-r--r--arch/csky/abiv2/inc/abi/entry.h87
-rw-r--r--arch/csky/abiv2/inc/abi/regdef.h5
-rw-r--r--arch/csky/abiv2/mcount.S39
-rw-r--r--arch/csky/abiv2/memmove.S6
-rw-r--r--arch/csky/include/asm/Kbuild2
-rw-r--r--arch/csky/include/asm/ftrace.h18
-rw-r--r--arch/csky/include/asm/mmu_context.h17
-rw-r--r--arch/csky/include/asm/page.h39
-rw-r--r--arch/csky/include/asm/perf_event.h8
-rw-r--r--arch/csky/include/asm/ptrace.h41
-rw-r--r--arch/csky/include/asm/syscall.h11
-rw-r--r--arch/csky/include/asm/thread_info.h27
-rw-r--r--arch/csky/include/asm/unistd.h2
-rw-r--r--arch/csky/include/uapi/asm/perf_regs.h51
-rw-r--r--arch/csky/include/uapi/asm/ptrace.h15
-rw-r--r--arch/csky/kernel/Makefile2
-rw-r--r--arch/csky/kernel/atomic.S26
-rw-r--r--arch/csky/kernel/entry.S77
-rw-r--r--arch/csky/kernel/ftrace.c148
-rw-r--r--arch/csky/kernel/head.S60
-rw-r--r--arch/csky/kernel/perf_callchain.c119
-rw-r--r--arch/csky/kernel/perf_regs.c40
-rw-r--r--arch/csky/kernel/ptrace.c51
-rw-r--r--arch/csky/kernel/setup.c12
-rw-r--r--arch/csky/kernel/signal.c348
-rw-r--r--arch/csky/mm/fault.c15
-rw-r--r--arch/h8300/Kconfig3
-rw-r--r--arch/h8300/include/asm/Kbuild1
-rw-r--r--arch/h8300/include/asm/syscall.h6
-rw-r--r--arch/h8300/include/asm/tlb.h2
-rw-r--r--arch/hexagon/Kconfig6
-rw-r--r--arch/hexagon/include/asm/Kbuild2
-rw-r--r--arch/hexagon/include/asm/elf.h6
-rw-r--r--arch/hexagon/include/asm/io.h2
-rw-r--r--arch/hexagon/include/asm/syscall.h8
-rw-r--r--arch/hexagon/include/asm/tlb.h12
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/ia64/include/asm/io.h17
-rw-r--r--arch/ia64/include/asm/machvec.h13
-rw-r--r--arch/ia64/include/asm/machvec_sn2.h2
-rw-r--r--arch/ia64/include/asm/mmiowb.h25
-rw-r--r--arch/ia64/include/asm/rwsem.h172
-rw-r--r--arch/ia64/include/asm/spinlock.h2
-rw-r--r--arch/ia64/include/asm/syscall.h2
-rw-r--r--arch/ia64/include/asm/tlb.h259
-rw-r--r--arch/ia64/include/asm/tlbflush.h25
-rw-r--r--arch/ia64/include/uapi/asm/sockios.h21
-rw-r--r--arch/ia64/kernel/acpi.c14
-rw-r--r--arch/ia64/kernel/setup.c4
-rw-r--r--arch/ia64/mm/tlb.c23
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c7
-rw-r--r--arch/m68k/Kconfig9
-rw-r--r--arch/m68k/amiga/cia.c9
-rw-r--r--arch/m68k/amiga/config.c49
-rw-r--r--arch/m68k/apollo/config.c7
-rw-r--r--arch/m68k/atari/ataints.c4
-rw-r--r--arch/m68k/atari/config.c2
-rw-r--r--arch/m68k/atari/time.c70
-rw-r--r--arch/m68k/bvme6000/config.c77
-rw-r--r--arch/m68k/configs/amcore_defconfig1
-rw-r--r--arch/m68k/configs/amiga_defconfig14
-rw-r--r--arch/m68k/configs/apollo_defconfig14
-rw-r--r--arch/m68k/configs/atari_defconfig14
-rw-r--r--arch/m68k/configs/bvme6000_defconfig14
-rw-r--r--arch/m68k/configs/hp300_defconfig14
-rw-r--r--arch/m68k/configs/m5475evb_defconfig1
-rw-r--r--arch/m68k/configs/mac_defconfig14
-rw-r--r--arch/m68k/configs/multi_defconfig14
-rw-r--r--arch/m68k/configs/mvme147_defconfig14
-rw-r--r--arch/m68k/configs/mvme16x_defconfig14
-rw-r--r--arch/m68k/configs/q40_defconfig14
-rw-r--r--arch/m68k/configs/stmark2_defconfig1
-rw-r--r--arch/m68k/configs/sun3_defconfig14
-rw-r--r--arch/m68k/configs/sun3x_defconfig14
-rw-r--r--arch/m68k/hp300/config.c1
-rw-r--r--arch/m68k/hp300/time.c73
-rw-r--r--arch/m68k/hp300/time.h1
-rw-r--r--arch/m68k/include/asm/Kbuild1
-rw-r--r--arch/m68k/include/asm/io_mm.h2
-rw-r--r--arch/m68k/include/asm/mvme147hw.h2
-rw-r--r--arch/m68k/include/asm/syscall.h12
-rw-r--r--arch/m68k/include/asm/tlb.h14
-rw-r--r--arch/m68k/mac/config.c3
-rw-r--r--arch/m68k/mac/via.c146
-rw-r--r--arch/m68k/mvme147/config.c73
-rw-r--r--arch/m68k/mvme16x/config.c97
-rw-r--r--arch/m68k/q40/config.c9
-rw-r--r--arch/m68k/q40/q40ints.c19
-rw-r--r--arch/m68k/sun3/config.c2
-rw-r--r--arch/m68k/sun3/intersil.c7
-rw-r--r--arch/m68k/sun3/sun3ints.c3
-rw-r--r--arch/m68k/sun3x/config.c1
-rw-r--r--arch/m68k/sun3x/time.c21
-rw-r--r--arch/m68k/sun3x/time.h1
-rw-r--r--arch/microblaze/Kconfig7
-rw-r--r--arch/microblaze/include/asm/Kbuild1
-rw-r--r--arch/microblaze/include/asm/syscall.h2
-rw-r--r--arch/microblaze/include/asm/tlb.h9
-rw-r--r--arch/microblaze/mm/pgtable.c2
-rw-r--r--arch/mips/Kconfig71
-rw-r--r--arch/mips/alchemy/common/clock.c2
-rw-r--r--arch/mips/bcm47xx/Kconfig8
-rw-r--r--arch/mips/bcm63xx/boards/Kconfig2
-rw-r--r--arch/mips/configs/ar7_defconfig1
-rw-r--r--arch/mips/configs/decstation_defconfig1
-rw-r--r--arch/mips/configs/decstation_r4k_defconfig1
-rw-r--r--arch/mips/configs/generic_defconfig1
-rw-r--r--arch/mips/configs/loongson1b_defconfig1
-rw-r--r--arch/mips/configs/loongson1c_defconfig1
-rw-r--r--arch/mips/configs/rb532_defconfig1
-rw-r--r--arch/mips/configs/rbtx49xx_defconfig1
-rw-r--r--arch/mips/include/asm/bootinfo.h1
-rw-r--r--arch/mips/include/asm/io.h3
-rw-r--r--arch/mips/include/asm/jump_label.h15
-rw-r--r--arch/mips/include/asm/mmiowb.h11
-rw-r--r--arch/mips/include/asm/spinlock.h15
-rw-r--r--arch/mips/include/asm/syscall.h6
-rw-r--r--arch/mips/include/asm/tlb.h17
-rw-r--r--arch/mips/include/asm/uasm.h8
-rw-r--r--arch/mips/include/uapi/asm/inst.h6
-rw-r--r--arch/mips/include/uapi/asm/sockios.h4
-rw-r--r--arch/mips/kernel/entry.S5
-rw-r--r--arch/mips/kernel/jump_label.c30
-rw-r--r--arch/mips/kernel/prom.c18
-rw-r--r--arch/mips/kernel/ptrace.c2
-rw-r--r--arch/mips/kernel/setup.c129
-rw-r--r--arch/mips/kernel/traps.c63
-rw-r--r--arch/mips/kvm/Kconfig1
-rw-r--r--arch/mips/kvm/emulate.c4
-rw-r--r--arch/mips/mm/uasm-mips.c14
-rw-r--r--arch/mips/mm/uasm.c39
-rw-r--r--arch/mips/net/Makefile1
-rw-r--r--arch/mips/net/bpf_jit.c1270
-rw-r--r--arch/mips/net/bpf_jit_asm.S285
-rw-r--r--arch/mips/net/ebpf_jit.c196
-rw-r--r--arch/mips/pic32/Kconfig8
-rw-r--r--arch/mips/vdso/Makefile4
-rw-r--r--arch/nds32/Kconfig3
-rw-r--r--arch/nds32/include/asm/Kbuild1
-rw-r--r--arch/nds32/include/asm/elf.h3
-rw-r--r--arch/nds32/include/asm/io.h2
-rw-r--r--arch/nds32/include/asm/syscall.h9
-rw-r--r--arch/nds32/include/asm/tlb.h16
-rw-r--r--arch/nds32/include/asm/tlbflush.h1
-rw-r--r--arch/nios2/Kconfig4
-rw-r--r--arch/nios2/include/asm/Kbuild1
-rw-r--r--arch/nios2/include/asm/syscall.h6
-rw-r--r--arch/nios2/include/asm/tlb.h14
-rw-r--r--arch/openrisc/Kconfig7
-rw-r--r--arch/openrisc/include/asm/Kbuild1
-rw-r--r--arch/openrisc/include/asm/syscall.h2
-rw-r--r--arch/openrisc/include/asm/tlb.h8
-rw-r--r--arch/parisc/Kconfig23
-rw-r--r--arch/parisc/boot/compressed/head.S6
-rw-r--r--arch/parisc/boot/compressed/misc.c31
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig1
-rw-r--r--arch/parisc/include/asm/Kbuild2
-rw-r--r--arch/parisc/include/asm/assembly.h6
-rw-r--r--arch/parisc/include/asm/cache.h10
-rw-r--r--arch/parisc/include/asm/fixmap.h19
-rw-r--r--arch/parisc/include/asm/hardware.h2
-rw-r--r--arch/parisc/include/asm/io.h2
-rw-r--r--arch/parisc/include/asm/jump_label.h43
-rw-r--r--arch/parisc/include/asm/kgdb.h68
-rw-r--r--arch/parisc/include/asm/kprobes.h55
-rw-r--r--arch/parisc/include/asm/mmzone.h58
-rw-r--r--arch/parisc/include/asm/page.h4
-rw-r--r--arch/parisc/include/asm/patch.h11
-rw-r--r--arch/parisc/include/asm/pgalloc.h1
-rw-r--r--arch/parisc/include/asm/pgtable.h69
-rw-r--r--arch/parisc/include/asm/ptrace.h13
-rw-r--r--arch/parisc/include/asm/sparsemem.h14
-rw-r--r--arch/parisc/include/asm/spinlock.h4
-rw-r--r--arch/parisc/include/asm/syscall.h4
-rw-r--r--arch/parisc/include/asm/tlb.h18
-rw-r--r--arch/parisc/include/asm/tlbflush.h24
-rw-r--r--arch/parisc/include/uapi/asm/sockios.h14
-rw-r--r--arch/parisc/kernel/Makefile6
-rw-r--r--arch/parisc/kernel/cache.c15
-rw-r--r--arch/parisc/kernel/drivers.c25
-rw-r--r--arch/parisc/kernel/entry.S51
-rw-r--r--arch/parisc/kernel/head.S17
-rw-r--r--arch/parisc/kernel/inventory.c7
-rw-r--r--arch/parisc/kernel/jump_label.c55
-rw-r--r--arch/parisc/kernel/kgdb.c209
-rw-r--r--arch/parisc/kernel/kprobes.c291
-rw-r--r--arch/parisc/kernel/pacache.S43
-rw-r--r--arch/parisc/kernel/parisc_ksyms.c6
-rw-r--r--arch/parisc/kernel/patch.c77
-rw-r--r--arch/parisc/kernel/process.c1
-rw-r--r--arch/parisc/kernel/processor.c3
-rw-r--r--arch/parisc/kernel/ptrace.c35
-rw-r--r--arch/parisc/kernel/setup.c6
-rw-r--r--arch/parisc/kernel/stacktrace.c5
-rw-r--r--arch/parisc/kernel/sys_parisc.c3
-rw-r--r--arch/parisc/kernel/syscall.S18
-rw-r--r--arch/parisc/kernel/traps.c31
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S3
-rw-r--r--arch/parisc/mm/Makefile2
-rw-r--r--arch/parisc/mm/fixmap.c41
-rw-r--r--arch/parisc/mm/hugetlbpage.c19
-rw-r--r--arch/parisc/mm/init.c118
-rw-r--r--arch/powerpc/Kconfig24
-rw-r--r--arch/powerpc/Kconfig.debug32
-rw-r--r--arch/powerpc/Makefile15
-rw-r--r--arch/powerpc/boot/addnote.c6
-rw-r--r--arch/powerpc/boot/dts/fsl/b4qds.dtsi1
-rw-r--r--arch/powerpc/configs/pseries_defconfig1
-rw-r--r--arch/powerpc/configs/skiroot_defconfig2
-rw-r--r--arch/powerpc/crypto/crc-vpmsum_test.c10
-rw-r--r--arch/powerpc/crypto/crc32c-vpmsum_glue.c4
-rw-r--r--arch/powerpc/crypto/crct10dif-vpmsum_glue.c4
-rw-r--r--arch/powerpc/include/asm/Kbuild2
-rw-r--r--arch/powerpc/include/asm/book3s/32/kup.h145
-rw-r--r--arch/powerpc/include/asm/book3s/32/mmu-hash.h9
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgalloc.h41
-rw-r--r--arch/powerpc/include/asm/book3s/32/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-4k.h23
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash-64k.h21
-rw-r--r--arch/powerpc/include/asm/book3s/64/hash.h95
-rw-r--r--arch/powerpc/include/asm/book3s/64/hugetlb.h72
-rw-r--r--arch/powerpc/include/asm/book3s/64/kup-radix.h108
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h70
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu.h104
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgalloc.h52
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-4k.h9
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix-64k.h8
-rw-r--r--arch/powerpc/include/asm/book3s/64/radix.h40
-rw-r--r--arch/powerpc/include/asm/book3s/64/slice.h13
-rw-r--r--arch/powerpc/include/asm/cpuidle.h19
-rw-r--r--arch/powerpc/include/asm/drmem.h21
-rw-r--r--arch/powerpc/include/asm/exception-64s.h2
-rw-r--r--arch/powerpc/include/asm/fadump.h1
-rw-r--r--arch/powerpc/include/asm/feature-fixups.h3
-rw-r--r--arch/powerpc/include/asm/fixmap.h5
-rw-r--r--arch/powerpc/include/asm/futex.h4
-rw-r--r--arch/powerpc/include/asm/hugetlb.h87
-rw-r--r--arch/powerpc/include/asm/hw_breakpoint.h8
-rw-r--r--arch/powerpc/include/asm/imc-pmu.h39
-rw-r--r--arch/powerpc/include/asm/io.h33
-rw-r--r--arch/powerpc/include/asm/kasan.h40
-rw-r--r--arch/powerpc/include/asm/kup.h73
-rw-r--r--arch/powerpc/include/asm/mce.h97
-rw-r--r--arch/powerpc/include/asm/mmiowb.h18
-rw-r--r--arch/powerpc/include/asm/mmu.h28
-rw-r--r--arch/powerpc/include/asm/mmu_context.h7
-rw-r--r--arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h44
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h58
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h102
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu.h25
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgalloc.h123
-rw-r--r--arch/powerpc/include/asm/nohash/32/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/nohash/32/slice.h2
-rw-r--r--arch/powerpc/include/asm/nohash/64/mmu.h12
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgalloc.h117
-rw-r--r--arch/powerpc/include/asm/nohash/64/pgtable.h12
-rw-r--r--arch/powerpc/include/asm/nohash/64/slice.h12
-rw-r--r--arch/powerpc/include/asm/nohash/hugetlb-book3e.h45
-rw-r--r--arch/powerpc/include/asm/nohash/mmu-book3e.h2
-rw-r--r--arch/powerpc/include/asm/nohash/mmu.h16
-rw-r--r--arch/powerpc/include/asm/nohash/pgalloc.h56
-rw-r--r--arch/powerpc/include/asm/nohash/pte-book3e.h5
-rw-r--r--arch/powerpc/include/asm/opal-api.h18
-rw-r--r--arch/powerpc/include/asm/opal.h9
-rw-r--r--arch/powerpc/include/asm/paca.h46
-rw-r--r--arch/powerpc/include/asm/page.h23
-rw-r--r--arch/powerpc/include/asm/pgalloc.h51
-rw-r--r--arch/powerpc/include/asm/pgtable-be-types.h9
-rw-r--r--arch/powerpc/include/asm/pgtable-types.h9
-rw-r--r--arch/powerpc/include/asm/pgtable.h9
-rw-r--r--arch/powerpc/include/asm/processor.h12
-rw-r--r--arch/powerpc/include/asm/ptrace.h11
-rw-r--r--arch/powerpc/include/asm/reg.h8
-rw-r--r--arch/powerpc/include/asm/reg_booke.h2
-rw-r--r--arch/powerpc/include/asm/slice.h9
-rw-r--r--arch/powerpc/include/asm/sparsemem.h4
-rw-r--r--arch/powerpc/include/asm/spinlock.h17
-rw-r--r--arch/powerpc/include/asm/string.h32
-rw-r--r--arch/powerpc/include/asm/syscall.h10
-rw-r--r--arch/powerpc/include/asm/task_size_64.h2
-rw-r--r--arch/powerpc/include/asm/time.h2
-rw-r--r--arch/powerpc/include/asm/tlb.h18
-rw-r--r--arch/powerpc/include/asm/trace.h16
-rw-r--r--arch/powerpc/include/asm/uaccess.h38
-rw-r--r--arch/powerpc/include/asm/xive.h14
-rw-r--r--arch/powerpc/kernel/Makefile14
-rw-r--r--arch/powerpc/kernel/asm-offsets.c25
-rw-r--r--arch/powerpc/kernel/cacheinfo.c13
-rw-r--r--arch/powerpc/kernel/cputable.c13
-rw-r--r--arch/powerpc/kernel/dbell.c3
-rw-r--r--arch/powerpc/kernel/early_32.c36
-rw-r--r--arch/powerpc/kernel/entry_32.S186
-rw-r--r--arch/powerpc/kernel/entry_64.S35
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S26
-rw-r--r--arch/powerpc/kernel/fadump.c1
-rw-r--r--arch/powerpc/kernel/fpu.S1
-rw-r--r--arch/powerpc/kernel/head_32.S258
-rw-r--r--arch/powerpc/kernel/head_32.h203
-rw-r--r--arch/powerpc/kernel/head_40x.S155
-rw-r--r--arch/powerpc/kernel/head_44x.S12
-rw-r--r--arch/powerpc/kernel/head_64.S4
-rw-r--r--arch/powerpc/kernel/head_8xx.S136
-rw-r--r--arch/powerpc/kernel/head_booke.h131
-rw-r--r--arch/powerpc/kernel/head_fsl_booke.S32
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c62
-rw-r--r--arch/powerpc/kernel/idle_book3s.S1060
-rw-r--r--arch/powerpc/kernel/irq.c16
-rw-r--r--arch/powerpc/kernel/mce.c106
-rw-r--r--arch/powerpc/kernel/mce_power.c253
-rw-r--r--arch/powerpc/kernel/paca.c12
-rw-r--r--arch/powerpc/kernel/process.c35
-rw-r--r--arch/powerpc/kernel/prom_init.c248
-rw-r--r--arch/powerpc/kernel/prom_init_check.sh12
-rw-r--r--arch/powerpc/kernel/ptrace.c3
-rw-r--r--arch/powerpc/kernel/security.c14
-rw-r--r--arch/powerpc/kernel/setup-common.c116
-rw-r--r--arch/powerpc/kernel/setup_32.c28
-rw-r--r--arch/powerpc/kernel/setup_64.c12
-rw-r--r--arch/powerpc/kernel/signal_64.c27
-rw-r--r--arch/powerpc/kernel/time.c10
-rw-r--r--arch/powerpc/kernel/traps.c8
-rw-r--r--arch/powerpc/kernel/vdso32/Makefile5
-rw-r--r--arch/powerpc/kernel/vdso64/Makefile5
-rw-r--r--arch/powerpc/kernel/vector.S1
-rw-r--r--arch/powerpc/kernel/watchdog.c81
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c6
-rw-r--r--arch/powerpc/kvm/book3s_hv.c7
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c2
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S141
-rw-r--r--arch/powerpc/lib/Makefile19
-rw-r--r--arch/powerpc/lib/checksum_wrappers.c4
-rw-r--r--arch/powerpc/lib/code-patching.c5
-rw-r--r--arch/powerpc/lib/copy_32.S12
-rw-r--r--arch/powerpc/lib/mem_64.S9
-rw-r--r--arch/powerpc/lib/memcpy_64.S4
-rw-r--r--arch/powerpc/mm/Makefile47
-rw-r--r--arch/powerpc/mm/book3s32/Makefile9
-rw-r--r--arch/powerpc/mm/book3s32/hash_low.S (renamed from arch/powerpc/mm/hash_low_32.S)6
-rw-r--r--arch/powerpc/mm/book3s32/mmu.c (renamed from arch/powerpc/mm/ppc_mmu_32.c)94
-rw-r--r--arch/powerpc/mm/book3s32/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_hash32.c)0
-rw-r--r--arch/powerpc/mm/book3s32/tlb.c (renamed from arch/powerpc/mm/tlb_hash32.c)2
-rw-r--r--arch/powerpc/mm/book3s64/Makefile24
-rw-r--r--arch/powerpc/mm/book3s64/hash_4k.c (renamed from arch/powerpc/mm/hash64_4k.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_64k.c (renamed from arch/powerpc/mm/hash64_64k.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugepage.c (renamed from arch/powerpc/mm/hugepage-hash64.c)2
-rw-r--r--arch/powerpc/mm/book3s64/hash_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-hash64.c)31
-rw-r--r--arch/powerpc/mm/book3s64/hash_native.c (renamed from arch/powerpc/mm/hash_native_64.c)0
-rw-r--r--arch/powerpc/mm/book3s64/hash_pgtable.c (renamed from arch/powerpc/mm/pgtable-hash64.c)15
-rw-r--r--arch/powerpc/mm/book3s64/hash_tlb.c (renamed from arch/powerpc/mm/tlb_hash64.c)18
-rw-r--r--arch/powerpc/mm/book3s64/hash_utils.c (renamed from arch/powerpc/mm/hash_utils_64.c)145
-rw-r--r--arch/powerpc/mm/book3s64/iommu_api.c (renamed from arch/powerpc/mm/mmu_context_iommu.c)0
-rw-r--r--arch/powerpc/mm/book3s64/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_book3s64.c)29
-rw-r--r--arch/powerpc/mm/book3s64/pgtable.c (renamed from arch/powerpc/mm/pgtable-book3s64.c)2
-rw-r--r--arch/powerpc/mm/book3s64/pkeys.c (renamed from arch/powerpc/mm/pkeys.c)1
-rw-r--r--arch/powerpc/mm/book3s64/radix_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-radix.c)0
-rw-r--r--arch/powerpc/mm/book3s64/radix_pgtable.c (renamed from arch/powerpc/mm/pgtable-radix.c)117
-rw-r--r--arch/powerpc/mm/book3s64/radix_tlb.c (renamed from arch/powerpc/mm/tlb-radix.c)0
-rw-r--r--arch/powerpc/mm/book3s64/slb.c (renamed from arch/powerpc/mm/slb.c)31
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c (renamed from arch/powerpc/mm/subpage-prot.c)39
-rw-r--r--arch/powerpc/mm/book3s64/vphn.c (renamed from arch/powerpc/mm/vphn.c)6
-rw-r--r--arch/powerpc/mm/book3s64/vphn.h (renamed from arch/powerpc/mm/vphn.h)3
-rw-r--r--arch/powerpc/mm/copro_fault.c18
-rw-r--r--arch/powerpc/mm/dma-noncoherent.c2
-rw-r--r--arch/powerpc/mm/drmem.c6
-rw-r--r--arch/powerpc/mm/fault.c49
-rw-r--r--arch/powerpc/mm/highmem.c14
-rw-r--r--arch/powerpc/mm/hugetlbpage.c242
-rw-r--r--arch/powerpc/mm/init-common.c26
-rw-r--r--arch/powerpc/mm/init_32.c8
-rw-r--r--arch/powerpc/mm/init_64.c2
-rw-r--r--arch/powerpc/mm/kasan/Makefile5
-rw-r--r--arch/powerpc/mm/kasan/kasan_init_32.c183
-rw-r--r--arch/powerpc/mm/mem.c17
-rw-r--r--arch/powerpc/mm/mmu_context.c2
-rw-r--r--arch/powerpc/mm/mmu_decl.h9
-rw-r--r--arch/powerpc/mm/nohash/40x.c (renamed from arch/powerpc/mm/40x_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/44x.c (renamed from arch/powerpc/mm/44x_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/8xx.c (renamed from arch/powerpc/mm/8xx_mmu.c)26
-rw-r--r--arch/powerpc/mm/nohash/Makefile18
-rw-r--r--arch/powerpc/mm/nohash/book3e_hugetlbpage.c (renamed from arch/powerpc/mm/hugetlbpage-book3e.c)52
-rw-r--r--arch/powerpc/mm/nohash/book3e_pgtable.c (renamed from arch/powerpc/mm/pgtable-book3e.c)9
-rw-r--r--arch/powerpc/mm/nohash/fsl_booke.c (renamed from arch/powerpc/mm/fsl_booke_mmu.c)2
-rw-r--r--arch/powerpc/mm/nohash/mmu_context.c (renamed from arch/powerpc/mm/mmu_context_nohash.c)2
-rw-r--r--arch/powerpc/mm/nohash/tlb.c (renamed from arch/powerpc/mm/tlb_nohash.c)19
-rw-r--r--arch/powerpc/mm/nohash/tlb_low.S (renamed from arch/powerpc/mm/tlb_nohash_low.S)0
-rw-r--r--arch/powerpc/mm/nohash/tlb_low_64e.S (renamed from arch/powerpc/mm/tlb_low_64e.S)31
-rw-r--r--arch/powerpc/mm/numa.c35
-rw-r--r--arch/powerpc/mm/pgtable.c114
-rw-r--r--arch/powerpc/mm/pgtable_32.c47
-rw-r--r--arch/powerpc/mm/pgtable_64.c13
-rw-r--r--arch/powerpc/mm/ptdump/hashpagetable.c2
-rw-r--r--arch/powerpc/mm/ptdump/ptdump.c86
-rw-r--r--arch/powerpc/mm/slice.c109
-rw-r--r--arch/powerpc/perf/Makefile3
-rw-r--r--arch/powerpc/perf/core-book3s.c28
-rw-r--r--arch/powerpc/perf/generic-compat-pmu.c234
-rw-r--r--arch/powerpc/perf/imc-pmu.c347
-rw-r--r--arch/powerpc/perf/internal.h12
-rw-r--r--arch/powerpc/perf/power5+-pmu.c4
-rw-r--r--arch/powerpc/perf/power5-pmu.c4
-rw-r--r--arch/powerpc/perf/power6-pmu.c4
-rw-r--r--arch/powerpc/perf/power7-pmu.c4
-rw-r--r--arch/powerpc/perf/power8-pmu.c3
-rw-r--r--arch/powerpc/perf/power9-events-list.h2
-rw-r--r--arch/powerpc/perf/power9-pmu.c3
-rw-r--r--arch/powerpc/perf/ppc970-pmu.c4
-rw-r--r--arch/powerpc/platforms/512x/clock-commonclk.c9
-rw-r--r--arch/powerpc/platforms/52xx/mpc52xx_gpt.c2
-rw-r--r--arch/powerpc/platforms/83xx/usb.c4
-rw-r--r--arch/powerpc/platforms/8xx/pic.c3
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype45
-rw-r--r--arch/powerpc/platforms/cell/spu_base.c4
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c10
-rw-r--r--arch/powerpc/platforms/embedded6xx/holly.c19
-rw-r--r--arch/powerpc/platforms/powermac/Makefile6
-rw-r--r--arch/powerpc/platforms/powernv/idle.c902
-rw-r--r--arch/powerpc/platforms/powernv/opal-call.c6
-rw-r--r--arch/powerpc/platforms/powernv/opal-imc.c5
-rw-r--r--arch/powerpc/platforms/powernv/opal.c23
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c35
-rw-r--r--arch/powerpc/platforms/powernv/pci.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c5
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c2
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c17
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c13
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c3
-rw-r--r--arch/powerpc/platforms/pseries/pmem.c3
-rw-r--r--arch/powerpc/platforms/pseries/ras.c135
-rw-r--r--arch/powerpc/purgatory/Makefile3
-rw-r--r--arch/powerpc/sysdev/tsi108_dev.c2
-rw-r--r--arch/powerpc/sysdev/xive/native.c99
-rw-r--r--arch/powerpc/xmon/Makefile1
-rw-r--r--arch/powerpc/xmon/xmon.c71
-rw-r--r--arch/riscv/Kconfig4
-rw-r--r--arch/riscv/include/asm/io.h15
-rw-r--r--arch/riscv/include/asm/mmiowb.h14
-rw-r--r--arch/riscv/include/asm/syscall.h2
-rw-r--r--arch/riscv/include/asm/tlb.h1
-rw-r--r--arch/riscv/kernel/stacktrace.c2
-rw-r--r--arch/s390/Kconfig64
-rw-r--r--arch/s390/Makefile10
-rw-r--r--arch/s390/boot/Makefile31
-rw-r--r--arch/s390/boot/als.c2
-rw-r--r--arch/s390/boot/boot.h5
-rw-r--r--arch/s390/boot/compressed/decompressor.h5
-rw-r--r--arch/s390/boot/compressed/vmlinux.lds.S22
-rw-r--r--arch/s390/boot/head.S48
-rw-r--r--arch/s390/boot/ipl_parm.c54
-rw-r--r--arch/s390/boot/ipl_report.c165
-rw-r--r--arch/s390/boot/kaslr.c144
-rw-r--r--arch/s390/boot/machine_kexec_reloc.c2
-rw-r--r--arch/s390/boot/startup.c121
-rw-r--r--arch/s390/boot/text_dma.S184
-rw-r--r--arch/s390/boot/uv.c24
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/configs/performance_defconfig1
-rw-r--r--arch/s390/crypto/crc32be-vx.S1
-rw-r--r--arch/s390/crypto/crc32le-vx.S6
-rw-r--r--arch/s390/crypto/des_s390.c21
-rw-r--r--arch/s390/crypto/prng.c135
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/s390/hypfs/hypfs_diag0c.c18
-rw-r--r--arch/s390/include/asm/Kbuild2
-rw-r--r--arch/s390/include/asm/airq.h12
-rw-r--r--arch/s390/include/asm/bitops.h12
-rw-r--r--arch/s390/include/asm/boot_data.h11
-rw-r--r--arch/s390/include/asm/bug.h24
-rw-r--r--arch/s390/include/asm/diag.h13
-rw-r--r--arch/s390/include/asm/ebcdic.h2
-rw-r--r--arch/s390/include/asm/elf.h4
-rw-r--r--arch/s390/include/asm/extable.h5
-rw-r--r--arch/s390/include/asm/ftrace.h7
-rw-r--r--arch/s390/include/asm/io.h17
-rw-r--r--arch/s390/include/asm/ipl.h132
-rw-r--r--arch/s390/include/asm/irq.h9
-rw-r--r--arch/s390/include/asm/kexec.h26
-rw-r--r--arch/s390/include/asm/linkage.h7
-rw-r--r--arch/s390/include/asm/lowcore.h2
-rw-r--r--arch/s390/include/asm/nospec-insn.h10
-rw-r--r--arch/s390/include/asm/pci.h12
-rw-r--r--arch/s390/include/asm/pci_clp.h20
-rw-r--r--arch/s390/include/asm/pci_insn.h97
-rw-r--r--arch/s390/include/asm/pci_io.h49
-rw-r--r--arch/s390/include/asm/pgtable.h112
-rw-r--r--arch/s390/include/asm/processor.h82
-rw-r--r--arch/s390/include/asm/sclp.h3
-rw-r--r--arch/s390/include/asm/sections.h22
-rw-r--r--arch/s390/include/asm/setup.h21
-rw-r--r--arch/s390/include/asm/stacktrace.h114
-rw-r--r--arch/s390/include/asm/syscall.h13
-rw-r--r--arch/s390/include/asm/syscall_wrapper.h4
-rw-r--r--arch/s390/include/asm/tlb.h130
-rw-r--r--arch/s390/include/asm/uaccess.h2
-rw-r--r--arch/s390/include/asm/unwind.h101
-rw-r--r--arch/s390/include/asm/uv.h132
-rw-r--r--arch/s390/include/asm/vmlinux.lds.h13
-rw-r--r--arch/s390/include/uapi/asm/ipl.h154
-rw-r--r--arch/s390/kernel/Makefile7
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/base.S71
-rw-r--r--arch/s390/kernel/diag.c67
-rw-r--r--arch/s390/kernel/dumpstack.c167
-rw-r--r--arch/s390/kernel/early.c9
-rw-r--r--arch/s390/kernel/early_nobss.c2
-rw-r--r--arch/s390/kernel/entry.S42
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/ftrace.c9
-rw-r--r--arch/s390/kernel/head64.S26
-rw-r--r--arch/s390/kernel/ima_arch.c14
-rw-r--r--arch/s390/kernel/ipl.c370
-rw-r--r--arch/s390/kernel/ipl_vmparm.c8
-rw-r--r--arch/s390/kernel/irq.c49
-rw-r--r--arch/s390/kernel/kexec_elf.c63
-rw-r--r--arch/s390/kernel/kexec_image.c49
-rw-r--r--arch/s390/kernel/kprobes.c37
-rw-r--r--arch/s390/kernel/machine_kexec.c8
-rw-r--r--arch/s390/kernel/machine_kexec_file.c268
-rw-r--r--arch/s390/kernel/machine_kexec_reloc.c53
-rw-r--r--arch/s390/kernel/mcount.S12
-rw-r--r--arch/s390/kernel/nmi.c2
-rw-r--r--arch/s390/kernel/nospec-branch.c9
-rw-r--r--arch/s390/kernel/nospec-sysfs.c2
-rw-r--r--arch/s390/kernel/perf_cpum_cf.c15
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c9
-rw-r--r--arch/s390/kernel/perf_cpum_cf_events.c107
-rw-r--r--arch/s390/kernel/perf_event.c16
-rw-r--r--arch/s390/kernel/pgm_check.S2
-rw-r--r--arch/s390/kernel/process.c1
-rw-r--r--arch/s390/kernel/processor.c3
-rw-r--r--arch/s390/kernel/reipl.S1
-rw-r--r--arch/s390/kernel/relocate_kernel.S4
-rw-r--r--arch/s390/kernel/setup.c71
-rw-r--r--arch/s390/kernel/smp.c3
-rw-r--r--arch/s390/kernel/stacktrace.c81
-rw-r--r--arch/s390/kernel/swsusp.S17
-rw-r--r--arch/s390/kernel/traps.c3
-rw-r--r--arch/s390/kernel/unwind_bc.c155
-rw-r--r--arch/s390/kernel/vdso.c10
-rw-r--r--arch/s390/kernel/vdso32/Makefile2
-rw-r--r--arch/s390/kernel/vdso64/Makefile2
-rw-r--r--arch/s390/kernel/vmlinux.lds.S19
-rw-r--r--arch/s390/kvm/Kconfig1
-rw-r--r--arch/s390/kvm/interrupt.c2
-rw-r--r--arch/s390/lib/mem.S1
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/fault.c14
-rw-r--r--arch/s390/mm/gup.c300
-rw-r--r--arch/s390/mm/init.c3
-rw-r--r--arch/s390/mm/maccess.c1
-rw-r--r--arch/s390/mm/pgalloc.c63
-rw-r--r--arch/s390/mm/pgtable.c2
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/s390/net/bpf_jit_comp.c6
-rw-r--r--arch/s390/oprofile/init.c22
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c366
-rw-r--r--arch/s390/pci/pci_clp.c25
-rw-r--r--arch/s390/pci/pci_insn.c169
-rw-r--r--arch/s390/pci/pci_irq.c486
-rw-r--r--arch/s390/purgatory/Makefile20
-rw-r--r--arch/s390/purgatory/kexec-purgatory.S14
-rw-r--r--arch/s390/purgatory/purgatory.lds.S54
-rw-r--r--arch/s390/scripts/Makefile.chkbss3
-rw-r--r--arch/s390/tools/opcodes.txt11
-rw-r--r--arch/sh/Kconfig6
-rw-r--r--arch/sh/boards/mach-ecovec24/setup.c12
-rw-r--r--arch/sh/configs/apsh4ad0a_defconfig1
-rw-r--r--arch/sh/configs/ecovec24-romimage_defconfig1
-rw-r--r--arch/sh/configs/rsk7264_defconfig1
-rw-r--r--arch/sh/configs/rsk7269_defconfig1
-rw-r--r--arch/sh/configs/sh7785lcr_32bit_defconfig1
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sh/include/asm/io.h3
-rw-r--r--arch/sh/include/asm/mmiowb.h12
-rw-r--r--arch/sh/include/asm/pgalloc.h9
-rw-r--r--arch/sh/include/asm/spinlock-llsc.h2
-rw-r--r--arch/sh/include/asm/syscall_32.h2
-rw-r--r--arch/sh/include/asm/syscall_64.h2
-rw-r--r--arch/sh/include/asm/tlb.h132
-rw-r--r--arch/sh/include/cpu-sh4/cpu/sh7786.h2
-rw-r--r--arch/sh/include/uapi/asm/sockios.h5
-rw-r--r--arch/sh/kernel/stacktrace.c4
-rw-r--r--arch/sparc/Kconfig9
-rw-r--r--arch/sparc/crypto/des_glue.c11
-rw-r--r--arch/sparc/include/asm/Kbuild2
-rw-r--r--arch/sparc/include/asm/io_64.h2
-rw-r--r--arch/sparc/include/asm/syscall.h5
-rw-r--r--arch/sparc/include/asm/tlb_32.h18
-rw-r--r--arch/sparc/include/uapi/asm/sockios.h15
-rw-r--r--arch/sparc/kernel/cpumap.c3
-rw-r--r--arch/sparc/kernel/ds.c2
-rw-r--r--arch/sparc/kernel/uprobes.c1
-rw-r--r--arch/sparc/mm/init_64.c42
-rw-r--r--arch/sparc/mm/iommu.c142
-rw-r--r--arch/sparc/vdso/Makefile2
-rw-r--r--arch/um/Kconfig58
-rw-r--r--arch/um/drivers/Kconfig352
-rw-r--r--arch/um/drivers/harddog_kern.c2
-rw-r--r--arch/um/drivers/ubd_kern.c4
-rw-r--r--arch/um/drivers/vector_kern.c2
-rw-r--r--arch/um/include/asm/Kbuild1
-rw-r--r--arch/um/include/asm/pgtable.h7
-rw-r--r--arch/um/include/asm/tlb.h158
-rw-r--r--arch/um/kernel/irq.c2
-rw-r--r--arch/um/kernel/skas/uaccess.c1
-rw-r--r--arch/um/kernel/stacktrace.c2
-rw-r--r--arch/um/kernel/sysrq.c2
-rw-r--r--arch/um/kernel/time.c2
-rw-r--r--arch/um/os-Linux/signal.c28
-rw-r--r--arch/um/os-Linux/umid.c36
-rw-r--r--arch/unicore32/Kconfig7
-rw-r--r--arch/unicore32/include/asm/Kbuild1
-rw-r--r--arch/unicore32/include/asm/elf.h3
-rw-r--r--arch/unicore32/include/asm/syscall.h12
-rw-r--r--arch/unicore32/include/asm/tlb.h7
-rw-r--r--arch/unicore32/kernel/stacktrace.c2
-rw-r--r--arch/x86/Kconfig45
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/compressed/acpi.c2
-rw-r--r--arch/x86/configs/i386_defconfig12
-rw-r--r--arch/x86/configs/x86_64_defconfig12
-rw-r--r--arch/x86/crypto/aegis128-aesni-glue.c157
-rw-r--r--arch/x86/crypto/aegis128l-aesni-glue.c157
-rw-r--r--arch/x86/crypto/aegis256-aesni-glue.c157
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c212
-rw-r--r--arch/x86/crypto/chacha_glue.c6
-rw-r--r--arch/x86/crypto/crc32-pclmul_glue.c5
-rw-r--r--arch/x86/crypto/crc32c-intel_glue.c7
-rw-r--r--arch/x86/crypto/crct10dif-pclmul_glue.c20
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_glue.c11
-rw-r--r--arch/x86/crypto/morus1280-avx2-glue.c12
-rw-r--r--arch/x86/crypto/morus1280-sse2-glue.c12
-rw-r--r--arch/x86/crypto/morus1280_glue.c85
-rw-r--r--arch/x86/crypto/morus640-sse2-glue.c12
-rw-r--r--arch/x86/crypto/morus640_glue.c85
-rw-r--r--arch/x86/crypto/nhpoly1305-avx2-glue.c5
-rw-r--r--arch/x86/crypto/nhpoly1305-sse2-glue.c5
-rw-r--r--arch/x86/crypto/poly1305_glue.c4
-rw-r--r--arch/x86/crypto/sha1_ssse3_glue.c7
-rw-r--r--arch/x86/crypto/sha256_ssse3_glue.c7
-rw-r--r--arch/x86/crypto/sha512_ssse3_glue.c10
-rw-r--r--arch/x86/entry/common.c10
-rw-r--r--arch/x86/entry/entry_32.S5
-rw-r--r--arch/x86/entry/entry_64.S19
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl7
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl6
-rw-r--r--arch/x86/entry/vdso/Makefile2
-rw-r--r--arch/x86/entry/vdso/vclock_gettime.c4
-rw-r--r--arch/x86/entry/vdso/vdso2c.h13
-rw-r--r--arch/x86/events/amd/core.c111
-rw-r--r--arch/x86/events/core.c95
-rw-r--r--arch/x86/events/intel/core.c306
-rw-r--r--arch/x86/events/intel/cstate.c2
-rw-r--r--arch/x86/events/intel/ds.c505
-rw-r--r--arch/x86/events/intel/lbr.c35
-rw-r--r--arch/x86/events/intel/pt.c3
-rw-r--r--arch/x86/events/intel/rapl.c2
-rw-r--r--arch/x86/events/intel/uncore.c6
-rw-r--r--arch/x86/events/intel/uncore.h1
-rw-r--r--arch/x86/events/intel/uncore_snb.c91
-rw-r--r--arch/x86/events/msr.c1
-rw-r--r--arch/x86/events/perf_event.h98
-rw-r--r--arch/x86/hyperv/hv_apic.c5
-rw-r--r--arch/x86/hyperv/hv_spinlock.c2
-rw-r--r--arch/x86/ia32/ia32_signal.c46
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/alternative-asm.h11
-rw-r--r--arch/x86/include/asm/alternative.h10
-rw-r--r--arch/x86/include/asm/asm.h24
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h69
-rw-r--r--arch/x86/include/asm/cpufeature.h11
-rw-r--r--arch/x86/include/asm/debugreg.h2
-rw-r--r--arch/x86/include/asm/dma-mapping.h10
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/fpu/api.h31
-rw-r--r--arch/x86/include/asm/fpu/internal.h140
-rw-r--r--arch/x86/include/asm/fpu/signal.h2
-rw-r--r--arch/x86/include/asm/fpu/types.h9
-rw-r--r--arch/x86/include/asm/fpu/xstate.h8
-rw-r--r--arch/x86/include/asm/intel_ds.h2
-rw-r--r--arch/x86/include/asm/io.h2
-rw-r--r--arch/x86/include/asm/irq.h6
-rw-r--r--arch/x86/include/asm/irq_vectors.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h1
-rw-r--r--arch/x86/include/asm/mce.h25
-rw-r--r--arch/x86/include/asm/mmu_context.h56
-rw-r--r--arch/x86/include/asm/msr-index.h1
-rw-r--r--arch/x86/include/asm/nospec-branch.h28
-rw-r--r--arch/x86/include/asm/page_32_types.h8
-rw-r--r--arch/x86/include/asm/page_64_types.h16
-rw-r--r--arch/x86/include/asm/perf_event.h57
-rw-r--r--arch/x86/include/asm/pgtable.h34
-rw-r--r--arch/x86/include/asm/processor.h43
-rw-r--r--arch/x86/include/asm/rwsem.h237
-rw-r--r--arch/x86/include/asm/set_memory.h3
-rw-r--r--arch/x86/include/asm/smap.h37
-rw-r--r--arch/x86/include/asm/smp.h2
-rw-r--r--arch/x86/include/asm/special_insns.h19
-rw-r--r--arch/x86/include/asm/stackprotector.h6
-rw-r--r--arch/x86/include/asm/stacktrace.h15
-rw-r--r--arch/x86/include/asm/switch_to.h1
-rw-r--r--arch/x86/include/asm/sync_bitops.h31
-rw-r--r--arch/x86/include/asm/syscall.h8
-rw-r--r--arch/x86/include/asm/text-patching.h7
-rw-r--r--arch/x86/include/asm/thread_info.h2
-rw-r--r--arch/x86/include/asm/tlb.h1
-rw-r--r--arch/x86/include/asm/tlbflush.h4
-rw-r--r--arch/x86/include/asm/trace/exceptions.h2
-rw-r--r--arch/x86/include/asm/trace/fpu.h13
-rw-r--r--arch/x86/include/asm/uaccess.h15
-rw-r--r--arch/x86/include/asm/uaccess_64.h3
-rw-r--r--arch/x86/include/asm/xen/hypercall.h24
-rw-r--r--arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--arch/x86/include/uapi/asm/perf_regs.h23
-rw-r--r--arch/x86/include/uapi/asm/sockios.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c36
-rw-r--r--arch/x86/kernel/acpi/cstate.c12
-rw-r--r--arch/x86/kernel/alternative.c201
-rw-r--r--arch/x86/kernel/amd_gart_64.c6
-rw-r--r--arch/x86/kernel/apic/apic.c57
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c4
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/cpu/aperfmperf.c6
-rw-r--r--arch/x86/kernel/cpu/bugs.c11
-rw-r--r--arch/x86/kernel/cpu/common.c84
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/hygon.c5
-rw-r--r--arch/x86/kernel/cpu/intel.c34
-rw-r--r--arch/x86/kernel/cpu/intel_epb.c216
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c52
-rw-r--r--arch/x86/kernel/cpu/mce/core.c102
-rw-r--r--arch/x86/kernel/cpu/mce/genpool.c3
-rw-r--r--arch/x86/kernel/cpu/mce/inject.c16
-rw-r--r--arch/x86/kernel/cpu/mce/internal.h9
-rw-r--r--arch/x86/kernel/cpu/microcode/core.c5
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c71
-rw-r--r--arch/x86/kernel/cpu/proc.c10
-rw-r--r--arch/x86/kernel/cpu/resctrl/ctrlmondata.c4
-rw-r--r--arch/x86/kernel/cpu/resctrl/rdtgroup.c173
-rw-r--r--arch/x86/kernel/crash.c3
-rw-r--r--arch/x86/kernel/dumpstack_32.c8
-rw-r--r--arch/x86/kernel/dumpstack_64.c99
-rw-r--r--arch/x86/kernel/early-quirks.c4
-rw-r--r--arch/x86/kernel/fpu/core.c195
-rw-r--r--arch/x86/kernel/fpu/init.c2
-rw-r--r--arch/x86/kernel/fpu/regset.c24
-rw-r--r--arch/x86/kernel/fpu/signal.c202
-rw-r--r--arch/x86/kernel/fpu/xstate.c42
-rw-r--r--arch/x86/kernel/ftrace.c22
-rw-r--r--arch/x86/kernel/head_64.S2
-rw-r--r--arch/x86/kernel/idt.c19
-rw-r--r--arch/x86/kernel/ima_arch.c10
-rw-r--r--arch/x86/kernel/irq_32.c41
-rw-r--r--arch/x86/kernel/irq_64.c89
-rw-r--r--arch/x86/kernel/irqinit.c4
-rw-r--r--arch/x86/kernel/jump_label.c21
-rw-r--r--arch/x86/kernel/kgdb.c25
-rw-r--r--arch/x86/kernel/kprobes/core.c20
-rw-r--r--arch/x86/kernel/kvm.c2
-rw-r--r--arch/x86/kernel/ldt.c14
-rw-r--r--arch/x86/kernel/module.c2
-rw-r--r--arch/x86/kernel/nmi.c20
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-dma.c20
-rw-r--r--arch/x86/kernel/perf_regs.c27
-rw-r--r--arch/x86/kernel/process.c6
-rw-r--r--arch/x86/kernel/process_32.c18
-rw-r--r--arch/x86/kernel/process_64.c12
-rw-r--r--arch/x86/kernel/reboot.c2
-rw-r--r--arch/x86/kernel/setup.c38
-rw-r--r--arch/x86/kernel/setup_percpu.c5
-rw-r--r--arch/x86/kernel/signal.c55
-rw-r--r--arch/x86/kernel/smpboot.c21
-rw-r--r--arch/x86/kernel/stacktrace.c128
-rw-r--r--arch/x86/kernel/topology.c2
-rw-r--r--arch/x86/kernel/traps.c2
-rw-r--r--arch/x86/kernel/tsc.c34
-rw-r--r--arch/x86/kernel/vm86_32.c2
-rw-r--r--arch/x86/kernel/vmlinux.lds.S13
-rw-r--r--arch/x86/kvm/Kconfig1
-rw-r--r--arch/x86/kvm/hyperv.c11
-rw-r--r--arch/x86/kvm/lapic.c73
-rw-r--r--arch/x86/kvm/lapic.h4
-rw-r--r--arch/x86/kvm/mmu.c1
-rw-r--r--arch/x86/kvm/vmx/nested.c4
-rw-r--r--arch/x86/kvm/vmx/vmenter.S12
-rw-r--r--arch/x86/kvm/vmx/vmx.c9
-rw-r--r--arch/x86/kvm/x86.c84
-rw-r--r--arch/x86/kvm/x86.h2
-rw-r--r--arch/x86/lib/Makefile13
-rw-r--r--arch/x86/lib/copy_user_64.S48
-rw-r--r--arch/x86/lib/delay.c2
-rw-r--r--arch/x86/lib/error-inject.c1
-rw-r--r--arch/x86/lib/memcpy_64.S3
-rw-r--r--arch/x86/lib/rwsem.S156
-rw-r--r--arch/x86/lib/usercopy_64.c20
-rw-r--r--arch/x86/math-emu/fpu_entry.c3
-rw-r--r--arch/x86/mm/cpu_entry_area.c64
-rw-r--r--arch/x86/mm/dump_pagetables.c4
-rw-r--r--arch/x86/mm/extable.c4
-rw-r--r--arch/x86/mm/fault.c58
-rw-r--r--arch/x86/mm/init.c37
-rw-r--r--arch/x86/mm/kaslr.c94
-rw-r--r--arch/x86/mm/mpx.c6
-rw-r--r--arch/x86/mm/pageattr.c16
-rw-r--r--arch/x86/mm/pgtable.c14
-rw-r--r--arch/x86/mm/pkeys.c21
-rw-r--r--arch/x86/mm/pti.c6
-rw-r--r--arch/x86/mm/tlb.c116
-rw-r--r--arch/x86/net/bpf_jit_comp32.c236
-rw-r--r--arch/x86/platform/uv/tlb_uv.c7
-rw-r--r--arch/x86/power/hibernate.c1
-rw-r--r--arch/x86/tools/relocs.c76
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/Makefile4
-rw-r--r--arch/x86/um/asm/syscall.h2
-rw-r--r--arch/x86/um/vdso/Makefile2
-rw-r--r--arch/x86/xen/mmu_pv.c2
-rw-r--r--arch/x86/xen/multicalls.c2
-rw-r--r--arch/x86/xen/smp_pv.c4
-rw-r--r--arch/x86/xen/xen-head.S10
-rw-r--r--arch/xtensa/Kconfig29
-rw-r--r--arch/xtensa/boot/boot-redboot/bootstrap.S2
-rw-r--r--arch/xtensa/include/asm/Kbuild2
-rw-r--r--arch/xtensa/include/asm/asmmacro.h2
-rw-r--r--arch/xtensa/include/asm/atomic.h66
-rw-r--r--arch/xtensa/include/asm/barrier.h4
-rw-r--r--arch/xtensa/include/asm/bitops.h125
-rw-r--r--arch/xtensa/include/asm/cache.h2
-rw-r--r--arch/xtensa/include/asm/checksum.h2
-rw-r--r--arch/xtensa/include/asm/cmpxchg.h36
-rw-r--r--arch/xtensa/include/asm/coprocessor.h2
-rw-r--r--arch/xtensa/include/asm/core.h21
-rw-r--r--arch/xtensa/include/asm/futex.h122
-rw-r--r--arch/xtensa/include/asm/initialize_mmu.h38
-rw-r--r--arch/xtensa/include/asm/io.h3
-rw-r--r--arch/xtensa/include/asm/irq.h2
-rw-r--r--arch/xtensa/include/asm/pci-bridge.h3
-rw-r--r--arch/xtensa/include/asm/pci.h4
-rw-r--r--arch/xtensa/include/asm/pgalloc.h3
-rw-r--r--arch/xtensa/include/asm/processor.h2
-rw-r--r--arch/xtensa/include/asm/ptrace.h2
-rw-r--r--arch/xtensa/include/asm/syscall.h2
-rw-r--r--arch/xtensa/include/asm/tlb.h26
-rw-r--r--arch/xtensa/include/asm/vectors.h2
-rw-r--r--arch/xtensa/include/uapi/asm/sockios.h4
-rw-r--r--arch/xtensa/kernel/hw_breakpoint.c2
-rw-r--r--arch/xtensa/kernel/setup.c3
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S2
-rw-r--r--arch/xtensa/lib/checksum.S2
-rw-r--r--arch/xtensa/lib/memcopy.S2
-rw-r--r--arch/xtensa/lib/memset.S2
-rw-r--r--arch/xtensa/lib/strncpy_user.S2
-rw-r--r--arch/xtensa/lib/strnlen_user.S2
-rw-r--r--arch/xtensa/lib/usercopy.S2
-rw-r--r--arch/xtensa/platforms/iss/simdisk.c3
-rw-r--r--arch/xtensa/platforms/xt2000/include/platform/hardware.h2
-rw-r--r--arch/xtensa/platforms/xt2000/include/platform/serial.h2
1039 files changed, 19730 insertions, 14700 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 33687dddd86a..5e43fcbad4ca 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -249,6 +249,10 @@ config ARCH_HAS_FORTIFY_SOURCE
config ARCH_HAS_SET_MEMORY
bool
+# Select if arch has all set_direct_map_invalid/default() functions
+config ARCH_HAS_SET_DIRECT_MAP
+ bool
+
# Select if arch init_task must go in the __init_task_data section
config ARCH_TASK_STRUCT_ON_STACK
bool
@@ -383,7 +387,13 @@ config HAVE_ARCH_JUMP_LABEL_RELATIVE
config HAVE_RCU_TABLE_FREE
bool
-config HAVE_RCU_TABLE_INVALIDATE
+config HAVE_RCU_TABLE_NO_INVALIDATE
+ bool
+
+config HAVE_MMU_GATHER_PAGE_SIZE
+ bool
+
+config HAVE_MMU_GATHER_NO_GATHER
bool
config ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -901,6 +911,15 @@ config HAVE_ARCH_PREL32_RELOCATIONS
config ARCH_USE_MEMREMAP_PROT
bool
+config LOCK_EVENT_COUNTS
+ bool "Locking event counts collection"
+ depends on DEBUG_FS
+ ---help---
+ Enable light-weight counting of various locking related events
+ in the system with minimal performance impact. This reduces
+ the chance of application behavior change because of timing
+ differences. The counts are reported via debugfs.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 584a6e114853..f7b19b813a70 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -36,6 +36,7 @@ config ALPHA
select ODD_RT_SIGACTION
select OLD_SIGSUSPEND
select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67
+ select MMU_GATHER_NO_RANGE
help
The Alpha is a 64-bit general-purpose processor designed and
marketed by the Digital Equipment Corporation of blessed memory,
@@ -49,13 +50,6 @@ config MMU
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config ARCH_HAS_ILOG2_U32
bool
default n
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 70b783333965..89e87bbc987f 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += irq_work.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += preempt.h
generic-y += sections.h
generic-y += trace_clock.h
diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h
index 4c533fc94d62..ccf9d65166bb 100644
--- a/arch/alpha/include/asm/io.h
+++ b/arch/alpha/include/asm/io.h
@@ -513,8 +513,6 @@ extern inline void writeq(u64 b, volatile void __iomem *addr)
#define writel_relaxed(b, addr) __raw_writel(b, addr)
#define writeq_relaxed(b, addr) __raw_writeq(b, addr)
-#define mmiowb()
-
/*
* String version of IO memory access ops:
*/
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
deleted file mode 100644
index cf8fc8f9a2ed..000000000000
--- a/arch/alpha/include/asm/rwsem.h
+++ /dev/null
@@ -1,211 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ALPHA_RWSEM_H
-#define _ALPHA_RWSEM_H
-
-/*
- * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001.
- * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
- */
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-
-#include <linux/compiler.h>
-
-#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
-#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
-#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
-#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-static inline int ___down_read(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
- return (oldcount < 0);
-}
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
- if (unlikely(___down_read(sem)))
- rwsem_down_read_failed(sem);
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (unlikely(___down_read(sem)))
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int __down_read_trylock(struct rw_semaphore *sem)
-{
- long old, new, res;
-
- res = atomic_long_read(&sem->count);
- do {
- new = res + RWSEM_ACTIVE_READ_BIAS;
- if (new <= 0)
- break;
- old = res;
- res = atomic_long_cmpxchg(&sem->count, old, new);
- } while (res != old);
- return res >= 0 ? 1 : 0;
-}
-
-static inline long ___down_write(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
- return oldcount;
-}
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- if (unlikely(___down_write(sem)))
- rwsem_down_write_failed(sem);
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- if (unlikely(___down_write(sem))) {
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- }
-
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int __down_write_trylock(struct rw_semaphore *sem)
-{
- long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
- RWSEM_ACTIVE_WRITE_BIAS);
- if (ret == RWSEM_UNLOCKED_VALUE)
- return 1;
- return 0;
-}
-
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- " mb\n"
- "1: ldq_l %0,%1\n"
- " subq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(oldcount < 0))
- if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0)
- rwsem_wake(sem);
-}
-
-static inline void __up_write(struct rw_semaphore *sem)
-{
- long count;
-#ifndef CONFIG_SMP
- sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
- count = sem->count.counter;
-#else
- long temp;
- __asm__ __volatile__(
- " mb\n"
- "1: ldq_l %0,%1\n"
- " subq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " subq %0,%3,%0\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (count), "=m" (sem->count), "=&r" (temp)
- :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(count))
- if ((int)count == 0)
- rwsem_wake(sem);
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- long oldcount;
-#ifndef CONFIG_SMP
- oldcount = sem->count.counter;
- sem->count.counter -= RWSEM_WAITING_BIAS;
-#else
- long temp;
- __asm__ __volatile__(
- "1: ldq_l %0,%1\n"
- " addq %0,%3,%2\n"
- " stq_c %2,%1\n"
- " beq %2,2f\n"
- " mb\n"
- ".subsection 2\n"
- "2: br 1b\n"
- ".previous"
- :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
- :"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory");
-#endif
- if (unlikely(oldcount < 0))
- rwsem_downgrade_wake(sem);
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/alpha/include/asm/syscall.h b/arch/alpha/include/asm/syscall.h
index d73a6fcb519c..11c688c1d7ec 100644
--- a/arch/alpha/include/asm/syscall.h
+++ b/arch/alpha/include/asm/syscall.h
@@ -4,7 +4,7 @@
#include <uapi/linux/audit.h>
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_ALPHA;
}
diff --git a/arch/alpha/include/asm/tlb.h b/arch/alpha/include/asm/tlb.h
index 8f5042b61875..4f79e331af5e 100644
--- a/arch/alpha/include/asm/tlb.h
+++ b/arch/alpha/include/asm/tlb.h
@@ -2,12 +2,6 @@
#ifndef _ALPHA_TLB_H
#define _ALPHA_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
diff --git a/arch/alpha/include/uapi/asm/sockios.h b/arch/alpha/include/uapi/asm/sockios.h
index ba287e4b01bf..af92bc27c3be 100644
--- a/arch/alpha/include/uapi/asm/sockios.h
+++ b/arch/alpha/include/uapi/asm/sockios.h
@@ -11,7 +11,7 @@
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* _ASM_ALPHA_SOCKIOS_H */
diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c
index 3034d6d936d2..242108439f42 100644
--- a/arch/alpha/kernel/pci_iommu.c
+++ b/arch/alpha/kernel/pci_iommu.c
@@ -249,7 +249,7 @@ static int pci_dac_dma_supported(struct pci_dev *dev, u64 mask)
ok = 0;
/* If both conditions above are met, we are fine. */
- DBGA("pci_dac_dma_supported %s from %pf\n",
+ DBGA("pci_dac_dma_supported %s from %ps\n",
ok ? "yes" : "no", __builtin_return_address(0));
return ok;
@@ -281,7 +281,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
&& paddr + size <= __direct_map_size) {
ret = paddr + __direct_map_base;
- DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %pf\n",
+ DBGA2("pci_map_single: [%p,%zx] -> direct %llx from %ps\n",
cpu_addr, size, ret, __builtin_return_address(0));
return ret;
@@ -292,7 +292,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
if (dac_allowed) {
ret = paddr + alpha_mv.pci_dac_offset;
- DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %pf\n",
+ DBGA2("pci_map_single: [%p,%zx] -> DAC %llx from %ps\n",
cpu_addr, size, ret, __builtin_return_address(0));
return ret;
@@ -329,7 +329,7 @@ pci_map_single_1(struct pci_dev *pdev, void *cpu_addr, size_t size,
ret = arena->dma_base + dma_ofs * PAGE_SIZE;
ret += (unsigned long)cpu_addr & ~PAGE_MASK;
- DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %pf\n",
+ DBGA2("pci_map_single: [%p,%zx] np %ld -> sg %llx from %ps\n",
cpu_addr, size, npages, ret, __builtin_return_address(0));
return ret;
@@ -396,14 +396,14 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr,
&& dma_addr < __direct_map_base + __direct_map_size) {
/* Nothing to do. */
- DBGA2("pci_unmap_single: direct [%llx,%zx] from %pf\n",
+ DBGA2("pci_unmap_single: direct [%llx,%zx] from %ps\n",
dma_addr, size, __builtin_return_address(0));
return;
}
if (dma_addr > 0xffffffff) {
- DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %pf\n",
+ DBGA2("pci64_unmap_single: DAC [%llx,%zx] from %ps\n",
dma_addr, size, __builtin_return_address(0));
return;
}
@@ -435,7 +435,7 @@ static void alpha_pci_unmap_page(struct device *dev, dma_addr_t dma_addr,
spin_unlock_irqrestore(&arena->lock, flags);
- DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %pf\n",
+ DBGA2("pci_unmap_single: sg [%llx,%zx] np %ld from %ps\n",
dma_addr, size, npages, __builtin_return_address(0));
}
@@ -458,7 +458,7 @@ try_again:
cpu_addr = (void *)__get_free_pages(gfp | __GFP_ZERO, order);
if (! cpu_addr) {
printk(KERN_INFO "pci_alloc_consistent: "
- "get_free_pages failed from %pf\n",
+ "get_free_pages failed from %ps\n",
__builtin_return_address(0));
/* ??? Really atomic allocation? Otherwise we could play
with vmalloc and sg if we can't find contiguous memory. */
@@ -477,7 +477,7 @@ try_again:
goto try_again;
}
- DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %pf\n",
+ DBGA2("pci_alloc_consistent: %zx -> [%p,%llx] from %ps\n",
size, cpu_addr, *dma_addrp, __builtin_return_address(0));
return cpu_addr;
@@ -497,7 +497,7 @@ static void alpha_pci_free_coherent(struct device *dev, size_t size,
pci_unmap_single(pdev, dma_addr, size, PCI_DMA_BIDIRECTIONAL);
free_pages((unsigned long)cpu_addr, get_order(size));
- DBGA2("pci_free_consistent: [%llx,%zx] from %pf\n",
+ DBGA2("pci_free_consistent: [%llx,%zx] from %ps\n",
dma_addr, size, __builtin_return_address(0));
}
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c781e45d1d99..23e063df5d2c 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER
config GENERIC_CSUM
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config ARCH_DISCONTIGMEM_ENABLE
def_bool n
diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts
index 69bc1c9e8e50..7425bb0f2d1b 100644
--- a/arch/arc/boot/dts/hsdk.dts
+++ b/arch/arc/boot/dts/hsdk.dts
@@ -18,8 +18,8 @@
model = "snps,hsdk";
compatible = "snps,hsdk";
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
chosen {
bootargs = "earlycon=uart8250,mmio32,0xf0005000,115200n8 console=ttyS0,115200n8 debug print-fatal-signals=1";
@@ -105,7 +105,7 @@
#size-cells = <1>;
interrupt-parent = <&idu_intc>;
- ranges = <0x00000000 0xf0000000 0x10000000>;
+ ranges = <0x00000000 0x0 0xf0000000 0x10000000>;
cgu_rst: reset-controller@8a0 {
compatible = "snps,hsdk-reset";
@@ -269,9 +269,10 @@
};
memory@80000000 {
- #address-cells = <1>;
- #size-cells = <1>;
+ #address-cells = <2>;
+ #size-cells = <2>;
device_type = "memory";
- reg = <0x80000000 0x40000000>; /* 1 GiB */
+ reg = <0x0 0x80000000 0x0 0x40000000>; /* 1 GB lowmem */
+ /* 0x1 0x00000000 0x0 0x40000000>; 1 GB highmem */
};
};
diff --git a/arch/arc/configs/haps_hs_defconfig b/arch/arc/configs/haps_hs_defconfig
index f56cc2070c11..b117e6c16d41 100644
--- a/arch/arc/configs/haps_hs_defconfig
+++ b/arch/arc/configs/haps_hs_defconfig
@@ -15,7 +15,6 @@ CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_SLAB=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/configs/haps_hs_smp_defconfig b/arch/arc/configs/haps_hs_smp_defconfig
index b6f2482c7e74..33a787c375e2 100644
--- a/arch/arc/configs/haps_hs_smp_defconfig
+++ b/arch/arc/configs/haps_hs_smp_defconfig
@@ -17,7 +17,6 @@ CONFIG_PERF_EVENTS=y
CONFIG_SLAB=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/configs/nsim_700_defconfig b/arch/arc/configs/nsim_700_defconfig
index 318e4cd29629..de398c7b10b3 100644
--- a/arch/arc/configs/nsim_700_defconfig
+++ b/arch/arc/configs/nsim_700_defconfig
@@ -18,7 +18,6 @@ CONFIG_PERF_EVENTS=y
CONFIG_ISA_ARCOMPACT=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/configs/nsim_hs_defconfig b/arch/arc/configs/nsim_hs_defconfig
index c15807b0e0c1..2dbd34a9ff07 100644
--- a/arch/arc/configs/nsim_hs_defconfig
+++ b/arch/arc/configs/nsim_hs_defconfig
@@ -20,7 +20,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/configs/nsim_hs_smp_defconfig b/arch/arc/configs/nsim_hs_smp_defconfig
index 65e983fd942b..c7135f1e2583 100644
--- a/arch/arc/configs/nsim_hs_smp_defconfig
+++ b/arch/arc/configs/nsim_hs_smp_defconfig
@@ -18,7 +18,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/configs/nsimosci_defconfig b/arch/arc/configs/nsimosci_defconfig
index 08c5b99ac341..385a71d3c478 100644
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@@ -18,7 +18,6 @@ CONFIG_PERF_EVENTS=y
CONFIG_ISA_ARCOMPACT=y
CONFIG_KPROBES=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/configs/nsimosci_hs_defconfig b/arch/arc/configs/nsimosci_hs_defconfig
index 5b5e26d67955..248a2c3bdc12 100644
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@@ -17,7 +17,6 @@ CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_KPROBES=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/configs/nsimosci_hs_smp_defconfig b/arch/arc/configs/nsimosci_hs_smp_defconfig
index 26af9b2f7fcb..1a4bc7b660fb 100644
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@@ -12,7 +12,6 @@ CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
CONFIG_KPROBES=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index decc306a3b52..393d4f5e1450 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += msi.h
generic-y += parport.h
generic-y += percpu.h
diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h
index aa2d6da9d187..2b80c184c9c8 100644
--- a/arch/arc/include/asm/elf.h
+++ b/arch/arc/include/asm/elf.h
@@ -10,13 +10,9 @@
#define __ASM_ARC_ELF_H
#include <linux/types.h>
+#include <linux/elf-em.h>
#include <uapi/asm/elf.h>
-/* These ELF defines belong to uapi but libc elf.h already defines them */
-#define EM_ARCOMPACT 93
-
-#define EM_ARCV2 195 /* ARCv2 Cores */
-
#define EM_ARC_INUSE (IS_ENABLED(CONFIG_ISA_ARCOMPACT) ? \
EM_ARCOMPACT : EM_ARCV2)
diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index c7a4201ed62b..9cac959ca4e8 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -9,6 +9,7 @@
#ifndef _ASM_ARC_SYSCALL_H
#define _ASM_ARC_SYSCALL_H 1
+#include <uapi/linux/audit.h>
#include <linux/err.h>
#include <linux/sched.h>
#include <asm/unistd.h>
@@ -67,4 +68,14 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
}
}
+static inline int
+syscall_get_arch(struct task_struct *task)
+{
+ return IS_ENABLED(CONFIG_ISA_ARCOMPACT)
+ ? (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+ ? AUDIT_ARCH_ARCOMPACTBE : AUDIT_ARCH_ARCOMPACT)
+ : (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+ ? AUDIT_ARCH_ARCV2BE : AUDIT_ARCH_ARCV2);
+}
+
#endif
diff --git a/arch/arc/include/asm/tlb.h b/arch/arc/include/asm/tlb.h
index a9db5f62aaf3..90cac97643a4 100644
--- a/arch/arc/include/asm/tlb.h
+++ b/arch/arc/include/asm/tlb.h
@@ -9,38 +9,6 @@
#ifndef _ASM_ARC_TLB_H
#define _ASM_ARC_TLB_H
-#define tlb_flush(tlb) \
-do { \
- if (tlb->fullmm) \
- flush_tlb_mm((tlb)->mm); \
-} while (0)
-
-/*
- * This pair is called at time of munmap/exit to flush cache and TLB entries
- * for mappings being torn down.
- * 1) cache-flush part -implemented via tlb_start_vma( ) for VIPT aliasing D$
- * 2) tlb-flush part - implemted via tlb_end_vma( ) flushes the TLB range
- *
- * Note, read http://lkml.org/lkml/2004/1/15/6
- */
-#ifndef CONFIG_ARC_CACHE_VIPT_ALIASING
-#define tlb_start_vma(tlb, vma)
-#else
-#define tlb_start_vma(tlb, vma) \
-do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while(0)
-#endif
-
-#define tlb_end_vma(tlb, vma) \
-do { \
- if (!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, ptep, address)
-
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/arc/lib/memset-archs.S b/arch/arc/lib/memset-archs.S
index f230bb7092fd..b3373f5c88e0 100644
--- a/arch/arc/lib/memset-archs.S
+++ b/arch/arc/lib/memset-archs.S
@@ -30,10 +30,10 @@
#else
-.macro PREALLOC_INSTR
+.macro PREALLOC_INSTR reg, off
.endm
-.macro PREFETCHW_INSTR
+.macro PREFETCHW_INSTR reg, off
.endm
#endif
diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c
index 4135abec3fb0..63e6e6504699 100644
--- a/arch/arc/mm/cache.c
+++ b/arch/arc/mm/cache.c
@@ -113,10 +113,24 @@ static void read_decode_cache_bcr_arcv2(int cpu)
}
READ_BCR(ARC_REG_CLUSTER_BCR, cbcr);
- if (cbcr.c)
+ if (cbcr.c) {
ioc_exists = 1;
- else
+
+ /*
+ * As for today we don't support both IOC and ZONE_HIGHMEM enabled
+ * simultaneously. This happens because as of today IOC aperture covers
+ * only ZONE_NORMAL (low mem) and any dma transactions outside this
+ * region won't be HW coherent.
+ * If we want to use both IOC and ZONE_HIGHMEM we can use
+ * bounce_buffer to handle dma transactions to HIGHMEM.
+ * Also it is possible to modify dma_direct cache ops or increase IOC
+ * aperture size if we are planning to use HIGHMEM without PAE.
+ */
+ if (IS_ENABLED(CONFIG_HIGHMEM) || is_pae40_enabled())
+ ioc_enable = 0;
+ } else {
ioc_enable = 0;
+ }
/* HS 2.0 didn't have AUX_VOL */
if (cpuinfo_arc700[cpu].core.family > 0x51) {
@@ -1158,19 +1172,6 @@ noinline void __init arc_ioc_setup(void)
if (!ioc_enable)
return;
- /*
- * As for today we don't support both IOC and ZONE_HIGHMEM enabled
- * simultaneously. This happens because as of today IOC aperture covers
- * only ZONE_NORMAL (low mem) and any dma transactions outside this
- * region won't be HW coherent.
- * If we want to use both IOC and ZONE_HIGHMEM we can use
- * bounce_buffer to handle dma transactions to HIGHMEM.
- * Also it is possible to modify dma_direct cache ops or increase IOC
- * aperture size if we are planning to use HIGHMEM without PAE.
- */
- if (IS_ENABLED(CONFIG_HIGHMEM))
- panic("IOC and HIGHMEM can't be used simultaneously");
-
/* Flush + invalidate + disable L1 dcache */
__dc_disable();
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9aed25a6019b..dc9855c4a3b4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -178,10 +178,6 @@ config TRACE_IRQFLAGS_SUPPORT
bool
default !CPU_V7M
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/arm/boot/dts/ls1021a.dtsi b/arch/arm/boot/dts/ls1021a.dtsi
index b4f2723ecd86..b10ff5877b4c 100644
--- a/arch/arm/boot/dts/ls1021a.dtsi
+++ b/arch/arm/boot/dts/ls1021a.dtsi
@@ -446,6 +446,34 @@
status = "disabled";
};
+ counter0: counter@29d0000 {
+ compatible = "fsl,ftm-quaddec";
+ reg = <0x0 0x29d0000 0x0 0x10000>;
+ big-endian;
+ status = "disabled";
+ };
+
+ counter1: counter@29e0000 {
+ compatible = "fsl,ftm-quaddec";
+ reg = <0x0 0x29e0000 0x0 0x10000>;
+ big-endian;
+ status = "disabled";
+ };
+
+ counter2: counter@29f0000 {
+ compatible = "fsl,ftm-quaddec";
+ reg = <0x0 0x29f0000 0x0 0x10000>;
+ big-endian;
+ status = "disabled";
+ };
+
+ counter3: counter@2a00000 {
+ compatible = "fsl,ftm-quaddec";
+ reg = <0x0 0x2a00000 0x0 0x10000>;
+ big-endian;
+ status = "disabled";
+ };
+
gpio0: gpio@2300000 {
compatible = "fsl,ls1021a-gpio", "fsl,qoriq-gpio";
reg = <0x0 0x2300000 0x0 0x10000>;
diff --git a/arch/arm/boot/dts/omap4-droid4-xt894.dts b/arch/arm/boot/dts/omap4-droid4-xt894.dts
index e21ec929f096..714863f8f261 100644
--- a/arch/arm/boot/dts/omap4-droid4-xt894.dts
+++ b/arch/arm/boot/dts/omap4-droid4-xt894.dts
@@ -214,7 +214,6 @@
width-mm = <50>;
height-mm = <89>;
- backlight = <&lcd_backlight>;
panel-timing {
clock-frequency = <0>; /* Calculated by dsi */
@@ -383,20 +382,30 @@
};
&i2c1 {
- lm3532@38 {
+ led-controller@38 {
compatible = "ti,lm3532";
+ #address-cells = <1>;
+ #size-cells = <0>;
reg = <0x38>;
enable-gpios = <&gpio6 12 GPIO_ACTIVE_HIGH>;
- lcd_backlight: backlight {
- compatible = "ti,lm3532-backlight";
+ ramp-up-us = <1024>;
+ ramp-down-us = <8193>;
- lcd {
- led-sources = <0 1 2>;
- ramp-up-msec = <1>;
- ramp-down-msec = <0>;
- };
+ led@0 {
+ reg = <0>;
+ led-sources = <2>;
+ ti,led-mode = <0>;
+ label = ":backlight";
+ linux,default-trigger = "backlight";
+ };
+
+ led@1 {
+ reg = <1>;
+ led-sources = <1>;
+ ti,led-mode = <0>;
+ label = ":kbd_backlight";
};
};
};
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index a024d1e7e74c..8ce3dd2264b1 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -616,6 +616,7 @@
dr_mode = "host";
phys = <&usbphy2>;
phy-names = "usb2-phy";
+ snps,reset-phy-on-wake;
status = "disabled";
};
@@ -904,6 +905,8 @@
clocks = <&cru SCLK_OTGPHY0>;
clock-names = "phyclk";
#clock-cells = <0>;
+ resets = <&cru SRST_USBOTG_PHY>;
+ reset-names = "phy-reset";
};
usbphy1: usb-phy@334 {
@@ -912,6 +915,8 @@
clocks = <&cru SCLK_OTGPHY1>;
clock-names = "phyclk";
#clock-cells = <0>;
+ resets = <&cru SRST_USBHOST0_PHY>;
+ reset-names = "phy-reset";
};
usbphy2: usb-phy@348 {
@@ -920,6 +925,8 @@
clocks = <&cru SCLK_OTGPHY2>;
clock-names = "phyclk";
#clock-cells = <0>;
+ resets = <&cru SRST_USBHOST1_PHY>;
+ reset-names = "phy-reset";
};
};
};
diff --git a/arch/arm/configs/aspeed_g4_defconfig b/arch/arm/configs/aspeed_g4_defconfig
index 1446262921b4..190d6e9d3296 100644
--- a/arch/arm/configs/aspeed_g4_defconfig
+++ b/arch/arm/configs/aspeed_g4_defconfig
@@ -23,7 +23,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_JUMP_LABEL=y
CONFIG_STRICT_KERNEL_RWX=y
CONFIG_GCC_PLUGINS=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_BLK_DEBUG_FS is not set
# CONFIG_IOSCHED_DEADLINE is not set
@@ -248,7 +247,6 @@ CONFIG_PANIC_TIMEOUT=-1
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_FUNCTION_TRACER=y
-# CONFIG_TRACING_EVENTS_GPIO is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_DEBUG_WX=y
CONFIG_DEBUG_USER=y
diff --git a/arch/arm/configs/aspeed_g5_defconfig b/arch/arm/configs/aspeed_g5_defconfig
index 02fa3a41add5..407ffb7655a8 100644
--- a/arch/arm/configs/aspeed_g5_defconfig
+++ b/arch/arm/configs/aspeed_g5_defconfig
@@ -23,7 +23,6 @@ CONFIG_SLAB_FREELIST_RANDOM=y
CONFIG_JUMP_LABEL=y
CONFIG_STRICT_KERNEL_RWX=y
CONFIG_GCC_PLUGINS=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_BLK_DEBUG_FS is not set
# CONFIG_IOSCHED_DEADLINE is not set
@@ -248,7 +247,6 @@ CONFIG_PANIC_TIMEOUT=-1
# CONFIG_SCHED_DEBUG is not set
CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_FUNCTION_TRACER=y
-# CONFIG_TRACING_EVENTS_GPIO is not set
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_DEBUG_WX=y
CONFIG_DEBUG_USER=y
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
index 50ef3eb0ab64..a88e31449880 100644
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -9,7 +9,6 @@ CONFIG_EMBEDDED=y
CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arm/configs/clps711x_defconfig b/arch/arm/configs/clps711x_defconfig
index 7968d20673b9..c255dab36bde 100644
--- a/arch/arm/configs/clps711x_defconfig
+++ b/arch/arm/configs/clps711x_defconfig
@@ -6,7 +6,6 @@ CONFIG_RD_LZMA=y
CONFIG_EMBEDDED=y
CONFIG_SLOB=y
CONFIG_JUMP_LABEL=y
-# CONFIG_LBDAF is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_CLPS711X=y
diff --git a/arch/arm/configs/efm32_defconfig b/arch/arm/configs/efm32_defconfig
index ee42158f41ec..10ea92513a69 100644
--- a/arch/arm/configs/efm32_defconfig
+++ b/arch/arm/configs/efm32_defconfig
@@ -11,7 +11,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index 484e51fbd4a6..e3afca5bd9d6 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -13,7 +13,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
diff --git a/arch/arm/configs/h3600_defconfig b/arch/arm/configs/h3600_defconfig
index ebeca11faa48..175881b7da7c 100644
--- a/arch/arm/configs/h3600_defconfig
+++ b/arch/arm/configs/h3600_defconfig
@@ -4,7 +4,6 @@ CONFIG_HIGH_RES_TIMERS=y
CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index f204017c26b9..9b779e13e05d 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -12,7 +12,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_PXA=y
diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index 078228a19339..6a11669fa536 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -15,7 +15,6 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
CONFIG_ARCH_MULTI_V4=y
diff --git a/arch/arm/configs/multi_v4t_defconfig b/arch/arm/configs/multi_v4t_defconfig
index 14f3a4a65d01..0b42bddfbc82 100644
--- a/arch/arm/configs/multi_v4t_defconfig
+++ b/arch/arm/configs/multi_v4t_defconfig
@@ -5,7 +5,6 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_EMBEDDED=y
CONFIG_SLOB=y
CONFIG_JUMP_LABEL=y
-# CONFIG_LBDAF is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
CONFIG_ARCH_MULTI_V4T=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 40fdf890a292..82af77c093f1 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -17,7 +17,6 @@ CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arm/configs/stm32_defconfig b/arch/arm/configs/stm32_defconfig
index 0258ba891376..152321d2893e 100644
--- a/arch/arm/configs/stm32_defconfig
+++ b/arch/arm/configs/stm32_defconfig
@@ -13,7 +13,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arm/configs/u300_defconfig b/arch/arm/configs/u300_defconfig
index cfd3622e2c8a..bedf397c75de 100644
--- a/arch/arm/configs/u300_defconfig
+++ b/arch/arm/configs/u300_defconfig
@@ -9,7 +9,6 @@ CONFIG_EXPERT=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig
index 392ed3b3613c..484d77a7f589 100644
--- a/arch/arm/configs/vexpress_defconfig
+++ b/arch/arm/configs/vexpress_defconfig
@@ -14,7 +14,6 @@ CONFIG_PROFILING=y
CONFIG_OPROFILE=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index 07e31941dc67..617c2c99ebfb 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -278,6 +278,8 @@ static int __xts_crypt(struct skcipher_request *req,
int err;
err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
index 9d6fda81986d..48a89537b828 100644
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ b/arch/arm/crypto/chacha-neon-glue.c
@@ -21,6 +21,7 @@
#include <crypto/algapi.h>
#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -93,7 +94,7 @@ static int chacha_neon(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
return crypto_chacha_crypt(req);
return chacha_neon_stream_xor(req, ctx, req->iv);
@@ -107,7 +108,7 @@ static int xchacha_neon(struct skcipher_request *req)
u32 state[16];
u8 real_iv[16];
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
return crypto_xchacha_crypt(req);
crypto_chacha_init(state, ctx, req->iv);
diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glue.c
index cd9e93b46c2d..e712c2a7d387 100644
--- a/arch/arm/crypto/crc32-ce-glue.c
+++ b/arch/arm/crypto/crc32-ce-glue.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <asm/hwcap.h>
#include <asm/neon.h>
@@ -113,7 +114,7 @@ static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
u32 *crc = shash_desc_ctx(desc);
unsigned int l;
- if (may_use_simd()) {
+ if (crypto_simd_usable()) {
if ((u32)data % SCALE_F) {
l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
@@ -147,7 +148,7 @@ static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
u32 *crc = shash_desc_ctx(desc);
unsigned int l;
- if (may_use_simd()) {
+ if (crypto_simd_usable()) {
if ((u32)data % SCALE_F) {
l = min_t(u32, length, SCALE_F - ((u32)data % SCALE_F));
diff --git a/arch/arm/crypto/crct10dif-ce-glue.c b/arch/arm/crypto/crct10dif-ce-glue.c
index 3d6b800b8396..3b24f2872592 100644
--- a/arch/arm/crypto/crct10dif-ce-glue.c
+++ b/arch/arm/crypto/crct10dif-ce-glue.c
@@ -15,6 +15,7 @@
#include <linux/string.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <asm/neon.h>
#include <asm/simd.h>
@@ -36,7 +37,7 @@ static int crct10dif_update(struct shash_desc *desc, const u8 *data,
{
u16 *crc = shash_desc_ctx(desc);
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
kernel_neon_begin();
*crc = crc_t10dif_pmull(*crc, data, length);
kernel_neon_end();
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index b7d30b6cf49c..39d1ccec1aab 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -14,6 +14,7 @@
#include <asm/unaligned.h>
#include <crypto/cryptd.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/gf128mul.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
@@ -185,7 +186,6 @@ static int ghash_async_init(struct ahash_request *req)
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
- desc->flags = req->base.flags;
return crypto_shash_init(desc);
}
@@ -196,7 +196,7 @@ static int ghash_async_update(struct ahash_request *req)
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
@@ -214,7 +214,7 @@ static int ghash_async_final(struct ahash_request *req)
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
@@ -232,7 +232,7 @@ static int ghash_async_digest(struct ahash_request *req)
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
@@ -242,7 +242,6 @@ static int ghash_async_digest(struct ahash_request *req)
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
- desc->flags = req->base.flags;
return shash_ahash_digest(req, desc);
}
}
@@ -255,7 +254,6 @@ static int ghash_async_import(struct ahash_request *req, const void *in)
struct shash_desc *desc = cryptd_shash_desc(cryptd_req);
desc->tfm = cryptd_ahash_child(ctx->cryptd_tfm);
- desc->flags = req->base.flags;
return crypto_shash_import(desc, in);
}
diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c
index 49aae87cb2bc..ae5aefc44a4d 100644
--- a/arch/arm/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm/crypto/nhpoly1305-neon-glue.c
@@ -9,6 +9,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
@@ -25,7 +26,7 @@ static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
static int nhpoly1305_neon_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- if (srclen < 64 || !may_use_simd())
+ if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
do {
diff --git a/arch/arm/crypto/sha1-ce-glue.c b/arch/arm/crypto/sha1-ce-glue.c
index b732522e20f8..4c6c6900853c 100644
--- a/arch/arm/crypto/sha1-ce-glue.c
+++ b/arch/arm/crypto/sha1-ce-glue.c
@@ -9,6 +9,7 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
@@ -33,7 +34,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
{
struct sha1_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return sha1_update_arm(desc, data, len);
@@ -47,7 +48,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return sha1_finup_arm(desc, data, len, out);
kernel_neon_begin();
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index d15e0ea2c95e..d6c95c213d42 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -19,6 +19,7 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -39,7 +40,7 @@ static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
{
struct sha1_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return sha1_update_arm(desc, data, len);
@@ -54,7 +55,7 @@ static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
static int sha1_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return sha1_finup_arm(desc, data, len, out);
kernel_neon_begin();
diff --git a/arch/arm/crypto/sha2-ce-glue.c b/arch/arm/crypto/sha2-ce-glue.c
index 1211a5c129fc..a47a9d4b663e 100644
--- a/arch/arm/crypto/sha2-ce-glue.c
+++ b/arch/arm/crypto/sha2-ce-glue.c
@@ -9,6 +9,7 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <linux/cpufeature.h>
@@ -34,7 +35,7 @@ static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
{
struct sha256_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
return crypto_sha256_arm_update(desc, data, len);
@@ -49,7 +50,7 @@ static int sha2_ce_update(struct shash_desc *desc, const u8 *data,
static int sha2_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sha256_arm_finup(desc, data, len, out);
kernel_neon_begin();
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
index 1d82c6cd31a4..f3f6b1624fc3 100644
--- a/arch/arm/crypto/sha256_neon_glue.c
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -15,6 +15,7 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -34,7 +35,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
{
struct sha256_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
return crypto_sha256_arm_update(desc, data, len);
@@ -49,7 +50,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sha256_arm_finup(desc, data, len, out);
kernel_neon_begin();
diff --git a/arch/arm/crypto/sha512-neon-glue.c b/arch/arm/crypto/sha512-neon-glue.c
index 8a5642b41fd6..d33ab59c26c0 100644
--- a/arch/arm/crypto/sha512-neon-glue.c
+++ b/arch/arm/crypto/sha512-neon-glue.c
@@ -9,6 +9,7 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
#include <linux/crypto.h>
@@ -30,7 +31,7 @@ static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
{
struct sha512_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd() ||
+ if (!crypto_simd_usable() ||
(sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
return sha512_arm_update(desc, data, len);
@@ -45,7 +46,7 @@ static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
static int sha512_neon_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return sha512_arm_finup(desc, data, len, out);
kernel_neon_begin();
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a8a4eb7f6dae..41deac2451af 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -9,10 +9,10 @@ generic-y += kdebug.h
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += msi.h
generic-y += parport.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += seccomp.h
generic-y += segment.h
generic-y += serial.h
diff --git a/arch/arm/include/asm/arch_timer.h b/arch/arm/include/asm/arch_timer.h
index 0a8d7bba2cb0..4b66ecd6be99 100644
--- a/arch/arm/include/asm/arch_timer.h
+++ b/arch/arm/include/asm/arch_timer.h
@@ -11,6 +11,10 @@
#include <clocksource/arm_arch_timer.h>
#ifdef CONFIG_ARM_ARCH_TIMER
+/* 32bit ARM doesn't know anything about timer errata... */
+#define has_erratum_handler(h) (false)
+#define erratum_handler(h) (arch_timer_##h)
+
int arch_timer_arch_init(void);
/*
@@ -79,7 +83,7 @@ static inline u32 arch_timer_get_cntfrq(void)
return val;
}
-static inline u64 arch_counter_get_cntpct(void)
+static inline u64 __arch_counter_get_cntpct(void)
{
u64 cval;
@@ -88,7 +92,12 @@ static inline u64 arch_counter_get_cntpct(void)
return cval;
}
-static inline u64 arch_counter_get_cntvct(void)
+static inline u64 __arch_counter_get_cntpct_stable(void)
+{
+ return __arch_counter_get_cntpct();
+}
+
+static inline u64 __arch_counter_get_cntvct(void)
{
u64 cval;
@@ -97,6 +106,11 @@ static inline u64 arch_counter_get_cntvct(void)
return cval;
}
+static inline u64 __arch_counter_get_cntvct_stable(void)
+{
+ return __arch_counter_get_cntvct();
+}
+
static inline u32 arch_timer_get_cntkctl(void)
{
u32 cntkctl;
diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h
index 07e27f212dc7..d2453e2d3f1f 100644
--- a/arch/arm/include/asm/cp15.h
+++ b/arch/arm/include/asm/cp15.h
@@ -68,6 +68,8 @@
#define BPIALL __ACCESS_CP15(c7, 0, c5, 6)
#define ICIALLU __ACCESS_CP15(c7, 0, c5, 0)
+#define CNTVCT __ACCESS_CP15_64(1, c14)
+
extern unsigned long cr_alignment; /* defined in entry-armv.S */
static inline unsigned long get_cr(void)
diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h
index 6b51826ab3d1..7e22c81398c4 100644
--- a/arch/arm/include/asm/io.h
+++ b/arch/arm/include/asm/io.h
@@ -281,8 +281,6 @@ extern void _memcpy_fromio(void *, const volatile void __iomem *, size_t);
extern void _memcpy_toio(volatile void __iomem *, const void *, size_t);
extern void _memset_io(volatile void __iomem *, int, size_t);
-#define mmiowb()
-
/*
* Memory access primitives
* ------------------------
diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h
index 9e11dce55e06..9587517649bd 100644
--- a/arch/arm/include/asm/stage2_pgtable.h
+++ b/arch/arm/include/asm/stage2_pgtable.h
@@ -32,14 +32,14 @@
#define stage2_pgd_present(kvm, pgd) pgd_present(pgd)
#define stage2_pgd_populate(kvm, pgd, pud) pgd_populate(NULL, pgd, pud)
#define stage2_pud_offset(kvm, pgd, address) pud_offset(pgd, address)
-#define stage2_pud_free(kvm, pud) pud_free(NULL, pud)
+#define stage2_pud_free(kvm, pud) do { } while (0)
#define stage2_pud_none(kvm, pud) pud_none(pud)
#define stage2_pud_clear(kvm, pud) pud_clear(pud)
#define stage2_pud_present(kvm, pud) pud_present(pud)
#define stage2_pud_populate(kvm, pud, pmd) pud_populate(NULL, pud, pmd)
#define stage2_pmd_offset(kvm, pud, address) pmd_offset(pud, address)
-#define stage2_pmd_free(kvm, pmd) pmd_free(NULL, pmd)
+#define stage2_pmd_free(kvm, pmd) free_page((unsigned long)pmd)
#define stage2_pud_huge(kvm, pud) pud_huge(pud)
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 080ce70cab12..fd02761ba06c 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -73,7 +73,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->ARM_r0 + 1, args, 5 * sizeof(args[0]));
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
/* ARM tasks don't change audit architectures on the fly. */
return AUDIT_ARCH_ARM;
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index f854148c8d7c..bc6d04a09899 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -33,271 +33,42 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-#define MMU_GATHER_BUNDLE 8
-
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
static inline void __tlb_remove_table(void *_table)
{
free_page_and_swap_cache((struct page *)_table);
}
-struct mmu_table_batch {
- struct rcu_head rcu;
- unsigned int nr;
- void *tables[0];
-};
-
-#define MAX_TABLE_BATCH \
- ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-#define tlb_remove_entry(tlb, entry) tlb_remove_table(tlb, entry)
-#else
-#define tlb_remove_entry(tlb, entry) tlb_remove_page(tlb, entry)
-#endif /* CONFIG_HAVE_RCU_TABLE_FREE */
-
-/*
- * TLB handling. This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
- struct mm_struct *mm;
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- struct mmu_table_batch *batch;
- unsigned int need_flush;
-#endif
- unsigned int fullmm;
- struct vm_area_struct *vma;
- unsigned long start, end;
- unsigned long range_start;
- unsigned long range_end;
- unsigned int nr;
- unsigned int max;
- struct page **pages;
- struct page *local[MMU_GATHER_BUNDLE];
-};
-
-DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
-
-/*
- * This is unnecessarily complex. There's three ways the TLB shootdown
- * code is used:
- * 1. Unmapping a range of vmas. See zap_page_range(), unmap_region().
- * tlb->fullmm = 0, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL.
- * 2. Unmapping all vmas. See exit_mmap().
- * tlb->fullmm = 1, and tlb_start_vma/tlb_end_vma will be called.
- * tlb->vma will be non-NULL. Additionally, page tables will be freed.
- * 3. Unmapping argument pages. See shift_arg_pages().
- * tlb->fullmm = 0, but tlb_start_vma/tlb_end_vma will not be called.
- * tlb->vma will be NULL.
- */
-static inline void tlb_flush(struct mmu_gather *tlb)
-{
- if (tlb->fullmm || !tlb->vma)
- flush_tlb_mm(tlb->mm);
- else if (tlb->range_end > 0) {
- flush_tlb_range(tlb->vma, tlb->range_start, tlb->range_end);
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
-
-static inline void tlb_add_flush(struct mmu_gather *tlb, unsigned long addr)
-{
- if (!tlb->fullmm) {
- if (addr < tlb->range_start)
- tlb->range_start = addr;
- if (addr + PAGE_SIZE > tlb->range_end)
- tlb->range_end = addr + PAGE_SIZE;
- }
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(struct page *);
- }
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- tlb_flush(tlb);
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb_table_flush(tlb);
-#endif
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- free_pages_and_swap_cache(tlb->pages, tlb->nr);
- tlb->nr = 0;
- if (tlb->pages == tlb->local)
- __tlb_alloc_page(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->fullmm = !(start | (end+1));
- tlb->start = start;
- tlb->end = end;
- tlb->vma = NULL;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
- tlb->nr = 0;
- __tlb_alloc_page(tlb);
+#include <asm-generic/tlb.h>
-#ifdef CONFIG_HAVE_RCU_TABLE_FREE
- tlb->batch = NULL;
+#ifndef CONFIG_HAVE_RCU_TABLE_FREE
+#define tlb_remove_table(tlb, entry) tlb_remove_page(tlb, entry)
#endif
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->range_start = start;
- tlb->range_end = end;
- }
-
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Memorize the range for the TLB flush.
- */
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm) {
- flush_cache_range(vma, vma->vm_start, vma->vm_end);
- tlb->vma = vma;
- tlb->range_start = TASK_SIZE;
- tlb->range_end = 0;
- }
-}
static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm)
- tlb_flush(tlb);
-}
-
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->pages[tlb->nr++] = page;
- VM_WARN_ON(tlb->nr > tlb->max);
- if (tlb->nr == tlb->max)
- return true;
- return false;
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- if (__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
- unsigned long addr)
+__pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, unsigned long addr)
{
pgtable_page_dtor(pte);
-#ifdef CONFIG_ARM_LPAE
- tlb_add_flush(tlb, addr);
-#else
+#ifndef CONFIG_ARM_LPAE
/*
* With the classic ARM MMU, a pte page has two corresponding pmd
* entries, each covering 1MB.
*/
- addr &= PMD_MASK;
- tlb_add_flush(tlb, addr + SZ_1M - PAGE_SIZE);
- tlb_add_flush(tlb, addr + SZ_1M);
+ addr = (addr & PMD_MASK) + SZ_1M;
+ __tlb_adjust_range(tlb, addr - PAGE_SIZE, 2 * PAGE_SIZE);
#endif
- tlb_remove_entry(tlb, pte);
-}
-
-static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
- unsigned long addr)
-{
-#ifdef CONFIG_ARM_LPAE
- tlb_add_flush(tlb, addr);
- tlb_remove_entry(tlb, virt_to_page(pmdp));
-#endif
+ tlb_remove_table(tlb, pte);
}
static inline void
-tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
-{
- tlb_add_flush(tlb, addr);
-}
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm) do { } while (0)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
+__pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr)
{
-}
-
-static inline void tlb_flush_remove_tables(struct mm_struct *mm)
-{
-}
+#ifdef CONFIG_ARM_LPAE
+ struct page *page = virt_to_page(pmdp);
-static inline void tlb_flush_remove_tables_local(void *arg)
-{
+ tlb_remove_table(tlb, page);
+#endif
}
#endif /* CONFIG_MMU */
diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c
index 84363fe7bad2..10c45cc6b957 100644
--- a/arch/arm/kernel/dma-isa.c
+++ b/arch/arm/kernel/dma-isa.c
@@ -55,6 +55,12 @@ static int isa_get_dma_residue(unsigned int chan, dma_t *dma)
return chan < 4 ? count : (count << 1);
}
+static struct device isa_dma_dev = {
+ .init_name = "fallback device",
+ .coherent_dma_mask = ~(dma_addr_t)0,
+ .dma_mask = &isa_dma_dev.coherent_dma_mask,
+};
+
static void isa_enable_dma(unsigned int chan, dma_t *dma)
{
if (dma->invalid) {
@@ -89,7 +95,7 @@ static void isa_enable_dma(unsigned int chan, dma_t *dma)
dma->sg = &dma->buf;
dma->sgcount = 1;
dma->buf.length = dma->count;
- dma->buf.dma_address = dma_map_single(NULL,
+ dma->buf.dma_address = dma_map_single(&isa_dma_dev,
dma->addr, dma->count,
direction);
}
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 76bb8de6bf6b..be5edfdde558 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -549,8 +549,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
int ret;
/*
- * Increment event counter and perform fixup for the pre-signal
- * frame.
+ * Perform fixup for the pre-signal frame.
*/
rseq_signal_deliver(ksig, regs);
diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c
index a56e7c856ab5..86870f40f9a0 100644
--- a/arch/arm/kernel/stacktrace.c
+++ b/arch/arm/kernel/stacktrace.c
@@ -115,8 +115,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
* running on another CPU? For now, ignore it as we
* can't guarantee we won't explode.
*/
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
return;
#else
frame.fp = thread_saved_fp(tsk);
@@ -134,8 +132,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
}
walk_stackframe(&frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
@@ -153,8 +149,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
frame.pc = regs->ARM_pc;
walk_stackframe(&frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 3f5320f46de2..f591026347a5 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -22,7 +22,6 @@ config KVM
bool "Kernel-based Virtual Machine (KVM) support"
depends on MMU && OF
select PREEMPT_NOTIFIERS
- select ANON_INODES
select ARM_GIC
select ARM_GIC_V3
select ARM_GIC_V3_ITS
diff --git a/arch/arm/mach-ep93xx/edb93xx.c b/arch/arm/mach-ep93xx/edb93xx.c
index 8e89ec8b6f0f..34e18e9556d9 100644
--- a/arch/arm/mach-ep93xx/edb93xx.c
+++ b/arch/arm/mach-ep93xx/edb93xx.c
@@ -29,6 +29,7 @@
#include <linux/platform_device.h>
#include <linux/i2c.h>
#include <linux/spi/spi.h>
+#include <linux/gpio/machine.h>
#include <sound/cs4271.h>
@@ -105,13 +106,16 @@ static struct spi_board_info edb93xx_spi_board_info[] __initdata = {
},
};
-static int edb93xx_spi_chipselects[] __initdata = {
- EP93XX_GPIO_LINE_EGPIO6,
+static struct gpiod_lookup_table edb93xx_spi_cs_gpio_table = {
+ .dev_id = "ep93xx-spi.0",
+ .table = {
+ GPIO_LOOKUP("A", 6, "cs", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct ep93xx_spi_info edb93xx_spi_info __initdata = {
- .chipselect = edb93xx_spi_chipselects,
- .num_chipselect = ARRAY_SIZE(edb93xx_spi_chipselects),
+ /* Intentionally left blank */
};
static void __init edb93xx_register_spi(void)
@@ -123,6 +127,7 @@ static void __init edb93xx_register_spi(void)
else if (machine_is_edb9315a())
edb93xx_cs4271_data.gpio_nreset = EP93XX_GPIO_LINE_EGPIO14;
+ gpiod_add_lookup_table(&edb93xx_spi_cs_gpio_table);
ep93xx_register_spi(&edb93xx_spi_info, edb93xx_spi_board_info,
ARRAY_SIZE(edb93xx_spi_board_info));
}
diff --git a/arch/arm/mach-ep93xx/simone.c b/arch/arm/mach-ep93xx/simone.c
index 80ccb984d521..f0f38c0dba52 100644
--- a/arch/arm/mach-ep93xx/simone.c
+++ b/arch/arm/mach-ep93xx/simone.c
@@ -77,13 +77,15 @@ static struct spi_board_info simone_spi_devices[] __initdata = {
* low between multi-message command blocks. From v1.4, it uses a GPIO instead.
* v1.3 parts will still work, since the signal on SFRMOUT is automatic.
*/
-static int simone_spi_chipselects[] __initdata = {
- EP93XX_GPIO_LINE_EGPIO1,
+static struct gpiod_lookup_table simone_spi_cs_gpio_table = {
+ .dev_id = "ep93xx-spi.0",
+ .table = {
+ GPIO_LOOKUP("A", 1, "cs", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct ep93xx_spi_info simone_spi_info __initdata = {
- .chipselect = simone_spi_chipselects,
- .num_chipselect = ARRAY_SIZE(simone_spi_chipselects),
.use_dma = 1,
};
@@ -113,6 +115,7 @@ static void __init simone_init_machine(void)
ep93xx_register_i2c(simone_i2c_board_info,
ARRAY_SIZE(simone_i2c_board_info));
gpiod_add_lookup_table(&simone_mmc_spi_gpio_table);
+ gpiod_add_lookup_table(&simone_spi_cs_gpio_table);
ep93xx_register_spi(&simone_spi_info, simone_spi_devices,
ARRAY_SIZE(simone_spi_devices));
simone_register_audio();
diff --git a/arch/arm/mach-ep93xx/ts72xx.c b/arch/arm/mach-ep93xx/ts72xx.c
index 85b74ac943f0..a3a20c83c6b8 100644
--- a/arch/arm/mach-ep93xx/ts72xx.c
+++ b/arch/arm/mach-ep93xx/ts72xx.c
@@ -22,6 +22,7 @@
#include <linux/spi/mmc_spi.h>
#include <linux/mmc/host.h>
#include <linux/platform_data/spi-ep93xx.h>
+#include <linux/gpio/machine.h>
#include <mach/gpio-ep93xx.h>
#include <mach/hardware.h>
@@ -269,13 +270,15 @@ static struct spi_board_info bk3_spi_board_info[] __initdata = {
* The all work is performed automatically by !SPI_FRAME (SFRM1) and
* goes through CPLD
*/
-static int bk3_spi_chipselects[] __initdata = {
- EP93XX_GPIO_LINE_F(3),
+static struct gpiod_lookup_table bk3_spi_cs_gpio_table = {
+ .dev_id = "ep93xx-spi.0",
+ .table = {
+ GPIO_LOOKUP("F", 3, "cs", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct ep93xx_spi_info bk3_spi_master __initdata = {
- .chipselect = bk3_spi_chipselects,
- .num_chipselect = ARRAY_SIZE(bk3_spi_chipselects),
.use_dma = 1,
};
@@ -316,13 +319,17 @@ static struct spi_board_info ts72xx_spi_devices[] __initdata = {
},
};
-static int ts72xx_spi_chipselects[] __initdata = {
- EP93XX_GPIO_LINE_F(2), /* DIO_17 */
+static struct gpiod_lookup_table ts72xx_spi_cs_gpio_table = {
+ .dev_id = "ep93xx-spi.0",
+ .table = {
+ /* DIO_17 */
+ GPIO_LOOKUP("F", 2, "cs", GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct ep93xx_spi_info ts72xx_spi_info __initdata = {
- .chipselect = ts72xx_spi_chipselects,
- .num_chipselect = ARRAY_SIZE(ts72xx_spi_chipselects),
+ /* Intentionally left blank */
};
static void __init ts72xx_init_machine(void)
@@ -339,6 +346,7 @@ static void __init ts72xx_init_machine(void)
if (board_is_ts7300())
platform_device_register(&ts73xx_fpga_device);
#endif
+ gpiod_add_lookup_table(&ts72xx_spi_cs_gpio_table);
ep93xx_register_spi(&ts72xx_spi_info, ts72xx_spi_devices,
ARRAY_SIZE(ts72xx_spi_devices));
}
@@ -398,6 +406,7 @@ static void __init bk3_init_machine(void)
ep93xx_register_eth(&ts72xx_eth_data, 1);
+ gpiod_add_lookup_table(&bk3_spi_cs_gpio_table);
ep93xx_register_spi(&bk3_spi_master, bk3_spi_board_info,
ARRAY_SIZE(bk3_spi_board_info));
diff --git a/arch/arm/mach-ep93xx/vision_ep9307.c b/arch/arm/mach-ep93xx/vision_ep9307.c
index 767ee64628dc..f95a644769e4 100644
--- a/arch/arm/mach-ep93xx/vision_ep9307.c
+++ b/arch/arm/mach-ep93xx/vision_ep9307.c
@@ -245,15 +245,17 @@ static struct spi_board_info vision_spi_board_info[] __initdata = {
},
};
-static int vision_spi_chipselects[] __initdata = {
- EP93XX_GPIO_LINE_EGPIO6,
- EP93XX_GPIO_LINE_EGPIO7,
- EP93XX_GPIO_LINE_G(2),
+static struct gpiod_lookup_table vision_spi_cs_gpio_table = {
+ .dev_id = "ep93xx-spi.0",
+ .table = {
+ GPIO_LOOKUP_IDX("A", 6, "cs", 0, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("A", 7, "cs", 1, GPIO_ACTIVE_LOW),
+ GPIO_LOOKUP_IDX("G", 2, "cs", 2, GPIO_ACTIVE_LOW),
+ { },
+ },
};
static struct ep93xx_spi_info vision_spi_master __initdata = {
- .chipselect = vision_spi_chipselects,
- .num_chipselect = ARRAY_SIZE(vision_spi_chipselects),
.use_dma = 1,
};
@@ -295,6 +297,7 @@ static void __init vision_init_machine(void)
ep93xx_register_i2c(vision_i2c_info,
ARRAY_SIZE(vision_i2c_info));
gpiod_add_lookup_table(&vision_spi_mmc_gpio_table);
+ gpiod_add_lookup_table(&vision_spi_cs_gpio_table);
ep93xx_register_spi(&vision_spi_master, vision_spi_board_info,
ARRAY_SIZE(vision_spi_board_info));
vision_register_i2s();
diff --git a/arch/arm/mach-imx/pm-imx6.c b/arch/arm/mach-imx/pm-imx6.c
index 87f45b926c78..e67e0b2d4ce0 100644
--- a/arch/arm/mach-imx/pm-imx6.c
+++ b/arch/arm/mach-imx/pm-imx6.c
@@ -631,7 +631,7 @@ static void imx6_pm_stby_poweroff(void)
static int imx6_pm_stby_poweroff_probe(void)
{
if (pm_power_off) {
- pr_warn("%s: pm_power_off already claimed %p %pf!\n",
+ pr_warn("%s: pm_power_off already claimed %p %ps!\n",
__func__, pm_power_off, pm_power_off);
return -EBUSY;
}
diff --git a/arch/arm/mach-mvebu/kirkwood.c b/arch/arm/mach-mvebu/kirkwood.c
index 0aa88105d46e..9b5f4d665374 100644
--- a/arch/arm/mach-mvebu/kirkwood.c
+++ b/arch/arm/mach-mvebu/kirkwood.c
@@ -92,7 +92,8 @@ static void __init kirkwood_dt_eth_fixup(void)
continue;
/* skip disabled nodes or nodes with valid MAC address*/
- if (!of_device_is_available(pnp) || of_get_mac_address(np))
+ if (!of_device_is_available(pnp) ||
+ !IS_ERR(of_get_mac_address(np)))
goto eth_fixup_skip;
clk = of_clk_get(pnp, 0);
diff --git a/arch/arm/mach-omap2/clock.c b/arch/arm/mach-omap2/clock.c
index 42881f21cede..3e0f09cc0028 100644
--- a/arch/arm/mach-omap2/clock.c
+++ b/arch/arm/mach-omap2/clock.c
@@ -119,6 +119,9 @@ void __init ti_clk_init_features(void)
if (cpu_is_omap343x())
features.flags |= TI_CLK_DPLL_HAS_FREQSEL;
+ if (omap_type() == OMAP2_DEVICE_TYPE_GP)
+ features.flags |= TI_CLK_DEVICE_TYPE_GP;
+
/* Idlest value for interface clocks.
* 24xx uses 0 to indicate not ready, and 1 to indicate ready.
* 34xx reverses this, just to keep us on our toes
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 3a04c73ac03c..baadddf9aad4 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -648,10 +648,10 @@ static struct clockdomain *_get_clkdm(struct omap_hwmod *oh)
if (oh->clkdm) {
return oh->clkdm;
} else if (oh->_clk) {
- if (__clk_get_flags(oh->_clk) & CLK_IS_BASIC)
+ if (!omap2_clk_is_hw_omap(__clk_get_hw(oh->_clk)))
return NULL;
clk = to_clk_hw_omap(__clk_get_hw(oh->_clk));
- return clk->clkdm;
+ return clk->clkdm;
}
return NULL;
}
diff --git a/arch/arm/mach-rpc/dma.c b/arch/arm/mach-rpc/dma.c
index fb48f3141fb4..f2703ca17954 100644
--- a/arch/arm/mach-rpc/dma.c
+++ b/arch/arm/mach-rpc/dma.c
@@ -151,6 +151,12 @@ static void iomd_free_dma(unsigned int chan, dma_t *dma)
free_irq(idma->irq, idma);
}
+static struct device isa_dma_dev = {
+ .init_name = "fallback device",
+ .coherent_dma_mask = ~(dma_addr_t)0,
+ .dma_mask = &isa_dma_dev.coherent_dma_mask,
+};
+
static void iomd_enable_dma(unsigned int chan, dma_t *dma)
{
struct iomd_dma *idma = container_of(dma, struct iomd_dma, dma);
@@ -168,7 +174,7 @@ static void iomd_enable_dma(unsigned int chan, dma_t *dma)
idma->dma.sg = &idma->dma.buf;
idma->dma.sgcount = 1;
idma->dma.buf.length = idma->dma.count;
- idma->dma.buf.dma_address = dma_map_single(NULL,
+ idma->dma.buf.dma_address = dma_map_single(&isa_dma_dev,
idma->dma.addr, idma->dma.count,
idma->dma.dma_mode == DMA_MODE_READ ?
DMA_FROM_DEVICE : DMA_TO_DEVICE);
diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c
index b54f8f8def36..e376883ab35b 100644
--- a/arch/arm/mm/alignment.c
+++ b/arch/arm/mm/alignment.c
@@ -133,7 +133,7 @@ static const char *usermode_action[] = {
static int alignment_proc_show(struct seq_file *m, void *v)
{
seq_printf(m, "User:\t\t%lu\n", ai_user);
- seq_printf(m, "System:\t\t%lu (%pF)\n", ai_sys, ai_sys_last_pc);
+ seq_printf(m, "System:\t\t%lu (%pS)\n", ai_sys, ai_sys_last_pc);
seq_printf(m, "Skipped:\t%lu\n", ai_skipped);
seq_printf(m, "Half:\t\t%lu\n", ai_half);
seq_printf(m, "Word:\t\t%lu\n", ai_word);
diff --git a/arch/arm/nwfpe/fpmodule.c b/arch/arm/nwfpe/fpmodule.c
index 1365e8650843..ee34c76e6624 100644
--- a/arch/arm/nwfpe/fpmodule.c
+++ b/arch/arm/nwfpe/fpmodule.c
@@ -147,7 +147,7 @@ void float_raise(signed char flags)
#ifdef CONFIG_DEBUG_USER
if (flags & debug)
printk(KERN_DEBUG
- "NWFPE: %s[%d] takes exception %08x at %pf from %08lx\n",
+ "NWFPE: %s[%d] takes exception %08x at %ps from %08lx\n",
current->comm, current->pid, flags,
__builtin_return_address(0), GET_USERREG()->ARM_pc);
#endif
diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index d4012d6c0dcb..5ca4c5fd627a 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -1449,7 +1449,6 @@ static void __exit omap_system_dma_exit(void)
MODULE_DESCRIPTION("OMAP SYSTEM DMA DRIVER");
MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DRIVER_NAME);
MODULE_AUTHOR("Texas Instruments Inc");
/*
diff --git a/arch/arm/vdso/vgettimeofday.c b/arch/arm/vdso/vgettimeofday.c
index a9dd619c6c29..7bdbf5d5c47d 100644
--- a/arch/arm/vdso/vgettimeofday.c
+++ b/arch/arm/vdso/vgettimeofday.c
@@ -18,9 +18,9 @@
#include <linux/compiler.h>
#include <linux/hrtimer.h>
#include <linux/time.h>
-#include <asm/arch_timer.h>
#include <asm/barrier.h>
#include <asm/bug.h>
+#include <asm/cp15.h>
#include <asm/page.h>
#include <asm/unistd.h>
#include <asm/vdso_datapage.h>
@@ -123,7 +123,8 @@ static notrace u64 get_ns(struct vdso_data *vdata)
u64 cycle_now;
u64 nsec;
- cycle_now = arch_counter_get_cntvct();
+ isb();
+ cycle_now = read_sysreg(CNTVCT);
cycle_delta = (cycle_now - vdata->cs_cycle_last) & vdata->cs_mask;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7e34b9eba5de..3f957443f286 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -13,6 +13,7 @@ config ARM64
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_COHERENT_TO_PFN
select ARCH_HAS_DMA_MMAP_PGPROT
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
@@ -90,6 +91,7 @@ config ARM64
select GENERIC_CLOCKEVENTS
select GENERIC_CLOCKEVENTS_BROADCAST
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_CPU_VULNERABILITIES
select GENERIC_EARLY_IOREMAP
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IRQ_MULTI_HANDLER
@@ -148,8 +150,8 @@ config ARM64
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API
+ select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_RCU_TABLE_FREE
- select HAVE_RCU_TABLE_INVALIDATE
select HAVE_RSEQ
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
@@ -237,9 +239,6 @@ config LOCKDEP_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_BUG
def_bool y
depends on BUG
@@ -297,7 +296,7 @@ menu "Kernel Features"
menu "ARM errata workarounds via the alternatives framework"
config ARM64_WORKAROUND_CLEAN_CACHE
- def_bool n
+ bool
config ARM64_ERRATUM_826319
bool "Cortex-A53: 826319: System might deadlock if a write cannot complete until read data is accepted"
@@ -464,26 +463,28 @@ config ARM64_ERRATUM_1024718
bool "Cortex-A55: 1024718: Update of DBM/AP bits without break before make might result in incorrect update"
default y
help
- This option adds work around for Arm Cortex-A55 Erratum 1024718.
+ This option adds a workaround for ARM Cortex-A55 Erratum 1024718.
Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect
update of the hardware dirty bit when the DBM/AP bits are updated
- without a break-before-make. The work around is to disable the usage
+ without a break-before-make. The workaround is to disable the usage
of hardware DBM locally on the affected cores. CPUs not affected by
- erratum will continue to use the feature.
+ this erratum will continue to use the feature.
If unsure, say Y.
config ARM64_ERRATUM_1188873
- bool "Cortex-A76: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result"
+ bool "Cortex-A76/Neoverse-N1: MRC read following MRRC read of specific Generic Timer in AArch32 might give incorrect result"
default y
+ depends on COMPAT
select ARM_ARCH_TIMER_OOL_WORKAROUND
help
- This option adds work arounds for ARM Cortex-A76 erratum 1188873
+ This option adds a workaround for ARM Cortex-A76/Neoverse-N1
+ erratum 1188873.
- Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could cause
- register corruption when accessing the timer registers from
- AArch32 userspace.
+ Affected Cortex-A76/Neoverse-N1 cores (r0p0, r1p0, r2p0) could
+ cause register corruption when accessing the timer registers
+ from AArch32 userspace.
If unsure, say Y.
@@ -491,7 +492,7 @@ config ARM64_ERRATUM_1165522
bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation"
default y
help
- This option adds work arounds for ARM Cortex-A76 erratum 1165522
+ This option adds a workaround for ARM Cortex-A76 erratum 1165522.
Affected Cortex-A76 cores (r0p0, r1p0, r2p0) could end-up with
corrupted TLBs by speculating an AT instruction during a guest
@@ -504,7 +505,7 @@ config ARM64_ERRATUM_1286807
default y
select ARM64_WORKAROUND_REPEAT_TLBI
help
- This option adds workaround for ARM Cortex-A76 erratum 1286807
+ This option adds a workaround for ARM Cortex-A76 erratum 1286807.
On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual
address for a cacheable mapping of a location is being
@@ -521,10 +522,10 @@ config CAVIUM_ERRATUM_22375
bool "Cavium erratum 22375, 24313"
default y
help
- Enable workaround for erratum 22375, 24313.
+ Enable workaround for errata 22375 and 24313.
This implements two gicv3-its errata workarounds for ThunderX. Both
- with small impact affecting only ITS table allocation.
+ with a small impact affecting only ITS table allocation.
erratum 22375: only alloc 8MB table size
erratum 24313: ignore memory access type
@@ -588,9 +589,6 @@ config QCOM_FALKOR_ERRATUM_1003
config ARM64_WORKAROUND_REPEAT_TLBI
bool
- help
- Enable the repeat TLBI workaround for Falkor erratum 1009 and
- Cortex-A76 erratum 1286807.
config QCOM_FALKOR_ERRATUM_1009
bool "Falkor E1009: Prematurely complete a DSB after a TLBI"
@@ -626,7 +624,7 @@ config HISILICON_ERRATUM_161600802
bool "Hip07 161600802: Erroneous redistributor VLPI base"
default y
help
- The HiSilicon Hip07 SoC usees the wrong redistributor base
+ The HiSilicon Hip07 SoC uses the wrong redistributor base
when issued ITS commands such as VMOVP and VMAPP, and requires
a 128kB offset to be applied to the target address in this commands.
@@ -646,7 +644,7 @@ config FUJITSU_ERRATUM_010001
bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly"
default y
help
- This option adds workaround for Fujitsu-A64FX erratum E#010001.
+ This option adds a workaround for Fujitsu-A64FX erratum E#010001.
On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory
accesses may cause undefined fault (Data abort, DFSC=0b111111).
This fault occurs under a specific hardware condition when a
@@ -657,7 +655,7 @@ config FUJITSU_ERRATUM_010001
case-4 TTBR1_EL2 with TCR_EL2.NFD1 == 1.
The workaround is to ensure these bits are clear in TCR_ELx.
- The workaround only affect the Fujitsu-A64FX.
+ The workaround only affects the Fujitsu-A64FX.
If unsure, say Y.
@@ -889,6 +887,9 @@ config ARCH_WANT_HUGE_PMD_SHARE
config ARCH_HAS_CACHE_LINE_SIZE
def_bool y
+config ARCH_ENABLE_SPLIT_PMD_PTLOCK
+ def_bool y if PGTABLE_LEVELS > 2
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
@@ -1078,9 +1079,65 @@ config RODATA_FULL_DEFAULT_ENABLED
This requires the linear region to be mapped down to pages,
which may adversely affect performance in some cases.
+config ARM64_SW_TTBR0_PAN
+ bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
+ help
+ Enabling this option prevents the kernel from accessing
+ user-space memory directly by pointing TTBR0_EL1 to a reserved
+ zeroed area and reserved ASID. The user access routines
+ restore the valid TTBR0_EL1 temporarily.
+
+menuconfig COMPAT
+ bool "Kernel support for 32-bit EL0"
+ depends on ARM64_4K_PAGES || EXPERT
+ select COMPAT_BINFMT_ELF if BINFMT_ELF
+ select HAVE_UID16
+ select OLD_SIGSUSPEND3
+ select COMPAT_OLD_SIGACTION
+ help
+ This option enables support for a 32-bit EL0 running under a 64-bit
+ kernel at EL1. AArch32-specific components such as system calls,
+ the user helper functions, VFP support and the ptrace interface are
+ handled appropriately by the kernel.
+
+ If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
+ that you will only be able to execute AArch32 binaries that were compiled
+ with page size aligned segments.
+
+ If you want to execute 32-bit userspace applications, say Y.
+
+if COMPAT
+
+config KUSER_HELPERS
+ bool "Enable kuser helpers page for 32 bit applications"
+ default y
+ help
+ Warning: disabling this option may break 32-bit user programs.
+
+ Provide kuser helpers to compat tasks. The kernel provides
+ helper code to userspace in read only form at a fixed location
+ to allow userspace to be independent of the CPU type fitted to
+ the system. This permits binaries to be run on ARMv4 through
+ to ARMv8 without modification.
+
+ See Documentation/arm/kernel_user_helpers.txt for details.
+
+ However, the fixed address nature of these helpers can be used
+ by ROP (return orientated programming) authors when creating
+ exploits.
+
+ If all of the binaries and libraries which run on your platform
+ are built specifically for your platform, and make no use of
+ these helpers, then you can turn this option off to hinder
+ such exploits. However, in that case, if a binary or library
+ relying on those helpers is run, it will not function correctly.
+
+ Say N here only if you are absolutely certain that you do not
+ need these helpers; otherwise, the safe option is to say Y.
+
+
menuconfig ARMV8_DEPRECATED
bool "Emulate deprecated/obsolete ARMv8 instructions"
- depends on COMPAT
depends on SYSCTL
help
Legacy software support may require certain instructions
@@ -1146,13 +1203,7 @@ config SETEND_EMULATION
If unsure, say Y
endif
-config ARM64_SW_TTBR0_PAN
- bool "Emulate Privileged Access Never using TTBR0_EL1 switching"
- help
- Enabling this option prevents the kernel from accessing
- user-space memory directly by pointing TTBR0_EL1 to a reserved
- zeroed area and reserved ASID. The user access routines
- restore the valid TTBR0_EL1 temporarily.
+endif
menu "ARMv8.1 architectural features"
@@ -1318,6 +1369,9 @@ config ARM64_SVE
To enable use of this extension on CPUs that implement it, say Y.
+ On CPUs that support the SVE2 extensions, this option will enable
+ those too.
+
Note that for architectural reasons, firmware _must_ implement SVE
support when running on SVE capable hardware. The required support
is present in:
@@ -1351,7 +1405,7 @@ config ARM64_PSEUDO_NMI
help
Adds support for mimicking Non-Maskable Interrupts through the use of
GIC interrupt priority. This support requires version 3 or later of
- Arm GIC.
+ ARM GIC.
This high priority configuration for interrupts needs to be
explicitly enabled by setting the kernel parameter
@@ -1475,25 +1529,6 @@ config DMI
endmenu
-config COMPAT
- bool "Kernel support for 32-bit EL0"
- depends on ARM64_4K_PAGES || EXPERT
- select COMPAT_BINFMT_ELF if BINFMT_ELF
- select HAVE_UID16
- select OLD_SIGSUSPEND3
- select COMPAT_OLD_SIGACTION
- help
- This option enables support for a 32-bit EL0 running under a 64-bit
- kernel at EL1. AArch32-specific components such as system calls,
- the user helper functions, VFP support and the ptrace interface are
- handled appropriately by the kernel.
-
- If you use a page size other than 4KB (i.e, 16KB or 64KB), please be aware
- that you will only be able to execute AArch32 binaries that were compiled
- with page size aligned segments.
-
- If you want to execute 32-bit userspace applications, say Y.
-
config SYSVIPC_COMPAT
def_bool y
depends on COMPAT && SYSVIPC
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index cd7c76e58b09..a2cec6218211 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -534,11 +534,12 @@
};
eccmgr {
- compatible = "altr,socfpga-a10-ecc-manager";
+ compatible = "altr,socfpga-s10-ecc-manager",
+ "altr,socfpga-a10-ecc-manager";
altr,sysmgr-syscon = <&sysmgr>;
#address-cells = <1>;
#size-cells = <1>;
- interrupts = <0 15 4>, <0 95 4>;
+ interrupts = <0 15 4>;
interrupt-controller;
#interrupt-cells = <2>;
ranges;
@@ -546,31 +547,31 @@
sdramedac {
compatible = "altr,sdram-edac-s10";
altr,sdr-syscon = <&sdr>;
- interrupts = <16 4>, <48 4>;
+ interrupts = <16 4>;
};
usb0-ecc@ff8c4000 {
- compatible = "altr,socfpga-usb-ecc";
+ compatible = "altr,socfpga-s10-usb-ecc",
+ "altr,socfpga-usb-ecc";
reg = <0xff8c4000 0x100>;
altr,ecc-parent = <&usb0>;
- interrupts = <2 4>,
- <34 4>;
+ interrupts = <2 4>;
};
emac0-rx-ecc@ff8c0000 {
- compatible = "altr,socfpga-eth-mac-ecc";
+ compatible = "altr,socfpga-s10-eth-mac-ecc",
+ "altr,socfpga-eth-mac-ecc";
reg = <0xff8c0000 0x100>;
altr,ecc-parent = <&gmac0>;
- interrupts = <4 4>,
- <36 4>;
+ interrupts = <4 4>;
};
emac0-tx-ecc@ff8c0400 {
- compatible = "altr,socfpga-eth-mac-ecc";
+ compatible = "altr,socfpga-s10-eth-mac-ecc",
+ "altr,socfpga-eth-mac-ecc";
reg = <0xff8c0400 0x100>;
altr,ecc-parent = <&gmac0>;
- interrupts = <5 4>,
- <37 4>;
+ interrupts = <5 4>;
};
};
diff --git a/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h b/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h
index 1b4cb0c55744..385c455a7c98 100644
--- a/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h
+++ b/arch/arm64/boot/dts/mediatek/mt2712-pinfunc.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2018 MediaTek Inc.
* Author: Zhiyong Tao <zhiyong.tao@mediatek.com>
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 5fc6f51908fd..cb89c80800b5 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -14,6 +14,7 @@
#include <crypto/aes.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/module.h>
@@ -109,7 +110,7 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
u32 abytes, u32 *macp)
{
- if (may_use_simd()) {
+ if (crypto_simd_usable()) {
kernel_neon_begin();
ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
num_rounds(key));
@@ -255,7 +256,7 @@ static int ccm_encrypt(struct aead_request *req)
err = skcipher_walk_aead_encrypt(&walk, req, false);
- if (may_use_simd()) {
+ if (crypto_simd_usable()) {
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
@@ -313,7 +314,7 @@ static int ccm_decrypt(struct aead_request *req)
err = skcipher_walk_aead_decrypt(&walk, req, false);
- if (may_use_simd()) {
+ if (crypto_simd_usable()) {
while (walk.nbytes) {
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
@@ -372,7 +373,7 @@ static struct aead_alg ccm_aes_alg = {
static int __init aes_mod_init(void)
{
- if (!(elf_hwcap & HWCAP_AES))
+ if (!cpu_have_named_feature(AES))
return -ENODEV;
return crypto_register_aead(&ccm_aes_alg);
}
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index e6b3227bbf57..3213843fcb46 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -12,6 +12,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/aes.h>
+#include <crypto/internal/simd.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
@@ -52,7 +53,7 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
return;
}
@@ -66,7 +67,7 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
{
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
return;
}
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 1e676625ef33..f0ceb545bd1e 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -405,7 +405,7 @@ static int ctr_encrypt_sync(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return aes_ctr_encrypt_fallback(ctx, req);
return ctr_encrypt(req);
@@ -642,7 +642,7 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
{
int rounds = 6 + ctx->key_length / 4;
- if (may_use_simd()) {
+ if (crypto_simd_usable()) {
kernel_neon_begin();
aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
enc_after);
@@ -707,7 +707,7 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
- mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
+ mac_do_update(&tctx->key, NULL, 0, ctx->dg, (ctx->len != 0), 0);
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index e7a95a566462..02b65d9eb947 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -288,7 +288,7 @@ static int ctr_encrypt_sync(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return aes_ctr_encrypt_fallback(&ctx->fallback, req);
return ctr_encrypt(req);
@@ -304,6 +304,8 @@ static int __xts_crypt(struct skcipher_request *req,
int err;
err = skcipher_walk_virt(&walk, req, false);
+ if (err)
+ return err;
kernel_neon_begin();
neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1);
@@ -440,7 +442,7 @@ static int __init aes_init(void)
int err;
int i;
- if (!(elf_hwcap & HWCAP_ASIMD))
+ if (!cpu_have_named_feature(ASIMD))
return -ENODEV;
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index bece1d85bd81..82029cda2e77 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -21,6 +21,7 @@
#include <crypto/algapi.h>
#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -90,7 +91,7 @@ static int chacha_neon(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
return crypto_chacha_crypt(req);
return chacha_neon_stream_xor(req, ctx, req->iv);
@@ -104,7 +105,7 @@ static int xchacha_neon(struct skcipher_request *req)
u32 state[16];
u8 real_iv[16];
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
return crypto_xchacha_crypt(req);
crypto_chacha_init(state, ctx, req->iv);
@@ -173,7 +174,7 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
- if (!(elf_hwcap & HWCAP_ASIMD))
+ if (!cpu_have_named_feature(ASIMD))
return -ENODEV;
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
diff --git a/arch/arm64/crypto/crct10dif-ce-glue.c b/arch/arm64/crypto/crct10dif-ce-glue.c
index dd325829ee44..2e0a7d2eee24 100644
--- a/arch/arm64/crypto/crct10dif-ce-glue.c
+++ b/arch/arm64/crypto/crct10dif-ce-glue.c
@@ -16,6 +16,7 @@
#include <linux/string.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <asm/neon.h>
#include <asm/simd.h>
@@ -38,7 +39,7 @@ static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data,
{
u16 *crc = shash_desc_ctx(desc);
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
kernel_neon_begin();
*crc = crc_t10dif_pmull_p8(*crc, data, length);
kernel_neon_end();
@@ -54,7 +55,7 @@ static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data,
{
u16 *crc = shash_desc_ctx(desc);
- if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
+ if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && crypto_simd_usable()) {
kernel_neon_begin();
*crc = crc_t10dif_pmull_p64(*crc, data, length);
kernel_neon_end();
@@ -101,7 +102,7 @@ static struct shash_alg crc_t10dif_alg[] = {{
static int __init crc_t10dif_mod_init(void)
{
- if (elf_hwcap & HWCAP_PMULL)
+ if (cpu_have_named_feature(PMULL))
return crypto_register_shashes(crc_t10dif_alg,
ARRAY_SIZE(crc_t10dif_alg));
else
@@ -111,7 +112,7 @@ static int __init crc_t10dif_mod_init(void)
static void __exit crc_t10dif_mod_exit(void)
{
- if (elf_hwcap & HWCAP_PMULL)
+ if (cpu_have_named_feature(PMULL))
crypto_unregister_shashes(crc_t10dif_alg,
ARRAY_SIZE(crc_t10dif_alg));
else
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 791ad422c427..b39ed99b06fb 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -17,6 +17,7 @@
#include <crypto/gf128mul.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/cpufeature.h>
@@ -89,7 +90,7 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
struct ghash_key const *k,
const char *head))
{
- if (likely(may_use_simd())) {
+ if (likely(crypto_simd_usable())) {
kernel_neon_begin();
simd_update(blocks, dg, src, key, head);
kernel_neon_end();
@@ -441,7 +442,7 @@ static int gcm_encrypt(struct aead_request *req)
err = skcipher_walk_aead_encrypt(&walk, req, false);
- if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) {
+ if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
u32 const *rk = NULL;
kernel_neon_begin();
@@ -473,9 +474,11 @@ static int gcm_encrypt(struct aead_request *req)
put_unaligned_be32(2, iv + GCM_IV_SIZE);
while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
- int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ const int blocks =
+ walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
u8 *dst = walk.dst.virt.addr;
u8 *src = walk.src.virt.addr;
+ int remaining = blocks;
do {
__aes_arm64_encrypt(ctx->aes_key.key_enc,
@@ -485,9 +488,9 @@ static int gcm_encrypt(struct aead_request *req)
dst += AES_BLOCK_SIZE;
src += AES_BLOCK_SIZE;
- } while (--blocks > 0);
+ } while (--remaining > 0);
- ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
+ ghash_do_update(blocks, dg,
walk.dst.virt.addr, &ctx->ghash_key,
NULL, pmull_ghash_update_p64);
@@ -563,7 +566,7 @@ static int gcm_decrypt(struct aead_request *req)
err = skcipher_walk_aead_decrypt(&walk, req, false);
- if (likely(may_use_simd() && walk.total >= 2 * AES_BLOCK_SIZE)) {
+ if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
u32 const *rk = NULL;
kernel_neon_begin();
@@ -609,7 +612,7 @@ static int gcm_decrypt(struct aead_request *req)
put_unaligned_be32(2, iv + GCM_IV_SIZE);
while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
- int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
u8 *dst = walk.dst.virt.addr;
u8 *src = walk.src.virt.addr;
@@ -704,10 +707,10 @@ static int __init ghash_ce_mod_init(void)
{
int ret;
- if (!(elf_hwcap & HWCAP_ASIMD))
+ if (!cpu_have_named_feature(ASIMD))
return -ENODEV;
- if (elf_hwcap & HWCAP_PMULL)
+ if (cpu_have_named_feature(PMULL))
ret = crypto_register_shashes(ghash_alg,
ARRAY_SIZE(ghash_alg));
else
@@ -717,7 +720,7 @@ static int __init ghash_ce_mod_init(void)
if (ret)
return ret;
- if (elf_hwcap & HWCAP_PMULL) {
+ if (cpu_have_named_feature(PMULL)) {
ret = crypto_register_aead(&gcm_aes_alg);
if (ret)
crypto_unregister_shashes(ghash_alg,
@@ -728,7 +731,7 @@ static int __init ghash_ce_mod_init(void)
static void __exit ghash_ce_mod_exit(void)
{
- if (elf_hwcap & HWCAP_PMULL)
+ if (cpu_have_named_feature(PMULL))
crypto_unregister_shashes(ghash_alg, ARRAY_SIZE(ghash_alg));
else
crypto_unregister_shash(ghash_alg);
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
index 22cc32ac9448..895d3727c1fb 100644
--- a/arch/arm64/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -9,6 +9,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
@@ -25,7 +26,7 @@ static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
static int nhpoly1305_neon_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- if (srclen < 64 || !may_use_simd())
+ if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
do {
@@ -56,7 +57,7 @@ static struct shash_alg nhpoly1305_alg = {
static int __init nhpoly1305_mod_init(void)
{
- if (!(elf_hwcap & HWCAP_ASIMD))
+ if (!cpu_have_named_feature(ASIMD))
return -ENODEV;
return crypto_register_shash(&nhpoly1305_alg);
diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c
index 17fac2889f56..eaa7a8258f1c 100644
--- a/arch/arm64/crypto/sha1-ce-glue.c
+++ b/arch/arm64/crypto/sha1-ce-glue.c
@@ -12,6 +12,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
@@ -38,7 +39,7 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sha1_update(desc, data, len);
sctx->finalize = 0;
@@ -56,7 +57,7 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sha1_finup(desc, data, len, out);
/*
@@ -78,7 +79,7 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
{
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sha1_finup(desc, NULL, 0, out);
sctx->finalize = 0;
diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c
index 261f5195cab7..a725997e55f2 100644
--- a/arch/arm64/crypto/sha2-ce-glue.c
+++ b/arch/arm64/crypto/sha2-ce-glue.c
@@ -12,6 +12,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <linux/cpufeature.h>
@@ -42,7 +43,7 @@ static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
{
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order);
@@ -61,7 +62,7 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE);
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
if (len)
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order);
@@ -90,7 +91,7 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
{
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
sha256_base_do_finalize(desc,
(sha256_block_fn *)sha256_block_data_order);
return sha256_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index 4aedeaefd61f..e62298740e31 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -14,6 +14,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
#include <linux/cryptohash.h>
@@ -89,7 +90,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
{
struct sha256_state *sctx = shash_desc_ctx(desc);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order);
@@ -119,7 +120,7 @@ static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
if (len)
sha256_base_do_update(desc, data, len,
(sha256_block_fn *)sha256_block_data_order);
@@ -173,7 +174,7 @@ static int __init sha256_mod_init(void)
if (ret)
return ret;
- if (elf_hwcap & HWCAP_ASIMD) {
+ if (cpu_have_named_feature(ASIMD)) {
ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs));
if (ret)
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
@@ -183,7 +184,7 @@ static int __init sha256_mod_init(void)
static void __exit sha256_mod_fini(void)
{
- if (elf_hwcap & HWCAP_ASIMD)
+ if (cpu_have_named_feature(ASIMD))
crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs));
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c
index a336feac0f59..9a4bbfc45f40 100644
--- a/arch/arm64/crypto/sha3-ce-glue.c
+++ b/arch/arm64/crypto/sha3-ce-glue.c
@@ -14,6 +14,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha3.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
@@ -32,7 +33,7 @@ static int sha3_update(struct shash_desc *desc, const u8 *data,
struct sha3_state *sctx = shash_desc_ctx(desc);
unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sha3_update(desc, data, len);
if ((sctx->partial + len) >= sctx->rsiz) {
@@ -76,7 +77,7 @@ static int sha3_final(struct shash_desc *desc, u8 *out)
__le64 *digest = (__le64 *)out;
int i;
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sha3_final(desc, out);
sctx->buf[sctx->partial++] = 0x06;
diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c
index f2c5f28c622a..2369540040aa 100644
--- a/arch/arm64/crypto/sha512-ce-glue.c
+++ b/arch/arm64/crypto/sha512-ce-glue.c
@@ -13,6 +13,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
#include <linux/cpufeature.h>
@@ -31,7 +32,7 @@ asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_block_data_order);
@@ -46,7 +47,7 @@ static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
if (len)
sha512_base_do_update(desc, data, len,
(sha512_block_fn *)sha512_block_data_order);
@@ -65,7 +66,7 @@ static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
static int sha512_ce_final(struct shash_desc *desc, u8 *out)
{
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
sha512_base_do_finalize(desc,
(sha512_block_fn *)sha512_block_data_order);
return sha512_base_finish(desc, out);
diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index 88938a20d9b2..5d15533799a2 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -12,6 +12,7 @@
#include <asm/simd.h>
#include <asm/unaligned.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/sm3.h>
#include <crypto/sm3_base.h>
#include <linux/cpufeature.h>
@@ -28,7 +29,7 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sm3_update(desc, data, len);
kernel_neon_begin();
@@ -40,7 +41,7 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
static int sm3_ce_final(struct shash_desc *desc, u8 *out)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sm3_finup(desc, NULL, 0, out);
kernel_neon_begin();
@@ -53,7 +54,7 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out)
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
- if (!may_use_simd())
+ if (!crypto_simd_usable())
return crypto_sm3_finup(desc, data, len, out);
kernel_neon_begin();
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 0c4fc223f225..2754c875d39c 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -3,6 +3,7 @@
#include <asm/neon.h>
#include <asm/simd.h>
#include <crypto/sm4.h>
+#include <crypto/internal/simd.h>
#include <linux/module.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
@@ -20,7 +21,7 @@ static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
crypto_sm4_encrypt(tfm, out, in);
} else {
kernel_neon_begin();
@@ -33,7 +34,7 @@ static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
- if (!may_use_simd()) {
+ if (!crypto_simd_usable()) {
crypto_sm4_decrypt(tfm, out, in);
} else {
kernel_neon_begin();
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 1e17ea5c372b..eb0df239a759 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -13,10 +13,10 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += msi.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += rwsem.h
generic-y += segment.h
generic-y += serial.h
generic-y += set_memory.h
diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h
index f2a234d6516c..b7bca1ae09e6 100644
--- a/arch/arm64/include/asm/arch_timer.h
+++ b/arch/arm64/include/asm/arch_timer.h
@@ -31,11 +31,23 @@
#include <clocksource/arm_arch_timer.h>
#if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND)
-extern struct static_key_false arch_timer_read_ool_enabled;
-#define needs_unstable_timer_counter_workaround() \
- static_branch_unlikely(&arch_timer_read_ool_enabled)
+#define has_erratum_handler(h) \
+ ({ \
+ const struct arch_timer_erratum_workaround *__wa; \
+ __wa = __this_cpu_read(timer_unstable_counter_workaround); \
+ (__wa && __wa->h); \
+ })
+
+#define erratum_handler(h) \
+ ({ \
+ const struct arch_timer_erratum_workaround *__wa; \
+ __wa = __this_cpu_read(timer_unstable_counter_workaround); \
+ (__wa && __wa->h) ? __wa->h : arch_timer_##h; \
+ })
+
#else
-#define needs_unstable_timer_counter_workaround() false
+#define has_erratum_handler(h) false
+#define erratum_handler(h) (arch_timer_##h)
#endif
enum arch_timer_erratum_match_type {
@@ -61,23 +73,37 @@ struct arch_timer_erratum_workaround {
DECLARE_PER_CPU(const struct arch_timer_erratum_workaround *,
timer_unstable_counter_workaround);
+/* inline sysreg accessors that make erratum_handler() work */
+static inline notrace u32 arch_timer_read_cntp_tval_el0(void)
+{
+ return read_sysreg(cntp_tval_el0);
+}
+
+static inline notrace u32 arch_timer_read_cntv_tval_el0(void)
+{
+ return read_sysreg(cntv_tval_el0);
+}
+
+static inline notrace u64 arch_timer_read_cntpct_el0(void)
+{
+ return read_sysreg(cntpct_el0);
+}
+
+static inline notrace u64 arch_timer_read_cntvct_el0(void)
+{
+ return read_sysreg(cntvct_el0);
+}
+
#define arch_timer_reg_read_stable(reg) \
-({ \
- u64 _val; \
- if (needs_unstable_timer_counter_workaround()) { \
- const struct arch_timer_erratum_workaround *wa; \
+ ({ \
+ u64 _val; \
+ \
preempt_disable_notrace(); \
- wa = __this_cpu_read(timer_unstable_counter_workaround); \
- if (wa && wa->read_##reg) \
- _val = wa->read_##reg(); \
- else \
- _val = read_sysreg(reg); \
+ _val = erratum_handler(read_ ## reg)(); \
preempt_enable_notrace(); \
- } else { \
- _val = read_sysreg(reg); \
- } \
- _val; \
-})
+ \
+ _val; \
+ })
/*
* These register accessors are marked inline so the compiler can
@@ -148,18 +174,67 @@ static inline void arch_timer_set_cntkctl(u32 cntkctl)
isb();
}
-static inline u64 arch_counter_get_cntpct(void)
+/*
+ * Ensure that reads of the counter are treated the same as memory reads
+ * for the purposes of ordering by subsequent memory barriers.
+ *
+ * This insanity brought to you by speculative system register reads,
+ * out-of-order memory accesses, sequence locks and Thomas Gleixner.
+ *
+ * http://lists.infradead.org/pipermail/linux-arm-kernel/2019-February/631195.html
+ */
+#define arch_counter_enforce_ordering(val) do { \
+ u64 tmp, _val = (val); \
+ \
+ asm volatile( \
+ " eor %0, %1, %1\n" \
+ " add %0, sp, %0\n" \
+ " ldr xzr, [%0]" \
+ : "=r" (tmp) : "r" (_val)); \
+} while (0)
+
+static inline u64 __arch_counter_get_cntpct_stable(void)
+{
+ u64 cnt;
+
+ isb();
+ cnt = arch_timer_reg_read_stable(cntpct_el0);
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
+}
+
+static inline u64 __arch_counter_get_cntpct(void)
{
+ u64 cnt;
+
isb();
- return arch_timer_reg_read_stable(cntpct_el0);
+ cnt = read_sysreg(cntpct_el0);
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
}
-static inline u64 arch_counter_get_cntvct(void)
+static inline u64 __arch_counter_get_cntvct_stable(void)
{
+ u64 cnt;
+
isb();
- return arch_timer_reg_read_stable(cntvct_el0);
+ cnt = arch_timer_reg_read_stable(cntvct_el0);
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
}
+static inline u64 __arch_counter_get_cntvct(void)
+{
+ u64 cnt;
+
+ isb();
+ cnt = read_sysreg(cntvct_el0);
+ arch_counter_enforce_ordering(cnt);
+ return cnt;
+}
+
+#undef arch_counter_enforce_ordering
+
static inline int arch_timer_arch_init(void)
{
return 0;
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index c5308d01e228..039fbd822ec6 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -407,10 +407,14 @@ alternative_endif
.ifc \op, cvap
sys 3, c7, c12, 1, \kaddr // dc cvap
.else
+ .ifc \op, cvadp
+ sys 3, c7, c13, 1, \kaddr // dc cvadp
+ .else
dc \op, \kaddr
.endif
.endif
.endif
+ .endif
add \kaddr, \kaddr, \tmp1
cmp \kaddr, \size
b.lo 9998b
@@ -442,8 +446,8 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU
* reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
*/
.macro reset_pmuserenr_el0, tmpreg
- mrs \tmpreg, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
- sbfx \tmpreg, \tmpreg, #8, #4
+ mrs \tmpreg, id_aa64dfr0_el1
+ sbfx \tmpreg, \tmpreg, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp \tmpreg, #1 // Skip if no PMU present
b.lt 9000f
msr pmuserenr_el0, xzr // Disable PMU access from EL0
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index f66bb04fdf2d..85b6bedbcc68 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -20,6 +20,8 @@
#ifndef __ASSEMBLY__
+#include <linux/kasan-checks.h>
+
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
#define nops(n) asm volatile(__nops(n))
@@ -72,31 +74,33 @@ static inline unsigned long array_index_mask_nospec(unsigned long idx,
#define __smp_store_release(p, v) \
do { \
+ typeof(p) __p = (p); \
union { typeof(*p) __val; char __c[1]; } __u = \
- { .__val = (__force typeof(*p)) (v) }; \
+ { .__val = (__force typeof(*p)) (v) }; \
compiletime_assert_atomic_type(*p); \
+ kasan_check_write(__p, sizeof(*p)); \
switch (sizeof(*p)) { \
case 1: \
asm volatile ("stlrb %w1, %0" \
- : "=Q" (*p) \
+ : "=Q" (*__p) \
: "r" (*(__u8 *)__u.__c) \
: "memory"); \
break; \
case 2: \
asm volatile ("stlrh %w1, %0" \
- : "=Q" (*p) \
+ : "=Q" (*__p) \
: "r" (*(__u16 *)__u.__c) \
: "memory"); \
break; \
case 4: \
asm volatile ("stlr %w1, %0" \
- : "=Q" (*p) \
+ : "=Q" (*__p) \
: "r" (*(__u32 *)__u.__c) \
: "memory"); \
break; \
case 8: \
asm volatile ("stlr %1, %0" \
- : "=Q" (*p) \
+ : "=Q" (*__p) \
: "r" (*(__u64 *)__u.__c) \
: "memory"); \
break; \
@@ -106,27 +110,29 @@ do { \
#define __smp_load_acquire(p) \
({ \
union { typeof(*p) __val; char __c[1]; } __u; \
+ typeof(p) __p = (p); \
compiletime_assert_atomic_type(*p); \
+ kasan_check_read(__p, sizeof(*p)); \
switch (sizeof(*p)) { \
case 1: \
asm volatile ("ldarb %w0, %1" \
: "=r" (*(__u8 *)__u.__c) \
- : "Q" (*p) : "memory"); \
+ : "Q" (*__p) : "memory"); \
break; \
case 2: \
asm volatile ("ldarh %w0, %1" \
: "=r" (*(__u16 *)__u.__c) \
- : "Q" (*p) : "memory"); \
+ : "Q" (*__p) : "memory"); \
break; \
case 4: \
asm volatile ("ldar %w0, %1" \
: "=r" (*(__u32 *)__u.__c) \
- : "Q" (*p) : "memory"); \
+ : "Q" (*__p) : "memory"); \
break; \
case 8: \
asm volatile ("ldar %0, %1" \
: "=r" (*(__u64 *)__u.__c) \
- : "Q" (*p) : "memory"); \
+ : "Q" (*__p) : "memory"); \
break; \
} \
__u.__val; \
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index 2945fe6cd863..d84294064e6a 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -11,6 +11,8 @@
/*
* #imm16 values used for BRK instruction generation
+ * 0x004: for installing kprobes
+ * 0x005: for installing uprobes
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
@@ -18,10 +20,13 @@
* 0x800: kernel-mode BUG() and WARN() traps
* 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff)
*/
+#define KPROBES_BRK_IMM 0x004
+#define UPROBES_BRK_IMM 0x005
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
#define BUG_BRK_IMM 0x800
#define KASAN_BRK_IMM 0x900
+#define KASAN_BRK_MASK 0x0ff
#endif
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index f6a76e43f39e..defdc67d9ab4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -61,7 +61,8 @@
#define ARM64_HAS_GENERIC_AUTH_ARCH 40
#define ARM64_HAS_GENERIC_AUTH_IMP_DEF 41
#define ARM64_HAS_IRQ_PRIO_MASKING 42
+#define ARM64_HAS_DCPODP 43
-#define ARM64_NCAPS 43
+#define ARM64_NCAPS 44
#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index e505e1fbd2b9..f210bcf096f7 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -14,15 +14,8 @@
#include <asm/hwcap.h>
#include <asm/sysreg.h>
-/*
- * In the arm64 world (as in the ARM world), elf_hwcap is used both internally
- * in the kernel and for user space to keep track of which optional features
- * are supported by the current system. So let's map feature 'x' to HWCAP_x.
- * Note that HWCAP_x constants are bit fields so we need to take the log.
- */
-
-#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap))
-#define cpu_feature(x) ilog2(HWCAP_ ## x)
+#define MAX_CPU_FEATURES 64
+#define cpu_feature(x) KERNEL_HWCAP_ ## x
#ifndef __ASSEMBLY__
@@ -399,11 +392,13 @@ extern DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
for_each_set_bit(cap, cpu_hwcaps, ARM64_NCAPS)
bool this_cpu_has_cap(unsigned int cap);
+void cpu_set_feature(unsigned int num);
+bool cpu_have_feature(unsigned int num);
+unsigned long cpu_get_elf_hwcap(void);
+unsigned long cpu_get_elf_hwcap2(void);
-static inline bool cpu_have_feature(unsigned int num)
-{
- return elf_hwcap & (1UL << num);
-}
+#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
+#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
/* System capability check for constant caps */
static inline bool __cpus_have_const_cap(int num)
@@ -638,11 +633,7 @@ static inline int arm64_get_ssbd_state(void)
#endif
}
-#ifdef CONFIG_ARM64_SSBD
void arm64_set_ssbd_mitigation(bool state);
-#else
-static inline void arm64_set_ssbd_mitigation(bool state) {}
-#endif
extern int do_emulate_mrs(struct pt_regs *regs, u32 sys_reg, u32 rt);
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index 5f1437099b99..2602bae334fb 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -89,6 +89,7 @@
#define ARM_CPU_PART_CORTEX_A35 0xD04
#define ARM_CPU_PART_CORTEX_A55 0xD05
#define ARM_CPU_PART_CORTEX_A76 0xD0B
+#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define APM_CPU_PART_POTENZA 0x000
@@ -118,6 +119,7 @@
#define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35)
#define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
+#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index a44cf5225429..0679f781696d 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -65,12 +65,9 @@
#define CACHE_FLUSH_IS_SAFE 1
/* kprobes BRK opcodes with ESR encoding */
-#define BRK64_ESR_MASK 0xFFFF
-#define BRK64_ESR_KPROBES 0x0004
-#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (BRK64_ESR_KPROBES << 5))
+#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
/* uprobes BRK opcodes with ESR encoding */
-#define BRK64_ESR_UPROBES 0x0005
-#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (BRK64_ESR_UPROBES << 5))
+#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
/* AArch32 */
#define DBG_ESR_EVT_BKPT 0x4
@@ -94,18 +91,24 @@ struct step_hook {
int (*fn)(struct pt_regs *regs, unsigned int esr);
};
-void register_step_hook(struct step_hook *hook);
-void unregister_step_hook(struct step_hook *hook);
+void register_user_step_hook(struct step_hook *hook);
+void unregister_user_step_hook(struct step_hook *hook);
+
+void register_kernel_step_hook(struct step_hook *hook);
+void unregister_kernel_step_hook(struct step_hook *hook);
struct break_hook {
struct list_head node;
- u32 esr_val;
- u32 esr_mask;
int (*fn)(struct pt_regs *regs, unsigned int esr);
+ u16 imm;
+ u16 mask; /* These bits are ignored when comparing with imm */
};
-void register_break_hook(struct break_hook *hook);
-void unregister_break_hook(struct break_hook *hook);
+void register_user_break_hook(struct break_hook *hook);
+void unregister_user_break_hook(struct break_hook *hook);
+
+void register_kernel_break_hook(struct break_hook *hook);
+void unregister_kernel_break_hook(struct break_hook *hook);
u8 debug_monitors_arch(void);
diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h
index 6adc1a90e7e6..355d120b78cb 100644
--- a/arch/arm64/include/asm/elf.h
+++ b/arch/arm64/include/asm/elf.h
@@ -214,10 +214,10 @@ typedef compat_elf_greg_t compat_elf_gregset_t[COMPAT_ELF_NGREG];
set_thread_flag(TIF_32BIT); \
})
#define COMPAT_ARCH_DLINFO
-extern int aarch32_setup_vectors_page(struct linux_binprm *bprm,
- int uses_interp);
+extern int aarch32_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp);
#define compat_arch_setup_additional_pages \
- aarch32_setup_vectors_page
+ aarch32_setup_additional_pages
#endif /* CONFIG_COMPAT */
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 52233f00d53d..0e27fe91d5ea 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -156,9 +156,7 @@
ESR_ELx_WFx_ISS_WFI)
/* BRK instruction trap from AArch64 state */
-#define ESR_ELx_VAL_BRK64(imm) \
- ((ESR_ELx_EC_BRK64 << ESR_ELx_EC_SHIFT) | ESR_ELx_IL | \
- ((imm) & 0xffff))
+#define ESR_ELx_BRK64_ISS_COMMENT_MASK 0xffff
/* ISS field definitions for System instruction traps */
#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22
@@ -198,9 +196,10 @@
/*
* User space cache operations have the following sysreg encoding
* in System instructions.
- * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 14 }, WRITE (L=0)
+ * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 13, 14 }, WRITE (L=0)
*/
#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14
+#define ESR_ELx_SYS64_ISS_CRM_DC_CVADP 13
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11
#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index c7e1a7837706..a56efb5626fa 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -23,26 +23,34 @@
#include <asm/errno.h>
+#define FUTEX_MAX_LOOPS 128 /* What's the largest number you can think of? */
+
#define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \
do { \
+ unsigned int loops = FUTEX_MAX_LOOPS; \
+ \
uaccess_enable(); \
asm volatile( \
" prfm pstl1strm, %2\n" \
"1: ldxr %w1, %2\n" \
insn "\n" \
"2: stlxr %w0, %w3, %2\n" \
-" cbnz %w0, 1b\n" \
-" dmb ish\n" \
+" cbz %w0, 3f\n" \
+" sub %w4, %w4, %w0\n" \
+" cbnz %w4, 1b\n" \
+" mov %w0, %w7\n" \
"3:\n" \
+" dmb ish\n" \
" .pushsection .fixup,\"ax\"\n" \
" .align 2\n" \
-"4: mov %w0, %w5\n" \
+"4: mov %w0, %w6\n" \
" b 3b\n" \
" .popsection\n" \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
- : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \
- : "r" (oparg), "Ir" (-EFAULT) \
+ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), \
+ "+r" (loops) \
+ : "r" (oparg), "Ir" (-EFAULT), "Ir" (-EAGAIN) \
: "memory"); \
uaccess_disable(); \
} while (0)
@@ -57,23 +65,23 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *_uaddr)
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("mov %w3, %w4",
+ __futex_atomic_op("mov %w3, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("add %w3, %w1, %w4",
+ __futex_atomic_op("add %w3, %w1, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("orr %w3, %w1, %w4",
+ __futex_atomic_op("orr %w3, %w1, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("and %w3, %w1, %w4",
+ __futex_atomic_op("and %w3, %w1, %w5",
ret, oldval, uaddr, tmp, ~oparg);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("eor %w3, %w1, %w4",
+ __futex_atomic_op("eor %w3, %w1, %w5",
ret, oldval, uaddr, tmp, oparg);
break;
default:
@@ -93,6 +101,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
u32 oldval, u32 newval)
{
int ret = 0;
+ unsigned int loops = FUTEX_MAX_LOOPS;
u32 val, tmp;
u32 __user *uaddr;
@@ -104,24 +113,30 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *_uaddr,
asm volatile("// futex_atomic_cmpxchg_inatomic\n"
" prfm pstl1strm, %2\n"
"1: ldxr %w1, %2\n"
-" sub %w3, %w1, %w4\n"
-" cbnz %w3, 3f\n"
-"2: stlxr %w3, %w5, %2\n"
-" cbnz %w3, 1b\n"
-" dmb ish\n"
+" sub %w3, %w1, %w5\n"
+" cbnz %w3, 4f\n"
+"2: stlxr %w3, %w6, %2\n"
+" cbz %w3, 3f\n"
+" sub %w4, %w4, %w3\n"
+" cbnz %w4, 1b\n"
+" mov %w0, %w8\n"
"3:\n"
+" dmb ish\n"
+"4:\n"
" .pushsection .fixup,\"ax\"\n"
-"4: mov %w0, %w6\n"
-" b 3b\n"
+"5: mov %w0, %w7\n"
+" b 4b\n"
" .popsection\n"
- _ASM_EXTABLE(1b, 4b)
- _ASM_EXTABLE(2b, 4b)
- : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp)
- : "r" (oldval), "r" (newval), "Ir" (-EFAULT)
+ _ASM_EXTABLE(1b, 5b)
+ _ASM_EXTABLE(2b, 5b)
+ : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
+ : "r" (oldval), "r" (newval), "Ir" (-EFAULT), "Ir" (-EAGAIN)
: "memory");
uaccess_disable();
- *uval = val;
+ if (!ret)
+ *uval = val;
+
return ret;
}
diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h
index 400b80b49595..b4bfb6672168 100644
--- a/arch/arm64/include/asm/hwcap.h
+++ b/arch/arm64/include/asm/hwcap.h
@@ -17,6 +17,7 @@
#define __ASM_HWCAP_H
#include <uapi/asm/hwcap.h>
+#include <asm/cpufeature.h>
#define COMPAT_HWCAP_HALF (1 << 1)
#define COMPAT_HWCAP_THUMB (1 << 2)
@@ -40,11 +41,67 @@
#define COMPAT_HWCAP2_CRC32 (1 << 4)
#ifndef __ASSEMBLY__
+#include <linux/log2.h>
+
+/*
+ * For userspace we represent hwcaps as a collection of HWCAP{,2}_x bitfields
+ * as described in uapi/asm/hwcap.h. For the kernel we represent hwcaps as
+ * natural numbers (in a single range of size MAX_CPU_FEATURES) defined here
+ * with prefix KERNEL_HWCAP_ mapped to their HWCAP{,2}_x counterpart.
+ *
+ * Hwcaps should be set and tested within the kernel via the
+ * cpu_{set,have}_named_feature(feature) where feature is the unique suffix
+ * of KERNEL_HWCAP_{feature}.
+ */
+#define __khwcap_feature(x) const_ilog2(HWCAP_ ## x)
+#define KERNEL_HWCAP_FP __khwcap_feature(FP)
+#define KERNEL_HWCAP_ASIMD __khwcap_feature(ASIMD)
+#define KERNEL_HWCAP_EVTSTRM __khwcap_feature(EVTSTRM)
+#define KERNEL_HWCAP_AES __khwcap_feature(AES)
+#define KERNEL_HWCAP_PMULL __khwcap_feature(PMULL)
+#define KERNEL_HWCAP_SHA1 __khwcap_feature(SHA1)
+#define KERNEL_HWCAP_SHA2 __khwcap_feature(SHA2)
+#define KERNEL_HWCAP_CRC32 __khwcap_feature(CRC32)
+#define KERNEL_HWCAP_ATOMICS __khwcap_feature(ATOMICS)
+#define KERNEL_HWCAP_FPHP __khwcap_feature(FPHP)
+#define KERNEL_HWCAP_ASIMDHP __khwcap_feature(ASIMDHP)
+#define KERNEL_HWCAP_CPUID __khwcap_feature(CPUID)
+#define KERNEL_HWCAP_ASIMDRDM __khwcap_feature(ASIMDRDM)
+#define KERNEL_HWCAP_JSCVT __khwcap_feature(JSCVT)
+#define KERNEL_HWCAP_FCMA __khwcap_feature(FCMA)
+#define KERNEL_HWCAP_LRCPC __khwcap_feature(LRCPC)
+#define KERNEL_HWCAP_DCPOP __khwcap_feature(DCPOP)
+#define KERNEL_HWCAP_SHA3 __khwcap_feature(SHA3)
+#define KERNEL_HWCAP_SM3 __khwcap_feature(SM3)
+#define KERNEL_HWCAP_SM4 __khwcap_feature(SM4)
+#define KERNEL_HWCAP_ASIMDDP __khwcap_feature(ASIMDDP)
+#define KERNEL_HWCAP_SHA512 __khwcap_feature(SHA512)
+#define KERNEL_HWCAP_SVE __khwcap_feature(SVE)
+#define KERNEL_HWCAP_ASIMDFHM __khwcap_feature(ASIMDFHM)
+#define KERNEL_HWCAP_DIT __khwcap_feature(DIT)
+#define KERNEL_HWCAP_USCAT __khwcap_feature(USCAT)
+#define KERNEL_HWCAP_ILRCPC __khwcap_feature(ILRCPC)
+#define KERNEL_HWCAP_FLAGM __khwcap_feature(FLAGM)
+#define KERNEL_HWCAP_SSBS __khwcap_feature(SSBS)
+#define KERNEL_HWCAP_SB __khwcap_feature(SB)
+#define KERNEL_HWCAP_PACA __khwcap_feature(PACA)
+#define KERNEL_HWCAP_PACG __khwcap_feature(PACG)
+
+#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32)
+#define KERNEL_HWCAP_DCPODP __khwcap2_feature(DCPODP)
+#define KERNEL_HWCAP_SVE2 __khwcap2_feature(SVE2)
+#define KERNEL_HWCAP_SVEAES __khwcap2_feature(SVEAES)
+#define KERNEL_HWCAP_SVEPMULL __khwcap2_feature(SVEPMULL)
+#define KERNEL_HWCAP_SVEBITPERM __khwcap2_feature(SVEBITPERM)
+#define KERNEL_HWCAP_SVESHA3 __khwcap2_feature(SVESHA3)
+#define KERNEL_HWCAP_SVESM4 __khwcap2_feature(SVESM4)
+
/*
* This yields a mask that user programs can use to figure out what
* instruction set this cpu supports.
*/
-#define ELF_HWCAP (elf_hwcap)
+#define ELF_HWCAP cpu_get_elf_hwcap()
+#define ELF_HWCAP2 cpu_get_elf_hwcap2()
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP (compat_elf_hwcap)
@@ -60,6 +117,5 @@ enum {
#endif
};
-extern unsigned long elf_hwcap;
#endif
#endif
diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h
index 9c01f04db64d..ec894de0ed4e 100644
--- a/arch/arm64/include/asm/insn.h
+++ b/arch/arm64/include/asm/insn.h
@@ -277,6 +277,7 @@ __AARCH64_INSN_FUNCS(adrp, 0x9F000000, 0x90000000)
__AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000)
__AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000)
__AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800)
+__AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0xB8200000)
__AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800)
__AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000)
__AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000)
@@ -394,6 +395,13 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
enum aarch64_insn_register state,
enum aarch64_insn_size_type size,
enum aarch64_insn_ldst_type type);
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size);
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size);
u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst,
enum aarch64_insn_register src,
int imm, enum aarch64_insn_variant variant,
diff --git a/arch/arm64/include/asm/io.h b/arch/arm64/include/asm/io.h
index 8bb7210ac286..b807cb9b517d 100644
--- a/arch/arm64/include/asm/io.h
+++ b/arch/arm64/include/asm/io.h
@@ -124,8 +124,6 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
#define __io_par(v) __iormb(v)
#define __iowmb() wmb()
-#define mmiowb() do { } while (0)
-
/*
* Relaxed I/O memory access primitives. These follow the Device memory
* ordering rules but do not guarantee any ordering relative to Normal memory
diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h
index 43d8366c1e87..629963189085 100644
--- a/arch/arm64/include/asm/irqflags.h
+++ b/arch/arm64/include/asm/irqflags.h
@@ -43,7 +43,7 @@ static inline void arch_local_irq_enable(void)
asm volatile(ALTERNATIVE(
"msr daifclr, #2 // arch_local_irq_enable\n"
"nop",
- "msr_s " __stringify(SYS_ICC_PMR_EL1) ",%0\n"
+ __msr_s(SYS_ICC_PMR_EL1, "%0")
"dsb sy",
ARM64_HAS_IRQ_PRIO_MASKING)
:
@@ -55,7 +55,7 @@ static inline void arch_local_irq_disable(void)
{
asm volatile(ALTERNATIVE(
"msr daifset, #2 // arch_local_irq_disable",
- "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0",
+ __msr_s(SYS_ICC_PMR_EL1, "%0"),
ARM64_HAS_IRQ_PRIO_MASKING)
:
: "r" ((unsigned long) GIC_PRIO_IRQOFF)
@@ -86,7 +86,7 @@ static inline unsigned long arch_local_save_flags(void)
"mov %0, %1\n"
"nop\n"
"nop",
- "mrs_s %0, " __stringify(SYS_ICC_PMR_EL1) "\n"
+ __mrs_s("%0", SYS_ICC_PMR_EL1)
"ands %1, %1, " __stringify(PSR_I_BIT) "\n"
"csel %0, %0, %2, eq",
ARM64_HAS_IRQ_PRIO_MASKING)
@@ -116,7 +116,7 @@ static inline void arch_local_irq_restore(unsigned long flags)
asm volatile(ALTERNATIVE(
"msr daif, %0\n"
"nop",
- "msr_s " __stringify(SYS_ICC_PMR_EL1) ", %0\n"
+ __msr_s(SYS_ICC_PMR_EL1, "%0")
"dsb sy",
ARM64_HAS_IRQ_PRIO_MASKING)
: "+r" (flags)
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index d5a44cf859e9..21721fbf44e7 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -54,8 +54,6 @@ void arch_remove_kprobe(struct kprobe *);
int kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr);
int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
-int kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr);
-int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr);
void kretprobe_trampoline(void);
void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 4da765f2cca5..c3060833b7a5 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -30,7 +30,7 @@
({ \
u64 reg; \
asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
- "mrs_s %0, " __stringify(r##vh),\
+ __mrs_s("%0", r##vh), \
ARM64_HAS_VIRT_HOST_EXTN) \
: "=r" (reg)); \
reg; \
@@ -40,7 +40,7 @@
do { \
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
- "msr_s " __stringify(r##vh) ", %x0",\
+ __msr_s(r##vh, "%x0"), \
ARM64_HAS_VIRT_HOST_EXTN) \
: : "rZ" (__val)); \
} while (0)
diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h
index 290195168bb3..2cb8248fa2c8 100644
--- a/arch/arm64/include/asm/memory.h
+++ b/arch/arm64/include/asm/memory.h
@@ -302,7 +302,7 @@ static inline void *phys_to_virt(phys_addr_t x)
*/
#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET)
-#ifndef CONFIG_SPARSEMEM_VMEMMAP
+#if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL)
#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT)
#else
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 52fa47c73bf0..dabba4b2c61f 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -33,12 +33,22 @@
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
- return (pmd_t *)__get_free_page(PGALLOC_GFP);
+ struct page *page;
+
+ page = alloc_page(PGALLOC_GFP);
+ if (!page)
+ return NULL;
+ if (!pgtable_pmd_page_ctor(page)) {
+ __free_page(page);
+ return NULL;
+ }
+ return page_address(page);
}
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmdp)
{
BUG_ON((unsigned long)pmdp & (PAGE_SIZE-1));
+ pgtable_pmd_page_dtor(virt_to_page(pmdp));
free_page((unsigned long)pmdp);
}
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index de70c1eabf33..2c41b04708fe 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -478,6 +478,8 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
return __pmd_to_phys(pmd);
}
+static inline void pte_unmap(pte_t *pte) { }
+
/* Find an entry in the third-level page table. */
#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
@@ -485,9 +487,6 @@ static inline phys_addr_t pmd_page_paddr(pmd_t pmd)
#define pte_offset_kernel(dir,addr) ((pte_t *)__va(pte_offset_phys((dir), (addr))))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
diff --git a/arch/arm64/include/asm/pointer_auth.h b/arch/arm64/include/asm/pointer_auth.h
index 15d49515efdd..d328540cb85e 100644
--- a/arch/arm64/include/asm/pointer_auth.h
+++ b/arch/arm64/include/asm/pointer_auth.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __ASM_POINTER_AUTH_H
#define __ASM_POINTER_AUTH_H
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 5d9ce62bdebd..fcd0e691b1ea 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -57,7 +57,15 @@
#define TASK_SIZE_64 (UL(1) << vabits_user)
#ifdef CONFIG_COMPAT
+#if defined(CONFIG_ARM64_64K_PAGES) && defined(CONFIG_KUSER_HELPERS)
+/*
+ * With CONFIG_ARM64_64K_PAGES enabled, the last page is occupied
+ * by the compat vectors page.
+ */
#define TASK_SIZE_32 UL(0x100000000)
+#else
+#define TASK_SIZE_32 (UL(0x100000000) - PAGE_SIZE)
+#endif /* CONFIG_ARM64_64K_PAGES */
#define TASK_SIZE (test_thread_flag(TIF_32BIT) ? \
TASK_SIZE_32 : TASK_SIZE_64)
#define TASK_SIZE_OF(tsk) (test_tsk_thread_flag(tsk, TIF_32BIT) ? \
diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h
index ec60174c8c18..b2de32939ada 100644
--- a/arch/arm64/include/asm/ptrace.h
+++ b/arch/arm64/include/asm/ptrace.h
@@ -305,6 +305,28 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->regs[0];
}
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs: pt_regs of that context
+ * @n: function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ *
+ * Note that this chooses the most likely register mapping. In very rare
+ * cases this may not return correct data, for example, if one of the
+ * function parameters is 16 bytes or bigger. In such cases, we cannot
+ * get access the parameter correctly and the register assignment of
+ * subsequent parameters will be shifted.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+ unsigned int n)
+{
+#define NR_REG_ARGUMENTS 8
+ if (n < NR_REG_ARGUMENTS)
+ return pt_regs_read_reg(regs, n);
+ return 0;
+}
+
/* We must avoid circular header include via sched.h */
struct task_struct;
int valid_user_regs(struct user_pt_regs *regs, struct task_struct *task);
diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h
index ffe47d766c25..63e0b92a5fbb 100644
--- a/arch/arm64/include/asm/sdei.h
+++ b/arch/arm64/include/asm/sdei.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2017 Arm Ltd.
#ifndef __ASM_SDEI_H
#define __ASM_SDEI_H
diff --git a/arch/arm64/include/asm/signal32.h b/arch/arm64/include/asm/signal32.h
index 81abea0b7650..58e288aaf0ba 100644
--- a/arch/arm64/include/asm/signal32.h
+++ b/arch/arm64/include/asm/signal32.h
@@ -20,8 +20,6 @@
#ifdef CONFIG_COMPAT
#include <linux/compat.h>
-#define AARCH32_KERN_SIGRET_CODE_OFFSET 0x500
-
int compat_setup_frame(int usig, struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs);
int compat_setup_rt_frame(int usig, struct ksignal *ksig, sigset_t *set,
diff --git a/arch/arm64/include/asm/stage2_pgtable.h b/arch/arm64/include/asm/stage2_pgtable.h
index 5412fa40825e..915809e4ac32 100644
--- a/arch/arm64/include/asm/stage2_pgtable.h
+++ b/arch/arm64/include/asm/stage2_pgtable.h
@@ -119,7 +119,7 @@ static inline pud_t *stage2_pud_offset(struct kvm *kvm,
static inline void stage2_pud_free(struct kvm *kvm, pud_t *pud)
{
if (kvm_stage2_has_pud(kvm))
- pud_free(NULL, pud);
+ free_page((unsigned long)pud);
}
static inline bool stage2_pud_table_empty(struct kvm *kvm, pud_t *pudp)
@@ -192,7 +192,7 @@ static inline pmd_t *stage2_pmd_offset(struct kvm *kvm,
static inline void stage2_pmd_free(struct kvm *kvm, pmd_t *pmd)
{
if (kvm_stage2_has_pmd(kvm))
- pmd_free(NULL, pmd);
+ free_page((unsigned long)pmd);
}
static inline bool stage2_pud_huge(struct kvm *kvm, pud_t pud)
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index a179df3674a1..a65167f5cded 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -87,9 +87,9 @@ static inline void syscall_set_arguments(struct task_struct *task,
* We don't care about endianness (__AUDIT_ARCH_LE bit) here because
* AArch64 has the same system calls both on little- and big- endian.
*/
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
- if (is_compat_task())
+ if (is_compat_thread(task_thread_info(task)))
return AUDIT_ARCH_ARM;
return AUDIT_ARCH_AARCH64;
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 5b267dec6194..3f7b917e8f3a 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -606,6 +606,20 @@
#define ID_AA64PFR1_SSBS_PSTATE_ONLY 1
#define ID_AA64PFR1_SSBS_PSTATE_INSNS 2
+/* id_aa64zfr0 */
+#define ID_AA64ZFR0_SM4_SHIFT 40
+#define ID_AA64ZFR0_SHA3_SHIFT 32
+#define ID_AA64ZFR0_BITPERM_SHIFT 16
+#define ID_AA64ZFR0_AES_SHIFT 4
+#define ID_AA64ZFR0_SVEVER_SHIFT 0
+
+#define ID_AA64ZFR0_SM4 0x1
+#define ID_AA64ZFR0_SHA3 0x1
+#define ID_AA64ZFR0_BITPERM 0x1
+#define ID_AA64ZFR0_AES 0x1
+#define ID_AA64ZFR0_AES_PMULL 0x2
+#define ID_AA64ZFR0_SVEVER_SVE2 0x1
+
/* id_aa64mmfr0 */
#define ID_AA64MMFR0_TGRAN4_SHIFT 28
#define ID_AA64MMFR0_TGRAN64_SHIFT 24
@@ -746,20 +760,39 @@
#include <linux/build_bug.h>
#include <linux/types.h>
-asm(
-" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n"
-" .equ .L__reg_num_x\\num, \\num\n"
-" .endr\n"
+#define __DEFINE_MRS_MSR_S_REGNUM \
+" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
+" .equ .L__reg_num_x\\num, \\num\n" \
+" .endr\n" \
" .equ .L__reg_num_xzr, 31\n"
-"\n"
-" .macro mrs_s, rt, sreg\n"
- __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt))
+
+#define DEFINE_MRS_S \
+ __DEFINE_MRS_MSR_S_REGNUM \
+" .macro mrs_s, rt, sreg\n" \
+ __emit_inst(0xd5200000|(\\sreg)|(.L__reg_num_\\rt)) \
" .endm\n"
-"\n"
-" .macro msr_s, sreg, rt\n"
- __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt))
+
+#define DEFINE_MSR_S \
+ __DEFINE_MRS_MSR_S_REGNUM \
+" .macro msr_s, sreg, rt\n" \
+ __emit_inst(0xd5000000|(\\sreg)|(.L__reg_num_\\rt)) \
" .endm\n"
-);
+
+#define UNDEFINE_MRS_S \
+" .purgem mrs_s\n"
+
+#define UNDEFINE_MSR_S \
+" .purgem msr_s\n"
+
+#define __mrs_s(v, r) \
+ DEFINE_MRS_S \
+" mrs_s " v ", " __stringify(r) "\n" \
+ UNDEFINE_MRS_S
+
+#define __msr_s(r, v) \
+ DEFINE_MSR_S \
+" msr_s " __stringify(r) ", " v "\n" \
+ UNDEFINE_MSR_S
/*
* Unlike read_cpuid, calls to read_sysreg are never expected to be
@@ -787,13 +820,13 @@ asm(
*/
#define read_sysreg_s(r) ({ \
u64 __val; \
- asm volatile("mrs_s %0, " __stringify(r) : "=r" (__val)); \
+ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \
__val; \
})
#define write_sysreg_s(v, r) do { \
u64 __val = (u64)(v); \
- asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \
+ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \
} while (0)
/*
diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h
index 32693f34f431..fca95424e873 100644
--- a/arch/arm64/include/asm/system_misc.h
+++ b/arch/arm64/include/asm/system_misc.h
@@ -41,7 +41,6 @@ void hook_debug_fault_code(int nr, int (*fn)(unsigned long, unsigned int,
int sig, int code, const char *name);
struct mm_struct;
-extern void show_pte(unsigned long addr);
extern void __show_regs(struct pt_regs *);
extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd);
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index 106fdc951b6e..a287189ca8b4 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -27,6 +27,7 @@ static inline void __tlb_remove_table(void *_table)
free_page_and_swap_cache((struct page *)_table);
}
+#define tlb_flush tlb_flush
static void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
@@ -62,7 +63,10 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
unsigned long addr)
{
- tlb_remove_table(tlb, virt_to_page(pmdp));
+ struct page *page = virt_to_page(pmdp);
+
+ pgtable_pmd_page_dtor(page);
+ tlb_remove_table(tlb, page);
}
#endif
diff --git a/arch/arm64/include/asm/vdso_datapage.h b/arch/arm64/include/asm/vdso_datapage.h
index 2b9a63771eda..f89263c8e11a 100644
--- a/arch/arm64/include/asm/vdso_datapage.h
+++ b/arch/arm64/include/asm/vdso_datapage.h
@@ -38,6 +38,7 @@ struct vdso_data {
__u32 tz_minuteswest; /* Whacky timezone stuff */
__u32 tz_dsttime;
__u32 use_syscall;
+ __u32 hrtimer_res;
};
#endif /* !__ASSEMBLY__ */
diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h
index 0b5ec6e08c10..0a12115d9638 100644
--- a/arch/arm64/include/asm/vmap_stack.h
+++ b/arch/arm64/include/asm/vmap_stack.h
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+/* SPDX-License-Identifier: GPL-2.0 */
// Copyright (C) 2017 Arm Ltd.
#ifndef __ASM_VMAP_STACK_H
#define __ASM_VMAP_STACK_H
diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h
index 5f0750c2199c..1a772b162191 100644
--- a/arch/arm64/include/uapi/asm/hwcap.h
+++ b/arch/arm64/include/uapi/asm/hwcap.h
@@ -18,7 +18,7 @@
#define _UAPI__ASM_HWCAP_H
/*
- * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
+ * HWCAP flags - for AT_HWCAP
*/
#define HWCAP_FP (1 << 0)
#define HWCAP_ASIMD (1 << 1)
@@ -53,4 +53,15 @@
#define HWCAP_PACA (1 << 30)
#define HWCAP_PACG (1UL << 31)
+/*
+ * HWCAP2 flags - for AT_HWCAP2
+ */
+#define HWCAP2_DCPODP (1 << 0)
+#define HWCAP2_SVE2 (1 << 1)
+#define HWCAP2_SVEAES (1 << 2)
+#define HWCAP2_SVEPMULL (1 << 3)
+#define HWCAP2_SVEBITPERM (1 << 4)
+#define HWCAP2_SVESHA3 (1 << 5)
+#define HWCAP2_SVESM4 (1 << 6)
+
#endif /* _UAPI__ASM_HWCAP_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index cd434d0719c1..9e7dcb2c31c7 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -7,9 +7,9 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
CFLAGS_armv8_deprecated.o := -I$(src)
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_insn.o = -pg
-CFLAGS_REMOVE_return_address.o = -pg
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_insn.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
# Object file lists.
obj-y := debug-monitors.o entry.o irq.o fpsimd.o \
@@ -27,8 +27,9 @@ OBJCOPYFLAGS := --prefix-symbols=__efistub_
$(obj)/%.stub.o: $(obj)/%.o FORCE
$(call if_changed,objcopy)
-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \
- sys_compat.o
+obj-$(CONFIG_COMPAT) += sys32.o signal32.o \
+ sigreturn32.o sys_compat.o
+obj-$(CONFIG_KUSER_HELPERS) += kuser32.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index eac1d0cc595c..7ff800045434 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -45,7 +45,7 @@ static inline int get_cpu_for_acpi_id(u32 uid)
return -EINVAL;
}
-static int __init acpi_parse_gicc_pxm(struct acpi_subtable_header *header,
+static int __init acpi_parse_gicc_pxm(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_srat_gicc_affinity *pa;
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 7f40dcbdd51d..e10e2a5d9ddc 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -94,7 +94,7 @@ int main(void)
DEFINE(CLOCK_REALTIME, CLOCK_REALTIME);
DEFINE(CLOCK_MONOTONIC, CLOCK_MONOTONIC);
DEFINE(CLOCK_MONOTONIC_RAW, CLOCK_MONOTONIC_RAW);
- DEFINE(CLOCK_REALTIME_RES, MONOTONIC_RES_NSEC);
+ DEFINE(CLOCK_REALTIME_RES, offsetof(struct vdso_data, hrtimer_res));
DEFINE(CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
DEFINE(CLOCK_MONOTONIC_COARSE,CLOCK_MONOTONIC_COARSE);
DEFINE(CLOCK_COARSE_RES, LOW_RES_NSEC);
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 9950bb0cbd52..e88d4e7bdfc7 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -19,6 +19,7 @@
#include <linux/arm-smccc.h>
#include <linux/psci.h>
#include <linux/types.h>
+#include <linux/cpu.h>
#include <asm/cpu.h>
#include <asm/cputype.h>
#include <asm/cpufeature.h>
@@ -109,7 +110,6 @@ cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused)
atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1);
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
@@ -131,9 +131,9 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
__flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
}
-static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
- const char *hyp_vecs_start,
- const char *hyp_vecs_end)
+static void install_bp_hardening_cb(bp_hardening_cb_t fn,
+ const char *hyp_vecs_start,
+ const char *hyp_vecs_end)
{
static DEFINE_RAW_SPINLOCK(bp_lock);
int cpu, slot = -1;
@@ -169,7 +169,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
#define __smccc_workaround_1_smc_start NULL
#define __smccc_workaround_1_smc_end NULL
-static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
+static void install_bp_hardening_cb(bp_hardening_cb_t fn,
const char *hyp_vecs_start,
const char *hyp_vecs_end)
{
@@ -177,23 +177,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn,
}
#endif /* CONFIG_KVM_INDIRECT_VECTORS */
-static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry,
- bp_hardening_cb_t fn,
- const char *hyp_vecs_start,
- const char *hyp_vecs_end)
-{
- u64 pfr0;
-
- if (!entry->matches(entry, SCOPE_LOCAL_CPU))
- return;
-
- pfr0 = read_cpuid(ID_AA64PFR0_EL1);
- if (cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_CSV2_SHIFT))
- return;
-
- __install_bp_hardening_cb(fn, hyp_vecs_start, hyp_vecs_end);
-}
-
#include <uapi/linux/psci.h>
#include <linux/arm-smccc.h>
#include <linux/psci.h>
@@ -220,60 +203,83 @@ static void qcom_link_stack_sanitization(void)
: "=&r" (tmp));
}
-static void
-enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry)
+static bool __nospectre_v2;
+static int __init parse_nospectre_v2(char *str)
+{
+ __nospectre_v2 = true;
+ return 0;
+}
+early_param("nospectre_v2", parse_nospectre_v2);
+
+/*
+ * -1: No workaround
+ * 0: No workaround required
+ * 1: Workaround installed
+ */
+static int detect_harden_bp_fw(void)
{
bp_hardening_cb_t cb;
void *smccc_start, *smccc_end;
struct arm_smccc_res res;
u32 midr = read_cpuid_id();
- if (!entry->matches(entry, SCOPE_LOCAL_CPU))
- return;
-
if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
- return;
+ return -1;
switch (psci_ops.conduit) {
case PSCI_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if ((int)res.a0 < 0)
- return;
- cb = call_hvc_arch_workaround_1;
- /* This is a guest, no need to patch KVM vectors */
- smccc_start = NULL;
- smccc_end = NULL;
+ switch ((int)res.a0) {
+ case 1:
+ /* Firmware says we're just fine */
+ return 0;
+ case 0:
+ cb = call_hvc_arch_workaround_1;
+ /* This is a guest, no need to patch KVM vectors */
+ smccc_start = NULL;
+ smccc_end = NULL;
+ break;
+ default:
+ return -1;
+ }
break;
case PSCI_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if ((int)res.a0 < 0)
- return;
- cb = call_smc_arch_workaround_1;
- smccc_start = __smccc_workaround_1_smc_start;
- smccc_end = __smccc_workaround_1_smc_end;
+ switch ((int)res.a0) {
+ case 1:
+ /* Firmware says we're just fine */
+ return 0;
+ case 0:
+ cb = call_smc_arch_workaround_1;
+ smccc_start = __smccc_workaround_1_smc_start;
+ smccc_end = __smccc_workaround_1_smc_end;
+ break;
+ default:
+ return -1;
+ }
break;
default:
- return;
+ return -1;
}
if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) ||
((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1))
cb = qcom_link_stack_sanitization;
- install_bp_hardening_cb(entry, cb, smccc_start, smccc_end);
+ if (IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR))
+ install_bp_hardening_cb(cb, smccc_start, smccc_end);
- return;
+ return 1;
}
-#endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */
-#ifdef CONFIG_ARM64_SSBD
DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required);
int ssbd_state __read_mostly = ARM64_SSBD_KERNEL;
+static bool __ssb_safe = true;
static const struct ssbd_options {
const char *str;
@@ -343,6 +349,11 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt,
void arm64_set_ssbd_mitigation(bool state)
{
+ if (!IS_ENABLED(CONFIG_ARM64_SSBD)) {
+ pr_info_once("SSBD disabled by kernel configuration\n");
+ return;
+ }
+
if (this_cpu_has_cap(ARM64_SSBS)) {
if (state)
asm volatile(SET_PSTATE_SSBS(0));
@@ -372,16 +383,28 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
struct arm_smccc_res res;
bool required = true;
s32 val;
+ bool this_cpu_safe = false;
WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+ if (cpu_mitigations_off())
+ ssbd_state = ARM64_SSBD_FORCE_DISABLE;
+
+ /* delay setting __ssb_safe until we get a firmware response */
+ if (is_midr_in_range_list(read_cpuid_id(), entry->midr_range_list))
+ this_cpu_safe = true;
+
if (this_cpu_has_cap(ARM64_SSBS)) {
+ if (!this_cpu_safe)
+ __ssb_safe = false;
required = false;
goto out_printmsg;
}
if (psci_ops.smccc_version == SMCCC_VERSION_1_0) {
ssbd_state = ARM64_SSBD_UNKNOWN;
+ if (!this_cpu_safe)
+ __ssb_safe = false;
return false;
}
@@ -398,6 +421,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
default:
ssbd_state = ARM64_SSBD_UNKNOWN;
+ if (!this_cpu_safe)
+ __ssb_safe = false;
return false;
}
@@ -406,14 +431,18 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
switch (val) {
case SMCCC_RET_NOT_SUPPORTED:
ssbd_state = ARM64_SSBD_UNKNOWN;
+ if (!this_cpu_safe)
+ __ssb_safe = false;
return false;
+ /* machines with mixed mitigation requirements must not return this */
case SMCCC_RET_NOT_REQUIRED:
pr_info_once("%s mitigation not required\n", entry->desc);
ssbd_state = ARM64_SSBD_MITIGATED;
return false;
case SMCCC_RET_SUCCESS:
+ __ssb_safe = false;
required = true;
break;
@@ -423,6 +452,8 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry,
default:
WARN_ON(1);
+ if (!this_cpu_safe)
+ __ssb_safe = false;
return false;
}
@@ -462,7 +493,14 @@ out_printmsg:
return required;
}
-#endif /* CONFIG_ARM64_SSBD */
+
+/* known invulnerable cores */
+static const struct midr_range arm64_ssb_cpus[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ {},
+};
static void __maybe_unused
cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
@@ -507,26 +545,67 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, \
CAP_MIDR_RANGE_LIST(midr_list)
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
+/* Track overall mitigation state. We are only mitigated if all cores are ok */
+static bool __hardenbp_enab = true;
+static bool __spectrev2_safe = true;
/*
- * List of CPUs where we need to issue a psci call to
- * harden the branch predictor.
+ * List of CPUs that do not need any Spectre-v2 mitigation at all.
*/
-static const struct midr_range arm64_bp_harden_smccc_cpus[] = {
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
- MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
- MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN),
- MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2),
- MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1),
- MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR),
- MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER),
- {},
+static const struct midr_range spectre_v2_safe_list[] = {
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A35),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A53),
+ MIDR_ALL_VERSIONS(MIDR_CORTEX_A55),
+ { /* sentinel */ }
};
-#endif
+/*
+ * Track overall bp hardening for all heterogeneous cores in the machine.
+ * We are only considered "safe" if all booted cores are known safe.
+ */
+static bool __maybe_unused
+check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope)
+{
+ int need_wa;
+
+ WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+ /* If the CPU has CSV2 set, we're safe */
+ if (cpuid_feature_extract_unsigned_field(read_cpuid(ID_AA64PFR0_EL1),
+ ID_AA64PFR0_CSV2_SHIFT))
+ return false;
+
+ /* Alternatively, we have a list of unaffected CPUs */
+ if (is_midr_in_range_list(read_cpuid_id(), spectre_v2_safe_list))
+ return false;
+
+ /* Fallback to firmware detection */
+ need_wa = detect_harden_bp_fw();
+ if (!need_wa)
+ return false;
+
+ __spectrev2_safe = false;
+
+ if (!IS_ENABLED(CONFIG_HARDEN_BRANCH_PREDICTOR)) {
+ pr_warn_once("spectrev2 mitigation disabled by kernel configuration\n");
+ __hardenbp_enab = false;
+ return false;
+ }
+
+ /* forced off */
+ if (__nospectre_v2 || cpu_mitigations_off()) {
+ pr_info_once("spectrev2 mitigation disabled by command line option\n");
+ __hardenbp_enab = false;
+ return false;
+ }
+
+ if (need_wa < 0) {
+ pr_warn_once("ARM_SMCCC_ARCH_WORKAROUND_1 missing from firmware\n");
+ __hardenbp_enab = false;
+ }
+
+ return (need_wa > 0);
+}
#ifdef CONFIG_HARDEN_EL2_VECTORS
@@ -603,6 +682,16 @@ static const struct midr_range workaround_clean_cache[] = {
};
#endif
+#ifdef CONFIG_ARM64_ERRATUM_1188873
+static const struct midr_range erratum_1188873_list[] = {
+ /* Cortex-A76 r0p0 to r2p0 */
+ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+ /* Neoverse-N1 r0p0 to r2p0 */
+ MIDR_RANGE(MIDR_NEOVERSE_N1, 0, 0, 2, 0),
+ {},
+};
+#endif
+
const struct arm64_cpu_capabilities arm64_errata[] = {
#ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE
{
@@ -701,13 +790,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
},
#endif
-#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
{
.capability = ARM64_HARDEN_BRANCH_PREDICTOR,
- .cpu_enable = enable_smccc_arch_workaround_1,
- ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus),
+ .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+ .matches = check_branch_predictor,
},
-#endif
#ifdef CONFIG_HARDEN_EL2_VECTORS
{
.desc = "EL2 vector hardening",
@@ -715,20 +802,18 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors),
},
#endif
-#ifdef CONFIG_ARM64_SSBD
{
.desc = "Speculative Store Bypass Disable",
.capability = ARM64_SSBD,
.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
.matches = has_ssbd_mitigation,
+ .midr_range_list = arm64_ssb_cpus,
},
-#endif
#ifdef CONFIG_ARM64_ERRATUM_1188873
{
- /* Cortex-A76 r0p0 to r2p0 */
.desc = "ARM erratum 1188873",
.capability = ARM64_WORKAROUND_1188873,
- ERRATA_MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0),
+ ERRATA_MIDR_RANGE_LIST(erratum_1188873_list),
},
#endif
#ifdef CONFIG_ARM64_ERRATUM_1165522
@@ -742,3 +827,38 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
{
}
};
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ if (__spectrev2_safe)
+ return sprintf(buf, "Not affected\n");
+
+ if (__hardenbp_enab)
+ return sprintf(buf, "Mitigation: Branch predictor hardening\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
+
+ssize_t cpu_show_spec_store_bypass(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (__ssb_safe)
+ return sprintf(buf, "Not affected\n");
+
+ switch (ssbd_state) {
+ case ARM64_SSBD_KERNEL:
+ case ARM64_SSBD_FORCE_ENABLE:
+ if (IS_ENABLED(CONFIG_ARM64_SSBD))
+ return sprintf(buf,
+ "Mitigation: Speculative Store Bypass disabled via prctl\n");
+ }
+
+ return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c
index ea001241bdd4..00f8b8612b69 100644
--- a/arch/arm64/kernel/cpu_ops.c
+++ b/arch/arm64/kernel/cpu_ops.c
@@ -85,6 +85,7 @@ static const char *__init cpu_read_enable_method(int cpu)
pr_err("%pOF: missing enable-method property\n",
dn);
}
+ of_node_put(dn);
} else {
enable_method = acpi_get_enable_method(cpu);
if (!enable_method) {
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 4061de10cea6..2b807f129e60 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -25,6 +25,7 @@
#include <linux/stop_machine.h>
#include <linux/types.h>
#include <linux/mm.h>
+#include <linux/cpu.h>
#include <asm/cpu.h>
#include <asm/cpufeature.h>
#include <asm/cpu_ops.h>
@@ -35,8 +36,8 @@
#include <asm/traps.h>
#include <asm/virt.h>
-unsigned long elf_hwcap __read_mostly;
-EXPORT_SYMBOL_GPL(elf_hwcap);
+/* Kernel representation of AT_HWCAP and AT_HWCAP2 */
+static unsigned long elf_hwcap __read_mostly;
#ifdef CONFIG_COMPAT
#define COMPAT_ELF_HWCAP_DEFAULT \
@@ -184,6 +185,15 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = {
ARM64_FTR_END,
};
+static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = {
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SM4_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SHA3_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_BITPERM_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_AES_SHIFT, 4, 0),
+ ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ZFR0_SVEVER_SHIFT, 4, 0),
+ ARM64_FTR_END,
+};
+
static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = {
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN4_SHIFT, 4, ID_AA64MMFR0_TGRAN4_NI),
S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR0_TGRAN64_SHIFT, 4, ID_AA64MMFR0_TGRAN64_NI),
@@ -392,7 +402,7 @@ static const struct __ftr_reg_entry {
/* Op1 = 0, CRn = 0, CRm = 4 */
ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0),
ARM64_FTR_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1),
- ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_raz),
+ ARM64_FTR_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0),
/* Op1 = 0, CRn = 0, CRm = 5 */
ARM64_FTR_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0),
@@ -947,7 +957,7 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
return has_cpuid_feature(entry, scope);
}
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+static bool __meltdown_safe = true;
static int __kpti_forced; /* 0: not forced, >0: forced on, <0: forced off */
static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
@@ -966,7 +976,17 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
MIDR_ALL_VERSIONS(MIDR_HISI_TSV110),
{ /* sentinel */ }
};
- char const *str = "command line option";
+ char const *str = "kpti command line option";
+ bool meltdown_safe;
+
+ meltdown_safe = is_midr_in_range_list(read_cpuid_id(), kpti_safe_list);
+
+ /* Defer to CPU feature registers */
+ if (has_cpuid_feature(entry, scope))
+ meltdown_safe = true;
+
+ if (!meltdown_safe)
+ __meltdown_safe = false;
/*
* For reasons that aren't entirely clear, enabling KPTI on Cavium
@@ -978,6 +998,24 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
__kpti_forced = -1;
}
+ /* Useful for KASLR robustness */
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0) {
+ if (!__kpti_forced) {
+ str = "KASLR";
+ __kpti_forced = 1;
+ }
+ }
+
+ if (cpu_mitigations_off() && !__kpti_forced) {
+ str = "mitigations=off";
+ __kpti_forced = -1;
+ }
+
+ if (!IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
+ pr_info_once("kernel page table isolation disabled by kernel configuration\n");
+ return false;
+ }
+
/* Forced? */
if (__kpti_forced) {
pr_info_once("kernel page table isolation forced %s by %s\n",
@@ -985,18 +1023,10 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry,
return __kpti_forced > 0;
}
- /* Useful for KASLR robustness */
- if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
- return kaslr_offset() > 0;
-
- /* Don't force KPTI for CPUs that are not vulnerable */
- if (is_midr_in_range_list(read_cpuid_id(), kpti_safe_list))
- return false;
-
- /* Defer to CPU feature registers */
- return !has_cpuid_feature(entry, scope);
+ return !meltdown_safe;
}
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
static void
kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
{
@@ -1026,6 +1056,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
return;
}
+#else
+static void
+kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
+{
+}
+#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
static int __init parse_kpti(char *str)
{
@@ -1039,7 +1075,6 @@ static int __init parse_kpti(char *str)
return 0;
}
early_param("kpti", parse_kpti);
-#endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
#ifdef CONFIG_ARM64_HW_AFDBM
static inline void __cpu_enable_hw_dbm(void)
@@ -1306,7 +1341,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64PFR0_EL0_SHIFT,
.min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
},
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
{
.desc = "Kernel page table isolation (KPTI)",
.capability = ARM64_UNMAP_KERNEL_AT_EL0,
@@ -1322,7 +1356,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.matches = unmap_kernel_at_el0,
.cpu_enable = kpti_install_ng_mappings,
},
-#endif
{
/* FP/SIMD is not implemented */
.capability = ARM64_HAS_NO_FPSIMD,
@@ -1340,6 +1373,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
.field_pos = ID_AA64ISAR1_DPB_SHIFT,
.min_field_value = 1,
},
+ {
+ .desc = "Data cache clean to Point of Deep Persistence",
+ .capability = ARM64_HAS_DCPODP,
+ .type = ARM64_CPUCAP_SYSTEM_FEATURE,
+ .matches = has_cpuid_feature,
+ .sys_reg = SYS_ID_AA64ISAR1_EL1,
+ .sign = FTR_UNSIGNED,
+ .field_pos = ID_AA64ISAR1_DPB_SHIFT,
+ .min_field_value = 2,
+ },
#endif
#ifdef CONFIG_ARM64_SVE
{
@@ -1571,39 +1614,46 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = {
#endif
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
- HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC),
- HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SB),
- HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
+ HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
+ HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB),
+ HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
#ifdef CONFIG_ARM64_SVE
- HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, HWCAP_SVE),
+ HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3),
+ HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4),
#endif
- HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS),
+ HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
#ifdef CONFIG_ARM64_PTR_AUTH
- HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, HWCAP_PACA),
- HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, HWCAP_PACG),
+ HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA),
+ HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG),
#endif
{},
};
@@ -1623,7 +1673,7 @@ static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
{
switch (cap->hwcap_type) {
case CAP_HWCAP:
- elf_hwcap |= cap->hwcap;
+ cpu_set_feature(cap->hwcap);
break;
#ifdef CONFIG_COMPAT
case CAP_COMPAT_HWCAP:
@@ -1646,7 +1696,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
switch (cap->hwcap_type) {
case CAP_HWCAP:
- rc = (elf_hwcap & cap->hwcap) != 0;
+ rc = cpu_have_feature(cap->hwcap);
break;
#ifdef CONFIG_COMPAT
case CAP_COMPAT_HWCAP:
@@ -1667,7 +1717,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
{
/* We support emulation of accesses to CPU ID feature registers */
- elf_hwcap |= HWCAP_CPUID;
+ cpu_set_named_feature(CPUID);
for (; hwcaps->matches; hwcaps++)
if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps)))
cap_set_elf_hwcap(hwcaps);
@@ -1947,6 +1997,35 @@ bool this_cpu_has_cap(unsigned int n)
return false;
}
+void cpu_set_feature(unsigned int num)
+{
+ WARN_ON(num >= MAX_CPU_FEATURES);
+ elf_hwcap |= BIT(num);
+}
+EXPORT_SYMBOL_GPL(cpu_set_feature);
+
+bool cpu_have_feature(unsigned int num)
+{
+ WARN_ON(num >= MAX_CPU_FEATURES);
+ return elf_hwcap & BIT(num);
+}
+EXPORT_SYMBOL_GPL(cpu_have_feature);
+
+unsigned long cpu_get_elf_hwcap(void)
+{
+ /*
+ * We currently only populate the first 32 bits of AT_HWCAP. Please
+ * note that for userspace compatibility we guarantee that bits 62
+ * and 63 will always be returned as 0.
+ */
+ return lower_32_bits(elf_hwcap);
+}
+
+unsigned long cpu_get_elf_hwcap2(void)
+{
+ return upper_32_bits(elf_hwcap);
+}
+
static void __init setup_system_capabilities(void)
{
/*
@@ -2101,3 +2180,15 @@ static int __init enable_mrs_emulation(void)
}
core_initcall(enable_mrs_emulation);
+
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ if (__meltdown_safe)
+ return sprintf(buf, "Not affected\n");
+
+ if (arm64_kernel_unmapped_at_el0())
+ return sprintf(buf, "Mitigation: PTI\n");
+
+ return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index ca0685f33900..f6f7936be6e7 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -85,6 +85,13 @@ static const char *const hwcap_str[] = {
"sb",
"paca",
"pacg",
+ "dcpodp",
+ "sve2",
+ "sveaes",
+ "svepmull",
+ "svebitperm",
+ "svesha3",
+ "svesm4",
NULL
};
@@ -167,7 +174,7 @@ static int c_show(struct seq_file *m, void *v)
#endif /* CONFIG_COMPAT */
} else {
for (j = 0; hwcap_str[j]; j++)
- if (elf_hwcap & (1 << j))
+ if (cpu_have_feature(j))
seq_printf(m, " %s", hwcap_str[j]);
}
seq_puts(m, "\n");
diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c
index d7bb6aefae0a..555b6bd2f3d6 100644
--- a/arch/arm64/kernel/debug-monitors.c
+++ b/arch/arm64/kernel/debug-monitors.c
@@ -135,6 +135,7 @@ NOKPROBE_SYMBOL(disable_debug_monitors);
*/
static int clear_os_lock(unsigned int cpu)
{
+ write_sysreg(0, osdlr_el1);
write_sysreg(0, oslar_el1);
isb();
return 0;
@@ -163,25 +164,46 @@ static void clear_regs_spsr_ss(struct pt_regs *regs)
}
NOKPROBE_SYMBOL(clear_regs_spsr_ss);
-/* EL1 Single Step Handler hooks */
-static LIST_HEAD(step_hook);
-static DEFINE_SPINLOCK(step_hook_lock);
+static DEFINE_SPINLOCK(debug_hook_lock);
+static LIST_HEAD(user_step_hook);
+static LIST_HEAD(kernel_step_hook);
-void register_step_hook(struct step_hook *hook)
+static void register_debug_hook(struct list_head *node, struct list_head *list)
{
- spin_lock(&step_hook_lock);
- list_add_rcu(&hook->node, &step_hook);
- spin_unlock(&step_hook_lock);
+ spin_lock(&debug_hook_lock);
+ list_add_rcu(node, list);
+ spin_unlock(&debug_hook_lock);
+
}
-void unregister_step_hook(struct step_hook *hook)
+static void unregister_debug_hook(struct list_head *node)
{
- spin_lock(&step_hook_lock);
- list_del_rcu(&hook->node);
- spin_unlock(&step_hook_lock);
+ spin_lock(&debug_hook_lock);
+ list_del_rcu(node);
+ spin_unlock(&debug_hook_lock);
synchronize_rcu();
}
+void register_user_step_hook(struct step_hook *hook)
+{
+ register_debug_hook(&hook->node, &user_step_hook);
+}
+
+void unregister_user_step_hook(struct step_hook *hook)
+{
+ unregister_debug_hook(&hook->node);
+}
+
+void register_kernel_step_hook(struct step_hook *hook)
+{
+ register_debug_hook(&hook->node, &kernel_step_hook);
+}
+
+void unregister_kernel_step_hook(struct step_hook *hook)
+{
+ unregister_debug_hook(&hook->node);
+}
+
/*
* Call registered single step handlers
* There is no Syndrome info to check for determining the handler.
@@ -191,11 +213,14 @@ void unregister_step_hook(struct step_hook *hook)
static int call_step_hook(struct pt_regs *regs, unsigned int esr)
{
struct step_hook *hook;
+ struct list_head *list;
int retval = DBG_HOOK_ERROR;
+ list = user_mode(regs) ? &user_step_hook : &kernel_step_hook;
+
rcu_read_lock();
- list_for_each_entry_rcu(hook, &step_hook, node) {
+ list_for_each_entry_rcu(hook, list, node) {
retval = hook->fn(regs, esr);
if (retval == DBG_HOOK_HANDLED)
break;
@@ -222,7 +247,7 @@ static void send_user_sigtrap(int si_code)
"User debug trap");
}
-static int single_step_handler(unsigned long addr, unsigned int esr,
+static int single_step_handler(unsigned long unused, unsigned int esr,
struct pt_regs *regs)
{
bool handler_found = false;
@@ -234,10 +259,6 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
if (!reinstall_suspended_bps(regs))
return 0;
-#ifdef CONFIG_KPROBES
- if (kprobe_single_step_handler(regs, esr) == DBG_HOOK_HANDLED)
- handler_found = true;
-#endif
if (!handler_found && call_step_hook(regs, esr) == DBG_HOOK_HANDLED)
handler_found = true;
@@ -264,61 +285,59 @@ static int single_step_handler(unsigned long addr, unsigned int esr,
}
NOKPROBE_SYMBOL(single_step_handler);
-/*
- * Breakpoint handler is re-entrant as another breakpoint can
- * hit within breakpoint handler, especically in kprobes.
- * Use reader/writer locks instead of plain spinlock.
- */
-static LIST_HEAD(break_hook);
-static DEFINE_SPINLOCK(break_hook_lock);
+static LIST_HEAD(user_break_hook);
+static LIST_HEAD(kernel_break_hook);
-void register_break_hook(struct break_hook *hook)
+void register_user_break_hook(struct break_hook *hook)
{
- spin_lock(&break_hook_lock);
- list_add_rcu(&hook->node, &break_hook);
- spin_unlock(&break_hook_lock);
+ register_debug_hook(&hook->node, &user_break_hook);
}
-void unregister_break_hook(struct break_hook *hook)
+void unregister_user_break_hook(struct break_hook *hook)
{
- spin_lock(&break_hook_lock);
- list_del_rcu(&hook->node);
- spin_unlock(&break_hook_lock);
- synchronize_rcu();
+ unregister_debug_hook(&hook->node);
+}
+
+void register_kernel_break_hook(struct break_hook *hook)
+{
+ register_debug_hook(&hook->node, &kernel_break_hook);
+}
+
+void unregister_kernel_break_hook(struct break_hook *hook)
+{
+ unregister_debug_hook(&hook->node);
}
static int call_break_hook(struct pt_regs *regs, unsigned int esr)
{
struct break_hook *hook;
+ struct list_head *list;
int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL;
+ list = user_mode(regs) ? &user_break_hook : &kernel_break_hook;
+
rcu_read_lock();
- list_for_each_entry_rcu(hook, &break_hook, node)
- if ((esr & hook->esr_mask) == hook->esr_val)
+ list_for_each_entry_rcu(hook, list, node) {
+ unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+
+ if ((comment & ~hook->mask) == hook->imm)
fn = hook->fn;
+ }
rcu_read_unlock();
return fn ? fn(regs, esr) : DBG_HOOK_ERROR;
}
NOKPROBE_SYMBOL(call_break_hook);
-static int brk_handler(unsigned long addr, unsigned int esr,
+static int brk_handler(unsigned long unused, unsigned int esr,
struct pt_regs *regs)
{
- bool handler_found = false;
-
-#ifdef CONFIG_KPROBES
- if ((esr & BRK64_ESR_MASK) == BRK64_ESR_KPROBES) {
- if (kprobe_breakpoint_handler(regs, esr) == DBG_HOOK_HANDLED)
- handler_found = true;
- }
-#endif
- if (!handler_found && call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
- handler_found = true;
+ if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED)
+ return 0;
- if (!handler_found && user_mode(regs)) {
+ if (user_mode(regs)) {
send_user_sigtrap(TRAP_BRKPT);
- } else if (!handler_found) {
+ } else {
pr_warn("Unexpected kernel BRK exception at EL1\n");
return -EFAULT;
}
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c50a7a75f2e0..1a7811b7e3c4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -336,6 +336,21 @@ alternative_if ARM64_WORKAROUND_845719
alternative_else_nop_endif
#endif
3:
+#ifdef CONFIG_ARM64_ERRATUM_1188873
+alternative_if_not ARM64_WORKAROUND_1188873
+ b 4f
+alternative_else_nop_endif
+ /*
+ * if (x22.mode32 == cntkctl_el1.el0vcten)
+ * cntkctl_el1.el0vcten = ~cntkctl_el1.el0vcten
+ */
+ mrs x1, cntkctl_el1
+ eon x0, x1, x22, lsr #3
+ tbz x0, #1, 4f
+ eor x1, x1, #2 // ARCH_TIMER_USR_VCT_ACCESS_EN
+ msr cntkctl_el1, x1
+4:
+#endif
apply_ssbd 0, x0, x1
.endif
@@ -362,11 +377,11 @@ alternative_else_nop_endif
.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
- bne 4f
+ bne 5f
msr far_el1, x30
tramp_alias x30, tramp_exit_native
br x30
-4:
+5:
tramp_alias x30, tramp_exit_compat
br x30
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 5ebe73b69961..735cf1f8b109 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1258,14 +1258,14 @@ static inline void fpsimd_hotplug_init(void) { }
*/
static int __init fpsimd_init(void)
{
- if (elf_hwcap & HWCAP_FP) {
+ if (cpu_have_named_feature(FP)) {
fpsimd_pm_init();
fpsimd_hotplug_init();
} else {
pr_notice("Floating-point is not implemented\n");
}
- if (!(elf_hwcap & HWCAP_ASIMD))
+ if (!cpu_have_named_feature(ASIMD))
pr_notice("Advanced SIMD is not implemented\n");
return sve_sysctl_init();
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index eecf7927dab0..fcae3f85c6cd 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -505,7 +505,7 @@ ENTRY(el2_setup)
* kernel is intended to run at EL2.
*/
mrs x2, id_aa64mmfr1_el1
- ubfx x2, x2, #8, #4
+ ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4
#else
mov x2, xzr
#endif
@@ -538,7 +538,7 @@ set_hcr:
#ifdef CONFIG_ARM_GIC_V3
/* GICv3 system register access */
mrs x0, id_aa64pfr0_el1
- ubfx x0, x0, #24, #4
+ ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4
cbz x0, 3f
mrs_s x0, SYS_ICC_SRE_EL2
@@ -564,8 +564,8 @@ set_hcr:
#endif
/* EL2 debug */
- mrs x1, id_aa64dfr0_el1 // Check ID_AA64DFR0_EL1 PMUVer
- sbfx x0, x1, #8, #4
+ mrs x1, id_aa64dfr0_el1
+ sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4
cmp x0, #1
b.lt 4f // Skip if no PMU present
mrs x0, pmcr_el0 // Disable debug access traps
@@ -574,7 +574,7 @@ set_hcr:
csel x3, xzr, x0, lt // all PMU counters from EL1
/* Statistical profiling */
- ubfx x0, x1, #32, #4 // Check ID_AA64DFR0_EL1 PMSVer
+ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4
cbz x0, 7f // Skip if SPE not present
cbnz x2, 6f // VHE?
mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2,
@@ -684,7 +684,7 @@ ENTRY(__boot_cpu_mode)
* with MMU turned off.
*/
ENTRY(__early_cpu_boot_status)
- .long 0
+ .quad 0
.popsection
diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c
index 7820a4a688fa..9e2b5882cdeb 100644
--- a/arch/arm64/kernel/insn.c
+++ b/arch/arm64/kernel/insn.c
@@ -734,6 +734,46 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg,
state);
}
+u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result,
+ enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size)
+{
+ u32 insn = aarch64_insn_get_ldadd_value();
+
+ switch (size) {
+ case AARCH64_INSN_SIZE_32:
+ case AARCH64_INSN_SIZE_64:
+ break;
+ default:
+ pr_err("%s: unimplemented size encoding %d\n", __func__, size);
+ return AARCH64_BREAK_FAULT;
+ }
+
+ insn = aarch64_insn_encode_ldst_size(size, insn);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn,
+ result);
+
+ insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn,
+ address);
+
+ return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn,
+ value);
+}
+
+u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address,
+ enum aarch64_insn_register value,
+ enum aarch64_insn_size_type size)
+{
+ /*
+ * STADD is simply encoded as an alias for LDADD with XZR as
+ * the destination register.
+ */
+ return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address,
+ value, size);
+}
+
static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type,
enum aarch64_insn_prfm_target target,
enum aarch64_insn_prfm_policy policy,
diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c
index 691854b77c7f..30853d5b7859 100644
--- a/arch/arm64/kernel/kgdb.c
+++ b/arch/arm64/kernel/kgdb.c
@@ -244,9 +244,6 @@ int kgdb_arch_handle_exception(int exception_vector, int signo,
static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr)
{
- if (user_mode(regs))
- return DBG_HOOK_ERROR;
-
kgdb_handle_exception(1, SIGTRAP, 0, regs);
return DBG_HOOK_HANDLED;
}
@@ -254,9 +251,6 @@ NOKPROBE_SYMBOL(kgdb_brk_fn)
static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr)
{
- if (user_mode(regs))
- return DBG_HOOK_ERROR;
-
compiled_break = 1;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
@@ -266,7 +260,7 @@ NOKPROBE_SYMBOL(kgdb_compiled_brk_fn);
static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
{
- if (user_mode(regs) || !kgdb_single_step)
+ if (!kgdb_single_step)
return DBG_HOOK_ERROR;
kgdb_handle_exception(1, SIGTRAP, 0, regs);
@@ -275,15 +269,13 @@ static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr)
NOKPROBE_SYMBOL(kgdb_step_brk_fn);
static struct break_hook kgdb_brkpt_hook = {
- .esr_mask = 0xffffffff,
- .esr_val = (u32)ESR_ELx_VAL_BRK64(KGDB_DYN_DBG_BRK_IMM),
- .fn = kgdb_brk_fn
+ .fn = kgdb_brk_fn,
+ .imm = KGDB_DYN_DBG_BRK_IMM,
};
static struct break_hook kgdb_compiled_brkpt_hook = {
- .esr_mask = 0xffffffff,
- .esr_val = (u32)ESR_ELx_VAL_BRK64(KGDB_COMPILED_DBG_BRK_IMM),
- .fn = kgdb_compiled_brk_fn
+ .fn = kgdb_compiled_brk_fn,
+ .imm = KGDB_COMPILED_DBG_BRK_IMM,
};
static struct step_hook kgdb_step_hook = {
@@ -332,9 +324,9 @@ int kgdb_arch_init(void)
if (ret != 0)
return ret;
- register_break_hook(&kgdb_brkpt_hook);
- register_break_hook(&kgdb_compiled_brkpt_hook);
- register_step_hook(&kgdb_step_hook);
+ register_kernel_break_hook(&kgdb_brkpt_hook);
+ register_kernel_break_hook(&kgdb_compiled_brkpt_hook);
+ register_kernel_step_hook(&kgdb_step_hook);
return 0;
}
@@ -345,9 +337,9 @@ int kgdb_arch_init(void)
*/
void kgdb_arch_exit(void)
{
- unregister_break_hook(&kgdb_brkpt_hook);
- unregister_break_hook(&kgdb_compiled_brkpt_hook);
- unregister_step_hook(&kgdb_step_hook);
+ unregister_kernel_break_hook(&kgdb_brkpt_hook);
+ unregister_kernel_break_hook(&kgdb_compiled_brkpt_hook);
+ unregister_kernel_step_hook(&kgdb_step_hook);
unregister_die_notifier(&kgdb_notifier);
}
diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S
index 997e6b27ff6a..49825e9e421e 100644
--- a/arch/arm64/kernel/kuser32.S
+++ b/arch/arm64/kernel/kuser32.S
@@ -1,29 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Low-level user helpers placed in the vectors page for AArch32.
+ * AArch32 user helpers.
* Based on the kuser helpers in arch/arm/kernel/entry-armv.S.
*
* Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
- * Copyright (C) 2012 ARM Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Copyright (C) 2012-2018 ARM Ltd.
*
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- *
- * AArch32 user helpers.
- *
- * Each segment is 32-byte aligned and will be moved to the top of the high
- * vector page. New segments (if ever needed) must be added in front of
- * existing ones. This mechanism should be used only for things that are
- * really small and justified, and not be abused freely.
+ * The kuser helpers below are mapped at a fixed address by
+ * aarch32_setup_additional_pages() and are provided for compatibility
+ * reasons with 32 bit (aarch32) applications that need them.
*
* See Documentation/arm/kernel_user_helpers.txt for formal definitions.
*/
@@ -77,42 +62,3 @@ __kuser_helper_version: // 0xffff0ffc
.word ((__kuser_helper_end - __kuser_helper_start) >> 5)
.globl __kuser_helper_end
__kuser_helper_end:
-
-/*
- * AArch32 sigreturn code
- *
- * For ARM syscalls, the syscall number has to be loaded into r7.
- * We do not support an OABI userspace.
- *
- * For Thumb syscalls, we also pass the syscall number via r7. We therefore
- * need two 16-bit instructions.
- */
- .globl __aarch32_sigret_code_start
-__aarch32_sigret_code_start:
-
- /*
- * ARM Code
- */
- .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn
-
- /*
- * Thumb code
- */
- .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn
- .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn
-
- /*
- * ARM code
- */
- .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn
-
- /*
- * Thumb code
- */
- .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn
- .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn
-
- .globl __aarch32_sigret_code_end
-__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
index 4addb38bc250..6164d389eed6 100644
--- a/arch/arm64/kernel/perf_event.c
+++ b/arch/arm64/kernel/perf_event.c
@@ -431,7 +431,7 @@ static inline u64 armv8pmu_read_hw_counter(struct perf_event *event)
return val;
}
-static inline u64 armv8pmu_read_counter(struct perf_event *event)
+static u64 armv8pmu_read_counter(struct perf_event *event)
{
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
@@ -468,7 +468,7 @@ static inline void armv8pmu_write_hw_counter(struct perf_event *event,
}
}
-static inline void armv8pmu_write_counter(struct perf_event *event, u64 value)
+static void armv8pmu_write_counter(struct perf_event *event, u64 value)
{
struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu);
struct hw_perf_event *hwc = &event->hw;
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index 7a679caf4585..2509fcb6d404 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -439,15 +439,12 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
return DBG_HOOK_ERROR;
}
-int __kprobes
+static int __kprobes
kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
int retval;
- if (user_mode(regs))
- return DBG_HOOK_ERROR;
-
/* return error if this is not our step */
retval = kprobe_ss_hit(kcb, instruction_pointer(regs));
@@ -461,16 +458,22 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
return retval;
}
-int __kprobes
+static struct step_hook kprobes_step_hook = {
+ .fn = kprobe_single_step_handler,
+};
+
+static int __kprobes
kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr)
{
- if (user_mode(regs))
- return DBG_HOOK_ERROR;
-
kprobe_handler(regs);
return DBG_HOOK_HANDLED;
}
+static struct break_hook kprobes_break_hook = {
+ .imm = KPROBES_BRK_IMM,
+ .fn = kprobe_breakpoint_handler,
+};
+
/*
* Provide a blacklist of symbols identifying ranges which cannot be kprobed.
* This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
@@ -599,5 +602,8 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
int __init arch_init_kprobes(void)
{
+ register_kernel_break_hook(&kprobes_break_hook);
+ register_kernel_step_hook(&kprobes_step_hook);
+
return 0;
}
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index 636ca0119c0e..605945eac1f8 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -171,7 +171,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self,
static int uprobe_breakpoint_handler(struct pt_regs *regs,
unsigned int esr)
{
- if (user_mode(regs) && uprobe_pre_sstep_notifier(regs))
+ if (uprobe_pre_sstep_notifier(regs))
return DBG_HOOK_HANDLED;
return DBG_HOOK_ERROR;
@@ -182,21 +182,16 @@ static int uprobe_single_step_handler(struct pt_regs *regs,
{
struct uprobe_task *utask = current->utask;
- if (user_mode(regs)) {
- WARN_ON(utask &&
- (instruction_pointer(regs) != utask->xol_vaddr + 4));
-
- if (uprobe_post_sstep_notifier(regs))
- return DBG_HOOK_HANDLED;
- }
+ WARN_ON(utask && (instruction_pointer(regs) != utask->xol_vaddr + 4));
+ if (uprobe_post_sstep_notifier(regs))
+ return DBG_HOOK_HANDLED;
return DBG_HOOK_ERROR;
}
/* uprobe breakpoint handler hook */
static struct break_hook uprobes_break_hook = {
- .esr_mask = BRK64_ESR_MASK,
- .esr_val = BRK64_ESR_UPROBES,
+ .imm = UPROBES_BRK_IMM,
.fn = uprobe_breakpoint_handler,
};
@@ -207,8 +202,8 @@ static struct step_hook uprobes_step_hook = {
static int __init arch_init_uprobes(void)
{
- register_break_hook(&uprobes_break_hook);
- register_step_hook(&uprobes_step_hook);
+ register_user_break_hook(&uprobes_break_hook);
+ register_user_step_hook(&uprobes_step_hook);
return 0;
}
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index cb7800acd19f..caea6e25db2a 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -403,8 +403,7 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka,
if (ka->sa.sa_flags & SA_SIGINFO)
idx += 3;
- retcode = AARCH32_VECTORS_BASE +
- AARCH32_KERN_SIGRET_CODE_OFFSET +
+ retcode = (unsigned long)current->mm->context.vdso +
(idx << 2) + thumb;
}
diff --git a/arch/arm64/kernel/sigreturn32.S b/arch/arm64/kernel/sigreturn32.S
new file mode 100644
index 000000000000..475d30d471ac
--- /dev/null
+++ b/arch/arm64/kernel/sigreturn32.S
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AArch32 sigreturn code.
+ * Based on the kuser helpers in arch/arm/kernel/entry-armv.S.
+ *
+ * Copyright (C) 2005-2011 Nicolas Pitre <nico@fluxnic.net>
+ * Copyright (C) 2012-2018 ARM Ltd.
+ *
+ * For ARM syscalls, the syscall number has to be loaded into r7.
+ * We do not support an OABI userspace.
+ *
+ * For Thumb syscalls, we also pass the syscall number via r7. We therefore
+ * need two 16-bit instructions.
+ */
+
+#include <asm/unistd.h>
+
+ .globl __aarch32_sigret_code_start
+__aarch32_sigret_code_start:
+
+ /*
+ * ARM Code
+ */
+ .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn
+ .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn
+
+ /*
+ * Thumb code
+ */
+ .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn
+ .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn
+
+ /*
+ * ARM code
+ */
+ .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn
+ .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn
+
+ /*
+ * Thumb code
+ */
+ .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn
+ .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn
+
+ .globl __aarch32_sigret_code_end
+__aarch32_sigret_code_end:
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 824de7038967..bb4b3f07761a 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -586,7 +586,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor)
}
static int __init
-acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
+acpi_parse_gic_cpu_interface(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_generic_interrupt *processor;
@@ -595,7 +595,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header,
if (BAD_MADT_GICC_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
acpi_map_gic_cpu_interface(processor);
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index d908b5e9e949..b00ec7d483d1 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -140,8 +140,6 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
#endif
walk_stackframe(current, &frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
@@ -172,8 +170,6 @@ static noinline void __save_stack_trace(struct task_struct *tsk,
#endif
walk_stackframe(tsk, &frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
put_task_stack(tsk);
}
diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c
index b44065fb1616..6f91e8116514 100644
--- a/arch/arm64/kernel/sys.c
+++ b/arch/arm64/kernel/sys.c
@@ -31,7 +31,7 @@
SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len,
unsigned long, prot, unsigned long, flags,
- unsigned long, fd, off_t, off)
+ unsigned long, fd, unsigned long, off)
{
if (offset_in_page(off) != 0)
return -EINVAL;
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 29755989f616..ade32046f3fe 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -462,6 +462,9 @@ static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs)
case ESR_ELx_SYS64_ISS_CRM_DC_CVAC: /* DC CVAC, gets promoted */
__user_cache_maint("dc civac", address, ret);
break;
+ case ESR_ELx_SYS64_ISS_CRM_DC_CVADP: /* DC CVADP */
+ __user_cache_maint("sys 3, c7, c13, 1", address, ret);
+ break;
case ESR_ELx_SYS64_ISS_CRM_DC_CVAP: /* DC CVAP */
__user_cache_maint("sys 3, c7, c12, 1", address, ret);
break;
@@ -496,7 +499,7 @@ static void cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
{
int rt = ESR_ELx_SYS64_ISS_RT(esr);
- pt_regs_write_reg(regs, rt, arch_counter_get_cntvct());
+ pt_regs_write_reg(regs, rt, arch_timer_read_counter());
arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE);
}
@@ -668,7 +671,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs)
{
int rt = (esr & ESR_ELx_CP15_64_ISS_RT_MASK) >> ESR_ELx_CP15_64_ISS_RT_SHIFT;
int rt2 = (esr & ESR_ELx_CP15_64_ISS_RT2_MASK) >> ESR_ELx_CP15_64_ISS_RT2_SHIFT;
- u64 val = arch_counter_get_cntvct();
+ u64 val = arch_timer_read_counter();
pt_regs_write_reg(regs, rt, lower_32_bits(val));
pt_regs_write_reg(regs, rt2, upper_32_bits(val));
@@ -950,9 +953,6 @@ int is_valid_bugaddr(unsigned long addr)
static int bug_handler(struct pt_regs *regs, unsigned int esr)
{
- if (user_mode(regs))
- return DBG_HOOK_ERROR;
-
switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
die("Oops - BUG", regs, 0);
@@ -972,9 +972,8 @@ static int bug_handler(struct pt_regs *regs, unsigned int esr)
}
static struct break_hook bug_break_hook = {
- .esr_val = 0xf2000000 | BUG_BRK_IMM,
- .esr_mask = 0xffffffff,
.fn = bug_handler,
+ .imm = BUG_BRK_IMM,
};
#ifdef CONFIG_KASAN_SW_TAGS
@@ -992,9 +991,6 @@ static int kasan_handler(struct pt_regs *regs, unsigned int esr)
u64 addr = regs->regs[0];
u64 pc = regs->pc;
- if (user_mode(regs))
- return DBG_HOOK_ERROR;
-
kasan_report(addr, size, write, pc);
/*
@@ -1019,13 +1015,10 @@ static int kasan_handler(struct pt_regs *regs, unsigned int esr)
return DBG_HOOK_HANDLED;
}
-#define KASAN_ESR_VAL (0xf2000000 | KASAN_BRK_IMM)
-#define KASAN_ESR_MASK 0xffffff00
-
static struct break_hook kasan_break_hook = {
- .esr_val = KASAN_ESR_VAL,
- .esr_mask = KASAN_ESR_MASK,
- .fn = kasan_handler,
+ .fn = kasan_handler,
+ .imm = KASAN_BRK_IMM,
+ .mask = KASAN_BRK_MASK,
};
#endif
@@ -1037,7 +1030,9 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
#ifdef CONFIG_KASAN_SW_TAGS
- if ((esr & KASAN_ESR_MASK) == KASAN_ESR_VAL)
+ unsigned int comment = esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
+
+ if ((comment & ~KASAN_BRK_MASK) == KASAN_BRK_IMM)
return kasan_handler(regs, esr) != DBG_HOOK_HANDLED;
#endif
return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
@@ -1046,8 +1041,8 @@ int __init early_brk64(unsigned long addr, unsigned int esr,
/* This registration must happen early, before debug_traps_init(). */
void __init trap_init(void)
{
- register_break_hook(&bug_break_hook);
+ register_kernel_break_hook(&bug_break_hook);
#ifdef CONFIG_KASAN_SW_TAGS
- register_break_hook(&kasan_break_hook);
+ register_kernel_break_hook(&kasan_break_hook);
#endif
}
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 2d419006ad43..8074cbd3a3a8 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -1,5 +1,5 @@
/*
- * VDSO implementation for AArch64 and vector page setup for AArch32.
+ * VDSO implementations.
*
* Copyright (C) 2012 ARM Limited
*
@@ -53,61 +53,129 @@ struct vdso_data *vdso_data = &vdso_data_store.data;
/*
* Create and map the vectors page for AArch32 tasks.
*/
-static struct page *vectors_page[1] __ro_after_init;
+#define C_VECTORS 0
+#define C_SIGPAGE 1
+#define C_PAGES (C_SIGPAGE + 1)
+static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init;
+static const struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = {
+ {
+ .name = "[vectors]", /* ABI */
+ .pages = &aarch32_vdso_pages[C_VECTORS],
+ },
+ {
+ .name = "[sigpage]", /* ABI */
+ .pages = &aarch32_vdso_pages[C_SIGPAGE],
+ },
+};
-static int __init alloc_vectors_page(void)
+static int aarch32_alloc_kuser_vdso_page(void)
{
extern char __kuser_helper_start[], __kuser_helper_end[];
- extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
-
int kuser_sz = __kuser_helper_end - __kuser_helper_start;
- int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
- unsigned long vpage;
+ unsigned long vdso_page;
- vpage = get_zeroed_page(GFP_ATOMIC);
+ if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
+ return 0;
- if (!vpage)
+ vdso_page = get_zeroed_page(GFP_ATOMIC);
+ if (!vdso_page)
return -ENOMEM;
- /* kuser helpers */
- memcpy((void *)vpage + 0x1000 - kuser_sz, __kuser_helper_start,
- kuser_sz);
+ memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start,
+ kuser_sz);
+ aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page);
+ flush_dcache_page(aarch32_vdso_pages[C_VECTORS]);
+ return 0;
+}
- /* sigreturn code */
- memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET,
- __aarch32_sigret_code_start, sigret_sz);
+static int __init aarch32_alloc_vdso_pages(void)
+{
+ extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[];
+ int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start;
+ unsigned long sigpage;
+ int ret;
- flush_icache_range(vpage, vpage + PAGE_SIZE);
- vectors_page[0] = virt_to_page(vpage);
+ sigpage = get_zeroed_page(GFP_ATOMIC);
+ if (!sigpage)
+ return -ENOMEM;
- return 0;
+ memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz);
+ aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage);
+ flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]);
+
+ ret = aarch32_alloc_kuser_vdso_page();
+ if (ret)
+ free_page(sigpage);
+
+ return ret;
}
-arch_initcall(alloc_vectors_page);
+arch_initcall(aarch32_alloc_vdso_pages);
-int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
+static int aarch32_kuser_helpers_setup(struct mm_struct *mm)
{
- struct mm_struct *mm = current->mm;
- unsigned long addr = AARCH32_VECTORS_BASE;
- static const struct vm_special_mapping spec = {
- .name = "[vectors]",
- .pages = vectors_page,
+ void *ret;
+
+ if (!IS_ENABLED(CONFIG_KUSER_HELPERS))
+ return 0;
+
+ /*
+ * Avoid VM_MAYWRITE for compatibility with arch/arm/, where it's
+ * not safe to CoW the page containing the CPU exception vectors.
+ */
+ ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE,
+ VM_READ | VM_EXEC |
+ VM_MAYREAD | VM_MAYEXEC,
+ &aarch32_vdso_spec[C_VECTORS]);
- };
+ return PTR_ERR_OR_ZERO(ret);
+}
+
+static int aarch32_sigreturn_setup(struct mm_struct *mm)
+{
+ unsigned long addr;
void *ret;
- if (down_write_killable(&mm->mmap_sem))
- return -EINTR;
- current->mm->context.vdso = (void *)addr;
+ addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
+ if (IS_ERR_VALUE(addr)) {
+ ret = ERR_PTR(addr);
+ goto out;
+ }
- /* Map vectors page at the high address. */
+ /*
+ * VM_MAYWRITE is required to allow gdb to Copy-on-Write and
+ * set breakpoints.
+ */
ret = _install_special_mapping(mm, addr, PAGE_SIZE,
- VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
- &spec);
+ VM_READ | VM_EXEC | VM_MAYREAD |
+ VM_MAYWRITE | VM_MAYEXEC,
+ &aarch32_vdso_spec[C_SIGPAGE]);
+ if (IS_ERR(ret))
+ goto out;
- up_write(&mm->mmap_sem);
+ mm->context.vdso = (void *)addr;
+out:
return PTR_ERR_OR_ZERO(ret);
}
+
+int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ if (down_write_killable(&mm->mmap_sem))
+ return -EINTR;
+
+ ret = aarch32_kuser_helpers_setup(mm);
+ if (ret)
+ goto out;
+
+ ret = aarch32_sigreturn_setup(mm);
+
+out:
+ up_write(&mm->mmap_sem);
+ return ret;
+}
#endif /* CONFIG_COMPAT */
static int vdso_mremap(const struct vm_special_mapping *sm,
@@ -146,8 +214,6 @@ static int __init vdso_init(void)
}
vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
- pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n",
- vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data);
/* Allocate the vDSO pagelist, plus a page for the data. */
vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *),
@@ -232,6 +298,9 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec;
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
+ /* Read without the seqlock held by clock_getres() */
+ WRITE_ONCE(vdso_data->hrtimer_res, hrtimer_resolution);
+
if (!use_syscall) {
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index b215c712d897..744b9dbaba03 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -12,17 +12,12 @@ obj-vdso := gettimeofday.o note.o sigreturn.o
targets := $(obj-vdso) vdso.so vdso.so.dbg
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 \
+ $(call ld-option, --hash-style=sysv) -n -T
# Disable gcov profiling for VDSO code
GCOV_PROFILE := n
-# Workaround for bare-metal (ELF) toolchains that neglect to pass -shared
-# down to collect2, resulting in silent corruption of the vDSO image.
-ccflags-y += -Wl,-shared
-
obj-y += vdso.o
extra-y += vdso.lds
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
@@ -31,8 +26,8 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
$(obj)/vdso.o : $(obj)/vdso.so
# Link rule for the .so file, .lds has to be first
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso)
- $(call if_changed,vdsold)
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
+ $(call if_changed,ld)
# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -42,9 +37,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE
# Generate VDSO offsets using helper script
gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh
quiet_cmd_vdsosym = VDSOSYM $@
-define cmd_vdsosym
- $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
-endef
+ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@
include/generated/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE
$(call if_changed,vdsosym)
@@ -54,8 +47,6 @@ $(obj-vdso): %.o: %.S FORCE
$(call if_changed_dep,vdsoas)
# Actual build commands
-quiet_cmd_vdsold = VDSOL $@
- cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@
quiet_cmd_vdsoas = VDSOA $@
cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $<
diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S
index c39872a7b03c..856fee6d3512 100644
--- a/arch/arm64/kernel/vdso/gettimeofday.S
+++ b/arch/arm64/kernel/vdso/gettimeofday.S
@@ -73,6 +73,13 @@ x_tmp .req x8
movn x_tmp, #0xff00, lsl #48
and \res, x_tmp, \res
mul \res, \res, \mult
+ /*
+ * Fake address dependency from the value computed from the counter
+ * register to subsequent data page accesses so that the sequence
+ * locking also orders the read of the counter.
+ */
+ and x_tmp, \res, xzr
+ add vdso_data, vdso_data, x_tmp
.endm
/*
@@ -147,12 +154,12 @@ ENTRY(__kernel_gettimeofday)
/* w11 = cs_mono_mult, w12 = cs_shift */
ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- seqcnt_check fail=1b
get_nsec_per_sec res=x9
lsl x9, x9, x12
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ seqcnt_check fail=1b
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
@@ -211,13 +218,13 @@ realtime:
/* w11 = cs_mono_mult, w12 = cs_shift */
ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
- seqcnt_check fail=realtime
/* All computations are done with left-shifted nsecs. */
get_nsec_per_sec res=x9
lsl x9, x9, x12
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ seqcnt_check fail=realtime
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
clock_gettime_return, shift=1
@@ -231,7 +238,6 @@ monotonic:
ldp w11, w12, [vdso_data, #VDSO_CS_MONO_MULT]
ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
- seqcnt_check fail=monotonic
/* All computations are done with left-shifted nsecs. */
lsl x4, x4, x12
@@ -239,6 +245,7 @@ monotonic:
lsl x9, x9, x12
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ seqcnt_check fail=monotonic
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
@@ -253,13 +260,13 @@ monotonic_raw:
/* w11 = cs_raw_mult, w12 = cs_shift */
ldp w12, w11, [vdso_data, #VDSO_CS_SHIFT]
ldp x13, x14, [vdso_data, #VDSO_RAW_TIME_SEC]
- seqcnt_check fail=monotonic_raw
/* All computations are done with left-shifted nsecs. */
get_nsec_per_sec res=x9
lsl x9, x9, x12
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
+ seqcnt_check fail=monotonic_raw
get_ts_clock_raw res_sec=x10, res_nsec=x11, \
clock_nsec=x15, nsec_to_sec=x9
@@ -301,13 +308,14 @@ ENTRY(__kernel_clock_getres)
ccmp w0, #CLOCK_MONOTONIC_RAW, #0x4, ne
b.ne 1f
- ldr x2, 5f
+ adr vdso_data, _vdso_data
+ ldr w2, [vdso_data, #CLOCK_REALTIME_RES]
b 2f
1:
cmp w0, #CLOCK_REALTIME_COARSE
ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
b.ne 4f
- ldr x2, 6f
+ ldr x2, 5f
2:
cbz x1, 3f
stp xzr, x2, [x1]
@@ -321,8 +329,6 @@ ENTRY(__kernel_clock_getres)
svc #0
ret
5:
- .quad CLOCK_REALTIME_RES
-6:
.quad CLOCK_COARSE_RES
.cfi_endproc
ENDPROC(__kernel_clock_getres)
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index a3f85624313e..a67121d419a2 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -23,7 +23,6 @@ config KVM
depends on OF
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile
index 5540a1638baf..33c2a4abda04 100644
--- a/arch/arm64/lib/Makefile
+++ b/arch/arm64/lib/Makefile
@@ -24,7 +24,7 @@ CFLAGS_atomic_ll_sc.o := -ffixed-x1 -ffixed-x2 \
-fcall-saved-x10 -fcall-saved-x11 -fcall-saved-x12 \
-fcall-saved-x13 -fcall-saved-x14 -fcall-saved-x15 \
-fcall-saved-x18 -fomit-frame-pointer
-CFLAGS_REMOVE_atomic_ll_sc.o := -pg
+CFLAGS_REMOVE_atomic_ll_sc.o := $(CC_FLAGS_FTRACE)
GCOV_PROFILE_atomic_ll_sc.o := n
KASAN_SANITIZE_atomic_ll_sc.o := n
KCOV_INSTRUMENT_atomic_ll_sc.o := n
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 1a7e92ab69eb..0cb0e09995e1 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -148,7 +148,7 @@ static inline bool is_ttbr1_addr(unsigned long addr)
/*
* Dump out the page tables associated with 'addr' in the currently active mm.
*/
-void show_pte(unsigned long addr)
+static void show_pte(unsigned long addr)
{
struct mm_struct *mm;
pgd_t *pgdp;
@@ -810,13 +810,12 @@ void __init hook_debug_fault_code(int nr,
debug_fault_info[nr].name = name;
}
-asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
- unsigned int esr,
- struct pt_regs *regs)
+asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint,
+ unsigned int esr,
+ struct pt_regs *regs)
{
const struct fault_info *inf = esr_to_debug_fault_info(esr);
unsigned long pc = instruction_pointer(regs);
- int rv;
/*
* Tell lockdep we disabled irqs in entry.S. Do nothing if they were
@@ -828,17 +827,12 @@ asmlinkage int __exception do_debug_exception(unsigned long addr_if_watchpoint,
if (user_mode(regs) && !is_ttbr0_addr(pc))
arm64_apply_bp_hardening();
- if (!inf->fn(addr_if_watchpoint, esr, regs)) {
- rv = 1;
- } else {
+ if (inf->fn(addr_if_watchpoint, esr, regs)) {
arm64_notify_die(inf->name, regs,
inf->sig, inf->code, (void __user *)pc, esr);
- rv = 0;
}
if (interrupts_enabled(regs))
trace_hardirqs_on();
-
- return rv;
}
NOKPROBE_SYMBOL(do_debug_exception);
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 7cae155e81a5..40e2d7e5efcb 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -377,7 +377,7 @@ void __init arm64_memblock_init(void)
base + size > memblock_start_of_DRAM() +
linear_region_size,
"initrd not fully accessible via the linear mapping -- please check your bootloader ...\n")) {
- initrd_start = 0;
+ phys_initrd_size = 0;
} else {
memblock_remove(base, size); /* clear MEMBLOCK_ flags */
memblock_add(base, size);
@@ -440,6 +440,7 @@ void __init bootmem_init(void)
early_memtest(min << PAGE_SHIFT, max << PAGE_SHIFT);
max_pfn = max_low_pfn = max;
+ min_low_pfn = min;
arm64_numa_init();
/*
@@ -535,7 +536,7 @@ void __init mem_init(void)
else
swiotlb_force = SWIOTLB_NO_FORCE;
- set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
+ set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
#ifndef CONFIG_SPARSEMEM_VMEMMAP
free_unused_memmap();
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index e97f018ff740..ef82312860ac 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -97,7 +97,7 @@ pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
}
EXPORT_SYMBOL(phys_mem_access_prot);
-static phys_addr_t __init early_pgtable_alloc(void)
+static phys_addr_t __init early_pgtable_alloc(int shift)
{
phys_addr_t phys;
void *ptr;
@@ -174,7 +174,7 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end,
static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void),
+ phys_addr_t (*pgtable_alloc)(int),
int flags)
{
unsigned long next;
@@ -184,7 +184,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
if (pmd_none(pmd)) {
phys_addr_t pte_phys;
BUG_ON(!pgtable_alloc);
- pte_phys = pgtable_alloc();
+ pte_phys = pgtable_alloc(PAGE_SHIFT);
__pmd_populate(pmdp, pte_phys, PMD_TYPE_TABLE);
pmd = READ_ONCE(*pmdp);
}
@@ -208,7 +208,7 @@ static void alloc_init_cont_pte(pmd_t *pmdp, unsigned long addr,
static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void), int flags)
+ phys_addr_t (*pgtable_alloc)(int), int flags)
{
unsigned long next;
pmd_t *pmdp;
@@ -246,7 +246,7 @@ static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
unsigned long end, phys_addr_t phys,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void), int flags)
+ phys_addr_t (*pgtable_alloc)(int), int flags)
{
unsigned long next;
pud_t pud = READ_ONCE(*pudp);
@@ -258,7 +258,7 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr,
if (pud_none(pud)) {
phys_addr_t pmd_phys;
BUG_ON(!pgtable_alloc);
- pmd_phys = pgtable_alloc();
+ pmd_phys = pgtable_alloc(PMD_SHIFT);
__pud_populate(pudp, pmd_phys, PUD_TYPE_TABLE);
pud = READ_ONCE(*pudp);
}
@@ -294,7 +294,7 @@ static inline bool use_1G_block(unsigned long addr, unsigned long next,
static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void),
+ phys_addr_t (*pgtable_alloc)(int),
int flags)
{
unsigned long next;
@@ -304,7 +304,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
if (pgd_none(pgd)) {
phys_addr_t pud_phys;
BUG_ON(!pgtable_alloc);
- pud_phys = pgtable_alloc();
+ pud_phys = pgtable_alloc(PUD_SHIFT);
__pgd_populate(pgdp, pud_phys, PUD_TYPE_TABLE);
pgd = READ_ONCE(*pgdp);
}
@@ -345,7 +345,7 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end,
static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
unsigned long virt, phys_addr_t size,
pgprot_t prot,
- phys_addr_t (*pgtable_alloc)(void),
+ phys_addr_t (*pgtable_alloc)(int),
int flags)
{
unsigned long addr, length, end, next;
@@ -371,17 +371,36 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys,
} while (pgdp++, addr = next, addr != end);
}
-static phys_addr_t pgd_pgtable_alloc(void)
+static phys_addr_t __pgd_pgtable_alloc(int shift)
{
void *ptr = (void *)__get_free_page(PGALLOC_GFP);
- if (!ptr || !pgtable_page_ctor(virt_to_page(ptr)))
- BUG();
+ BUG_ON(!ptr);
/* Ensure the zeroed page is visible to the page table walker */
dsb(ishst);
return __pa(ptr);
}
+static phys_addr_t pgd_pgtable_alloc(int shift)
+{
+ phys_addr_t pa = __pgd_pgtable_alloc(shift);
+
+ /*
+ * Call proper page table ctor in case later we need to
+ * call core mm functions like apply_to_page_range() on
+ * this pre-allocated page table.
+ *
+ * We don't select ARCH_ENABLE_SPLIT_PMD_PTLOCK if pmd is
+ * folded, and if so pgtable_pmd_page_ctor() becomes nop.
+ */
+ if (shift == PAGE_SHIFT)
+ BUG_ON(!pgtable_page_ctor(phys_to_page(pa)));
+ else if (shift == PMD_SHIFT)
+ BUG_ON(!pgtable_pmd_page_ctor(phys_to_page(pa)));
+
+ return pa;
+}
+
/*
* This function can only be used to modify existing table entries,
* without allocating new levels of table. Note that this permits the
@@ -583,7 +602,7 @@ static int __init map_entry_trampoline(void)
/* Map only the text into the trampoline page table */
memset(tramp_pg_dir, 0, PGD_SIZE);
__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
- prot, pgd_pgtable_alloc, 0);
+ prot, __pgd_pgtable_alloc, 0);
/* Map both the text and data into the kernel page table */
__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
@@ -1055,7 +1074,7 @@ int arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
- size, PAGE_KERNEL, pgd_pgtable_alloc, flags);
+ size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
altmap, want_memblock);
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 06a6f264f2dd..5202f63c29c9 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -124,7 +124,7 @@ static void __init setup_node_to_cpumask_map(void)
}
/*
- * Set the cpu to node and mem mapping
+ * Set the cpu to node and mem mapping
*/
void numa_store_cpu_info(unsigned int cpu)
{
@@ -200,7 +200,7 @@ void __init setup_per_cpu_areas(void)
#endif
/**
- * numa_add_memblk - Set node id to memblk
+ * numa_add_memblk() - Set node id to memblk
* @nid: NUMA node ID of the new memblk
* @start: Start address of the new memblk
* @end: End address of the new memblk
@@ -223,7 +223,7 @@ int __init numa_add_memblk(int nid, u64 start, u64 end)
return ret;
}
-/**
+/*
* Initialize NODE_DATA for a node on the local memory
*/
static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
@@ -257,7 +257,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn;
}
-/**
+/*
* numa_free_distance
*
* The current table is freed.
@@ -277,10 +277,8 @@ void __init numa_free_distance(void)
numa_distance = NULL;
}
-/**
- *
+/*
* Create a new NUMA distance table.
- *
*/
static int __init numa_alloc_distance(void)
{
@@ -311,7 +309,7 @@ static int __init numa_alloc_distance(void)
}
/**
- * numa_set_distance - Set inter node NUMA distance from node to node.
+ * numa_set_distance() - Set inter node NUMA distance from node to node.
* @from: the 'from' node to set distance
* @to: the 'to' node to set distance
* @distance: NUMA distance
@@ -321,7 +319,6 @@ static int __init numa_alloc_distance(void)
*
* If @from or @to is higher than the highest known node or lower than zero
* or @distance doesn't make sense, the call is ignored.
- *
*/
void __init numa_set_distance(int from, int to, int distance)
{
@@ -347,7 +344,7 @@ void __init numa_set_distance(int from, int to, int distance)
numa_distance[from * numa_distance_cnt + to] = distance;
}
-/**
+/*
* Return NUMA distance @from to @to
*/
int __node_distance(int from, int to)
@@ -422,13 +419,15 @@ out_free_distance:
}
/**
- * dummy_numa_init - Fallback dummy NUMA init
+ * dummy_numa_init() - Fallback dummy NUMA init
*
* Used if there's no underlying NUMA architecture, NUMA initialization
* fails, or NUMA is disabled on the command line.
*
* Must online at least one node (node 0) and add memory blocks that cover all
* allowed memory. It is unlikely that this function fails.
+ *
+ * Return: 0 on success, -errno on failure.
*/
static int __init dummy_numa_init(void)
{
@@ -454,9 +453,9 @@ static int __init dummy_numa_init(void)
}
/**
- * arm64_numa_init - Initialize NUMA
+ * arm64_numa_init() - Initialize NUMA
*
- * Try each configured NUMA initialization method until one succeeds. The
+ * Try each configured NUMA initialization method until one succeeds. The
* last fallback is dummy single node config encomapssing whole memory.
*/
void __init arm64_numa_init(void)
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index aa0817c9c4c3..fdd626d34274 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -65,24 +65,25 @@ ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
mrs x3, tpidrro_el0
mrs x4, contextidr_el1
- mrs x5, cpacr_el1
- mrs x6, tcr_el1
- mrs x7, vbar_el1
- mrs x8, mdscr_el1
- mrs x9, oslsr_el1
- mrs x10, sctlr_el1
+ mrs x5, osdlr_el1
+ mrs x6, cpacr_el1
+ mrs x7, tcr_el1
+ mrs x8, vbar_el1
+ mrs x9, mdscr_el1
+ mrs x10, oslsr_el1
+ mrs x11, sctlr_el1
alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
- mrs x11, tpidr_el1
+ mrs x12, tpidr_el1
alternative_else
- mrs x11, tpidr_el2
+ mrs x12, tpidr_el2
alternative_endif
- mrs x12, sp_el0
+ mrs x13, sp_el0
stp x2, x3, [x0]
- stp x4, xzr, [x0, #16]
- stp x5, x6, [x0, #32]
- stp x7, x8, [x0, #48]
- stp x9, x10, [x0, #64]
- stp x11, x12, [x0, #80]
+ stp x4, x5, [x0, #16]
+ stp x6, x7, [x0, #32]
+ stp x8, x9, [x0, #48]
+ stp x10, x11, [x0, #64]
+ stp x12, x13, [x0, #80]
ret
ENDPROC(cpu_do_suspend)
@@ -105,8 +106,8 @@ ENTRY(cpu_do_resume)
msr cpacr_el1, x6
/* Don't change t0sz here, mask those bits when restoring */
- mrs x5, tcr_el1
- bfi x8, x5, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
+ mrs x7, tcr_el1
+ bfi x8, x7, TCR_T0SZ_OFFSET, TCR_TxSZ_WIDTH
msr tcr_el1, x8
msr vbar_el1, x9
@@ -130,6 +131,7 @@ alternative_endif
/*
* Restore oslsr_el1 by writing oslar_el1
*/
+ msr osdlr_el1, x5
ubfx x11, x11, #1, #1
msr oslar_el1, x11
reset_pmuserenr_el0 x0 // Disable PMU access from EL0
diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h
index 783de51a6c4e..76606e87233f 100644
--- a/arch/arm64/net/bpf_jit.h
+++ b/arch/arm64/net/bpf_jit.h
@@ -100,11 +100,9 @@
#define A64_STXR(sf, Rt, Rn, Rs) \
A64_LSX(sf, Rt, Rn, Rs, STORE_EX)
-/* Prefetch */
-#define A64_PRFM(Rn, type, target, policy) \
- aarch64_insn_gen_prefetch(Rn, AARCH64_INSN_PRFM_TYPE_##type, \
- AARCH64_INSN_PRFM_TARGET_##target, \
- AARCH64_INSN_PRFM_POLICY_##policy)
+/* LSE atomics */
+#define A64_STADD(sf, Rn, Rs) \
+ aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf))
/* Add/subtract (immediate) */
#define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index aaddc0217e73..df845cee438e 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -365,7 +365,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
BPF_CLASS(code) == BPF_JMP;
const bool isdw = BPF_SIZE(code) == BPF_DW;
- u8 jmp_cond;
+ u8 jmp_cond, reg;
s32 jmp_offset;
#define check_imm(bits, imm) do { \
@@ -756,19 +756,28 @@ emit_cond_jmp:
break;
}
break;
+
/* STX XADD: lock *(u32 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_W:
/* STX XADD: lock *(u64 *)(dst + off) += src */
case BPF_STX | BPF_XADD | BPF_DW:
- emit_a64_mov_i(1, tmp, off, ctx);
- emit(A64_ADD(1, tmp, tmp, dst), ctx);
- emit(A64_PRFM(tmp, PST, L1, STRM), ctx);
- emit(A64_LDXR(isdw, tmp2, tmp), ctx);
- emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
- emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
- jmp_offset = -3;
- check_imm19(jmp_offset);
- emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+ if (!off) {
+ reg = dst;
+ } else {
+ emit_a64_mov_i(1, tmp, off, ctx);
+ emit(A64_ADD(1, tmp, tmp, dst), ctx);
+ reg = tmp;
+ }
+ if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) {
+ emit(A64_STADD(isdw, reg, src), ctx);
+ } else {
+ emit(A64_LDXR(isdw, tmp2, reg), ctx);
+ emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
+ emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
+ jmp_offset = -3;
+ check_imm19(jmp_offset);
+ emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
+ }
break;
default:
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index e5cd3c5f8399..eeb0471268a0 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -20,6 +20,7 @@ config C6X
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
select ARCH_NO_COHERENT_DMA_MMAP
+ select MMU_GATHER_NO_RANGE if MMU
config MMU
def_bool n
@@ -27,9 +28,6 @@ config MMU
config FPU
def_bool n
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 249c9f6f26dc..6b168d32fbff 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += mmu.h
generic-y += mmu_context.h
generic-y += pci.h
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index 15ba8599858e..5bcdcb651b19 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -11,6 +11,7 @@
#ifndef __ASM_C6X_SYSCALL_H
#define __ASM_C6X_SYSCALL_H
+#include <uapi/linux/audit.h>
#include <linux/err.h>
#include <linux/sched.h>
@@ -69,4 +70,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->a9 = *args;
}
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+ ? AUDIT_ARCH_C6XBE : AUDIT_ARCH_C6X;
+}
+
#endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/c6x/include/asm/tlb.h b/arch/c6x/include/asm/tlb.h
index 34525dea1356..240ba0febb57 100644
--- a/arch/c6x/include/asm/tlb.h
+++ b/arch/c6x/include/asm/tlb.h
@@ -2,8 +2,6 @@
#ifndef _ASM_C6X_TLB_H
#define _ASM_C6X_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* _ASM_C6X_TLB_H */
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 725a115759c9..ce0799077f3b 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -1,6 +1,7 @@
config CSKY
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_USE_BUILTIN_BSWAP
@@ -29,15 +30,20 @@ config CSKY
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
select HAVE_KERNEL_LZMA
select HAVE_PERF_EVENTS
- select HAVE_C_RECORDMCOUNT
+ select HAVE_PERF_REGS
+ select HAVE_PERF_USER_STACK_DUMP
select HAVE_DMA_API_DEBUG
select HAVE_DMA_CONTIGUOUS
+ select HAVE_SYSCALL_TRACEPOINTS
select MAY_HAVE_SPARSE_IRQ
select MODULES_USE_ELF_RELA if MODULES
select OF
@@ -92,9 +98,6 @@ config GENERIC_HWEIGHT
config MMU
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config STACKTRACE_SUPPORT
def_bool y
diff --git a/arch/csky/Makefile b/arch/csky/Makefile
index 3607a6e8f66c..6b87f6c22ad6 100644
--- a/arch/csky/Makefile
+++ b/arch/csky/Makefile
@@ -36,7 +36,7 @@ endif
ifneq ($(CSKYABI),)
MCPU_STR = $(CPUTYPE)$(FPUEXT)$(VDSPEXT)$(TEEEXT)
-KBUILD_CFLAGS += -mcpu=$(MCPU_STR)
+KBUILD_CFLAGS += -mcpu=$(CPUTYPE) -Wa,-mcpu=$(MCPU_STR)
KBUILD_CFLAGS += -DCSKYCPU_DEF_NAME=\"$(MCPU_STR)\"
KBUILD_CFLAGS += -msoft-float -mdiv
KBUILD_CFLAGS += -fno-tree-vectorize
diff --git a/arch/csky/abiv1/inc/abi/ckmmu.h b/arch/csky/abiv1/inc/abi/ckmmu.h
index 3a002017bebe..81f37715c0d2 100644
--- a/arch/csky/abiv1/inc/abi/ckmmu.h
+++ b/arch/csky/abiv1/inc/abi/ckmmu.h
@@ -40,6 +40,26 @@ static inline void write_mmu_entryhi(int value)
cpwcr("cpcr4", value);
}
+static inline unsigned long read_mmu_msa0(void)
+{
+ return cprcr("cpcr30");
+}
+
+static inline void write_mmu_msa0(unsigned long value)
+{
+ cpwcr("cpcr30", value);
+}
+
+static inline unsigned long read_mmu_msa1(void)
+{
+ return cprcr("cpcr31");
+}
+
+static inline void write_mmu_msa1(unsigned long value)
+{
+ cpwcr("cpcr31", value);
+}
+
/*
* TLB operations.
*/
@@ -65,11 +85,11 @@ static inline void tlb_invalid_indexed(void)
static inline void setup_pgd(unsigned long pgd, bool kernel)
{
- cpwcr("cpcr29", pgd);
+ cpwcr("cpcr29", pgd | BIT(0));
}
static inline unsigned long get_pgd(void)
{
- return cprcr("cpcr29");
+ return cprcr("cpcr29") & ~BIT(0);
}
#endif /* __ASM_CSKY_CKMMUV1_H */
diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h
index 3f3faab3d747..7ab78bd0f3b1 100644
--- a/arch/csky/abiv1/inc/abi/entry.h
+++ b/arch/csky/abiv1/inc/abi/entry.h
@@ -16,9 +16,6 @@
#define LSAVE_A4 40
#define LSAVE_A5 44
-#define EPC_INCREASE 2
-#define EPC_KEEP 0
-
.macro USPTOKSP
mtcr sp, ss1
mfcr sp, ss0
@@ -29,10 +26,6 @@
mfcr sp, ss1
.endm
-.macro INCTRAP rx
- addi \rx, EPC_INCREASE
-.endm
-
.macro SAVE_ALL epc_inc
mtcr r13, ss2
mfcr r13, epsr
@@ -150,11 +143,35 @@
cpwcr \rx, cpcr8
.endm
-.macro SETUP_MMU rx
- lrw \rx, PHYS_OFFSET | 0xe
- cpwcr \rx, cpcr30
- lrw \rx, (PHYS_OFFSET + 0x20000000) | 0xe
- cpwcr \rx, cpcr31
+.macro SETUP_MMU
+ /* Init psr and enable ee */
+ lrw r6, DEFAULT_PSR_VALUE
+ mtcr r6, psr
+ psrset ee
+
+ /* Select MMU as co-processor */
+ cpseti cp15
+
+ /*
+ * cpcr30 format:
+ * 31 - 29 | 28 - 4 | 3 | 2 | 1 | 0
+ * BA Reserved C D V
+ */
+ cprcr r6, cpcr30
+ lsri r6, 28
+ lsli r6, 28
+ addi r6, 0xe
+ cpwcr r6, cpcr30
+
+ lsri r6, 28
+ addi r6, 2
+ lsli r6, 28
+ addi r6, 0xe
+ cpwcr r6, cpcr31
.endm
+.macro ANDI_R3 rx, imm
+ lsri \rx, 3
+ andi \rx, (\imm >> 3)
+.endm
#endif /* __ASM_CSKY_ENTRY_H */
diff --git a/arch/csky/abiv1/inc/abi/regdef.h b/arch/csky/abiv1/inc/abi/regdef.h
index 876689291b71..104707fbdcc1 100644
--- a/arch/csky/abiv1/inc/abi/regdef.h
+++ b/arch/csky/abiv1/inc/abi/regdef.h
@@ -5,9 +5,8 @@
#define __ASM_CSKY_REGDEF_H
#define syscallid r1
-#define r11_sig r11
-
#define regs_syscallid(regs) regs->regs[9]
+#define regs_fp(regs) regs->regs[2]
/*
* PSR format:
@@ -23,4 +22,6 @@
#define SYSTRACE_SAVENUM 2
+#define TRAP0_SIZE 2
+
#endif /* __ASM_CSKY_REGDEF_H */
diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c
index d22c95ffc74d..5bb887b275e1 100644
--- a/arch/csky/abiv2/cacheflush.c
+++ b/arch/csky/abiv2/cacheflush.c
@@ -34,10 +34,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
{
unsigned long addr, pfn;
struct page *page;
- void *va;
-
- if (!(vma->vm_flags & VM_EXEC))
- return;
pfn = pte_pfn(*pte);
if (unlikely(!pfn_valid(pfn)))
@@ -47,14 +43,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
if (page == ZERO_PAGE(0))
return;
- va = page_address(page);
- addr = (unsigned long) va;
-
- if (va == NULL && PageHighMem(page))
- addr = (unsigned long) kmap_atomic(page);
+ addr = (unsigned long) kmap_atomic(page);
cache_wbinv_range(addr, addr + PAGE_SIZE);
- if (va == NULL && PageHighMem(page))
- kunmap_atomic((void *) addr);
+ kunmap_atomic((void *) addr);
}
diff --git a/arch/csky/abiv2/inc/abi/ckmmu.h b/arch/csky/abiv2/inc/abi/ckmmu.h
index 97230ad9427c..e4480e6bc3b3 100644
--- a/arch/csky/abiv2/inc/abi/ckmmu.h
+++ b/arch/csky/abiv2/inc/abi/ckmmu.h
@@ -42,6 +42,26 @@ static inline void write_mmu_entryhi(int value)
mtcr("cr<4, 15>", value);
}
+static inline unsigned long read_mmu_msa0(void)
+{
+ return mfcr("cr<30, 15>");
+}
+
+static inline void write_mmu_msa0(unsigned long value)
+{
+ mtcr("cr<30, 15>", value);
+}
+
+static inline unsigned long read_mmu_msa1(void)
+{
+ return mfcr("cr<31, 15>");
+}
+
+static inline void write_mmu_msa1(unsigned long value)
+{
+ mtcr("cr<31, 15>", value);
+}
+
/*
* TLB operations.
*/
@@ -70,18 +90,16 @@ static inline void tlb_invalid_indexed(void)
mtcr("cr<8, 15>", 0x02000000);
}
-/* setup hardrefil pgd */
-static inline unsigned long get_pgd(void)
-{
- return mfcr("cr<29, 15>");
-}
-
static inline void setup_pgd(unsigned long pgd, bool kernel)
{
if (kernel)
- mtcr("cr<28, 15>", pgd);
+ mtcr("cr<28, 15>", pgd | BIT(0));
else
- mtcr("cr<29, 15>", pgd);
+ mtcr("cr<29, 15>", pgd | BIT(0));
}
+static inline unsigned long get_pgd(void)
+{
+ return mfcr("cr<29, 15>") & ~BIT(0);
+}
#endif /* __ASM_CSKY_CKMMUV2_H */
diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h
index edc5cc04c4de..9897a16b45e5 100644
--- a/arch/csky/abiv2/inc/abi/entry.h
+++ b/arch/csky/abiv2/inc/abi/entry.h
@@ -14,18 +14,11 @@
#define LSAVE_A2 32
#define LSAVE_A3 36
-#define EPC_INCREASE 4
-#define EPC_KEEP 0
-
#define KSPTOUSP
#define USPTOKSP
#define usp cr<14, 1>
-.macro INCTRAP rx
- addi \rx, EPC_INCREASE
-.endm
-
.macro SAVE_ALL epc_inc
subi sp, 152
stw tls, (sp, 0)
@@ -169,10 +162,80 @@
mtcr \rx, cr<8, 15>
.endm
-.macro SETUP_MMU rx
- lrw \rx, PHYS_OFFSET | 0xe
- mtcr \rx, cr<30, 15>
- lrw \rx, (PHYS_OFFSET + 0x20000000) | 0xe
- mtcr \rx, cr<31, 15>
+.macro SETUP_MMU
+ /* Init psr and enable ee */
+ lrw r6, DEFAULT_PSR_VALUE
+ mtcr r6, psr
+ psrset ee
+
+ /* Invalid I/Dcache BTB BHT */
+ movi r6, 7
+ lsli r6, 16
+ addi r6, (1<<4) | 3
+ mtcr r6, cr17
+
+ /* Invalid all TLB */
+ bgeni r6, 26
+ mtcr r6, cr<8, 15> /* Set MCIR */
+
+ /* Check MMU on/off */
+ mfcr r6, cr18
+ btsti r6, 0
+ bt 1f
+
+ /* MMU off: setup mapping tlb entry */
+ movi r6, 0
+ mtcr r6, cr<6, 15> /* Set MPR with 4K page size */
+
+ grs r6, 1f /* Get current pa by PC */
+ bmaski r7, (PAGE_SHIFT + 1) /* r7 = 0x1fff */
+ andn r6, r7
+ mtcr r6, cr<4, 15> /* Set MEH */
+
+ mov r8, r6
+ movi r7, 0x00000006
+ or r8, r7
+ mtcr r8, cr<2, 15> /* Set MEL0 */
+ movi r7, 0x00001006
+ or r8, r7
+ mtcr r8, cr<3, 15> /* Set MEL1 */
+
+ bgeni r8, 28
+ mtcr r8, cr<8, 15> /* Set MCIR to write TLB */
+
+ br 2f
+1:
+ /*
+ * MMU on: use origin MSA value from bootloader
+ *
+ * cr<30/31, 15> MSA register format:
+ * 31 - 29 | 28 - 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
+ * BA Reserved SH WA B SO SEC C D V
+ */
+ mfcr r6, cr<30, 15> /* Get MSA0 */
+2:
+ lsri r6, 28
+ lsli r6, 28
+ addi r6, 0x1ce
+ mtcr r6, cr<30, 15> /* Set MSA0 */
+
+ lsri r6, 28
+ addi r6, 2
+ lsli r6, 28
+ addi r6, 0x1ce
+ mtcr r6, cr<31, 15> /* Set MSA1 */
+
+ /* enable MMU */
+ mfcr r6, cr18
+ bseti r6, 0
+ mtcr r6, cr18
+
+ jmpi 3f /* jump to va */
+3:
+.endm
+
+.macro ANDI_R3 rx, imm
+ lsri \rx, 3
+ andi \rx, (\imm >> 3)
.endm
#endif /* __ASM_CSKY_ENTRY_H */
diff --git a/arch/csky/abiv2/inc/abi/regdef.h b/arch/csky/abiv2/inc/abi/regdef.h
index c72abb781bdc..d7328bbc1ce7 100644
--- a/arch/csky/abiv2/inc/abi/regdef.h
+++ b/arch/csky/abiv2/inc/abi/regdef.h
@@ -5,9 +5,8 @@
#define __ASM_CSKY_REGDEF_H
#define syscallid r7
-#define r11_sig r11
-
#define regs_syscallid(regs) regs->regs[3]
+#define regs_fp(regs) regs->regs[4]
/*
* PSR format:
@@ -23,4 +22,6 @@
#define SYSTRACE_SAVENUM 5
+#define TRAP0_SIZE 4
+
#endif /* __ASM_CSKY_REGDEF_H */
diff --git a/arch/csky/abiv2/mcount.S b/arch/csky/abiv2/mcount.S
index c633379956f5..326402e65f9e 100644
--- a/arch/csky/abiv2/mcount.S
+++ b/arch/csky/abiv2/mcount.S
@@ -61,10 +61,17 @@
addi sp, 16
.endm
+.macro nop32_stub
+ nop32
+ nop32
+ nop32
+.endm
+
ENTRY(ftrace_stub)
jmp lr
END(ftrace_stub)
+#ifndef CONFIG_DYNAMIC_FTRACE
ENTRY(_mcount)
mcount_enter
@@ -76,7 +83,7 @@ ENTRY(_mcount)
bf skip_ftrace
mov a0, lr
- subi a0, MCOUNT_INSN_SIZE
+ subi a0, 4
ldw a1, (sp, 24)
jsr r26
@@ -101,13 +108,41 @@ skip_ftrace:
mcount_exit
#endif
END(_mcount)
+#else /* CONFIG_DYNAMIC_FTRACE */
+ENTRY(_mcount)
+ mov t1, lr
+ ldw lr, (sp, 0)
+ addi sp, 4
+ jmp t1
+ENDPROC(_mcount)
+
+ENTRY(ftrace_caller)
+ mcount_enter
+
+ ldw a0, (sp, 16)
+ subi a0, 4
+ ldw a1, (sp, 24)
+
+ nop
+GLOBAL(ftrace_call)
+ nop32_stub
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ nop
+GLOBAL(ftrace_graph_call)
+ nop32_stub
+#endif
+
+ mcount_exit
+ENDPROC(ftrace_caller)
+#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
ENTRY(ftrace_graph_caller)
mov a0, sp
addi a0, 24
ldw a1, (sp, 16)
- subi a1, MCOUNT_INSN_SIZE
+ subi a1, 4
mov a2, r8
lrw r26, prepare_ftrace_return
jsr r26
diff --git a/arch/csky/abiv2/memmove.S b/arch/csky/abiv2/memmove.S
index b0c42ecf1889..5721e73ad3d8 100644
--- a/arch/csky/abiv2/memmove.S
+++ b/arch/csky/abiv2/memmove.S
@@ -35,11 +35,7 @@ ENTRY(memmove)
.L_len_larger_16bytes:
subi r1, 16
subi r0, 16
-#if defined(__CSKY_VDSPV2__)
- vldx.8 vr0, (r1), r19
- PRE_BNEZAD (r18)
- vstx.8 vr0, (r0), r19
-#elif defined(__CK860__)
+#if defined(__CK860__)
ldw r3, (r1, 12)
stw r3, (r0, 12)
ldw r3, (r1, 8)
diff --git a/arch/csky/include/asm/Kbuild b/arch/csky/include/asm/Kbuild
index 2a0abe8f2a35..a9b63efef416 100644
--- a/arch/csky/include/asm/Kbuild
+++ b/arch/csky/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += dma-mapping.h
generic-y += emergency-restart.h
generic-y += exec.h
generic-y += fb.h
-generic-y += ftrace.h
generic-y += futex.h
generic-y += gpio.h
generic-y += hardirq.h
@@ -28,6 +27,7 @@ generic-y += linkage.h
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += module.h
generic-y += mutex.h
generic-y += pci.h
diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h
index 7547c45312a8..ba35d93ecda2 100644
--- a/arch/csky/include/asm/ftrace.h
+++ b/arch/csky/include/asm/ftrace.h
@@ -4,10 +4,26 @@
#ifndef __ASM_CSKY_FTRACE_H
#define __ASM_CSKY_FTRACE_H
-#define MCOUNT_INSN_SIZE 4
+#define MCOUNT_INSN_SIZE 14
#define HAVE_FUNCTION_GRAPH_FP_TEST
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+#define MCOUNT_ADDR ((unsigned long)_mcount)
+
+#ifndef __ASSEMBLY__
+
+extern void _mcount(unsigned long);
+
+extern void ftrace_graph_call(void);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+ return addr;
+}
+
+struct dyn_arch_ftrace {
+};
+#endif /* !__ASSEMBLY__ */
#endif /* __ASM_CSKY_FTRACE_H */
diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h
index b2905c0485a7..734db3a122e1 100644
--- a/arch/csky/include/asm/mmu_context.h
+++ b/arch/csky/include/asm/mmu_context.h
@@ -14,23 +14,10 @@
#include <linux/sched.h>
#include <abi/ckmmu.h>
-static inline void tlbmiss_handler_setup_pgd(unsigned long pgd, bool kernel)
-{
- pgd -= PAGE_OFFSET;
- pgd += PHYS_OFFSET;
- pgd |= 1;
- setup_pgd(pgd, kernel);
-}
-
#define TLBMISS_HANDLER_SETUP_PGD(pgd) \
- tlbmiss_handler_setup_pgd((unsigned long)pgd, 0)
+ setup_pgd(__pa(pgd), false)
#define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \
- tlbmiss_handler_setup_pgd((unsigned long)pgd, 1)
-
-static inline unsigned long tlb_get_pgd(void)
-{
- return ((get_pgd() - PHYS_OFFSET) & ~1) + PAGE_OFFSET;
-}
+ setup_pgd(__pa(pgd), true)
#define cpu_context(cpu, mm) ((mm)->context.asid[cpu])
#define cpu_asid(cpu, mm) (cpu_context((cpu), (mm)) & ASID_MASK)
diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h
index 73cf2bd66a13..9738eacefdc7 100644
--- a/arch/csky/include/asm/page.h
+++ b/arch/csky/include/asm/page.h
@@ -8,7 +8,7 @@
#include <linux/const.h>
/*
- * PAGE_SHIFT determines the page size
+ * PAGE_SHIFT determines the page size: 4KB
*/
#define PAGE_SHIFT 12
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
@@ -17,12 +17,18 @@
#define THREAD_MASK (~(THREAD_SIZE - 1))
#define THREAD_SHIFT (PAGE_SHIFT + 1)
+
/*
- * NOTE: virtual isn't really correct, actually it should be the offset into the
- * memory node, but we have no highmem, so that works for now.
- * TODO: implement (fast) pfn<->pgdat_idx conversion functions, this makes lots
- * of the shifts unnecessary.
+ * For C-SKY "User-space:Kernel-space" is "2GB:2GB" fixed by hardware and there
+ * are two segment registers (MSA0 + MSA1) to mapping 512MB + 512MB physical
+ * address region. We use them mapping kernel 1GB direct-map address area and
+ * for more than 1GB of memory we use highmem.
*/
+#define PAGE_OFFSET 0x80000000
+#define SSEG_SIZE 0x20000000
+#define LOWMEM_LIMIT (SSEG_SIZE * 2)
+
+#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (SSEG_SIZE - 1))
#ifndef __ASSEMBLY__
@@ -50,9 +56,6 @@ struct page;
struct vm_area_struct;
-/*
- * These are used to make use of C type-checking..
- */
typedef struct { unsigned long pte_low; } pte_t;
#define pte_val(x) ((x).pte_low)
@@ -69,18 +72,13 @@ typedef struct page *pgtable_t;
#define __pgd(x) ((pgd_t) { (x) })
#define __pgprot(x) ((pgprot_t) { (x) })
-#endif /* !__ASSEMBLY__ */
+extern unsigned long va_pa_offset;
-#define PHYS_OFFSET (CONFIG_RAM_BASE & ~(LOWMEM_LIMIT - 1))
-#define PHYS_OFFSET_OFFSET (CONFIG_RAM_BASE & (LOWMEM_LIMIT - 1))
-#define ARCH_PFN_OFFSET PFN_DOWN(CONFIG_RAM_BASE)
+#define ARCH_PFN_OFFSET PFN_DOWN(va_pa_offset + PHYS_OFFSET_OFFSET)
-#define PAGE_OFFSET 0x80000000
-#define LOWMEM_LIMIT 0x40000000
+#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + va_pa_offset)
+#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - va_pa_offset))
-#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET + PHYS_OFFSET)
-#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET - \
- PHYS_OFFSET))
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#define MAP_NR(x) PFN_DOWN((unsigned long)(x) - PAGE_OFFSET - \
@@ -90,15 +88,10 @@ typedef struct page *pgtable_t;
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | VM_EXEC | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
-/*
- * main RAM and kernel working space are coincident at 0x80000000, but to make
- * life more interesting, there's also an uncached virtual shadow at 0xb0000000
- * - these mappings are fixed in the MMU
- */
-
#define pfn_to_kaddr(x) __va(PFN_PHYS(x))
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
+#endif /* !__ASSEMBLY__ */
#endif /* __ASM_CSKY_PAGE_H */
diff --git a/arch/csky/include/asm/perf_event.h b/arch/csky/include/asm/perf_event.h
index ea8193122294..572093e11001 100644
--- a/arch/csky/include/asm/perf_event.h
+++ b/arch/csky/include/asm/perf_event.h
@@ -4,4 +4,12 @@
#ifndef __ASM_CSKY_PERF_EVENT_H
#define __ASM_CSKY_PERF_EVENT_H
+#include <abi/regdef.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->pc = (__ip); \
+ regs_fp(regs) = (unsigned long) __builtin_frame_address(0); \
+ asm volatile("mov %0, sp\n":"=r"((regs)->usp)); \
+}
+
#endif /* __ASM_PERF_EVENT_ELF_H */
diff --git a/arch/csky/include/asm/ptrace.h b/arch/csky/include/asm/ptrace.h
new file mode 100644
index 000000000000..d0aba7b32417
--- /dev/null
+++ b/arch/csky/include/asm/ptrace.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef __ASM_CSKY_PTRACE_H
+#define __ASM_CSKY_PTRACE_H
+
+#include <uapi/asm/ptrace.h>
+#include <asm/traps.h>
+#include <linux/types.h>
+
+#ifndef __ASSEMBLY__
+
+#define PS_S 0x80000000 /* Supervisor Mode */
+
+#define arch_has_single_step() (1)
+#define current_pt_regs() \
+({ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1; })
+
+#define user_stack_pointer(regs) ((regs)->usp)
+
+#define user_mode(regs) (!((regs)->sr & PS_S))
+#define instruction_pointer(regs) ((regs)->pc)
+#define profile_pc(regs) instruction_pointer(regs)
+
+static inline bool in_syscall(struct pt_regs const *regs)
+{
+ return ((regs->sr >> 16) & 0xff) == VEC_TRAP0;
+}
+
+static inline void forget_syscall(struct pt_regs *regs)
+{
+ regs->sr &= ~(0xff << 16);
+}
+
+static inline unsigned long regs_return_value(struct pt_regs *regs)
+{
+ return regs->a0;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif /* __ASM_CSKY_PTRACE_H */
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index bda0a446c63e..f624fa3bbc22 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -8,6 +8,8 @@
#include <abi/regdef.h>
#include <uapi/linux/audit.h>
+extern void *sys_call_table[];
+
static inline int
syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
{
@@ -15,6 +17,13 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
}
static inline void
+syscall_set_nr(struct task_struct *task, struct pt_regs *regs,
+ int sysno)
+{
+ regs_syscallid(regs) = sysno;
+}
+
+static inline void
syscall_rollback(struct task_struct *task, struct pt_regs *regs)
{
regs->a0 = regs->orig_a0;
@@ -60,7 +69,7 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
}
static inline int
-syscall_get_arch(void)
+syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_CSKY;
}
diff --git a/arch/csky/include/asm/thread_info.h b/arch/csky/include/asm/thread_info.h
index 0e9d035d712b..0b546a55a8bf 100644
--- a/arch/csky/include/asm/thread_info.h
+++ b/arch/csky/include/asm/thread_info.h
@@ -51,29 +51,26 @@ static inline struct thread_info *current_thread_info(void)
#endif /* !__ASSEMBLY__ */
-/* entry.S relies on these definitions!
- * bits 0-5 are tested at every exception exit
- */
#define TIF_SIGPENDING 0 /* signal pending */
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_SYSCALL_TRACE 5 /* syscall trace active */
-#define TIF_DELAYED_TRACE 14 /* single step a syscall */
+#define TIF_SYSCALL_TRACE 3 /* syscall trace active */
+#define TIF_SYSCALL_TRACEPOINT 4 /* syscall tracepoint instrumentation */
+#define TIF_SYSCALL_AUDIT 5 /* syscall auditing */
#define TIF_POLLING_NRFLAG 16 /* poll_idle() is TIF_NEED_RESCHED */
#define TIF_MEMDIE 18 /* is terminating due to OOM killer */
-#define TIF_FREEZE 19 /* thread is freezing for suspend */
#define TIF_RESTORE_SIGMASK 20 /* restore signal mask in do_signal() */
#define TIF_SECCOMP 21 /* secure computing */
-#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
-#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
-#define _TIF_DELAYED_TRACE (1 << TIF_DELAYED_TRACE)
-#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
+#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
+#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
+#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
#define _TIF_MEMDIE (1 << TIF_MEMDIE)
-#define _TIF_FREEZE (1 << TIF_FREEZE)
-#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
-#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_RESTORE_SIGMASK (1 << TIF_RESTORE_SIGMASK)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#endif /* _ASM_CSKY_THREAD_INFO_H */
diff --git a/arch/csky/include/asm/unistd.h b/arch/csky/include/asm/unistd.h
index 284487477a61..da7a18295615 100644
--- a/arch/csky/include/asm/unistd.h
+++ b/arch/csky/include/asm/unistd.h
@@ -2,3 +2,5 @@
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#include <uapi/asm/unistd.h>
+
+#define NR_syscalls (__NR_syscalls)
diff --git a/arch/csky/include/uapi/asm/perf_regs.h b/arch/csky/include/uapi/asm/perf_regs.h
new file mode 100644
index 000000000000..ee323d818592
--- /dev/null
+++ b/arch/csky/include/uapi/asm/perf_regs.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#ifndef _ASM_CSKY_PERF_REGS_H
+#define _ASM_CSKY_PERF_REGS_H
+
+/* Index of struct pt_regs */
+enum perf_event_csky_regs {
+ PERF_REG_CSKY_TLS,
+ PERF_REG_CSKY_LR,
+ PERF_REG_CSKY_PC,
+ PERF_REG_CSKY_SR,
+ PERF_REG_CSKY_SP,
+ PERF_REG_CSKY_ORIG_A0,
+ PERF_REG_CSKY_A0,
+ PERF_REG_CSKY_A1,
+ PERF_REG_CSKY_A2,
+ PERF_REG_CSKY_A3,
+ PERF_REG_CSKY_REGS0,
+ PERF_REG_CSKY_REGS1,
+ PERF_REG_CSKY_REGS2,
+ PERF_REG_CSKY_REGS3,
+ PERF_REG_CSKY_REGS4,
+ PERF_REG_CSKY_REGS5,
+ PERF_REG_CSKY_REGS6,
+ PERF_REG_CSKY_REGS7,
+ PERF_REG_CSKY_REGS8,
+ PERF_REG_CSKY_REGS9,
+#if defined(__CSKYABIV2__)
+ PERF_REG_CSKY_EXREGS0,
+ PERF_REG_CSKY_EXREGS1,
+ PERF_REG_CSKY_EXREGS2,
+ PERF_REG_CSKY_EXREGS3,
+ PERF_REG_CSKY_EXREGS4,
+ PERF_REG_CSKY_EXREGS5,
+ PERF_REG_CSKY_EXREGS6,
+ PERF_REG_CSKY_EXREGS7,
+ PERF_REG_CSKY_EXREGS8,
+ PERF_REG_CSKY_EXREGS9,
+ PERF_REG_CSKY_EXREGS10,
+ PERF_REG_CSKY_EXREGS11,
+ PERF_REG_CSKY_EXREGS12,
+ PERF_REG_CSKY_EXREGS13,
+ PERF_REG_CSKY_EXREGS14,
+ PERF_REG_CSKY_HI,
+ PERF_REG_CSKY_LO,
+ PERF_REG_CSKY_DCSR,
+#endif
+ PERF_REG_CSKY_MAX,
+};
+#endif /* _ASM_CSKY_PERF_REGS_H */
diff --git a/arch/csky/include/uapi/asm/ptrace.h b/arch/csky/include/uapi/asm/ptrace.h
index a4eaa8ddf0b1..4e248d5b86ef 100644
--- a/arch/csky/include/uapi/asm/ptrace.h
+++ b/arch/csky/include/uapi/asm/ptrace.h
@@ -48,20 +48,5 @@ struct user_fp {
unsigned long reserved;
};
-#ifdef __KERNEL__
-
-#define PS_S 0x80000000 /* Supervisor Mode */
-
-#define arch_has_single_step() (1)
-#define current_pt_regs() \
-({ (struct pt_regs *)((char *)current_thread_info() + THREAD_SIZE) - 1; })
-
-#define user_stack_pointer(regs) ((regs)->usp)
-
-#define user_mode(regs) (!((regs)->sr & PS_S))
-#define instruction_pointer(regs) ((regs)->pc)
-#define profile_pc(regs) instruction_pointer(regs)
-
-#endif /* __KERNEL__ */
#endif /* __ASSEMBLY__ */
#endif /* _CSKY_PTRACE_H */
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index 484e6d3a3647..1624b04bffb5 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -9,6 +9,8 @@ obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
obj-$(CONFIG_CSKY_PMU_V1) += perf_event.o
+obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
+obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S
index d2357c8f85bd..5b84f11485ae 100644
--- a/arch/csky/kernel/atomic.S
+++ b/arch/csky/kernel/atomic.S
@@ -12,11 +12,10 @@
* If *ptr != oldval && return 1,
* else *ptr = newval return 0.
*/
-#ifdef CONFIG_CPU_HAS_LDSTEX
ENTRY(csky_cmpxchg)
USPTOKSP
mfcr a3, epc
- INCTRAP a3
+ addi a3, TRAP0_SIZE
subi sp, 8
stw a3, (sp, 0)
@@ -24,6 +23,7 @@ ENTRY(csky_cmpxchg)
stw a3, (sp, 4)
psrset ee
+#ifdef CONFIG_CPU_HAS_LDSTEX
1:
ldex a3, (a2)
cmpne a0, a3
@@ -33,27 +33,7 @@ ENTRY(csky_cmpxchg)
bez a3, 1b
2:
sync.is
- mvc a0
- ldw a3, (sp, 0)
- mtcr a3, epc
- ldw a3, (sp, 4)
- mtcr a3, epsr
- addi sp, 8
- KSPTOUSP
- rte
-END(csky_cmpxchg)
#else
-ENTRY(csky_cmpxchg)
- USPTOKSP
- mfcr a3, epc
- INCTRAP a3
-
- subi sp, 8
- stw a3, (sp, 0)
- mfcr a3, epsr
- stw a3, (sp, 4)
-
- psrset ee
1:
ldw a3, (a2)
cmpne a0, a3
@@ -61,6 +41,7 @@ ENTRY(csky_cmpxchg)
2:
stw a1, (a2)
3:
+#endif
mvc a0
ldw a3, (sp, 0)
mtcr a3, epc
@@ -71,6 +52,7 @@ ENTRY(csky_cmpxchg)
rte
END(csky_cmpxchg)
+#ifndef CONFIG_CPU_HAS_LDSTEX
/*
* Called from tlbmodified exception
*/
diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
index 5137ed9062bd..a7e84ccccbd8 100644
--- a/arch/csky/kernel/entry.S
+++ b/arch/csky/kernel/entry.S
@@ -40,7 +40,8 @@ ENTRY(csky_\name)
WR_MCIR a2
#endif
bclri r6, 0
- lrw a2, PHYS_OFFSET
+ lrw a2, va_pa_offset
+ ld.w a2, (a2, 0)
subu r6, a2
bseti r6, 31
@@ -50,7 +51,8 @@ ENTRY(csky_\name)
addu r6, a2
ldw r6, (r6)
- lrw a2, PHYS_OFFSET
+ lrw a2, va_pa_offset
+ ld.w a2, (a2, 0)
subu r6, a2
bseti r6, 31
@@ -91,7 +93,7 @@ ENTRY(csky_\name)
mfcr a3, ss2
mfcr r6, ss3
mfcr a2, ss4
- SAVE_ALL EPC_KEEP
+ SAVE_ALL 0
.endm
.macro tlbop_end is_write
RD_MEH a2
@@ -99,7 +101,6 @@ ENTRY(csky_\name)
mov a0, sp
movi a1, \is_write
jbsr do_page_fault
- movi r11_sig, 0 /* r11 = 0, Not a syscall. */
jmpi ret_from_exception
.endm
@@ -118,7 +119,7 @@ jbsr csky_cmpxchg_fixup
tlbop_end 1
ENTRY(csky_systemcall)
- SAVE_ALL EPC_INCREASE
+ SAVE_ALL TRAP0_SIZE
psrset ee, ie
@@ -136,8 +137,9 @@ ENTRY(csky_systemcall)
bmaski r10, THREAD_SHIFT
andn r9, r10
ldw r8, (r9, TINFO_FLAGS)
- btsti r8, TIF_SYSCALL_TRACE
- bt 1f
+ ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+ cmpnei r8, 0
+ bt csky_syscall_trace
#if defined(__CSKYABIV2__)
subi sp, 8
stw r5, (sp, 0x4)
@@ -150,10 +152,9 @@ ENTRY(csky_systemcall)
stw a0, (sp, LSAVE_A0) /* Save return value */
jmpi ret_from_exception
-1:
- movi a0, 0 /* enter system call */
- mov a1, sp /* sp = pt_regs pointer */
- jbsr syscall_trace
+csky_syscall_trace:
+ mov a0, sp /* sp = pt_regs pointer */
+ jbsr syscall_trace_enter
/* Prepare args before do system call */
ldw a0, (sp, LSAVE_A0)
ldw a1, (sp, LSAVE_A1)
@@ -173,9 +174,8 @@ ENTRY(csky_systemcall)
#endif
stw a0, (sp, LSAVE_A0) /* Save return value */
- movi a0, 1 /* leave system call */
- mov a1, sp /* right now, sp --> pt_regs */
- jbsr syscall_trace
+ mov a0, sp /* right now, sp --> pt_regs */
+ jbsr syscall_trace_exit
br ret_from_exception
ENTRY(ret_from_kernel_thread)
@@ -190,14 +190,11 @@ ENTRY(ret_from_fork)
bmaski r10, THREAD_SHIFT
andn r9, r10
ldw r8, (r9, TINFO_FLAGS)
- movi r11_sig, 1
- btsti r8, TIF_SYSCALL_TRACE
- bf 3f
- movi a0, 1
- mov a1, sp /* sp = pt_regs pointer */
- jbsr syscall_trace
-3:
- jbsr ret_from_exception
+ ANDI_R3 r8, (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+ cmpnei r8, 0
+ bf ret_from_exception
+ mov a0, sp /* sp = pt_regs pointer */
+ jbsr syscall_trace_exit
ret_from_exception:
ld syscallid, (sp, LSAVE_PSR)
@@ -212,41 +209,30 @@ ret_from_exception:
bmaski r10, THREAD_SHIFT
andn r9, r10
-resume_userspace:
ldw r8, (r9, TINFO_FLAGS)
andi r8, (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED)
cmpnei r8, 0
bt exit_work
-1: RESTORE_ALL
+1:
+ RESTORE_ALL
exit_work:
+ lrw syscallid, ret_from_exception
+ mov lr, syscallid
+
btsti r8, TIF_NEED_RESCHED
bt work_resched
- /* If thread_info->flag is empty, RESTORE_ALL */
- cmpnei r8, 0
- bf 1b
- mov a1, sp
- mov a0, r8
- mov a2, r11_sig /* syscall? */
- btsti r8, TIF_SIGPENDING /* delivering a signal? */
- /* prevent further restarts(set r11 = 0) */
- clrt r11_sig
- jbsr do_notify_resume /* do signals */
- br resume_userspace
+
+ mov a0, sp
+ mov a1, r8
+ jmpi do_notify_resume
work_resched:
- lrw syscallid, ret_from_exception
- mov r15, syscallid /* Return address in link */
jmpi schedule
-ENTRY(sys_rt_sigreturn)
- movi r11_sig, 0
- jmpi do_rt_sigreturn
-
ENTRY(csky_trap)
- SAVE_ALL EPC_KEEP
+ SAVE_ALL 0
psrset ee
- movi r11_sig, 0 /* r11 = 0, Not a syscall. */
mov a0, sp /* Push Stack pointer arg */
jbsr trap_c /* Call C-level trap handler */
jmpi ret_from_exception
@@ -261,7 +247,7 @@ ENTRY(csky_get_tls)
/* increase epc for continue */
mfcr a0, epc
- INCTRAP a0
+ addi a0, TRAP0_SIZE
mtcr a0, epc
/* get current task thread_info with kernel 8K stack */
@@ -278,9 +264,8 @@ ENTRY(csky_get_tls)
rte
ENTRY(csky_irq)
- SAVE_ALL EPC_KEEP
+ SAVE_ALL 0
psrset ee
- movi r11_sig, 0 /* r11 = 0, Not a syscall. */
#ifdef CONFIG_PREEMPT
mov r9, sp /* Get current stack pointer */
diff --git a/arch/csky/kernel/ftrace.c b/arch/csky/kernel/ftrace.c
index 274c431f1810..44f4880179b7 100644
--- a/arch/csky/kernel/ftrace.c
+++ b/arch/csky/kernel/ftrace.c
@@ -3,6 +3,137 @@
#include <linux/ftrace.h>
#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define NOP 0x4000
+#define NOP32_HI 0xc400
+#define NOP32_LO 0x4820
+#define PUSH_LR 0x14d0
+#define MOVIH_LINK 0xea3a
+#define ORI_LINK 0xef5a
+#define JSR_LINK 0xe8fa
+#define BSR_LINK 0xe000
+
+/*
+ * Gcc-csky with -pg will insert stub in function prologue:
+ * push lr
+ * jbsr _mcount
+ * nop32
+ * nop32
+ *
+ * If the (callee - current_pc) is less then 64MB, we'll use bsr:
+ * push lr
+ * bsr _mcount
+ * nop32
+ * nop32
+ * else we'll use (movih + ori + jsr):
+ * push lr
+ * movih r26, ...
+ * ori r26, ...
+ * jsr r26
+ *
+ * (r26 is our reserved link-reg)
+ *
+ */
+static inline void make_jbsr(unsigned long callee, unsigned long pc,
+ uint16_t *call, bool nolr)
+{
+ long offset;
+
+ call[0] = nolr ? NOP : PUSH_LR;
+
+ offset = (long) callee - (long) pc;
+
+ if (unlikely(offset < -67108864 || offset > 67108864)) {
+ call[1] = MOVIH_LINK;
+ call[2] = callee >> 16;
+ call[3] = ORI_LINK;
+ call[4] = callee & 0xffff;
+ call[5] = JSR_LINK;
+ call[6] = 0;
+ } else {
+ offset = offset >> 1;
+
+ call[1] = BSR_LINK |
+ ((uint16_t)((unsigned long) offset >> 16) & 0x3ff);
+ call[2] = (uint16_t)((unsigned long) offset & 0xffff);
+ call[3] = call[5] = NOP32_HI;
+ call[4] = call[6] = NOP32_LO;
+ }
+}
+
+static uint16_t nops[7] = {NOP, NOP32_HI, NOP32_LO, NOP32_HI, NOP32_LO,
+ NOP32_HI, NOP32_LO};
+static int ftrace_check_current_nop(unsigned long hook)
+{
+ uint16_t olds[7];
+ unsigned long hook_pos = hook - 2;
+
+ if (probe_kernel_read((void *)olds, (void *)hook_pos, sizeof(nops)))
+ return -EFAULT;
+
+ if (memcmp((void *)nops, (void *)olds, sizeof(nops))) {
+ pr_err("%p: nop but get (%04x %04x %04x %04x %04x %04x %04x)\n",
+ (void *)hook_pos,
+ olds[0], olds[1], olds[2], olds[3], olds[4], olds[5],
+ olds[6]);
+
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ftrace_modify_code(unsigned long hook, unsigned long target,
+ bool enable, bool nolr)
+{
+ uint16_t call[7];
+
+ unsigned long hook_pos = hook - 2;
+ int ret = 0;
+
+ make_jbsr(target, hook, call, nolr);
+
+ ret = probe_kernel_write((void *)hook_pos, enable ? call : nops,
+ sizeof(nops));
+ if (ret)
+ return -EPERM;
+
+ flush_icache_range(hook_pos, hook_pos + MCOUNT_INSN_SIZE);
+
+ return 0;
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ int ret = ftrace_check_current_nop(rec->ip);
+
+ if (ret)
+ return ret;
+
+ return ftrace_modify_code(rec->ip, addr, true, false);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+{
+ return ftrace_modify_code(rec->ip, addr, false, false);
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+ int ret = ftrace_modify_code((unsigned long)&ftrace_call,
+ (unsigned long)func, true, true);
+ return ret;
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+ return 0;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
@@ -43,8 +174,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
*(unsigned long *)frame_pointer = return_hooker;
}
}
-#endif
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_code((unsigned long)&ftrace_graph_call,
+ (unsigned long)&ftrace_graph_caller, true, true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_code((unsigned long)&ftrace_graph_call,
+ (unsigned long)&ftrace_graph_caller, false, true);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
/* _mcount is defined in abi's mcount.S */
-extern void _mcount(void);
EXPORT_SYMBOL(_mcount);
diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S
index 9c4ec473b76b..61989f9241c0 100644
--- a/arch/csky/kernel/head.S
+++ b/arch/csky/kernel/head.S
@@ -7,16 +7,11 @@
__HEAD
ENTRY(_start)
- /* set super user mode */
- lrw a3, DEFAULT_PSR_VALUE
- mtcr a3, psr
- psrset ee
-
- SETUP_MMU a3
+ SETUP_MMU
/* set stack point */
- lrw a3, init_thread_union + THREAD_SIZE
- mov sp, a3
+ lrw r6, init_thread_union + THREAD_SIZE
+ mov sp, r6
jmpi csky_start
END(_start)
@@ -24,53 +19,12 @@ END(_start)
#ifdef CONFIG_SMP
.align 10
ENTRY(_start_smp_secondary)
- /* Invalid I/Dcache BTB BHT */
- movi a3, 7
- lsli a3, 16
- addi a3, (1<<4) | 3
- mtcr a3, cr17
-
- tlbi.alls
-
- /* setup PAGEMASK */
- movi a3, 0
- mtcr a3, cr<6, 15>
-
- /* setup MEL0/MEL1 */
- grs a0, _start_smp_pc
-_start_smp_pc:
- bmaski a1, 13
- andn a0, a1
- movi a1, 0x00000006
- movi a2, 0x00001006
- or a1, a0
- or a2, a0
- mtcr a1, cr<2, 15>
- mtcr a2, cr<3, 15>
-
- /* setup MEH */
- mtcr a0, cr<4, 15>
-
- /* write TLB */
- bgeni a3, 28
- mtcr a3, cr<8, 15>
-
- SETUP_MMU a3
-
- /* enable MMU */
- movi a3, 1
- mtcr a3, cr18
-
- jmpi _goto_mmu_on
-_goto_mmu_on:
- lrw a3, DEFAULT_PSR_VALUE
- mtcr a3, psr
- psrset ee
+ SETUP_MMU
/* set stack point */
- lrw a3, secondary_stack
- ld.w a3, (a3, 0)
- mov sp, a3
+ lrw r6, secondary_stack
+ ld.w r6, (r6, 0)
+ mov sp, r6
jmpi csky_start_secondary
END(_start_smp_secondary)
diff --git a/arch/csky/kernel/perf_callchain.c b/arch/csky/kernel/perf_callchain.c
new file mode 100644
index 000000000000..e68ff375c8f8
--- /dev/null
+++ b/arch/csky/kernel/perf_callchain.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/perf_event.h>
+#include <linux/uaccess.h>
+
+/* Kernel callchain */
+struct stackframe {
+ unsigned long fp;
+ unsigned long lr;
+};
+
+static int unwind_frame_kernel(struct stackframe *frame)
+{
+ if (kstack_end((void *)frame->fp))
+ return -EPERM;
+ if (frame->fp & 0x3 || frame->fp < TASK_SIZE)
+ return -EPERM;
+
+ *frame = *(struct stackframe *)frame->fp;
+ if (__kernel_text_address(frame->lr)) {
+ int graph = 0;
+
+ frame->lr = ftrace_graph_ret_addr(NULL, &graph, frame->lr,
+ NULL);
+ }
+ return 0;
+}
+
+static void notrace walk_stackframe(struct stackframe *fr,
+ struct perf_callchain_entry_ctx *entry)
+{
+ do {
+ perf_callchain_store(entry, fr->lr);
+ } while (unwind_frame_kernel(fr) >= 0);
+}
+
+/*
+ * Get the return address for a single stackframe and return a pointer to the
+ * next frame tail.
+ */
+static unsigned long user_backtrace(struct perf_callchain_entry_ctx *entry,
+ unsigned long fp, unsigned long reg_lr)
+{
+ struct stackframe buftail;
+ unsigned long lr = 0;
+ unsigned long *user_frame_tail = (unsigned long *)fp;
+
+ /* Check accessibility of one struct frame_tail beyond */
+ if (!access_ok(user_frame_tail, sizeof(buftail)))
+ return 0;
+ if (__copy_from_user_inatomic(&buftail, user_frame_tail,
+ sizeof(buftail)))
+ return 0;
+
+ if (reg_lr != 0)
+ lr = reg_lr;
+ else
+ lr = buftail.lr;
+
+ fp = buftail.fp;
+ perf_callchain_store(entry, lr);
+
+ return fp;
+}
+
+/*
+ * This will be called when the target is in user mode
+ * This function will only be called when we use
+ * "PERF_SAMPLE_CALLCHAIN" in
+ * kernel/events/core.c:perf_prepare_sample()
+ *
+ * How to trigger perf_callchain_[user/kernel] :
+ * $ perf record -e cpu-clock --call-graph fp ./program
+ * $ perf report --call-graph
+ *
+ * On C-SKY platform, the program being sampled and the C library
+ * need to be compiled with * -mbacktrace, otherwise the user
+ * stack will not contain function frame.
+ */
+void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ unsigned long fp = 0;
+
+ /* C-SKY does not support virtualization. */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+ return;
+
+ fp = regs->regs[4];
+ perf_callchain_store(entry, regs->pc);
+
+ /*
+ * While backtrace from leaf function, lr is normally
+ * not saved inside frame on C-SKY, so get lr from pt_regs
+ * at the sample point. However, lr value can be incorrect if
+ * lr is used as temp register
+ */
+ fp = user_backtrace(entry, fp, regs->lr);
+
+ while (fp && !(fp & 0x3) && entry->nr < entry->max_stack)
+ fp = user_backtrace(entry, fp, 0);
+}
+
+void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
+ struct pt_regs *regs)
+{
+ struct stackframe fr;
+
+ /* C-SKY does not support virtualization. */
+ if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ pr_warn("C-SKY does not support perf in guest mode!");
+ return;
+ }
+
+ fr.fp = regs->regs[4];
+ fr.lr = regs->lr;
+ walk_stackframe(&fr, entry);
+}
diff --git a/arch/csky/kernel/perf_regs.c b/arch/csky/kernel/perf_regs.c
new file mode 100644
index 000000000000..eb32838b8210
--- /dev/null
+++ b/arch/csky/kernel/perf_regs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/bug.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+ if (WARN_ON_ONCE((u32)idx >= PERF_REG_CSKY_MAX))
+ return 0;
+
+ return (u64)*((u32 *)regs + idx);
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_CSKY_MAX) - 1))
+
+int perf_reg_validate(u64 mask)
+{
+ if (!mask || mask & REG_RESERVED)
+ return -EINVAL;
+
+ return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+ return PERF_SAMPLE_REGS_ABI_32;
+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+ struct pt_regs *regs,
+ struct pt_regs *regs_user_copy)
+{
+ regs_user->regs = task_pt_regs(current);
+ regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c
index f2f12fff36f7..313623a19ecb 100644
--- a/arch/csky/kernel/ptrace.c
+++ b/arch/csky/kernel/ptrace.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+#include <linux/audit.h>
#include <linux/elf.h>
#include <linux/errno.h>
#include <linux/kernel.h>
@@ -11,6 +12,7 @@
#include <linux/sched/task_stack.h>
#include <linux/signal.h>
#include <linux/smp.h>
+#include <linux/tracehook.h>
#include <linux/uaccess.h>
#include <linux/user.h>
@@ -22,6 +24,9 @@
#include <abi/regdef.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/syscalls.h>
+
/* sets the trace bits. */
#define TRACE_MODE_SI (1 << 14)
#define TRACE_MODE_RUN 0
@@ -207,35 +212,27 @@ long arch_ptrace(struct task_struct *child, long request,
return ret;
}
-/*
- * If process's system calls is traces, do some corresponding handles in this
- * function before entering system call function and after exiting system call
- * function.
- */
-asmlinkage void syscall_trace(int why, struct pt_regs *regs)
+asmlinkage void syscall_trace_enter(struct pt_regs *regs)
{
- long saved_why;
- /*
- * Save saved_why, why is used to denote syscall entry/exit;
- * why = 0:entry, why = 1: exit
- */
- saved_why = regs->regs[SYSTRACE_SAVENUM];
- regs->regs[SYSTRACE_SAVENUM] = why;
-
- ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
- ? 0x80 : 0));
-
- /*
- * this isn't the same as continuing with a signal, but it will do
- * for normal use. strace only continues with a signal if the
- * stopping signal is not SIGTRAP. -brl
- */
- if (current->exit_code) {
- send_sig(current->exit_code, current, 1);
- current->exit_code = 0;
- }
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ if (tracehook_report_syscall_entry(regs))
+ syscall_set_nr(current, regs, -1);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_enter(regs, syscall_get_nr(current, regs));
+
+ audit_syscall_entry(regs_syscallid(regs), regs->a0, regs->a1, regs->a2, regs->a3);
+}
+
+asmlinkage void syscall_trace_exit(struct pt_regs *regs)
+{
+ audit_syscall_exit(regs);
+
+ if (test_thread_flag(TIF_SYSCALL_TRACE))
+ tracehook_report_syscall_exit(regs, 0);
- regs->regs[SYSTRACE_SAVENUM] = saved_why;
+ if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
+ trace_sys_exit(regs, syscall_get_return_value(current, regs));
}
extern void show_stack(struct task_struct *task, unsigned long *stack);
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index dff8b89444ec..23ee604aafdb 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -142,18 +142,24 @@ void __init setup_arch(char **cmdline_p)
#endif
}
-asmlinkage __visible void __init csky_start(unsigned int unused, void *param)
+unsigned long va_pa_offset;
+EXPORT_SYMBOL(va_pa_offset);
+
+asmlinkage __visible void __init csky_start(unsigned int unused,
+ void *dtb_start)
{
/* Clean up bss section */
memset(__bss_start, 0, __bss_stop - __bss_start);
+ va_pa_offset = read_mmu_msa0() & ~(SSEG_SIZE - 1);
+
pre_trap_init();
pre_mmu_init();
- if (param == NULL)
+ if (dtb_start == NULL)
early_init_dt_scan(__dtb_start);
else
- early_init_dt_scan(param);
+ early_init_dt_scan(dtb_start);
start_kernel();
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index 207a891479d2..04a43cfd4e09 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -1,26 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
#include <linux/signal.h>
+#include <linux/uaccess.h>
#include <linux/syscalls.h>
-#include <linux/errno.h>
-#include <linux/wait.h>
-#include <linux/ptrace.h>
-#include <linux/unistd.h>
-#include <linux/stddef.h>
-#include <linux/highuid.h>
-#include <linux/personality.h>
-#include <linux/tty.h>
-#include <linux/binfmts.h>
#include <linux/tracehook.h>
-#include <linux/freezer.h>
-#include <linux/uaccess.h>
-#include <asm/setup.h>
-#include <asm/pgtable.h>
#include <asm/traps.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
@@ -29,110 +13,117 @@
#ifdef CONFIG_CPU_HAS_FPU
#include <abi/fpu.h>
-
-static int restore_fpu_state(struct sigcontext *sc)
+static int restore_fpu_state(struct sigcontext __user *sc)
{
int err = 0;
struct user_fp user_fp;
- err = copy_from_user(&user_fp, &sc->sc_user_fp, sizeof(user_fp));
+ err = __copy_from_user(&user_fp, &sc->sc_user_fp, sizeof(user_fp));
restore_from_user_fp(&user_fp);
return err;
}
-static int save_fpu_state(struct sigcontext *sc)
+static int save_fpu_state(struct sigcontext __user *sc)
{
struct user_fp user_fp;
save_to_user_fp(&user_fp);
- return copy_to_user(&sc->sc_user_fp, &user_fp, sizeof(user_fp));
+ return __copy_to_user(&sc->sc_user_fp, &user_fp, sizeof(user_fp));
}
#else
-static inline int restore_fpu_state(struct sigcontext *sc) { return 0; }
-static inline int save_fpu_state(struct sigcontext *sc) { return 0; }
+#define restore_fpu_state(sigcontext) (0)
+#define save_fpu_state(sigcontext) (0)
#endif
struct rt_sigframe {
- int sig;
- struct siginfo *pinfo;
- void *puc;
struct siginfo info;
struct ucontext uc;
};
-static int
-restore_sigframe(struct pt_regs *regs,
- struct sigcontext *sc, int *pr2)
+static long restore_sigcontext(struct pt_regs *regs,
+ struct sigcontext __user *sc)
{
int err = 0;
- /* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->task->restart_block.fn = do_no_restart_syscall;
-
- err |= copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs));
+ /* sc_pt_regs is structured the same as the start of pt_regs */
+ err |= __copy_from_user(regs, &sc->sc_pt_regs, sizeof(struct pt_regs));
+ /* Restore the floating-point state. */
err |= restore_fpu_state(sc);
- *pr2 = regs->a0;
return err;
}
-asmlinkage int
-do_rt_sigreturn(void)
+SYSCALL_DEFINE0(rt_sigreturn)
{
- sigset_t set;
- int a0;
struct pt_regs *regs = current_pt_regs();
- struct rt_sigframe *frame = (struct rt_sigframe *)(regs->usp);
+ struct rt_sigframe __user *frame;
+ struct task_struct *task;
+ sigset_t set;
+
+ /* Always make any pending restarted system calls return -EINTR */
+ current->restart_block.fn = do_no_restart_syscall;
+
+ frame = (struct rt_sigframe __user *)regs->usp;
if (!access_ok(frame, sizeof(*frame)))
goto badframe;
+
if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
goto badframe;
- sigdelsetmask(&set, (sigmask(SIGKILL) | sigmask(SIGSTOP)));
- spin_lock_irq(&current->sighand->siglock);
- current->blocked = set;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ set_current_blocked(&set);
- if (restore_sigframe(regs, &frame->uc.uc_mcontext, &a0))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
- return a0;
+ if (restore_altstack(&frame->uc.uc_stack))
+ goto badframe;
+
+ return regs->a0;
badframe:
- force_sig(SIGSEGV, current);
+ task = current;
+ force_sig(SIGSEGV, task);
return 0;
}
-static int setup_sigframe(struct sigcontext *sc, struct pt_regs *regs)
+static int setup_sigcontext(struct rt_sigframe __user *frame,
+ struct pt_regs *regs)
{
+ struct sigcontext __user *sc = &frame->uc.uc_mcontext;
int err = 0;
- err |= copy_to_user(&sc->sc_pt_regs, regs, sizeof(struct pt_regs));
+ err |= __copy_to_user(&sc->sc_pt_regs, regs, sizeof(struct pt_regs));
err |= save_fpu_state(sc);
return err;
}
-static inline void *
-get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
+static inline void __user *get_sigframe(struct ksignal *ksig,
+ struct pt_regs *regs, size_t framesize)
{
- unsigned long usp;
+ unsigned long sp;
+ /* Default to using normal stack */
+ sp = regs->usp;
+
+ /*
+ * If we are on the alternate signal stack and would overflow it, don't.
+ * Return an always-bogus address instead so we will die with SIGSEGV.
+ */
+ if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
+ return (void __user __force *)(-1UL);
- /* Default to using normal stack. */
- usp = regs->usp;
+ /* This is the X/Open sanctioned signal stack switching. */
+ sp = sigsp(sp, ksig) - framesize;
- /* This is the X/Open sanctioned signal stack switching. */
- if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(usp)) {
- if (!on_sig_stack(usp))
- usp = current->sas_ss_sp + current->sas_ss_size;
- }
- return (void *)((usp - frame_size) & -8UL);
+ /* Align the stack frame. */
+ sp &= -8UL;
+
+ return (void __user *)sp;
}
static int
@@ -140,205 +131,128 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe *frame;
int err = 0;
-
struct csky_vdso *vdso = current->mm->context.vdso;
- frame = get_sigframe(&ksig->ka, regs, sizeof(*frame));
- if (!frame)
- return 1;
+ frame = get_sigframe(ksig, regs, sizeof(*frame));
+ if (!access_ok(frame, sizeof(*frame)))
+ return -EFAULT;
- err |= __put_user(ksig->sig, &frame->sig);
- err |= __put_user(&frame->info, &frame->pinfo);
- err |= __put_user(&frame->uc, &frame->puc);
err |= copy_siginfo_to_user(&frame->info, &ksig->info);
- /* Create the ucontext. */
+ /* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags);
- err |= __put_user(0, &frame->uc.uc_link);
- err |= __put_user((void *)current->sas_ss_sp,
- &frame->uc.uc_stack.ss_sp);
- err |= __put_user(sas_ss_flags(regs->usp),
- &frame->uc.uc_stack.ss_flags);
- err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
- err |= setup_sigframe(&frame->uc.uc_mcontext, regs);
- err |= copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
-
+ err |= __put_user(NULL, &frame->uc.uc_link);
+ err |= __save_altstack(&frame->uc.uc_stack, regs->usp);
+ err |= setup_sigcontext(frame, regs);
+ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err)
- goto give_sigsegv;
+ return -EFAULT;
- /* Set up registers for signal handler */
- regs->usp = (unsigned long)frame;
- regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
- regs->lr = (unsigned long)vdso->rt_signal_retcode;
+ /* Set up to return from userspace. */
+ regs->lr = (unsigned long)(vdso->rt_signal_retcode);
-adjust_stack:
- regs->a0 = ksig->sig; /* first arg is signo */
- regs->a1 = (unsigned long)(&(frame->info));
- regs->a2 = (unsigned long)(&(frame->uc));
- return err;
+ /*
+ * Set up registers for signal handler.
+ * Registers that we don't modify keep the value they had from
+ * user-space at the time we took the signal.
+ * We always pass siginfo and mcontext, regardless of SA_SIGINFO,
+ * since some things rely on this (e.g. glibc's debug/segfault.c).
+ */
+ regs->pc = (unsigned long)ksig->ka.sa.sa_handler;
+ regs->usp = (unsigned long)frame;
+ regs->a0 = ksig->sig; /* a0: signal number */
+ regs->a1 = (unsigned long)(&(frame->info)); /* a1: siginfo pointer */
+ regs->a2 = (unsigned long)(&(frame->uc)); /* a2: ucontext pointer */
-give_sigsegv:
- if (ksig->sig == SIGSEGV)
- ksig->ka.sa.sa_handler = SIG_DFL;
- force_sig(SIGSEGV, current);
- goto adjust_stack;
+ return 0;
}
-/*
- * OK, we're invoking a handler
- */
-static int
-handle_signal(struct ksignal *ksig, struct pt_regs *regs)
+static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
- int ret;
sigset_t *oldset = sigmask_to_save();
+ int ret;
- /*
- * set up the stack frame, regardless of SA_SIGINFO,
- * and pass info anyway.
- */
- ret = setup_rt_frame(ksig, oldset, regs);
+ /* Are we from a system call? */
+ if (in_syscall(regs)) {
+ /* Avoid additional syscall restarting via ret_from_exception */
+ forget_syscall(regs);
+
+ /* If so, check system call restarting.. */
+ switch (regs->a0) {
+ case -ERESTART_RESTARTBLOCK:
+ case -ERESTARTNOHAND:
+ regs->a0 = -EINTR;
+ break;
- if (ret != 0) {
- force_sigsegv(ksig->sig, current);
- return ret;
+ case -ERESTARTSYS:
+ if (!(ksig->ka.sa.sa_flags & SA_RESTART)) {
+ regs->a0 = -EINTR;
+ break;
+ }
+ /* fallthrough */
+ case -ERESTARTNOINTR:
+ regs->a0 = regs->orig_a0;
+ regs->pc -= TRAP0_SIZE;
+ break;
+ }
}
- /* Block the signal if we were successful. */
- spin_lock_irq(&current->sighand->siglock);
- sigorsets(&current->blocked, &current->blocked, &ksig->ka.sa.sa_mask);
- if (!(ksig->ka.sa.sa_flags & SA_NODEFER))
- sigaddset(&current->blocked, ksig->sig);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ /* Set up the stack frame */
+ ret = setup_rt_frame(ksig, oldset, regs);
- return 0;
+ signal_setup_done(ret, ksig, 0);
}
-/*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
- * Note that we go through the signals twice: once to check the signals
- * that the kernel can handle, and then we build all the user-level signal
- * handling stack-frames in one go after that.
- */
-static void do_signal(struct pt_regs *regs, int syscall)
+static void do_signal(struct pt_regs *regs)
{
- unsigned int retval = 0, continue_addr = 0, restart_addr = 0;
struct ksignal ksig;
- /*
- * We want the common case to go fast, which
- * is why we may in certain cases get here from
- * kernel mode. Just return without doing anything
- * if so.
- */
- if (!user_mode(regs))
+ if (get_signal(&ksig)) {
+ /* Actually deliver the signal */
+ handle_signal(&ksig, regs);
return;
+ }
- /*
- * If we were from a system call, check for system call restarting...
- */
- if (syscall) {
- continue_addr = regs->pc;
-#if defined(__CSKYABIV2__)
- restart_addr = continue_addr - 4;
-#else
- restart_addr = continue_addr - 2;
-#endif
- retval = regs->a0;
+ /* Did we come from a system call? */
+ if (in_syscall(regs)) {
+ /* Avoid additional syscall restarting via ret_from_exception */
+ forget_syscall(regs);
- /*
- * Prepare for system call restart. We do this here so that a
- * debugger will see the already changed.
- */
- switch (retval) {
+ /* Restart the system call - no handlers present */
+ switch (regs->a0) {
case -ERESTARTNOHAND:
case -ERESTARTSYS:
case -ERESTARTNOINTR:
regs->a0 = regs->orig_a0;
- regs->pc = restart_addr;
+ regs->pc -= TRAP0_SIZE;
break;
case -ERESTART_RESTARTBLOCK:
- regs->a0 = -EINTR;
+ regs->a0 = regs->orig_a0;
+ regs_syscallid(regs) = __NR_restart_syscall;
+ regs->pc -= TRAP0_SIZE;
break;
}
}
- if (try_to_freeze())
- goto no_signal;
-
/*
- * Get the signal to deliver. When running under ptrace, at this
- * point the debugger may change all our registers ...
+ * If there is no signal to deliver, we just put the saved
+ * sigmask back.
*/
- if (get_signal(&ksig)) {
- /*
- * Depending on the signal settings we may need to revert the
- * decision to restart the system call. But skip this if a
- * debugger has chosen to restart at a different PC.
- */
- if (regs->pc == restart_addr) {
- if (retval == -ERESTARTNOHAND ||
- (retval == -ERESTARTSYS &&
- !(ksig.ka.sa.sa_flags & SA_RESTART))) {
- regs->a0 = -EINTR;
- regs->pc = continue_addr;
- }
- }
-
- /* Whee! Actually deliver the signal. */
- if (handle_signal(&ksig, regs) == 0) {
- /*
- * A signal was successfully delivered; the saved
- * sigmask will have been stored in the signal frame,
- * and will be restored by sigreturn, so we can simply
- * clear the TIF_RESTORE_SIGMASK flag.
- */
- if (test_thread_flag(TIF_RESTORE_SIGMASK))
- clear_thread_flag(TIF_RESTORE_SIGMASK);
- }
- return;
- }
-
-no_signal:
- if (syscall) {
- /*
- * Handle restarting a different system call. As above,
- * if a debugger has chosen to restart at a different PC,
- * ignore the restart.
- */
- if (retval == -ERESTART_RESTARTBLOCK
- && regs->pc == continue_addr) {
-#if defined(__CSKYABIV2__)
- regs->regs[3] = __NR_restart_syscall;
- regs->pc -= 4;
-#else
- regs->regs[9] = __NR_restart_syscall;
- regs->pc -= 2;
-#endif
- }
-
- /*
- * If there's no signal to deliver, we just put the saved
- * sigmask back.
- */
- if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
- clear_thread_flag(TIF_RESTORE_SIGMASK);
- sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
- }
- }
+ restore_saved_sigmask();
}
-asmlinkage void
-do_notify_resume(unsigned int thread_flags, struct pt_regs *regs, int syscall)
+/*
+ * notification of userspace execution resumption
+ * - triggered by the _TIF_WORK_MASK flags
+ */
+asmlinkage void do_notify_resume(struct pt_regs *regs,
+ unsigned long thread_info_flags)
{
- if (thread_flags & _TIF_SIGPENDING)
- do_signal(regs, syscall);
+ /* Handle pending signal delivery */
+ if (thread_info_flags & _TIF_SIGPENDING)
+ do_signal(regs);
- if (thread_flags & _TIF_NOTIFY_RESUME) {
+ if (thread_info_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
tracehook_notify_resume(regs);
}
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index d6f4b66b93e2..18041f46ded1 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -15,9 +15,9 @@
#include <linux/smp.h>
#include <linux/version.h>
#include <linux/vt_kern.h>
-#include <linux/kernel.h>
#include <linux/extable.h>
#include <linux/uaccess.h>
+#include <linux/perf_event.h>
#include <asm/hardirq.h>
#include <asm/mmu_context.h>
@@ -82,7 +82,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
unsigned long pgd_base;
- pgd_base = tlb_get_pgd();
+ pgd_base = (unsigned long)__va(get_pgd());
pgd = (pgd_t *)pgd_base + offset;
pgd_k = init_mm.pgd + offset;
@@ -107,6 +107,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
return;
}
#endif
+
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
/*
* If we're in an interrupt or have no user
* context, we must not take the fault..
@@ -154,10 +156,15 @@ good_area:
goto bad_area;
BUG();
}
- if (fault & VM_FAULT_MAJOR)
+ if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- else
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, regs,
+ address);
+ } else {
tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, regs,
+ address);
+ }
up_read(&mm->mmap_sem);
return;
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c071da34e081..61c01db6c292 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -27,9 +27,6 @@ config H8300
config CPU_BIG_ENDIAN
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index e3dead402e5f..123d8f54be4a 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -29,6 +29,7 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += mmu.h
generic-y += mmu_context.h
generic-y += module.h
diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h
index ddd483c6ca95..01666b8bb263 100644
--- a/arch/h8300/include/asm/syscall.h
+++ b/arch/h8300/include/asm/syscall.h
@@ -8,6 +8,7 @@
#include <linux/linkage.h>
#include <linux/types.h>
#include <linux/ptrace.h>
+#include <uapi/linux/audit.h>
static inline int
syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
@@ -27,6 +28,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
*args = regs->er6;
}
+static inline int
+syscall_get_arch(struct task_struct *task)
+{
+ return AUDIT_ARCH_H8300;
+}
/* Misc syscall related bits */
diff --git a/arch/h8300/include/asm/tlb.h b/arch/h8300/include/asm/tlb.h
index 98f344279904..d8201ca31206 100644
--- a/arch/h8300/include/asm/tlb.h
+++ b/arch/h8300/include/asm/tlb.h
@@ -2,8 +2,6 @@
#ifndef __H8300_TLB_H__
#define __H8300_TLB_H__
-#define tlb_flush(tlb) do { } while (0)
-
#include <asm-generic/tlb.h>
#endif
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index ac441680dcc0..3e54a53208d5 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -65,12 +65,6 @@ config GENERIC_CSUM
config GENERIC_IRQ_PROBE
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool n
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index d046e8ccdf78..6234a303d2a3 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -24,10 +24,10 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += pci.h
generic-y += percpu.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += sections.h
generic-y += segment.h
generic-y += serial.h
diff --git a/arch/hexagon/include/asm/elf.h b/arch/hexagon/include/asm/elf.h
index 80311e7b8ca6..d10fbd54ae51 100644
--- a/arch/hexagon/include/asm/elf.h
+++ b/arch/hexagon/include/asm/elf.h
@@ -23,11 +23,7 @@
#include <asm/ptrace.h>
#include <asm/user.h>
-
-/*
- * This should really be in linux/elf-em.h.
- */
-#define EM_HEXAGON 164 /* QUALCOMM Hexagon */
+#include <linux/elf-em.h>
struct elf32_hdr;
diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index e17262ad125e..3d0ae09c2b8e 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h
@@ -184,8 +184,6 @@ static inline void writel(u32 data, volatile void __iomem *addr)
#define writew_relaxed __raw_writew
#define writel_relaxed __raw_writel
-#define mmiowb()
-
/*
* Need an mtype somewhere in here, for cache type deals?
* This is probably too long for an inline.
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index ae3a1e24fabd..dab26a71f577 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -21,6 +21,8 @@
#ifndef _ASM_HEXAGON_SYSCALL_H
#define _ASM_HEXAGON_SYSCALL_H
+#include <uapi/linux/audit.h>
+
typedef long (*syscall_fn)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
@@ -41,4 +43,10 @@ static inline void syscall_get_arguments(struct task_struct *task,
{
memcpy(args, &(&regs->r00)[0], 6 * sizeof(args[0]));
}
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return AUDIT_ARCH_HEXAGON;
+}
+
#endif
diff --git a/arch/hexagon/include/asm/tlb.h b/arch/hexagon/include/asm/tlb.h
index 2f00772cc08a..f71c4ba83614 100644
--- a/arch/hexagon/include/asm/tlb.h
+++ b/arch/hexagon/include/asm/tlb.h
@@ -22,18 +22,6 @@
#include <linux/pagemap.h>
#include <asm/tlbflush.h>
-/*
- * We don't need any special per-pte or per-vma handling...
- */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8d7396bd1790..73a26f04644e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -83,10 +83,6 @@ config STACKTRACE_SUPPORT
config GENERIC_LOCKBREAK
def_bool n
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config HUGETLB_PAGE_SIZE_VARIABLE
bool
depends on HUGETLB_PAGE
diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h
index 1e6fef69bb01..a511d62d447a 100644
--- a/arch/ia64/include/asm/io.h
+++ b/arch/ia64/include/asm/io.h
@@ -113,20 +113,6 @@ extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count);
*/
#define __ia64_mf_a() ia64_mfa()
-/**
- * ___ia64_mmiowb - I/O write barrier
- *
- * Ensure ordering of I/O space writes. This will make sure that writes
- * following the barrier will arrive after all previous writes. For most
- * ia64 platforms, this is a simple 'mf.a' instruction.
- *
- * See Documentation/driver-api/device-io.rst for more information.
- */
-static inline void ___ia64_mmiowb(void)
-{
- ia64_mfa();
-}
-
static inline void*
__ia64_mk_io_addr (unsigned long port)
{
@@ -161,7 +147,6 @@ __ia64_mk_io_addr (unsigned long port)
#define __ia64_writew ___ia64_writew
#define __ia64_writel ___ia64_writel
#define __ia64_writeq ___ia64_writeq
-#define __ia64_mmiowb ___ia64_mmiowb
/*
* For the in/out routines, we need to do "mf.a" _after_ doing the I/O access to ensure
@@ -296,7 +281,6 @@ __outsl (unsigned long port, const void *src, unsigned long count)
#define __outb platform_outb
#define __outw platform_outw
#define __outl platform_outl
-#define __mmiowb platform_mmiowb
#define inb(p) __inb(p)
#define inw(p) __inw(p)
@@ -310,7 +294,6 @@ __outsl (unsigned long port, const void *src, unsigned long count)
#define outsb(p,s,c) __outsb(p,s,c)
#define outsw(p,s,c) __outsw(p,s,c)
#define outsl(p,s,c) __outsl(p,s,c)
-#define mmiowb() __mmiowb()
/*
* The address passed to these functions are ioremap()ped already.
diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h
index 5133739966bc..beae261fbcb4 100644
--- a/arch/ia64/include/asm/machvec.h
+++ b/arch/ia64/include/asm/machvec.h
@@ -30,7 +30,6 @@ typedef void ia64_mv_irq_init_t (void);
typedef void ia64_mv_send_ipi_t (int, int, int, int);
typedef void ia64_mv_timer_interrupt_t (int, void *);
typedef void ia64_mv_global_tlb_purge_t (struct mm_struct *, unsigned long, unsigned long, unsigned long);
-typedef void ia64_mv_tlb_migrate_finish_t (struct mm_struct *);
typedef u8 ia64_mv_irq_to_vector (int);
typedef unsigned int ia64_mv_local_vector_to_irq (u8);
typedef char *ia64_mv_pci_get_legacy_mem_t (struct pci_bus *);
@@ -80,11 +79,6 @@ machvec_noop (void)
}
static inline void
-machvec_noop_mm (struct mm_struct *mm)
-{
-}
-
-static inline void
machvec_noop_task (struct task_struct *task)
{
}
@@ -96,7 +90,6 @@ machvec_noop_bus (struct pci_bus *bus)
extern void machvec_setup (char **);
extern void machvec_timer_interrupt (int, void *);
-extern void machvec_tlb_migrate_finish (struct mm_struct *);
# if defined (CONFIG_IA64_HP_SIM)
# include <asm/machvec_hpsim.h>
@@ -124,7 +117,6 @@ extern void machvec_tlb_migrate_finish (struct mm_struct *);
# define platform_send_ipi ia64_mv.send_ipi
# define platform_timer_interrupt ia64_mv.timer_interrupt
# define platform_global_tlb_purge ia64_mv.global_tlb_purge
-# define platform_tlb_migrate_finish ia64_mv.tlb_migrate_finish
# define platform_dma_init ia64_mv.dma_init
# define platform_dma_get_ops ia64_mv.dma_get_ops
# define platform_irq_to_vector ia64_mv.irq_to_vector
@@ -167,7 +159,6 @@ struct ia64_machine_vector {
ia64_mv_send_ipi_t *send_ipi;
ia64_mv_timer_interrupt_t *timer_interrupt;
ia64_mv_global_tlb_purge_t *global_tlb_purge;
- ia64_mv_tlb_migrate_finish_t *tlb_migrate_finish;
ia64_mv_dma_init *dma_init;
ia64_mv_dma_get_ops *dma_get_ops;
ia64_mv_irq_to_vector *irq_to_vector;
@@ -206,7 +197,6 @@ struct ia64_machine_vector {
platform_send_ipi, \
platform_timer_interrupt, \
platform_global_tlb_purge, \
- platform_tlb_migrate_finish, \
platform_dma_init, \
platform_dma_get_ops, \
platform_irq_to_vector, \
@@ -270,9 +260,6 @@ extern const struct dma_map_ops *dma_get_ops(struct device *);
#ifndef platform_global_tlb_purge
# define platform_global_tlb_purge ia64_global_tlb_purge /* default to architected version */
#endif
-#ifndef platform_tlb_migrate_finish
-# define platform_tlb_migrate_finish machvec_noop_mm
-#endif
#ifndef platform_kernel_launch_event
# define platform_kernel_launch_event machvec_noop
#endif
diff --git a/arch/ia64/include/asm/machvec_sn2.h b/arch/ia64/include/asm/machvec_sn2.h
index b5153d300289..a243e4fb4877 100644
--- a/arch/ia64/include/asm/machvec_sn2.h
+++ b/arch/ia64/include/asm/machvec_sn2.h
@@ -34,7 +34,6 @@ extern ia64_mv_irq_init_t sn_irq_init;
extern ia64_mv_send_ipi_t sn2_send_IPI;
extern ia64_mv_timer_interrupt_t sn_timer_interrupt;
extern ia64_mv_global_tlb_purge_t sn2_global_tlb_purge;
-extern ia64_mv_tlb_migrate_finish_t sn_tlb_migrate_finish;
extern ia64_mv_irq_to_vector sn_irq_to_vector;
extern ia64_mv_local_vector_to_irq sn_local_vector_to_irq;
extern ia64_mv_pci_get_legacy_mem_t sn_pci_get_legacy_mem;
@@ -77,7 +76,6 @@ extern ia64_mv_pci_fixup_bus_t sn_pci_fixup_bus;
#define platform_send_ipi sn2_send_IPI
#define platform_timer_interrupt sn_timer_interrupt
#define platform_global_tlb_purge sn2_global_tlb_purge
-#define platform_tlb_migrate_finish sn_tlb_migrate_finish
#define platform_pci_fixup sn_pci_fixup
#define platform_inb __sn_inb
#define platform_inw __sn_inw
diff --git a/arch/ia64/include/asm/mmiowb.h b/arch/ia64/include/asm/mmiowb.h
new file mode 100644
index 000000000000..297b85ac84a0
--- /dev/null
+++ b/arch/ia64/include/asm/mmiowb.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_IA64_MMIOWB_H
+#define _ASM_IA64_MMIOWB_H
+
+#include <asm/machvec.h>
+
+/**
+ * ___ia64_mmiowb - I/O write barrier
+ *
+ * Ensure ordering of I/O space writes. This will make sure that writes
+ * following the barrier will arrive after all previous writes. For most
+ * ia64 platforms, this is a simple 'mf.a' instruction.
+ */
+static inline void ___ia64_mmiowb(void)
+{
+ ia64_mfa();
+}
+
+#define __ia64_mmiowb ___ia64_mmiowb
+#define mmiowb() platform_mmiowb()
+
+#include <asm-generic/mmiowb.h>
+
+#endif /* _ASM_IA64_MMIOWB_H */
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
deleted file mode 100644
index 917910607e0e..000000000000
--- a/arch/ia64/include/asm/rwsem.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * R/W semaphores for ia64
- *
- * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
- * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
- * Copyright (C) 2005 Christoph Lameter <cl@linux.com>
- *
- * Based on asm-i386/rwsem.h and other architecture implementation.
- *
- * The MSW of the count is the negated number of active writers and
- * waiting lockers, and the LSW is the total number of active locks.
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
- * the case of an uncontended lock. Readers increment by 1 and see a positive
- * value when uncontended, negative if there are writers (and maybe) readers
- * waiting (in which case it goes to sleep).
- */
-
-#ifndef _ASM_IA64_RWSEM_H
-#define _ASM_IA64_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
-#endif
-
-#include <asm/intrinsics.h>
-
-#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
-#define RWSEM_ACTIVE_BIAS (1L)
-#define RWSEM_ACTIVE_MASK (0xffffffffL)
-#define RWSEM_WAITING_BIAS (-0x100000000L)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-static inline int
-___down_read (struct rw_semaphore *sem)
-{
- long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
-
- return (result < 0);
-}
-
-static inline void
-__down_read (struct rw_semaphore *sem)
-{
- if (___down_read(sem))
- rwsem_down_read_failed(sem);
-}
-
-static inline int
-__down_read_killable (struct rw_semaphore *sem)
-{
- if (___down_read(sem))
- if (IS_ERR(rwsem_down_read_failed_killable(sem)))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * lock for writing
- */
-static inline long
-___down_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old + RWSEM_ACTIVE_WRITE_BIAS;
- } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
-
- return old;
-}
-
-static inline void
-__down_write (struct rw_semaphore *sem)
-{
- if (___down_write(sem))
- rwsem_down_write_failed(sem);
-}
-
-static inline int
-__down_write_killable (struct rw_semaphore *sem)
-{
- if (___down_write(sem)) {
- if (IS_ERR(rwsem_down_write_failed_killable(sem)))
- return -EINTR;
- }
-
- return 0;
-}
-
-/*
- * unlock after reading
- */
-static inline void
-__up_read (struct rw_semaphore *sem)
-{
- long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
-
- if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * unlock after writing
- */
-static inline void
-__up_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old - RWSEM_ACTIVE_WRITE_BIAS;
- } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
- if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
- rwsem_wake(sem);
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_read_trylock (struct rw_semaphore *sem)
-{
- long tmp;
- while ((tmp = atomic_long_read(&sem->count)) >= 0) {
- if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
- return 1;
- }
- }
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline int
-__down_write_trylock (struct rw_semaphore *sem)
-{
- long tmp = atomic_long_cmpxchg_acquire(&sem->count,
- RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
- return tmp == RWSEM_UNLOCKED_VALUE;
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void
-__downgrade_write (struct rw_semaphore *sem)
-{
- long old, new;
-
- do {
- old = atomic_long_read(&sem->count);
- new = old - RWSEM_WAITING_BIAS;
- } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
-
- if (old < 0)
- rwsem_downgrade_wake(sem);
-}
-
-#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h
index afd0b3121b4c..5f620e66384e 100644
--- a/arch/ia64/include/asm/spinlock.h
+++ b/arch/ia64/include/asm/spinlock.h
@@ -73,6 +73,8 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
unsigned short *p = (unsigned short *)&lock->lock + 1, tmp;
+ /* This could be optimised with ARCH_HAS_MMIOWB */
+ mmiowb();
asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p));
WRITE_ONCE(*p, (tmp + 2) & ~1);
}
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 0d9e7fab4a79..da108cd45174 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -74,7 +74,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
ia64_syscall_get_set_arguments(task, regs, args, 1);
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_IA64;
}
diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h
index 516355a774bf..86ec034ba499 100644
--- a/arch/ia64/include/asm/tlb.h
+++ b/arch/ia64/include/asm/tlb.h
@@ -47,263 +47,6 @@
#include <asm/tlbflush.h>
#include <asm/machvec.h>
-/*
- * If we can't allocate a page to make a big batch of page pointers
- * to work on, then just handle a few from the on-stack structure.
- */
-#define IA64_GATHER_BUNDLE 8
-
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int nr;
- unsigned int max;
- unsigned char fullmm; /* non-zero means full mm flush */
- unsigned char need_flush; /* really unmapped some PTEs? */
- unsigned long start, end;
- unsigned long start_addr;
- unsigned long end_addr;
- struct page **pages;
- struct page *local[IA64_GATHER_BUNDLE];
-};
-
-struct ia64_tr_entry {
- u64 ifa;
- u64 itir;
- u64 pte;
- u64 rr;
-}; /*Record for tr entry!*/
-
-extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
-extern void ia64_ptr_entry(u64 target_mask, int slot);
-
-extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
-
-/*
- region register macros
-*/
-#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
-#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
-#define RR_VE_MASK 0x0000000000000001L
-#define RR_VE_SHIFT 0
-#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
-#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
-#define RR_PS_MASK 0x00000000000000fcL
-#define RR_PS_SHIFT 2
-#define RR_RID_MASK 0x00000000ffffff00L
-#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
-
-static inline void
-ia64_tlb_flush_mmu_tlbonly(struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
- tlb->need_flush = 0;
-
- if (tlb->fullmm) {
- /*
- * Tearing down the entire address space. This happens both as a result
- * of exit() and execve(). The latter case necessitates the call to
- * flush_tlb_mm() here.
- */
- flush_tlb_mm(tlb->mm);
- } else if (unlikely (end - start >= 1024*1024*1024*1024UL
- || REGION_NUMBER(start) != REGION_NUMBER(end - 1)))
- {
- /*
- * If we flush more than a tera-byte or across regions, we're probably
- * better off just flushing the entire TLB(s). This should be very rare
- * and is not worth optimizing for.
- */
- flush_tlb_all();
- } else {
- /*
- * flush_tlb_range() takes a vma instead of a mm pointer because
- * some architectures want the vm_flags for ITLB/DTLB flush.
- */
- struct vm_area_struct vma = TLB_FLUSH_VMA(tlb->mm, 0);
-
- /* flush the address range from the tlb: */
- flush_tlb_range(&vma, start, end);
- /* now flush the virt. page-table area mapping the address range: */
- flush_tlb_range(&vma, ia64_thash(start), ia64_thash(end));
- }
-
-}
-
-static inline void
-ia64_tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- unsigned long i;
- unsigned int nr;
-
- /* lastly, release the freed pages */
- nr = tlb->nr;
-
- tlb->nr = 0;
- tlb->start_addr = ~0UL;
- for (i = 0; i < nr; ++i)
- free_page_and_swap_cache(tlb->pages[i]);
-}
-
-/*
- * Flush the TLB for address range START to END and, if not in fast mode, release the
- * freed pages that where gathered up to this point.
- */
-static inline void
-ia64_tlb_flush_mmu (struct mmu_gather *tlb, unsigned long start, unsigned long end)
-{
- if (!tlb->need_flush)
- return;
- ia64_tlb_flush_mmu_tlbonly(tlb, start, end);
- ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void __tlb_alloc_page(struct mmu_gather *tlb)
-{
- unsigned long addr = __get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
-
- if (addr) {
- tlb->pages = (void *)addr;
- tlb->max = PAGE_SIZE / sizeof(void *);
- }
-}
-
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->max = ARRAY_SIZE(tlb->local);
- tlb->pages = tlb->local;
- tlb->nr = 0;
- tlb->fullmm = !(start | (end+1));
- tlb->start = start;
- tlb->end = end;
- tlb->start_addr = ~0UL;
-}
-
-/*
- * Called at the end of the shootdown operation to free up any resources that were
- * collected.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force)
- tlb->need_flush = 1;
- /*
- * Note: tlb->nr may be 0 at this point, so we can't rely on tlb->start_addr and
- * tlb->end_addr.
- */
- ia64_tlb_flush_mmu(tlb, start, end);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-
- if (tlb->pages != tlb->local)
- free_pages((unsigned long)tlb->pages, 0);
-}
-
-/*
- * Logically, this routine frees PAGE. On MP machines, the actual freeing of the page
- * must be delayed until after the TLB has been flushed (see comments at the beginning of
- * this file).
- */
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->need_flush = 1;
-
- if (!tlb->nr && tlb->pages == tlb->local)
- __tlb_alloc_page(tlb);
-
- tlb->pages[tlb->nr++] = page;
- VM_WARN_ON(tlb->nr > tlb->max);
- if (tlb->nr == tlb->max)
- return true;
- return false;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu_tlbonly(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu_free(tlb);
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- ia64_tlb_flush_mmu(tlb, tlb->start_addr, tlb->end_addr);
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- if (__tlb_remove_page(tlb, page))
- tlb_flush_mmu(tlb);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-/*
- * Remove TLB entry for PTE mapped at virtual address ADDRESS. This is called for any
- * PTE, not just those pointing to (normal) physical memory.
- */
-static inline void
-__tlb_remove_tlb_entry (struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
- if (tlb->start_addr == ~0UL)
- tlb->start_addr = address;
- tlb->end_addr = address + PAGE_SIZE;
-}
-
-#define tlb_migrate_finish(mm) platform_tlb_migrate_finish(mm)
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
-#define tlb_remove_tlb_entry(tlb, ptep, addr) \
-do { \
- tlb->need_flush = 1; \
- __tlb_remove_tlb_entry(tlb, ptep, addr); \
-} while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, address) \
-do { \
- tlb->need_flush = 1; \
- __pte_free_tlb(tlb, ptep, address); \
-} while (0)
-
-#define pmd_free_tlb(tlb, ptep, address) \
-do { \
- tlb->need_flush = 1; \
- __pmd_free_tlb(tlb, ptep, address); \
-} while (0)
-
-#define pud_free_tlb(tlb, pudp, address) \
-do { \
- tlb->need_flush = 1; \
- __pud_free_tlb(tlb, pudp, address); \
-} while (0)
+#include <asm-generic/tlb.h>
#endif /* _ASM_IA64_TLB_H */
diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h
index 25e280810f6c..ceac10c4d6e2 100644
--- a/arch/ia64/include/asm/tlbflush.h
+++ b/arch/ia64/include/asm/tlbflush.h
@@ -14,6 +14,31 @@
#include <asm/mmu_context.h>
#include <asm/page.h>
+struct ia64_tr_entry {
+ u64 ifa;
+ u64 itir;
+ u64 pte;
+ u64 rr;
+}; /*Record for tr entry!*/
+
+extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size);
+extern void ia64_ptr_entry(u64 target_mask, int slot);
+extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
+
+/*
+ region register macros
+*/
+#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001)
+#define RR_VE(val) (((val) & 0x0000000000000001) << 0)
+#define RR_VE_MASK 0x0000000000000001L
+#define RR_VE_SHIFT 0
+#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f)
+#define RR_PS(val) (((val) & 0x000000000000003f) << 2)
+#define RR_PS_MASK 0x00000000000000fcL
+#define RR_PS_SHIFT 2
+#define RR_RID_MASK 0x00000000ffffff00L
+#define RR_TO_RID(val) ((val >> 8) & 0xffffff)
+
/*
* Now for some TLB flushing routines. This is the kind of stuff that
* can be very expensive, so try to avoid them whenever possible.
diff --git a/arch/ia64/include/uapi/asm/sockios.h b/arch/ia64/include/uapi/asm/sockios.h
deleted file mode 100644
index f27a12f95d20..000000000000
--- a/arch/ia64/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_IA64_SOCKIOS_H
-#define _ASM_IA64_SOCKIOS_H
-
-/*
- * Socket-level I/O control calls.
- *
- * Based on <asm-i386/sockios.h>.
- *
- * Modified 1998, 1999
- * David Mosberger-Tang <davidm@hpl.hp.com>, Hewlett-Packard Co
- */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* _ASM_IA64_SOCKIOS_H */
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 41eb281709da..1435e7a1a8cd 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -177,7 +177,7 @@ struct acpi_table_madt *acpi_madt __initdata;
static u8 has_8259;
static int __init
-acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
+acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_local_apic_override *lapic;
@@ -195,7 +195,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lsapic(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_local_sapic *lsapic;
@@ -216,7 +216,7 @@ acpi_parse_lsapic(struct acpi_subtable_header * header, const unsigned long end)
}
static int __init
-acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_local_apic_nmi *lacpi_nmi;
@@ -230,7 +230,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
}
static int __init
-acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_iosapic(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_io_sapic *iosapic;
@@ -245,7 +245,7 @@ acpi_parse_iosapic(struct acpi_subtable_header * header, const unsigned long end
static unsigned int __initdata acpi_madt_rev;
static int __init
-acpi_parse_plat_int_src(struct acpi_subtable_header * header,
+acpi_parse_plat_int_src(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_interrupt_source *plintsrc;
@@ -329,7 +329,7 @@ unsigned int get_cpei_target_cpu(void)
}
static int __init
-acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
+acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_interrupt_override *p;
@@ -350,7 +350,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_nmi_source *nmi_src;
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 583a3746d70b..c9cfa760cd57 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -1058,9 +1058,7 @@ check_bugs (void)
static int __init run_dmi_scan(void)
{
- dmi_scan_machine();
- dmi_memdev_walk();
- dmi_set_dump_stack_arch_desc();
+ dmi_setup();
return 0;
}
core_initcall(run_dmi_scan);
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 5fc89aabdce1..5158bd28de05 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -305,8 +305,8 @@ local_flush_tlb_all (void)
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
-void
-flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
+static void
+__flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
unsigned long end)
{
struct mm_struct *mm = vma->vm_mm;
@@ -343,6 +343,25 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
preempt_enable();
ia64_srlz_i(); /* srlz.i implies srlz.d */
}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+ unsigned long start, unsigned long end)
+{
+ if (unlikely(end - start >= 1024*1024*1024*1024UL
+ || REGION_NUMBER(start) != REGION_NUMBER(end - 1))) {
+ /*
+ * If we flush more than a tera-byte or across regions, we're
+ * probably better off just flushing the entire TLB(s). This
+ * should be very rare and is not worth optimizing for.
+ */
+ flush_tlb_all();
+ } else {
+ /* flush the address range from the tlb */
+ __flush_tlb_range(vma, start, end);
+ /* flush the virt. page-table area mapping the addr range */
+ __flush_tlb_range(vma, ia64_thash(start), ia64_thash(end));
+ }
+}
EXPORT_SYMBOL(flush_tlb_range);
void ia64_tlb_init(void)
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
index b73b0ebf8214..b510f4f17fd4 100644
--- a/arch/ia64/sn/kernel/sn2/sn2_smp.c
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -120,13 +120,6 @@ void sn_migrate(struct task_struct *task)
cpu_relax();
}
-void sn_tlb_migrate_finish(struct mm_struct *mm)
-{
- /* flush_tlb_mm is inefficient if more than 1 users of mm */
- if (mm == current->mm && mm && atomic_read(&mm->mm_users) == 1)
- flush_tlb_mm(mm);
-}
-
static void
sn2_ipi_flush_all_tlb(struct mm_struct *mm)
{
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b54206408f91..fe5cc2da6d10 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -20,7 +20,6 @@ config M68K
select GENERIC_STRNCPY_FROM_USER if MMU
select GENERIC_STRNLEN_USER if MMU
select ARCH_WANT_IPC_PARSE_VERSION
- select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
select HAVE_FUTEX_CMPXCHG if MMU && FUTEX
select HAVE_MOD_ARCH_SPECIFIC
select MODULES_USE_ELF_REL
@@ -28,17 +27,11 @@ config M68K
select OLD_SIGSUSPEND3
select OLD_SIGACTION
select ARCH_DISCARD_MEMBLOCK
+ select MMU_GATHER_NO_RANGE if MMU
config CPU_BIG_ENDIAN
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/m68k/amiga/cia.c b/arch/m68k/amiga/cia.c
index 2081b8cd5591..b9aee983e6f4 100644
--- a/arch/m68k/amiga/cia.c
+++ b/arch/m68k/amiga/cia.c
@@ -88,10 +88,19 @@ static irqreturn_t cia_handler(int irq, void *dev_id)
struct ciabase *base = dev_id;
int mach_irq;
unsigned char ints;
+ unsigned long flags;
+ /* Interrupts get disabled while the timer irq flag is cleared and
+ * the timer interrupt serviced.
+ */
mach_irq = base->cia_irq;
+ local_irq_save(flags);
ints = cia_set_irq(base, CIA_ICR_ALL);
amiga_custom.intreq = base->int_mask;
+ if (ints & 1)
+ generic_handle_irq(mach_irq);
+ local_irq_restore(flags);
+ mach_irq++, ints >>= 1;
for (; ints; mach_irq++, ints >>= 1) {
if (ints & 1)
generic_handle_irq(mach_irq);
diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c
index 65f63a457130..c32ab8041cf6 100644
--- a/arch/m68k/amiga/config.c
+++ b/arch/m68k/amiga/config.c
@@ -17,6 +17,7 @@
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/rtc.h>
#include <linux/init.h>
@@ -95,8 +96,6 @@ static char amiga_model_name[13] = "Amiga ";
static void amiga_sched_init(irq_handler_t handler);
static void amiga_get_model(char *model);
static void amiga_get_hardware_list(struct seq_file *m);
-/* amiga specific timer functions */
-static u32 amiga_gettimeoffset(void);
extern void amiga_mksound(unsigned int count, unsigned int ticks);
static void amiga_reset(void);
extern void amiga_init_sound(void);
@@ -386,7 +385,6 @@ void __init config_amiga(void)
mach_init_IRQ = amiga_init_IRQ;
mach_get_model = amiga_get_model;
mach_get_hardware_list = amiga_get_hardware_list;
- arch_gettimeoffset = amiga_gettimeoffset;
/*
* default MAX_DMA=0xffffffff on all machines. If we don't do so, the SCSI
@@ -464,7 +462,29 @@ void __init config_amiga(void)
*(unsigned char *)ZTWO_VADDR(0xde0002) |= 0x80;
}
+static u64 amiga_read_clk(struct clocksource *cs);
+
+static struct clocksource amiga_clk = {
+ .name = "ciab",
+ .rating = 250,
+ .read = amiga_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
static unsigned short jiffy_ticks;
+static u32 clk_total, clk_offset;
+
+static irqreturn_t ciab_timer_handler(int irq, void *dev_id)
+{
+ irq_handler_t timer_routine = dev_id;
+
+ clk_total += jiffy_ticks;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+
+ return IRQ_HANDLED;
+}
static void __init amiga_sched_init(irq_handler_t timer_routine)
{
@@ -484,19 +504,22 @@ static void __init amiga_sched_init(irq_handler_t timer_routine)
* Please don't change this to use ciaa, as it interferes with the
* SCSI code. We'll have to take a look at this later
*/
- if (request_irq(IRQ_AMIGA_CIAB_TA, timer_routine, 0, "timer", NULL))
+ if (request_irq(IRQ_AMIGA_CIAB_TA, ciab_timer_handler, IRQF_TIMER,
+ "timer", timer_routine))
pr_err("Couldn't register timer interrupt\n");
/* start timer */
ciab.cra |= 0x11;
-}
-#define TICK_SIZE 10000
+ clocksource_register_hz(&amiga_clk, amiga_eclock);
+}
-/* This is always executed with interrupts disabled. */
-static u32 amiga_gettimeoffset(void)
+static u64 amiga_read_clk(struct clocksource *cs)
{
unsigned short hi, lo, hi2;
- u32 ticks, offset = 0;
+ unsigned long flags;
+ u32 ticks;
+
+ local_irq_save(flags);
/* read CIA B timer A current value */
hi = ciab.tahi;
@@ -513,12 +536,14 @@ static u32 amiga_gettimeoffset(void)
if (ticks > jiffy_ticks / 2)
/* check for pending interrupt */
if (cia_set_irq(&ciab_base, 0) & CIA_ICR_TA)
- offset = 10000;
+ clk_offset = jiffy_ticks;
ticks = jiffy_ticks - ticks;
- ticks = (10000 * ticks) / jiffy_ticks;
+ ticks += clk_offset + clk_total;
+
+ local_irq_restore(flags);
- return (ticks + offset) * 1000;
+ return ticks;
}
static void amiga_reset(void) __noreturn;
diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c
index aef8d42e078d..7d168e6dfb01 100644
--- a/arch/m68k/apollo/config.c
+++ b/arch/m68k/apollo/config.c
@@ -29,7 +29,6 @@ u_long apollo_model;
extern void dn_sched_init(irq_handler_t handler);
extern void dn_init_IRQ(void);
-extern u32 dn_gettimeoffset(void);
extern int dn_dummy_hwclk(int, struct rtc_time *);
extern void dn_dummy_reset(void);
#ifdef CONFIG_HEARTBEAT
@@ -152,7 +151,6 @@ void __init config_apollo(void)
mach_sched_init=dn_sched_init; /* */
mach_init_IRQ=dn_init_IRQ;
- arch_gettimeoffset = dn_gettimeoffset;
mach_max_dma_address = 0xffffffff;
mach_hwclk = dn_dummy_hwclk; /* */
mach_reset = dn_dummy_reset; /* */
@@ -205,11 +203,6 @@ void dn_sched_init(irq_handler_t timer_routine)
pr_err("Couldn't register timer interrupt\n");
}
-u32 dn_gettimeoffset(void)
-{
- return 0xdeadbeef;
-}
-
int dn_dummy_hwclk(int op, struct rtc_time *t) {
diff --git a/arch/m68k/atari/ataints.c b/arch/m68k/atari/ataints.c
index 3d2b63bedf05..56f02ea2c248 100644
--- a/arch/m68k/atari/ataints.c
+++ b/arch/m68k/atari/ataints.c
@@ -142,7 +142,7 @@ struct mfptimerbase {
.name = "MFP Timer D"
};
-static irqreturn_t mfptimer_handler(int irq, void *dev_id)
+static irqreturn_t mfp_timer_d_handler(int irq, void *dev_id)
{
struct mfptimerbase *base = dev_id;
int mach_irq;
@@ -344,7 +344,7 @@ void __init atari_init_IRQ(void)
st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 0xf0) | 0x6;
/* request timer D dispatch handler */
- if (request_irq(IRQ_MFP_TIMD, mfptimer_handler, IRQF_SHARED,
+ if (request_irq(IRQ_MFP_TIMD, mfp_timer_d_handler, IRQF_SHARED,
stmfp_base.name, &stmfp_base))
pr_err("Couldn't register %s interrupt\n", stmfp_base.name);
diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c
index 4fcc4b1df1c0..902255e7b5b2 100644
--- a/arch/m68k/atari/config.c
+++ b/arch/m68k/atari/config.c
@@ -78,7 +78,6 @@ static void atari_heartbeat(int on);
/* atari specific timer functions (in time.c) */
extern void atari_sched_init(irq_handler_t);
-extern u32 atari_gettimeoffset(void);
extern int atari_mste_hwclk (int, struct rtc_time *);
extern int atari_tt_hwclk (int, struct rtc_time *);
@@ -205,7 +204,6 @@ void __init config_atari(void)
mach_init_IRQ = atari_init_IRQ;
mach_get_model = atari_get_model;
mach_get_hardware_list = atari_get_hardware_list;
- arch_gettimeoffset = atari_gettimeoffset;
mach_reset = atari_reset;
mach_max_dma_address = 0xffffff;
#if IS_ENABLED(CONFIG_INPUT_M68K_BEEP)
diff --git a/arch/m68k/atari/time.c b/arch/m68k/atari/time.c
index 9cca64286464..ce923a523695 100644
--- a/arch/m68k/atari/time.c
+++ b/arch/m68k/atari/time.c
@@ -16,6 +16,7 @@
#include <linux/init.h>
#include <linux/rtc.h>
#include <linux/bcd.h>
+#include <linux/clocksource.h>
#include <linux/delay.h>
#include <linux/export.h>
@@ -24,6 +25,35 @@
DEFINE_SPINLOCK(rtc_lock);
EXPORT_SYMBOL_GPL(rtc_lock);
+static u64 atari_read_clk(struct clocksource *cs);
+
+static struct clocksource atari_clk = {
+ .name = "mfp",
+ .rating = 100,
+ .read = atari_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+static u8 last_timer_count;
+
+static irqreturn_t mfp_timer_c_handler(int irq, void *dev_id)
+{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ do {
+ last_timer_count = st_mfp.tim_dt_c;
+ } while (last_timer_count == 1);
+ clk_total += INT_TICKS;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
+ return IRQ_HANDLED;
+}
+
void __init
atari_sched_init(irq_handler_t timer_routine)
{
@@ -32,31 +62,33 @@ atari_sched_init(irq_handler_t timer_routine)
/* start timer C, div = 1:100 */
st_mfp.tim_ct_cd = (st_mfp.tim_ct_cd & 15) | 0x60;
/* install interrupt service routine for MFP Timer C */
- if (request_irq(IRQ_MFP_TIMC, timer_routine, 0, "timer", timer_routine))
+ if (request_irq(IRQ_MFP_TIMC, mfp_timer_c_handler, IRQF_TIMER, "timer",
+ timer_routine))
pr_err("Couldn't register timer interrupt\n");
+
+ clocksource_register_hz(&atari_clk, INT_CLK);
}
/* ++andreas: gettimeoffset fixed to check for pending interrupt */
-#define TICK_SIZE 10000
-
-/* This is always executed with interrupts disabled. */
-u32 atari_gettimeoffset(void)
+static u64 atari_read_clk(struct clocksource *cs)
{
- u32 ticks, offset = 0;
-
- /* read MFP timer C current value */
- ticks = st_mfp.tim_dt_c;
- /* The probability of underflow is less than 2% */
- if (ticks > INT_TICKS - INT_TICKS / 50)
- /* Check for pending timer interrupt */
- if (st_mfp.int_pn_b & (1 << 5))
- offset = TICK_SIZE;
-
- ticks = INT_TICKS - ticks;
- ticks = ticks * 10000L / INT_TICKS;
-
- return (ticks + offset) * 1000;
+ unsigned long flags;
+ u8 count;
+ u32 ticks;
+
+ local_irq_save(flags);
+ /* Ensure that the count is monotonically decreasing, even though
+ * the result may briefly stop changing after counter wrap-around.
+ */
+ count = min(st_mfp.tim_dt_c, last_timer_count);
+ last_timer_count = count;
+
+ ticks = INT_TICKS - count;
+ ticks += clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c
index 143ee9fa3893..8ebaabc931cd 100644
--- a/arch/m68k/bvme6000/config.c
+++ b/arch/m68k/bvme6000/config.c
@@ -18,6 +18,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -39,16 +40,10 @@
static void bvme6000_get_model(char *model);
extern void bvme6000_sched_init(irq_handler_t handler);
-extern u32 bvme6000_gettimeoffset(void);
extern int bvme6000_hwclk (int, struct rtc_time *);
extern void bvme6000_reset (void);
void bvme6000_set_vectors (void);
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/timer/timekeeping.c, called via bvme6000_process_int() */
-
-static irq_handler_t tick_handler;
-
int __init bvme6000_parse_bootinfo(const struct bi_record *bi)
{
@@ -110,7 +105,6 @@ void __init config_bvme6000(void)
mach_max_dma_address = 0xffffffff;
mach_sched_init = bvme6000_sched_init;
mach_init_IRQ = bvme6000_init_IRQ;
- arch_gettimeoffset = bvme6000_gettimeoffset;
mach_hwclk = bvme6000_hwclk;
mach_reset = bvme6000_reset;
mach_get_model = bvme6000_get_model;
@@ -154,15 +148,38 @@ irqreturn_t bvme6000_abort_int (int irq, void *dev_id)
return IRQ_HANDLED;
}
+static u64 bvme6000_read_clk(struct clocksource *cs);
+
+static struct clocksource bvme6000_clk = {
+ .name = "rtc",
+ .rating = 250,
+ .read = bvme6000_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
+#define RTC_TIMER_CLOCK_FREQ 8000000
+#define RTC_TIMER_CYCLES (RTC_TIMER_CLOCK_FREQ / HZ)
+#define RTC_TIMER_COUNT ((RTC_TIMER_CYCLES / 2) - 1)
static irqreturn_t bvme6000_timer_int (int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE;
- unsigned char msr = rtc->msr & 0xc0;
+ unsigned char msr;
+ local_irq_save(flags);
+ msr = rtc->msr & 0xc0;
rtc->msr = msr | 0x20; /* Ack the interrupt */
+ clk_total += RTC_TIMER_CYCLES;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
- return tick_handler(irq, dev_id);
+ return IRQ_HANDLED;
}
/*
@@ -181,14 +198,13 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
rtc->msr = 0; /* Ensure timer registers accessible */
- tick_handler = timer_routine;
- if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, 0,
- "timer", bvme6000_timer_int))
+ if (request_irq(BVME_IRQ_RTC, bvme6000_timer_int, IRQF_TIMER, "timer",
+ timer_routine))
panic ("Couldn't register timer int");
rtc->t1cr_omr = 0x04; /* Mode 2, ext clk */
- rtc->t1msb = 39999 >> 8;
- rtc->t1lsb = 39999 & 0xff;
+ rtc->t1msb = RTC_TIMER_COUNT >> 8;
+ rtc->t1lsb = RTC_TIMER_COUNT & 0xff;
rtc->irr_icr1 &= 0xef; /* Route timer 1 to INTR pin */
rtc->msr = 0x40; /* Access int.cntrl, etc */
rtc->pfr_icr0 = 0x80; /* Just timer 1 ints enabled */
@@ -200,14 +216,14 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
rtc->msr = msr;
+ clocksource_register_hz(&bvme6000_clk, RTC_TIMER_CLOCK_FREQ);
+
if (request_irq(BVME_IRQ_ABORT, bvme6000_abort_int, 0,
"abort", bvme6000_abort_int))
panic ("Couldn't register abort int");
}
-/* This is always executed with interrupts disabled. */
-
/*
* NOTE: Don't accept any readings within 5us of rollover, as
* the T1INT bit may be a little slow getting set. There is also
@@ -215,14 +231,18 @@ void bvme6000_sched_init (irq_handler_t timer_routine)
* results...
*/
-u32 bvme6000_gettimeoffset(void)
+static u64 bvme6000_read_clk(struct clocksource *cs)
{
+ unsigned long flags;
volatile RtcPtr_t rtc = (RtcPtr_t)BVME_RTC_BASE;
volatile PitRegsPtr pit = (PitRegsPtr)BVME_PIT_BASE;
- unsigned char msr = rtc->msr & 0xc0;
+ unsigned char msr, msb;
unsigned char t1int, t1op;
u32 v = 800000, ov;
+ local_irq_save(flags);
+
+ msr = rtc->msr & 0xc0;
rtc->msr = 0; /* Ensure timer registers accessible */
do {
@@ -230,22 +250,25 @@ u32 bvme6000_gettimeoffset(void)
t1int = rtc->msr & 0x20;
t1op = pit->pcdr & 0x04;
rtc->t1cr_omr |= 0x40; /* Latch timer1 */
- v = rtc->t1msb << 8; /* Read timer1 */
- v |= rtc->t1lsb; /* Read timer1 */
+ msb = rtc->t1msb; /* Read timer1 */
+ v = (msb << 8) | rtc->t1lsb; /* Read timer1 */
} while (t1int != (rtc->msr & 0x20) ||
t1op != (pit->pcdr & 0x04) ||
abs(ov-v) > 80 ||
- v > 39960);
+ v > RTC_TIMER_COUNT - (RTC_TIMER_COUNT / 100));
- v = 39999 - v;
+ v = RTC_TIMER_COUNT - v;
if (!t1op) /* If in second half cycle.. */
- v += 40000;
- v /= 8; /* Convert ticks to microseconds */
- if (t1int)
- v += 10000; /* Int pending, + 10ms */
+ v += RTC_TIMER_CYCLES / 2;
+ if (msb > 0 && t1int)
+ clk_offset = RTC_TIMER_CYCLES;
rtc->msr = msr;
- return v * 1000;
+ v += clk_offset + clk_total;
+
+ local_irq_restore(flags);
+
+ return v;
}
/*
diff --git a/arch/m68k/configs/amcore_defconfig b/arch/m68k/configs/amcore_defconfig
index 0857cdbfde0c..d5e683dd885d 100644
--- a/arch/m68k/configs/amcore_defconfig
+++ b/arch/m68k/configs/amcore_defconfig
@@ -12,7 +12,6 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_SLUB_DEBUG is not set
# CONFIG_COMPAT_BRK is not set
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_CFQ is not set
# CONFIG_MMU is not set
diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig
index 525421ae277d..fea392cfcf1b 100644
--- a/arch/m68k/configs/amiga_defconfig
+++ b/arch/m68k/configs/amiga_defconfig
@@ -56,6 +56,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -210,9 +211,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -234,9 +232,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -313,7 +308,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -460,12 +454,12 @@ CONFIG_RTC_DRV_RP5C01=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -573,9 +567,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -640,6 +636,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -649,4 +646,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig
index db0e654a88d5..2474d267460e 100644
--- a/arch/m68k/configs/apollo_defconfig
+++ b/arch/m68k/configs/apollo_defconfig
@@ -52,6 +52,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -206,9 +207,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -230,9 +228,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -309,7 +304,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -420,12 +414,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -533,9 +527,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -600,6 +596,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -609,4 +606,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig
index 1451168eb789..0fc7d2992fe0 100644
--- a/arch/m68k/configs/atari_defconfig
+++ b/arch/m68k/configs/atari_defconfig
@@ -59,6 +59,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -213,9 +214,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -237,9 +235,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -316,7 +311,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -442,12 +436,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -555,9 +549,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -622,6 +618,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -631,4 +628,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig
index b0d3609f5bb3..699df9fdf866 100644
--- a/arch/m68k/configs/bvme6000_defconfig
+++ b/arch/m68k/configs/bvme6000_defconfig
@@ -49,6 +49,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -203,9 +204,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -227,9 +225,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -306,7 +301,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -413,12 +407,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -526,9 +520,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -593,6 +589,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -602,4 +599,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig
index 4ed7c151347c..b50802255324 100644
--- a/arch/m68k/configs/hp300_defconfig
+++ b/arch/m68k/configs/hp300_defconfig
@@ -51,6 +51,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -205,9 +206,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -229,9 +227,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -308,7 +303,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -422,12 +416,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -535,9 +529,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -602,6 +598,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -611,4 +608,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/m5475evb_defconfig b/arch/m68k/configs/m5475evb_defconfig
index 4f4ccd13c11b..434bd3750966 100644
--- a/arch/m68k/configs/m5475evb_defconfig
+++ b/arch/m68k/configs/m5475evb_defconfig
@@ -11,7 +11,6 @@ CONFIG_SYSCTL_SYSCALL=y
# CONFIG_AIO is not set
CONFIG_EMBEDDED=y
CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig
index 0dc544e1ce1f..04e7d70f6030 100644
--- a/arch/m68k/configs/mac_defconfig
+++ b/arch/m68k/configs/mac_defconfig
@@ -50,6 +50,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -204,9 +205,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -228,9 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -310,7 +305,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -444,12 +438,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -557,9 +551,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -624,6 +620,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -633,4 +630,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig
index 5a7b7b0d6e72..5e1cc4c17852 100644
--- a/arch/m68k/configs/multi_defconfig
+++ b/arch/m68k/configs/multi_defconfig
@@ -70,6 +70,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -224,9 +225,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -248,9 +246,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -330,7 +325,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -526,12 +520,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -639,9 +633,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -706,6 +702,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -715,4 +712,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig
index 71eb9be1803b..170ac8792c2d 100644
--- a/arch/m68k/configs/mvme147_defconfig
+++ b/arch/m68k/configs/mvme147_defconfig
@@ -48,6 +48,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -202,9 +203,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -226,9 +224,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -305,7 +300,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -412,12 +406,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -525,9 +519,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -592,6 +588,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -601,4 +598,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig
index ea2ebd4241c0..d865592a423e 100644
--- a/arch/m68k/configs/mvme16x_defconfig
+++ b/arch/m68k/configs/mvme16x_defconfig
@@ -49,6 +49,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -203,9 +204,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -227,9 +225,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -306,7 +301,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -413,12 +407,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -526,9 +520,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -593,6 +589,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -602,4 +599,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig
index cef6dc47c725..034a9de90484 100644
--- a/arch/m68k/configs/q40_defconfig
+++ b/arch/m68k/configs/q40_defconfig
@@ -50,6 +50,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -204,9 +205,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -228,9 +226,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -307,7 +302,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -431,12 +425,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -544,9 +538,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -611,6 +607,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -620,4 +617,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/configs/stmark2_defconfig b/arch/m68k/configs/stmark2_defconfig
index 69f23c7b0497..27fa9465d19d 100644
--- a/arch/m68k/configs/stmark2_defconfig
+++ b/arch/m68k/configs/stmark2_defconfig
@@ -17,7 +17,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_BLK_CMDLINE_PARSER=y
# CONFIG_MMU is not set
diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig
index 69f2282dc4e9..49be0f9fcd8d 100644
--- a/arch/m68k/configs/sun3_defconfig
+++ b/arch/m68k/configs/sun3_defconfig
@@ -46,6 +46,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -200,9 +201,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -224,9 +222,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -415,12 +409,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -528,9 +522,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -595,6 +591,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -604,3 +601,4 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig
index e91267e868b2..a71acf4a6004 100644
--- a/arch/m68k/configs/sun3x_defconfig
+++ b/arch/m68k/configs/sun3x_defconfig
@@ -46,6 +46,7 @@ CONFIG_TLS=m
CONFIG_XFRM_MIGRATE=y
CONFIG_NET_KEY=y
CONFIG_XDP_SOCKETS=y
+CONFIG_XDP_SOCKETS_DIAG=m
CONFIG_INET=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
@@ -200,9 +201,6 @@ CONFIG_NFT_FIB_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NF_FLOW_TABLE_IPV4=m
CONFIG_NF_LOG_ARP=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
-CONFIG_NFT_MASQ_IPV4=m
-CONFIG_NFT_REDIR_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
@@ -224,9 +222,6 @@ CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_MASQ_IPV6=m
-CONFIG_NFT_REDIR_IPV6=m
CONFIG_NFT_DUP_IPV6=m
CONFIG_NFT_FIB_IPV6=m
CONFIG_NF_FLOW_TABLE_IPV6=m
@@ -303,7 +298,6 @@ CONFIG_AF_KCM=m
# CONFIG_WIRELESS is not set
CONFIG_PSAMPLE=m
CONFIG_NET_IFE=m
-CONFIG_NET_DEVLINK=m
# CONFIG_UEVENT_HELPER is not set
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
@@ -414,12 +408,12 @@ CONFIG_RTC_DRV_GENERIC=m
# CONFIG_VIRTIO_MENU is not set
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_DAX=m
+# CONFIG_VALIDATE_FS_PARSER is not set
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=m
CONFIG_JFS_FS=m
CONFIG_OCFS2_FS=m
# CONFIG_OCFS2_DEBUG_MASKLOG is not set
-CONFIG_FS_ENCRYPTION=m
CONFIG_FANOTIFY=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
@@ -527,9 +521,11 @@ CONFIG_CRYPTO_AEGIS256=m
CONFIG_CRYPTO_MORUS640=m
CONFIG_CRYPTO_MORUS1280=m
CONFIG_CRYPTO_CFB=m
+CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
+CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_ADIANTUM=m
CONFIG_CRYPTO_XCBC=m
@@ -594,6 +590,7 @@ CONFIG_TEST_OVERFLOW=m
CONFIG_TEST_RHASHTABLE=m
CONFIG_TEST_HASH=m
CONFIG_TEST_IDA=m
+CONFIG_TEST_VMALLOC=m
CONFIG_TEST_USER_COPY=m
CONFIG_TEST_BPF=m
CONFIG_FIND_BIT_BENCHMARK=m
@@ -603,4 +600,5 @@ CONFIG_TEST_UDELAY=m
CONFIG_TEST_STATIC_KEYS=m
CONFIG_TEST_KMOD=m
CONFIG_TEST_MEMCAT_P=m
+CONFIG_TEST_STACKINIT=m
CONFIG_EARLY_PRINTK=y
diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c
index a19bcd23f80b..a161d44fd20b 100644
--- a/arch/m68k/hp300/config.c
+++ b/arch/m68k/hp300/config.c
@@ -254,7 +254,6 @@ void __init config_hp300(void)
mach_sched_init = hp300_sched_init;
mach_init_IRQ = hp300_init_IRQ;
mach_get_model = hp300_get_model;
- arch_gettimeoffset = hp300_gettimeoffset;
mach_hwclk = hp300_hwclk;
mach_get_ss = hp300_get_ss;
mach_reset = hp300_reset;
diff --git a/arch/m68k/hp300/time.c b/arch/m68k/hp300/time.c
index 289d928a46cb..bfee13e1d0fe 100644
--- a/arch/m68k/hp300/time.c
+++ b/arch/m68k/hp300/time.c
@@ -8,6 +8,7 @@
*/
#include <asm/ptrace.h>
+#include <linux/clocksource.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/sched.h>
@@ -19,6 +20,18 @@
#include <asm/traps.h>
#include <asm/blinken.h>
+static u64 hp300_read_clk(struct clocksource *cs);
+
+static struct clocksource hp300_clk = {
+ .name = "timer",
+ .rating = 250,
+ .read = hp300_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
/* Clock hardware definitions */
#define CLOCKBASE 0xf05f8000
@@ -28,39 +41,61 @@
#define CLKCR3 CLKCR1
#define CLKSR CLKCR2
#define CLKMSB1 0x5
+#define CLKLSB1 0x7
#define CLKMSB2 0x9
#define CLKMSB3 0xD
+#define CLKSR_INT1 BIT(0)
+
/* This is for machines which generate the exact clock. */
-#define USECS_PER_JIFFY (1000000/HZ)
-#define INTVAL ((10000 / 4) - 1)
+#define HP300_TIMER_CLOCK_FREQ 250000
+#define HP300_TIMER_CYCLES (HP300_TIMER_CLOCK_FREQ / HZ)
+#define INTVAL (HP300_TIMER_CYCLES - 1)
static irqreturn_t hp300_tick(int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
unsigned long tmp;
- irq_handler_t vector = dev_id;
+
+ local_irq_save(flags);
in_8(CLOCKBASE + CLKSR);
asm volatile ("movpw %1@(5),%0" : "=d" (tmp) : "a" (CLOCKBASE));
+ clk_total += INTVAL;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
/* Turn off the network and SCSI leds */
blinken_leds(0, 0xe0);
- return vector(irq, NULL);
+ return IRQ_HANDLED;
}
-u32 hp300_gettimeoffset(void)
+static u64 hp300_read_clk(struct clocksource *cs)
{
- /* Read current timer 1 value */
- unsigned char lsb, msb1, msb2;
- unsigned short ticks;
-
- msb1 = in_8(CLOCKBASE + 5);
- lsb = in_8(CLOCKBASE + 7);
- msb2 = in_8(CLOCKBASE + 5);
- if (msb1 != msb2)
- /* A carry happened while we were reading. Read it again */
- lsb = in_8(CLOCKBASE + 7);
- ticks = INTVAL - ((msb2 << 8) | lsb);
- return ((USECS_PER_JIFFY * ticks) / INTVAL) * 1000;
+ unsigned long flags;
+ unsigned char lsb, msb, msb_new;
+ u32 ticks;
+
+ local_irq_save(flags);
+ /* Read current timer 1 value */
+ msb = in_8(CLOCKBASE + CLKMSB1);
+again:
+ if ((in_8(CLOCKBASE + CLKSR) & CLKSR_INT1) && msb > 0)
+ clk_offset = INTVAL;
+ lsb = in_8(CLOCKBASE + CLKLSB1);
+ msb_new = in_8(CLOCKBASE + CLKMSB1);
+ if (msb_new != msb) {
+ msb = msb_new;
+ goto again;
+ }
+
+ ticks = INTVAL - ((msb << 8) | lsb);
+ ticks += clk_offset + clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
void __init hp300_sched_init(irq_handler_t vector)
@@ -70,9 +105,11 @@ void __init hp300_sched_init(irq_handler_t vector)
asm volatile(" movpw %0,%1@(5)" : : "d" (INTVAL), "a" (CLOCKBASE));
- if (request_irq(IRQ_AUTO_6, hp300_tick, 0, "timer tick", vector))
+ if (request_irq(IRQ_AUTO_6, hp300_tick, IRQF_TIMER, "timer tick", vector))
pr_err("Couldn't register timer interrupt\n");
out_8(CLOCKBASE + CLKCR2, 0x1); /* select CR1 */
out_8(CLOCKBASE + CLKCR1, 0x40); /* enable irq */
+
+ clocksource_register_hz(&hp300_clk, HP300_TIMER_CLOCK_FREQ);
}
diff --git a/arch/m68k/hp300/time.h b/arch/m68k/hp300/time.h
index f5583ec4033d..1d77b55cc72a 100644
--- a/arch/m68k/hp300/time.h
+++ b/arch/m68k/hp300/time.h
@@ -1,2 +1 @@
extern void hp300_sched_init(irq_handler_t vector);
-extern u32 hp300_gettimeoffset(void);
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 2c359d9e80f6..0ddae4a74adb 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h
index 782b78f8a048..6c03ca5bc436 100644
--- a/arch/m68k/include/asm/io_mm.h
+++ b/arch/m68k/include/asm/io_mm.h
@@ -377,8 +377,6 @@ static inline void isa_delay(void)
#define writesw(port, buf, nr) raw_outsw((port), (u16 *)(buf), (nr))
#define writesl(port, buf, nr) raw_outsl((port), (u32 *)(buf), (nr))
-#define mmiowb()
-
#ifndef CONFIG_SUN3
#define IO_SPACE_LIMIT 0xffff
#else
diff --git a/arch/m68k/include/asm/mvme147hw.h b/arch/m68k/include/asm/mvme147hw.h
index 9c7ff67c5ffd..257b29184af9 100644
--- a/arch/m68k/include/asm/mvme147hw.h
+++ b/arch/m68k/include/asm/mvme147hw.h
@@ -66,7 +66,7 @@ struct pcc_regs {
#define PCC_INT_ENAB 0x08
#define PCC_TIMER_INT_CLR 0x80
-#define PCC_TIMER_PRELOAD 63936l
+#define PCC_TIMER_CLR_OVF 0x04
#define PCC_LEVEL_ABORT 0x07
#define PCC_LEVEL_SERIAL 0x04
diff --git a/arch/m68k/include/asm/syscall.h b/arch/m68k/include/asm/syscall.h
new file mode 100644
index 000000000000..465ac039be09
--- /dev/null
+++ b/arch/m68k/include/asm/syscall.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_M68K_SYSCALL_H
+#define _ASM_M68K_SYSCALL_H
+
+#include <uapi/linux/audit.h>
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return AUDIT_ARCH_M68K;
+}
+
+#endif /* _ASM_M68K_SYSCALL_H */
diff --git a/arch/m68k/include/asm/tlb.h b/arch/m68k/include/asm/tlb.h
index b4b9efb6f963..3c81f6adfc8b 100644
--- a/arch/m68k/include/asm/tlb.h
+++ b/arch/m68k/include/asm/tlb.h
@@ -2,20 +2,6 @@
#ifndef _M68K_TLB_H
#define _M68K_TLB_H
-/*
- * m68k doesn't need any special per-pte or
- * per-vma handling..
- */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it
- * fills up.
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* _M68K_TLB_H */
diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
index cd9317d53276..11be08f4f750 100644
--- a/arch/m68k/mac/config.c
+++ b/arch/m68k/mac/config.c
@@ -54,8 +54,6 @@ struct mac_booter_data mac_bi_data;
/* The phys. video addr. - might be bogus on some machines */
static unsigned long mac_orig_videoaddr;
-/* Mac specific timer functions */
-extern u32 mac_gettimeoffset(void);
extern int mac_hwclk(int, struct rtc_time *);
extern void iop_preinit(void);
extern void iop_init(void);
@@ -155,7 +153,6 @@ void __init config_mac(void)
mach_sched_init = mac_sched_init;
mach_init_IRQ = mac_init_IRQ;
mach_get_model = mac_get_model;
- arch_gettimeoffset = mac_gettimeoffset;
mach_hwclk = mac_hwclk;
mach_reset = mac_reset;
mach_halt = mac_poweroff;
diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c
index 0b0289459173..3c2cfcb74982 100644
--- a/arch/m68k/mac/via.c
+++ b/arch/m68k/mac/via.c
@@ -23,6 +23,7 @@
*
*/
+#include <linux/clocksource.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/mm.h>
@@ -55,16 +56,6 @@ static __u8 rbv_clear;
static int gIER,gIFR,gBufA,gBufB;
/*
- * Timer defs.
- */
-
-#define TICK_SIZE 10000
-#define MAC_CLOCK_TICK (783300/HZ) /* ticks per HZ */
-#define MAC_CLOCK_LOW (MAC_CLOCK_TICK&0xFF)
-#define MAC_CLOCK_HIGH (MAC_CLOCK_TICK>>8)
-
-
-/*
* On Macs with a genuine VIA chip there is no way to mask an individual slot
* interrupt. This limitation also seems to apply to VIA clone logic cores in
* Quadra-like ASICs. (RBV and OSS machines don't have this limitation.)
@@ -272,22 +263,6 @@ void __init via_init(void)
}
/*
- * Start the 100 Hz clock
- */
-
-void __init via_init_clock(irq_handler_t func)
-{
- via1[vACR] |= 0x40;
- via1[vT1LL] = MAC_CLOCK_LOW;
- via1[vT1LH] = MAC_CLOCK_HIGH;
- via1[vT1CL] = MAC_CLOCK_LOW;
- via1[vT1CH] = MAC_CLOCK_HIGH;
-
- if (request_irq(IRQ_MAC_TIMER_1, func, 0, "timer", func))
- pr_err("Couldn't register %s interrupt\n", "timer");
-}
-
-/*
* Debugging dump, used in various places to see what's going on.
*/
@@ -315,29 +290,6 @@ void via_debug_dump(void)
}
/*
- * This is always executed with interrupts disabled.
- *
- * TBI: get time offset between scheduling timer ticks
- */
-
-u32 mac_gettimeoffset(void)
-{
- unsigned long ticks, offset = 0;
-
- /* read VIA1 timer 2 current value */
- ticks = via1[vT1CL] | (via1[vT1CH] << 8);
- /* The probability of underflow is less than 2% */
- if (ticks > MAC_CLOCK_TICK - MAC_CLOCK_TICK / 50)
- /* Check for pending timer interrupt in VIA1 IFR */
- if (via1[vIFR] & 0x40) offset = TICK_SIZE;
-
- ticks = MAC_CLOCK_TICK - ticks;
- ticks = ticks * 10000L / MAC_CLOCK_TICK;
-
- return (ticks + offset) * 1000;
-}
-
-/*
* Flush the L2 cache on Macs that have it by flipping
* the system into 24-bit mode for an instant.
*/
@@ -440,6 +392,8 @@ void via_nubus_irq_shutdown(int irq)
* via6522.c :-), disable/pending masks added.
*/
+#define VIA_TIMER_1_INT BIT(6)
+
void via1_irq(struct irq_desc *desc)
{
int irq_num;
@@ -449,6 +403,21 @@ void via1_irq(struct irq_desc *desc)
if (!events)
return;
+ irq_num = IRQ_MAC_TIMER_1;
+ irq_bit = VIA_TIMER_1_INT;
+ if (events & irq_bit) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ via1[vIFR] = irq_bit;
+ generic_handle_irq(irq_num);
+ local_irq_restore(flags);
+
+ events &= ~irq_bit;
+ if (!events)
+ return;
+ }
+
irq_num = VIA1_SOURCE_BASE;
irq_bit = 1;
do {
@@ -605,3 +574,82 @@ int via2_scsi_drq_pending(void)
return via2[gIFR] & (1 << IRQ_IDX(IRQ_MAC_SCSIDRQ));
}
EXPORT_SYMBOL(via2_scsi_drq_pending);
+
+/* timer and clock source */
+
+#define VIA_CLOCK_FREQ 783360 /* VIA "phase 2" clock in Hz */
+#define VIA_TIMER_CYCLES (VIA_CLOCK_FREQ / HZ) /* clock cycles per jiffy */
+
+#define VIA_TC (VIA_TIMER_CYCLES - 2) /* including 0 and -1 */
+#define VIA_TC_LOW (VIA_TC & 0xFF)
+#define VIA_TC_HIGH (VIA_TC >> 8)
+
+static u64 mac_read_clk(struct clocksource *cs);
+
+static struct clocksource mac_clk = {
+ .name = "via1",
+ .rating = 250,
+ .read = mac_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total, clk_offset;
+
+static irqreturn_t via_timer_handler(int irq, void *dev_id)
+{
+ irq_handler_t timer_routine = dev_id;
+
+ clk_total += VIA_TIMER_CYCLES;
+ clk_offset = 0;
+ timer_routine(0, NULL);
+
+ return IRQ_HANDLED;
+}
+
+void __init via_init_clock(irq_handler_t timer_routine)
+{
+ if (request_irq(IRQ_MAC_TIMER_1, via_timer_handler, IRQF_TIMER, "timer",
+ timer_routine)) {
+ pr_err("Couldn't register %s interrupt\n", "timer");
+ return;
+ }
+
+ via1[vT1LL] = VIA_TC_LOW;
+ via1[vT1LH] = VIA_TC_HIGH;
+ via1[vT1CL] = VIA_TC_LOW;
+ via1[vT1CH] = VIA_TC_HIGH;
+ via1[vACR] |= 0x40;
+
+ clocksource_register_hz(&mac_clk, VIA_CLOCK_FREQ);
+}
+
+static u64 mac_read_clk(struct clocksource *cs)
+{
+ unsigned long flags;
+ u8 count_high;
+ u16 count;
+ u32 ticks;
+
+ /*
+ * Timer counter wrap-around is detected with the timer interrupt flag
+ * but reading the counter low byte (vT1CL) would reset the flag.
+ * Also, accessing both counter registers is essentially a data race.
+ * These problems are avoided by ignoring the low byte. Clock accuracy
+ * is 256 times worse (error can reach 0.327 ms) but CPU overhead is
+ * reduced by avoiding slow VIA register accesses.
+ */
+
+ local_irq_save(flags);
+ count_high = via1[vT1CH];
+ if (count_high == 0xFF)
+ count_high = 0;
+ if (count_high > 0 && (via1[vIFR] & VIA_TIMER_1_INT))
+ clk_offset = VIA_TIMER_CYCLES;
+ count = count_high << 8;
+ ticks = VIA_TIMER_CYCLES - count;
+ ticks += clk_offset + clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
+}
diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c
index adea549d240e..545a1fe0e119 100644
--- a/arch/m68k/mvme147/config.c
+++ b/arch/m68k/mvme147/config.c
@@ -17,6 +17,7 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -38,18 +39,12 @@
static void mvme147_get_model(char *model);
extern void mvme147_sched_init(irq_handler_t handler);
-extern u32 mvme147_gettimeoffset(void);
extern int mvme147_hwclk (int, struct rtc_time *);
extern void mvme147_reset (void);
static int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/time/timekeeping.c, called via mvme147_process_int() */
-
-irq_handler_t tick_handler;
-
int __init mvme147_parse_bootinfo(const struct bi_record *bi)
{
@@ -89,7 +84,6 @@ void __init config_mvme147(void)
mach_max_dma_address = 0x01000000;
mach_sched_init = mvme147_sched_init;
mach_init_IRQ = mvme147_init_IRQ;
- arch_gettimeoffset = mvme147_gettimeoffset;
mach_hwclk = mvme147_hwclk;
mach_reset = mvme147_reset;
mach_get_model = mvme147_get_model;
@@ -99,45 +93,76 @@ void __init config_mvme147(void)
vme_brdtype = VME_TYPE_MVME147;
}
+static u64 mvme147_read_clk(struct clocksource *cs);
+
+static struct clocksource mvme147_clk = {
+ .name = "pcc",
+ .rating = 250,
+ .read = mvme147_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+
+#define PCC_TIMER_CLOCK_FREQ 160000
+#define PCC_TIMER_CYCLES (PCC_TIMER_CLOCK_FREQ / HZ)
+#define PCC_TIMER_PRELOAD (0x10000 - PCC_TIMER_CYCLES)
/* Using pcc tick timer 1 */
static irqreturn_t mvme147_timer_int (int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
+
+ local_irq_save(flags);
m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR;
- m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
- return tick_handler(irq, dev_id);
+ m147_pcc->t1_cntrl = PCC_TIMER_CLR_OVF;
+ clk_total += PCC_TIMER_CYCLES;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
+ return IRQ_HANDLED;
}
void mvme147_sched_init (irq_handler_t timer_routine)
{
- tick_handler = timer_routine;
- if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, 0, "timer 1", NULL))
+ if (request_irq(PCC_IRQ_TIMER1, mvme147_timer_int, IRQF_TIMER,
+ "timer 1", timer_routine))
pr_err("Couldn't register timer interrupt\n");
/* Init the clock with a value */
- /* our clock goes off every 6.25us */
+ /* The clock counter increments until 0xFFFF then reloads */
m147_pcc->t1_preload = PCC_TIMER_PRELOAD;
m147_pcc->t1_cntrl = 0x0; /* clear timer */
m147_pcc->t1_cntrl = 0x3; /* start timer */
m147_pcc->t1_int_cntrl = PCC_TIMER_INT_CLR; /* clear pending ints */
m147_pcc->t1_int_cntrl = PCC_INT_ENAB|PCC_LEVEL_TIMER1;
+
+ clocksource_register_hz(&mvme147_clk, PCC_TIMER_CLOCK_FREQ);
}
-/* This is always executed with interrupts disabled. */
-/* XXX There are race hazards in this code XXX */
-u32 mvme147_gettimeoffset(void)
+static u64 mvme147_read_clk(struct clocksource *cs)
{
- volatile unsigned short *cp = (volatile unsigned short *)0xfffe1012;
- unsigned short n;
-
- n = *cp;
- while (n != *cp)
- n = *cp;
-
- n -= PCC_TIMER_PRELOAD;
- return ((unsigned long)n * 25 / 4) * 1000;
+ unsigned long flags;
+ u8 overflow, tmp;
+ u16 count;
+ u32 ticks;
+
+ local_irq_save(flags);
+ tmp = m147_pcc->t1_cntrl >> 4;
+ count = m147_pcc->t1_count;
+ overflow = m147_pcc->t1_cntrl >> 4;
+ if (overflow != tmp)
+ count = m147_pcc->t1_count;
+ count -= PCC_TIMER_PRELOAD;
+ ticks = count + overflow * PCC_TIMER_CYCLES;
+ ticks += clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
static int bcd2int (unsigned char b)
diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c
index 6ee36a5b528d..9bc2da69f80c 100644
--- a/arch/m68k/mvme16x/config.c
+++ b/arch/m68k/mvme16x/config.c
@@ -19,6 +19,7 @@
#include <linux/mm.h>
#include <linux/seq_file.h>
#include <linux/tty.h>
+#include <linux/clocksource.h>
#include <linux/console.h>
#include <linux/linkage.h>
#include <linux/init.h>
@@ -44,17 +45,11 @@ static MK48T08ptr_t volatile rtc = (MK48T08ptr_t)MVME_RTC_BASE;
static void mvme16x_get_model(char *model);
extern void mvme16x_sched_init(irq_handler_t handler);
-extern u32 mvme16x_gettimeoffset(void);
extern int mvme16x_hwclk (int, struct rtc_time *);
extern void mvme16x_reset (void);
int bcd2int (unsigned char b);
-/* Save tick handler routine pointer, will point to xtime_update() in
- * kernel/time/timekeeping.c, called via mvme16x_process_int() */
-
-static irq_handler_t tick_handler;
-
unsigned short mvme16x_config;
EXPORT_SYMBOL(mvme16x_config);
@@ -120,11 +115,11 @@ static void __init mvme16x_init_IRQ (void)
m68k_setup_user_interrupt(VEC_USER, 192);
}
-#define pcc2chip ((volatile u_char *)0xfff42000)
-#define PccSCCMICR 0x1d
-#define PccSCCTICR 0x1e
-#define PccSCCRICR 0x1f
-#define PccTPIACKR 0x25
+#define PCC2CHIP (0xfff42000)
+#define PCCSCCMICR (PCC2CHIP + 0x1d)
+#define PCCSCCTICR (PCC2CHIP + 0x1e)
+#define PCCSCCRICR (PCC2CHIP + 0x1f)
+#define PCCTPIACKR (PCC2CHIP + 0x25)
#ifdef CONFIG_EARLY_PRINTK
@@ -232,10 +227,10 @@ void mvme16x_cons_write(struct console *co, const char *str, unsigned count)
base_addr[CyIER] = CyTxMpty;
while (1) {
- if (pcc2chip[PccSCCTICR] & 0x20)
+ if (in_8(PCCSCCTICR) & 0x20)
{
/* We have a Tx int. Acknowledge it */
- sink = pcc2chip[PccTPIACKR];
+ sink = in_8(PCCTPIACKR);
if ((base_addr[CyLICR] >> 2) == port) {
if (i == count) {
/* Last char of string is now output */
@@ -277,7 +272,6 @@ void __init config_mvme16x(void)
mach_max_dma_address = 0xffffffff;
mach_sched_init = mvme16x_sched_init;
mach_init_IRQ = mvme16x_init_IRQ;
- arch_gettimeoffset = mvme16x_gettimeoffset;
mach_hwclk = mvme16x_hwclk;
mach_reset = mvme16x_reset;
mach_get_model = mvme16x_get_model;
@@ -350,10 +344,46 @@ static irqreturn_t mvme16x_abort_int (int irq, void *dev_id)
return IRQ_HANDLED;
}
+static u64 mvme16x_read_clk(struct clocksource *cs);
+
+static struct clocksource mvme16x_clk = {
+ .name = "pcc",
+ .rating = 250,
+ .read = mvme16x_read_clk,
+ .mask = CLOCKSOURCE_MASK(32),
+ .flags = CLOCK_SOURCE_IS_CONTINUOUS,
+};
+
+static u32 clk_total;
+
+#define PCC_TIMER_CLOCK_FREQ 1000000
+#define PCC_TIMER_CYCLES (PCC_TIMER_CLOCK_FREQ / HZ)
+
+#define PCCTCMP1 (PCC2CHIP + 0x04)
+#define PCCTCNT1 (PCC2CHIP + 0x08)
+#define PCCTOVR1 (PCC2CHIP + 0x17)
+#define PCCTIC1 (PCC2CHIP + 0x1b)
+
+#define PCCTOVR1_TIC_EN 0x01
+#define PCCTOVR1_COC_EN 0x02
+#define PCCTOVR1_OVR_CLR 0x04
+
+#define PCCTIC1_INT_CLR 0x08
+#define PCCTIC1_INT_EN 0x10
+
static irqreturn_t mvme16x_timer_int (int irq, void *dev_id)
{
- *(volatile unsigned char *)0xfff4201b |= 8;
- return tick_handler(irq, dev_id);
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ out_8(PCCTIC1, in_8(PCCTIC1) | PCCTIC1_INT_CLR);
+ out_8(PCCTOVR1, PCCTOVR1_OVR_CLR);
+ clk_total += PCC_TIMER_CYCLES;
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+
+ return IRQ_HANDLED;
}
void mvme16x_sched_init (irq_handler_t timer_routine)
@@ -361,16 +391,17 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
uint16_t brdno = be16_to_cpu(mvme_bdid.brdno);
int irq;
- tick_handler = timer_routine;
/* Using PCCchip2 or MC2 chip tick timer 1 */
- *(volatile unsigned long *)0xfff42008 = 0;
- *(volatile unsigned long *)0xfff42004 = 10000; /* 10ms */
- *(volatile unsigned char *)0xfff42017 |= 3;
- *(volatile unsigned char *)0xfff4201b = 0x16;
- if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, 0,
- "timer", mvme16x_timer_int))
+ out_be32(PCCTCNT1, 0);
+ out_be32(PCCTCMP1, PCC_TIMER_CYCLES);
+ out_8(PCCTOVR1, in_8(PCCTOVR1) | PCCTOVR1_TIC_EN | PCCTOVR1_COC_EN);
+ out_8(PCCTIC1, PCCTIC1_INT_EN | 6);
+ if (request_irq(MVME16x_IRQ_TIMER, mvme16x_timer_int, IRQF_TIMER, "timer",
+ timer_routine))
panic ("Couldn't register timer int");
+ clocksource_register_hz(&mvme16x_clk, PCC_TIMER_CLOCK_FREQ);
+
if (brdno == 0x0162 || brdno == 0x172)
irq = MVME162_IRQ_ABORT;
else
@@ -380,11 +411,23 @@ void mvme16x_sched_init (irq_handler_t timer_routine)
panic ("Couldn't register abort int");
}
-
-/* This is always executed with interrupts disabled. */
-u32 mvme16x_gettimeoffset(void)
+static u64 mvme16x_read_clk(struct clocksource *cs)
{
- return (*(volatile u32 *)0xfff42008) * 1000;
+ unsigned long flags;
+ u8 overflow, tmp;
+ u32 ticks;
+
+ local_irq_save(flags);
+ tmp = in_8(PCCTOVR1) >> 4;
+ ticks = in_be32(PCCTCNT1);
+ overflow = in_8(PCCTOVR1) >> 4;
+ if (overflow != tmp)
+ ticks = in_be32(PCCTCNT1);
+ ticks += overflow * PCC_TIMER_CYCLES;
+ ticks += clk_total;
+ local_irq_restore(flags);
+
+ return ticks;
}
int bcd2int (unsigned char b)
diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c
index 96810d91da2b..e63eb5f06999 100644
--- a/arch/m68k/q40/config.c
+++ b/arch/m68k/q40/config.c
@@ -40,7 +40,6 @@ extern void q40_init_IRQ(void);
static void q40_get_model(char *model);
extern void q40_sched_init(irq_handler_t handler);
-static u32 q40_gettimeoffset(void);
static int q40_hwclk(int, struct rtc_time *);
static unsigned int q40_get_ss(void);
static int q40_get_rtc_pll(struct rtc_pll_info *pll);
@@ -169,7 +168,6 @@ void __init config_q40(void)
mach_sched_init = q40_sched_init;
mach_init_IRQ = q40_init_IRQ;
- arch_gettimeoffset = q40_gettimeoffset;
mach_hwclk = q40_hwclk;
mach_get_ss = q40_get_ss;
mach_get_rtc_pll = q40_get_rtc_pll;
@@ -201,13 +199,6 @@ int __init q40_parse_bootinfo(const struct bi_record *rec)
return 1;
}
-
-static u32 q40_gettimeoffset(void)
-{
- return 5000 * (ql_ticks != 0) * 1000;
-}
-
-
/*
* Looks like op is non-zero for setting the clock, and zero for
* reading the clock.
diff --git a/arch/m68k/q40/q40ints.c b/arch/m68k/q40/q40ints.c
index 3e7603202977..1c696906c159 100644
--- a/arch/m68k/q40/q40ints.c
+++ b/arch/m68k/q40/q40ints.c
@@ -127,10 +127,10 @@ void q40_mksound(unsigned int hz, unsigned int ticks)
sound_ticks = ticks << 1;
}
-static irq_handler_t q40_timer_routine;
-
-static irqreturn_t q40_timer_int (int irq, void * dev)
+static irqreturn_t q40_timer_int(int irq, void *dev_id)
{
+ irq_handler_t timer_routine = dev_id;
+
ql_ticks = ql_ticks ? 0 : 1;
if (sound_ticks) {
unsigned char sval=(sound_ticks & 1) ? 128-SVOL : 128+SVOL;
@@ -139,8 +139,13 @@ static irqreturn_t q40_timer_int (int irq, void * dev)
*DAC_RIGHT=sval;
}
- if (!ql_ticks)
- q40_timer_routine(irq, dev);
+ if (!ql_ticks) {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
+ }
return IRQ_HANDLED;
}
@@ -148,11 +153,9 @@ void q40_sched_init (irq_handler_t timer_routine)
{
int timer_irq;
- q40_timer_routine = timer_routine;
timer_irq = Q40_IRQ_FRAME;
- if (request_irq(timer_irq, q40_timer_int, 0,
- "timer", q40_timer_int))
+ if (request_irq(timer_irq, q40_timer_int, 0, "timer", timer_routine))
panic("Couldn't register timer int");
master_outb(-1, FRAME_CLEAR_REG);
diff --git a/arch/m68k/sun3/config.c b/arch/m68k/sun3/config.c
index 542c4404861c..229ea37dfe1b 100644
--- a/arch/m68k/sun3/config.c
+++ b/arch/m68k/sun3/config.c
@@ -37,7 +37,6 @@
char sun3_reserved_pmeg[SUN3_PMEGS_NUM];
-extern u32 sun3_gettimeoffset(void);
static void sun3_sched_init(irq_handler_t handler);
extern void sun3_get_model (char* model);
extern int sun3_hwclk(int set, struct rtc_time *t);
@@ -138,7 +137,6 @@ void __init config_sun3(void)
mach_sched_init = sun3_sched_init;
mach_init_IRQ = sun3_init_IRQ;
mach_reset = sun3_reboot;
- arch_gettimeoffset = sun3_gettimeoffset;
mach_get_model = sun3_get_model;
mach_hwclk = sun3_hwclk;
mach_halt = sun3_halt;
diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c
index d911070af02a..8fc74864de81 100644
--- a/arch/m68k/sun3/intersil.c
+++ b/arch/m68k/sun3/intersil.c
@@ -22,13 +22,6 @@
#define STOP_VAL (INTERSIL_STOP | INTERSIL_INT_ENABLE | INTERSIL_24H_MODE)
#define START_VAL (INTERSIL_RUN | INTERSIL_INT_ENABLE | INTERSIL_24H_MODE)
-/* does this need to be implemented? */
-u32 sun3_gettimeoffset(void)
-{
- return 1000;
-}
-
-
/* get/set hwclock */
int sun3_hwclk(int set, struct rtc_time *t)
diff --git a/arch/m68k/sun3/sun3ints.c b/arch/m68k/sun3/sun3ints.c
index 6bbca30c9188..a5824abb4a39 100644
--- a/arch/m68k/sun3/sun3ints.c
+++ b/arch/m68k/sun3/sun3ints.c
@@ -61,8 +61,10 @@ static irqreturn_t sun3_int7(int irq, void *dev_id)
static irqreturn_t sun3_int5(int irq, void *dev_id)
{
+ unsigned long flags;
unsigned int cnt;
+ local_irq_save(flags);
#ifdef CONFIG_SUN3
intersil_clear();
#endif
@@ -76,6 +78,7 @@ static irqreturn_t sun3_int5(int irq, void *dev_id)
cnt = kstat_irqs_cpu(irq, 0);
if (!(cnt % 20))
sun3_leds(led_pattern[cnt % 160 / 20]);
+ local_irq_restore(flags);
return IRQ_HANDLED;
}
diff --git a/arch/m68k/sun3x/config.c b/arch/m68k/sun3x/config.c
index 33d3a1c6fba0..03ce7f9facfe 100644
--- a/arch/m68k/sun3x/config.c
+++ b/arch/m68k/sun3x/config.c
@@ -49,7 +49,6 @@ void __init config_sun3x(void)
mach_sched_init = sun3x_sched_init;
mach_init_IRQ = sun3_init_IRQ;
- arch_gettimeoffset = sun3x_gettimeoffset;
mach_reset = sun3x_reboot;
mach_hwclk = sun3x_hwclk;
diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c
index 047e2bcee3d7..9163294b0fb6 100644
--- a/arch/m68k/sun3x/time.c
+++ b/arch/m68k/sun3x/time.c
@@ -73,22 +73,21 @@ int sun3x_hwclk(int set, struct rtc_time *t)
return 0;
}
-/* Not much we can do here */
-u32 sun3x_gettimeoffset(void)
-{
- return 0L;
-}
#if 0
-static void sun3x_timer_tick(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t sun3x_timer_tick(int irq, void *dev_id)
{
- void (*vector)(int, void *, struct pt_regs *) = dev_id;
+ irq_handler_t timer_routine = dev_id;
+ unsigned long flags;
- /* Clear the pending interrupt - pulse the enable line low */
- disable_irq(5);
- enable_irq(5);
+ local_irq_save(flags);
+ /* Clear the pending interrupt - pulse the enable line low */
+ disable_irq(5);
+ enable_irq(5);
+ timer_routine(0, NULL);
+ local_irq_restore(flags);
- vector(irq, NULL, regs);
+ return IRQ_HANDLED;
}
#endif
diff --git a/arch/m68k/sun3x/time.h b/arch/m68k/sun3x/time.h
index 496f406412ad..86ce78bb3c28 100644
--- a/arch/m68k/sun3x/time.h
+++ b/arch/m68k/sun3x/time.h
@@ -3,7 +3,6 @@
#define SUN3X_TIME_H
extern int sun3x_hwclk(int set, struct rtc_time *t);
-u32 sun3x_gettimeoffset(void);
void sun3x_sched_init(irq_handler_t vector);
struct mostek_dt {
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index a51b965b3b82..adb179f519f9 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -41,6 +41,7 @@ config MICROBLAZE
select TRACING_SUPPORT
select VIRT_TO_BUS
select CPU_NO_EFFICIENT_FFS
+ select MMU_GATHER_NO_RANGE if MMU
# Endianness selection
choice
@@ -58,15 +59,9 @@ config CPU_LITTLE_ENDIAN
endchoice
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config ZONE_DMA
def_bool y
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
def_bool n
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 1a8285c3f693..17a8d0a62038 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 833d3a53dab3..3a6924f3cbde 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -105,7 +105,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
asmlinkage unsigned long do_syscall_trace_enter(struct pt_regs *regs);
asmlinkage void do_syscall_trace_leave(struct pt_regs *regs);
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_MICROBLAZE;
}
diff --git a/arch/microblaze/include/asm/tlb.h b/arch/microblaze/include/asm/tlb.h
index 99b6ded54849..628a78ee0a72 100644
--- a/arch/microblaze/include/asm/tlb.h
+++ b/arch/microblaze/include/asm/tlb.h
@@ -11,16 +11,7 @@
#ifndef _ASM_MICROBLAZE_TLB_H
#define _ASM_MICROBLAZE_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <linux/pagemap.h>
-
-#ifdef CONFIG_MMU
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#endif
-
#include <asm-generic/tlb.h>
#endif /* _ASM_MICROBLAZE_TLB_H */
diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c
index c2ce1e42b888..8fe54fda31dc 100644
--- a/arch/microblaze/mm/pgtable.c
+++ b/arch/microblaze/mm/pgtable.c
@@ -75,7 +75,7 @@ static void __iomem *__ioremap(phys_addr_t addr, unsigned long size,
p >= memory_start && p < virt_to_phys(high_memory) &&
!(p >= __virt_to_phys((phys_addr_t)__bss_stop) &&
p < __virt_to_phys((phys_addr_t)__bss_stop))) {
- pr_warn("__ioremap(): phys addr "PTE_FMT" is RAM lr %pf\n",
+ pr_warn("__ioremap(): phys addr "PTE_FMT" is RAM lr %ps\n",
(unsigned long)p, __builtin_return_address(0));
return NULL;
}
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4a5f5b0ee9a9..ff8cff9fcf54 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -44,8 +44,7 @@ config MIPS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE if CPU_SUPPORTS_HUGEPAGES && 64BIT
- select HAVE_CBPF_JIT if (!64BIT && !CPU_MICROMIPS)
- select HAVE_EBPF_JIT if (64BIT && !CPU_MICROMIPS)
+ select HAVE_EBPF_JIT if (!CPU_MICROMIPS)
select HAVE_CONTEXT_TRACKING
select HAVE_COPY_THREAD_TLS
select HAVE_C_RECORDMCOUNT
@@ -276,7 +275,7 @@ config BCM47XX
select BCM47XX_SPROM
select BCM47XX_SSB if !BCM47XX_BCMA
help
- Support for BCM47XX based boards
+ Support for BCM47XX based boards
config BCM63XX
bool "Broadcom BCM63XX based boards"
@@ -295,7 +294,7 @@ config BCM63XX
select MIPS_L1_CACHE_SHIFT_4
select CLKDEV_LOOKUP
help
- Support for BCM63XX based boards
+ Support for BCM63XX based boards
config MIPS_COBALT
bool "Cobalt Server"
@@ -374,10 +373,10 @@ config MACH_JAZZ
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_100HZ
help
- This a family of machines based on the MIPS R4030 chipset which was
- used by several vendors to build RISC/os and Windows NT workstations.
- Members include the Acer PICA, MIPS Magnum 4000, MIPS Millennium and
- Olivetti M700-10 workstations.
+ This a family of machines based on the MIPS R4030 chipset which was
+ used by several vendors to build RISC/os and Windows NT workstations.
+ Members include the Acer PICA, MIPS Magnum 4000, MIPS Millennium and
+ Olivetti M700-10 workstations.
config MACH_INGENIC
bool "Ingenic SoC based machines"
@@ -573,14 +572,14 @@ config NXP_STB220
bool "NXP STB220 board"
select SOC_PNX833X
help
- Support for NXP Semiconductors STB220 Development Board.
+ Support for NXP Semiconductors STB220 Development Board.
config NXP_STB225
bool "NXP 225 board"
select SOC_PNX833X
select SOC_PNX8335
help
- Support for NXP Semiconductors STB225 Development Board.
+ Support for NXP Semiconductors STB225 Development Board.
config PMC_MSP
bool "PMC-Sierra MSP chipsets"
@@ -722,9 +721,9 @@ config SGI_IP28
select SYS_SUPPORTS_64BIT_KERNEL
select SYS_SUPPORTS_BIG_ENDIAN
select MIPS_L1_CACHE_SHIFT_7
- help
- This is the SGI Indigo2 with R10000 processor. To compile a Linux
- kernel that runs on these, say Y here.
+ help
+ This is the SGI Indigo2 with R10000 processor. To compile a Linux
+ kernel that runs on these, say Y here.
config SGI_IP32
bool "SGI IP32 (O2)"
@@ -1037,13 +1036,6 @@ source "arch/mips/paravirt/Kconfig"
endmenu
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config GENERIC_HWEIGHT
bool
default y
@@ -1175,9 +1167,9 @@ config HOLES_IN_ZONE
config SYS_SUPPORTS_RELOCATABLE
bool
help
- Selected if the platform supports relocating the kernel.
- The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF
- to allow access to command line and entropy sources.
+ Selected if the platform supports relocating the kernel.
+ The platform must provide plat_get_fdt() if it selects CONFIG_USE_OF
+ to allow access to command line and entropy sources.
config MIPS_CBPF_JIT
def_bool y
@@ -2120,8 +2112,8 @@ config MIPS_PGD_C0_CONTEXT
# Set to y for ptrace access to watch registers.
#
config HARDWARE_WATCHPOINTS
- bool
- default y if CPU_MIPSR1 || CPU_MIPSR2 || CPU_MIPSR6
+ bool
+ default y if CPU_MIPSR1 || CPU_MIPSR2 || CPU_MIPSR6
menu "Kernel type"
@@ -2185,10 +2177,10 @@ config PAGE_SIZE_4KB
bool "4kB"
depends on !CPU_LOONGSON2 && !CPU_LOONGSON3
help
- This option select the standard 4kB Linux page size. On some
- R3000-family processors this is the only available page size. Using
- 4kB page size will minimize memory consumption and is therefore
- recommended for low memory systems.
+ This option select the standard 4kB Linux page size. On some
+ R3000-family processors this is the only available page size. Using
+ 4kB page size will minimize memory consumption and is therefore
+ recommended for low memory systems.
config PAGE_SIZE_8KB
bool "8kB"
@@ -2481,7 +2473,6 @@ config SB1_PASS_2_1_WORKAROUNDS
depends on CPU_SB1 && CPU_SB1_PASS_2
default y
-
choice
prompt "SmartMIPS or microMIPS ASE support"
@@ -2689,16 +2680,16 @@ config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
depends on RELOCATABLE
---help---
- Randomizes the physical and virtual address at which the
- kernel image is loaded, as a security feature that
- deters exploit attempts relying on knowledge of the location
- of kernel internals.
+ Randomizes the physical and virtual address at which the
+ kernel image is loaded, as a security feature that
+ deters exploit attempts relying on knowledge of the location
+ of kernel internals.
- Entropy is generated using any coprocessor 0 registers available.
+ Entropy is generated using any coprocessor 0 registers available.
- The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET.
+ The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET.
- If unsure, say N.
+ If unsure, say N.
config RANDOMIZE_BASE_MAX_OFFSET
hex "Maximum kASLR offset" if EXPERT
@@ -2828,7 +2819,7 @@ choice
prompt "Timer frequency"
default HZ_250
help
- Allows the configuration of the timer frequency.
+ Allows the configuration of the timer frequency.
config HZ_24
bool "24 HZ" if SYS_SUPPORTS_24HZ || SYS_SUPPORTS_ARBIT_HZ
@@ -3128,10 +3119,10 @@ config ARCH_MMAP_RND_BITS_MAX
default 15
config ARCH_MMAP_RND_COMPAT_BITS_MIN
- default 8
+ default 8
config ARCH_MMAP_RND_COMPAT_BITS_MAX
- default 15
+ default 15
config I8253
bool
diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index d129475fd40d..a95a894aceaf 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -160,7 +160,7 @@ static struct clk __init *alchemy_clk_setup_cpu(const char *parent_name,
id.name = ALCHEMY_CPU_CLK;
id.parent_names = &parent_name;
id.num_parents = 1;
- id.flags = CLK_IS_BASIC;
+ id.flags = 0;
id.ops = &alchemy_clkops_cpu;
h->init = &id;
diff --git a/arch/mips/bcm47xx/Kconfig b/arch/mips/bcm47xx/Kconfig
index 29471038d817..6889f74e06f5 100644
--- a/arch/mips/bcm47xx/Kconfig
+++ b/arch/mips/bcm47xx/Kconfig
@@ -15,9 +15,9 @@ config BCM47XX_SSB
select SSB_DRIVER_GPIO
default y
help
- Add support for old Broadcom BCM47xx boards with Sonics Silicon Backplane support.
+ Add support for old Broadcom BCM47xx boards with Sonics Silicon Backplane support.
- This will generate an image with support for SSB and MIPS32 R1 instruction set.
+ This will generate an image with support for SSB and MIPS32 R1 instruction set.
config BCM47XX_BCMA
bool "BCMA Support for Broadcom BCM47XX"
@@ -31,8 +31,8 @@ config BCM47XX_BCMA
select BCMA_DRIVER_GPIO
default y
help
- Add support for new Broadcom BCM47xx boards with Broadcom specific Advanced Microcontroller Bus.
+ Add support for new Broadcom BCM47xx boards with Broadcom specific Advanced Microcontroller Bus.
- This will generate an image with support for BCMA and MIPS32 R2 instruction set.
+ This will generate an image with support for BCMA and MIPS32 R2 instruction set.
endif
diff --git a/arch/mips/bcm63xx/boards/Kconfig b/arch/mips/bcm63xx/boards/Kconfig
index f60d96610ace..492c3bd005d5 100644
--- a/arch/mips/bcm63xx/boards/Kconfig
+++ b/arch/mips/bcm63xx/boards/Kconfig
@@ -5,7 +5,7 @@ choice
default BOARD_BCM963XX
config BOARD_BCM963XX
- bool "Generic Broadcom 963xx boards"
+ bool "Generic Broadcom 963xx boards"
select SSB
endchoice
diff --git a/arch/mips/configs/ar7_defconfig b/arch/mips/configs/ar7_defconfig
index 9fbfb6e5c7d2..c83fdf649327 100644
--- a/arch/mips/configs/ar7_defconfig
+++ b/arch/mips/configs/ar7_defconfig
@@ -18,7 +18,6 @@ CONFIG_KEXEC=y
# CONFIG_SECCOMP is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_BSD_DISKLABEL=y
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 0c86ed86266a..30a6eafdb1d0 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -17,7 +17,6 @@ CONFIG_TC=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_SRCVERSION_ALL=y
-# CONFIG_LBDAF is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_OSF_PARTITION=y
# CONFIG_EFI_PARTITION is not set
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
index 0e54ab2680ce..e2b58dbf4aa9 100644
--- a/arch/mips/configs/decstation_r4k_defconfig
+++ b/arch/mips/configs/decstation_r4k_defconfig
@@ -16,7 +16,6 @@ CONFIG_TC=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_SRCVERSION_ALL=y
-# CONFIG_LBDAF is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_OSF_PARTITION=y
# CONFIG_EFI_PARTITION is not set
diff --git a/arch/mips/configs/generic_defconfig b/arch/mips/configs/generic_defconfig
index 5d80521e5d5a..714169e411cf 100644
--- a/arch/mips/configs/generic_defconfig
+++ b/arch/mips/configs/generic_defconfig
@@ -26,6 +26,7 @@ CONFIG_MIPS_CPS=y
CONFIG_HIGHMEM=y
CONFIG_NR_CPUS=16
CONFIG_MIPS_O32_FP64_SUPPORT=y
+CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_TRIM_UNUSED_KSYMS=y
diff --git a/arch/mips/configs/loongson1b_defconfig b/arch/mips/configs/loongson1b_defconfig
index 1628e6fcc405..3d390a7494d6 100644
--- a/arch/mips/configs/loongson1b_defconfig
+++ b/arch/mips/configs/loongson1b_defconfig
@@ -19,7 +19,6 @@ CONFIG_MACH_LOONGSON32=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_NET=y
diff --git a/arch/mips/configs/loongson1c_defconfig b/arch/mips/configs/loongson1c_defconfig
index 7aa5527e50b1..247d56e94c0a 100644
--- a/arch/mips/configs/loongson1c_defconfig
+++ b/arch/mips/configs/loongson1c_defconfig
@@ -20,7 +20,6 @@ CONFIG_LOONGSON1_LS1C=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODVERSIONS=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
CONFIG_NET=y
diff --git a/arch/mips/configs/rb532_defconfig b/arch/mips/configs/rb532_defconfig
index 4cbcf015a7ed..50632a3103dd 100644
--- a/arch/mips/configs/rb532_defconfig
+++ b/arch/mips/configs/rb532_defconfig
@@ -19,7 +19,6 @@ CONFIG_PCI=y
# CONFIG_PCI_QUIRKS is not set
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_MAC_PARTITION=y
diff --git a/arch/mips/configs/rbtx49xx_defconfig b/arch/mips/configs/rbtx49xx_defconfig
index 96114ca59016..5e389db35fa7 100644
--- a/arch/mips/configs/rbtx49xx_defconfig
+++ b/arch/mips/configs/rbtx49xx_defconfig
@@ -17,7 +17,6 @@ CONFIG_TOSHIBA_RBTX4938_MPLEX_KEEP=y
CONFIG_PCI=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_NET=y
CONFIG_PACKET=y
diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h
index a301a8f4bc66..235bc2f52113 100644
--- a/arch/mips/include/asm/bootinfo.h
+++ b/arch/mips/include/asm/bootinfo.h
@@ -92,6 +92,7 @@ extern unsigned long mips_machtype;
#define BOOT_MEM_ROM_DATA 2
#define BOOT_MEM_RESERVED 3
#define BOOT_MEM_INIT_RAM 4
+#define BOOT_MEM_NOMAP 5
/*
* A memory map that's built upon what was determined
diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h
index 845fbbc7a2e3..29997e42480e 100644
--- a/arch/mips/include/asm/io.h
+++ b/arch/mips/include/asm/io.h
@@ -102,9 +102,6 @@ static inline void set_io_port_base(unsigned long base)
#define iobarrier_w() wmb()
#define iobarrier_sync() iob()
-/* Some callers use this older API instead. */
-#define mmiowb() iobarrier_w()
-
/*
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index e4456e450f94..3185fd3220ec 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -11,6 +11,7 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
+#include <asm/isa-rev.h>
#define JUMP_LABEL_NOP_SIZE 4
@@ -21,15 +22,20 @@
#endif
#ifdef CONFIG_CPU_MICROMIPS
-#define B_INSN "b32"
+# define B_INSN "b32"
+# define J_INSN "j32"
+#elif MIPS_ISA_REV >= 6
+# define B_INSN "bc"
+# define J_INSN "bc"
#else
-#define B_INSN "b"
+# define B_INSN "b"
+# define J_INSN "j"
#endif
static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
{
asm_volatile_goto("1:\t" B_INSN " 2f\n\t"
- "2:\tnop\n\t"
+ "2:\t.insn\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
".popsection\n\t"
@@ -42,8 +48,7 @@ l_yes:
static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
{
- asm_volatile_goto("1:\tj %l[l_yes]\n\t"
- "nop\n\t"
+ asm_volatile_goto("1:\t" J_INSN " %l[l_yes]\n\t"
".pushsection __jump_table, \"aw\"\n\t"
WORD_INSN " 1b, %l[l_yes], %0\n\t"
".popsection\n\t"
diff --git a/arch/mips/include/asm/mmiowb.h b/arch/mips/include/asm/mmiowb.h
new file mode 100644
index 000000000000..a40824e3ef8e
--- /dev/null
+++ b/arch/mips/include/asm/mmiowb.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_MMIOWB_H
+#define _ASM_MMIOWB_H
+
+#include <asm/io.h>
+
+#define mmiowb() iobarrier_w()
+
+#include <asm-generic/mmiowb.h>
+
+#endif /* _ASM_MMIOWB_H */
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index ee81297d9117..8a88eb265516 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -11,6 +11,21 @@
#include <asm/processor.h>
#include <asm/qrwlock.h>
+
+#include <asm-generic/qspinlock_types.h>
+
+#define queued_spin_unlock queued_spin_unlock
+/**
+ * queued_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+ /* This could be optimised with ARCH_HAS_MMIOWB */
+ mmiowb();
+ smp_store_release(&lock->locked, 0);
+}
+
#include <asm/qspinlock.h>
#endif /* _ASM_SPINLOCK_H */
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index a2b4748655df..acf80ae0a430 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -141,14 +141,14 @@ extern const unsigned long sys_call_table[];
extern const unsigned long sys32_call_table[];
extern const unsigned long sysn32_call_table[];
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
int arch = AUDIT_ARCH_MIPS;
#ifdef CONFIG_64BIT
- if (!test_thread_flag(TIF_32BIT_REGS)) {
+ if (!test_tsk_thread_flag(task, TIF_32BIT_REGS)) {
arch |= __AUDIT_ARCH_64BIT;
/* N32 sets only TIF_32BIT_ADDR */
- if (test_thread_flag(TIF_32BIT_ADDR))
+ if (test_tsk_thread_flag(task, TIF_32BIT_ADDR))
arch |= __AUDIT_ARCH_CONVENTION_MIPS64_N32;
}
#endif
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index b6823b9e94da..90f3ad76d9e0 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -5,23 +5,6 @@
#include <asm/cpu-features.h>
#include <asm/mipsregs.h>
-/*
- * MIPS doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-
-/*
- * .. because we flush the whole mm when it fills up.
- */
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#define _UNIQUE_ENTRYHI(base, idx) \
(((base) + ((idx) << (PAGE_SHIFT + 1))) | \
(cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
diff --git a/arch/mips/include/asm/uasm.h b/arch/mips/include/asm/uasm.h
index b1990dd75f27..f7effca791a5 100644
--- a/arch/mips/include/asm/uasm.h
+++ b/arch/mips/include/asm/uasm.h
@@ -86,14 +86,18 @@ Ip_u2u1(_ctcmsa);
Ip_u2u1s3(_daddiu);
Ip_u3u1u2(_daddu);
Ip_u1u2(_ddivu);
+Ip_u3u1u2(_ddivu_r6);
Ip_u1(_di);
Ip_u2u1msbu3(_dins);
Ip_u2u1msbu3(_dinsm);
Ip_u2u1msbu3(_dinsu);
Ip_u1u2(_divu);
+Ip_u3u1u2(_divu_r6);
Ip_u1u2u3(_dmfc0);
+Ip_u3u1u2(_dmodu);
Ip_u1u2u3(_dmtc0);
Ip_u1u2(_dmultu);
+Ip_u3u1u2(_dmulu);
Ip_u2u1u3(_drotr);
Ip_u2u1u3(_drotr32);
Ip_u2u1(_dsbh);
@@ -131,6 +135,7 @@ Ip_u1u2u3(_mfc0);
Ip_u1u2u3(_mfhc0);
Ip_u1(_mfhi);
Ip_u1(_mflo);
+Ip_u3u1u2(_modu);
Ip_u3u1u2(_movn);
Ip_u3u1u2(_movz);
Ip_u1u2u3(_mtc0);
@@ -139,6 +144,7 @@ Ip_u1(_mthi);
Ip_u1(_mtlo);
Ip_u3u1u2(_mul);
Ip_u1u2(_multu);
+Ip_u3u1u2(_mulu);
Ip_u3u1u2(_nor);
Ip_u3u1u2(_or);
Ip_u2u1u3(_ori);
@@ -149,6 +155,8 @@ Ip_u2s3u1(_sb);
Ip_u2s3u1(_sc);
Ip_u2s3u1(_scd);
Ip_u2s3u1(_sd);
+Ip_u3u1u2(_seleqz);
+Ip_u3u1u2(_selnez);
Ip_u2s3u1(_sh);
Ip_u2u1u3(_sll);
Ip_u3u2u1(_sllv);
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 40fbb5dd66df..eaa3a80affdf 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -55,9 +55,9 @@ enum spec_op {
spec3_unused_op, spec4_unused_op, slt_op, sltu_op,
dadd_op, daddu_op, dsub_op, dsubu_op,
tge_op, tgeu_op, tlt_op, tltu_op,
- teq_op, spec5_unused_op, tne_op, spec6_unused_op,
- dsll_op, spec7_unused_op, dsrl_op, dsra_op,
- dsll32_op, spec8_unused_op, dsrl32_op, dsra32_op
+ teq_op, seleqz_op, tne_op, selnez_op,
+ dsll_op, spec5_unused_op, dsrl_op, dsra_op,
+ dsll32_op, spec6_unused_op, dsrl32_op, dsra32_op
};
/*
diff --git a/arch/mips/include/uapi/asm/sockios.h b/arch/mips/include/uapi/asm/sockios.h
index 5b40a88593fa..66f60234f290 100644
--- a/arch/mips/include/uapi/asm/sockios.h
+++ b/arch/mips/include/uapi/asm/sockios.h
@@ -21,7 +21,7 @@
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* _ASM_SOCKIOS_H */
diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S
index d7de8adcfcc8..5469d43b6966 100644
--- a/arch/mips/kernel/entry.S
+++ b/arch/mips/kernel/entry.S
@@ -58,15 +58,14 @@ resume_kernel:
local_irq_disable
lw t0, TI_PRE_COUNT($28)
bnez t0, restore_all
-need_resched:
LONG_L t0, TI_FLAGS($28)
andi t1, t0, _TIF_NEED_RESCHED
beqz t1, restore_all
LONG_L t0, PT_STATUS(sp) # Interrupts off?
andi t0, 1
beqz t0, restore_all
- jal preempt_schedule_irq
- b need_resched
+ PTR_LA ra, restore_all
+ j preempt_schedule_irq
#endif
FEXPORT(ret_from_kernel_thread)
diff --git a/arch/mips/kernel/jump_label.c b/arch/mips/kernel/jump_label.c
index ab943927f97a..662c8db9f45b 100644
--- a/arch/mips/kernel/jump_label.c
+++ b/arch/mips/kernel/jump_label.c
@@ -40,18 +40,38 @@ void arch_jump_label_transform(struct jump_entry *e,
{
union mips_instruction *insn_p;
union mips_instruction insn;
+ long offset;
insn_p = (union mips_instruction *)msk_isa16_mode(e->code);
- /* Jump only works within an aligned region its delay slot is in. */
- BUG_ON((e->target & ~J_RANGE_MASK) != ((e->code + 4) & ~J_RANGE_MASK));
-
/* Target must have the right alignment and ISA must be preserved. */
BUG_ON((e->target & J_ALIGN_MASK) != J_ISA_BIT);
if (type == JUMP_LABEL_JMP) {
- insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op;
- insn.j_format.target = e->target >> J_RANGE_SHIFT;
+ if (!IS_ENABLED(CONFIG_CPU_MICROMIPS) && MIPS_ISA_REV >= 6) {
+ offset = e->target - ((unsigned long)insn_p + 4);
+ offset >>= 2;
+
+ /*
+ * The branch offset must fit in the instruction's 26
+ * bit field.
+ */
+ WARN_ON((offset >= BIT(25)) ||
+ (offset < -(long)BIT(25)));
+
+ insn.j_format.opcode = bc6_op;
+ insn.j_format.target = offset;
+ } else {
+ /*
+ * Jump only works within an aligned region its delay
+ * slot is in.
+ */
+ WARN_ON((e->target & ~J_RANGE_MASK) !=
+ ((e->code + 4) & ~J_RANGE_MASK));
+
+ insn.j_format.opcode = J_ISA_BIT ? mm_j32_op : j_op;
+ insn.j_format.target = e->target >> J_RANGE_SHIFT;
+ }
} else {
insn.word = 0; /* nop */
}
diff --git a/arch/mips/kernel/prom.c b/arch/mips/kernel/prom.c
index 93b8e0b4332f..28bf01961bb2 100644
--- a/arch/mips/kernel/prom.c
+++ b/arch/mips/kernel/prom.c
@@ -41,13 +41,27 @@ char *mips_get_machine_name(void)
#ifdef CONFIG_USE_OF
void __init early_init_dt_add_memory_arch(u64 base, u64 size)
{
- return add_memory_region(base, size, BOOT_MEM_RAM);
+ if (base >= PHYS_ADDR_MAX) {
+ pr_warn("Trying to add an invalid memory region, skipped\n");
+ return;
+ }
+
+ /* Truncate the passed memory region instead of type casting */
+ if (base + size - 1 >= PHYS_ADDR_MAX || base + size < base) {
+ pr_warn("Truncate memory region %llx @ %llx to size %llx\n",
+ size, base, PHYS_ADDR_MAX - base);
+ size = PHYS_ADDR_MAX - base;
+ }
+
+ add_memory_region(base, size, BOOT_MEM_RAM);
}
int __init early_init_dt_reserve_memory_arch(phys_addr_t base,
phys_addr_t size, bool nomap)
{
- add_memory_region(base, size, BOOT_MEM_RESERVED);
+ add_memory_region(base, size,
+ nomap ? BOOT_MEM_NOMAP : BOOT_MEM_RESERVED);
+
return 0;
}
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 3a62f80958e1..414b6e9c900b 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1418,7 +1418,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
unsigned long args[6];
sd.nr = syscall;
- sd.arch = syscall_get_arch();
+ sd.arch = syscall_get_arch(current);
syscall_get_arguments(current, regs, args);
for (i = 0; i < 6; i++)
sd.args[i] = args[i];
diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c
index 8d1dc6c71173..ab349d2381c3 100644
--- a/arch/mips/kernel/setup.c
+++ b/arch/mips/kernel/setup.c
@@ -27,6 +27,7 @@
#include <linux/dma-contiguous.h>
#include <linux/decompress/generic.h>
#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
#include <asm/addrspace.h>
#include <asm/bootinfo.h>
@@ -178,6 +179,7 @@ static bool __init __maybe_unused memory_region_available(phys_addr_t start,
in_ram = true;
break;
case BOOT_MEM_RESERVED:
+ case BOOT_MEM_NOMAP:
if ((start >= start_ && start < end_) ||
(start < start_ && start + size >= start_))
free = false;
@@ -213,6 +215,9 @@ static void __init print_memory_map(void)
case BOOT_MEM_RESERVED:
printk(KERN_CONT "(reserved)\n");
break;
+ case BOOT_MEM_NOMAP:
+ printk(KERN_CONT "(nomap)\n");
+ break;
default:
printk(KERN_CONT "type %lu\n", boot_mem_map.map[i].type);
break;
@@ -371,7 +376,6 @@ static void __init bootmem_init(void)
static void __init bootmem_init(void)
{
- unsigned long reserved_end;
phys_addr_t ramstart = PHYS_ADDR_MAX;
int i;
@@ -382,10 +386,10 @@ static void __init bootmem_init(void)
* will reserve the area used for the initrd.
*/
init_initrd();
- reserved_end = (unsigned long) PFN_UP(__pa_symbol(&_end));
- memblock_reserve(PHYS_OFFSET,
- (reserved_end << PAGE_SHIFT) - PHYS_OFFSET);
+ /* Reserve memory occupied by kernel. */
+ memblock_reserve(__pa_symbol(&_text),
+ __pa_symbol(&_end) - __pa_symbol(&_text));
/*
* max_low_pfn is not a number of pages. The number of pages
@@ -394,10 +398,7 @@ static void __init bootmem_init(void)
min_low_pfn = ~0UL;
max_low_pfn = 0;
- /*
- * Find the highest page frame number we have available
- * and the lowest used RAM address
- */
+ /* Find the highest and lowest page frame numbers we have available. */
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
@@ -427,13 +428,6 @@ static void __init bootmem_init(void)
max_low_pfn = end;
if (start < min_low_pfn)
min_low_pfn = start;
- if (end <= reserved_end)
- continue;
-#ifdef CONFIG_BLK_DEV_INITRD
- /* Skip zones before initrd and initrd itself */
- if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end)))
- continue;
-#endif
}
if (min_low_pfn >= max_low_pfn)
@@ -474,6 +468,7 @@ static void __init bootmem_init(void)
max_low_pfn = PFN_DOWN(HIGHMEM_START);
}
+ /* Install all valid RAM ranges to the memblock memory region */
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
@@ -481,98 +476,38 @@ static void __init bootmem_init(void)
end = PFN_DOWN(boot_mem_map.map[i].addr
+ boot_mem_map.map[i].size);
- if (start <= min_low_pfn)
+ if (start < min_low_pfn)
start = min_low_pfn;
- if (start >= end)
- continue;
-
#ifndef CONFIG_HIGHMEM
+ /* Ignore highmem regions if highmem is unsupported */
if (end > max_low_pfn)
end = max_low_pfn;
-
- /*
- * ... finally, is the area going away?
- */
+#endif
if (end <= start)
continue;
-#endif
memblock_add_node(PFN_PHYS(start), PFN_PHYS(end - start), 0);
- }
-
- /*
- * Register fully available low RAM pages with the bootmem allocator.
- */
- for (i = 0; i < boot_mem_map.nr_map; i++) {
- unsigned long start, end, size;
- start = PFN_UP(boot_mem_map.map[i].addr);
- end = PFN_DOWN(boot_mem_map.map[i].addr
- + boot_mem_map.map[i].size);
-
- /*
- * Reserve usable memory.
- */
+ /* Reserve any memory except the ordinary RAM ranges. */
switch (boot_mem_map.map[i].type) {
case BOOT_MEM_RAM:
break;
- case BOOT_MEM_INIT_RAM:
- memory_present(0, start, end);
- continue;
- default:
- /* Not usable memory */
- if (start > min_low_pfn && end < max_low_pfn)
- memblock_reserve(boot_mem_map.map[i].addr,
- boot_mem_map.map[i].size);
-
+ case BOOT_MEM_NOMAP: /* Discard the range from the system. */
+ memblock_remove(PFN_PHYS(start), PFN_PHYS(end - start));
continue;
+ default: /* Reserve the rest of the memory types at boot time */
+ memblock_reserve(PFN_PHYS(start), PFN_PHYS(end - start));
+ break;
}
/*
- * We are rounding up the start address of usable memory
- * and at the end of the usable range downwards.
+ * In any case the added to the memblock memory regions
+ * (highmem/lowmem, available/reserved, etc) are considered
+ * as present, so inform sparsemem about them.
*/
- if (start >= max_low_pfn)
- continue;
- if (start < reserved_end)
- start = reserved_end;
- if (end > max_low_pfn)
- end = max_low_pfn;
-
- /*
- * ... finally, is the area going away?
- */
- if (end <= start)
- continue;
- size = end - start;
-
- /* Register lowmem ranges */
memory_present(0, start, end);
}
-#ifdef CONFIG_RELOCATABLE
- /*
- * The kernel reserves all memory below its _end symbol as bootmem,
- * but the kernel may now be at a much higher address. The memory
- * between the original and new locations may be returned to the system.
- */
- if (__pa_symbol(_text) > __pa_symbol(VMLINUX_LOAD_ADDRESS)) {
- unsigned long offset;
- extern void show_kernel_relocation(const char *level);
-
- offset = __pa_symbol(_text) - __pa_symbol(VMLINUX_LOAD_ADDRESS);
- memblock_free(__pa_symbol(VMLINUX_LOAD_ADDRESS), offset);
-
-#if defined(CONFIG_DEBUG_KERNEL) && defined(CONFIG_DEBUG_INFO)
- /*
- * This information is necessary when debugging the kernel
- * But is a security vulnerability otherwise!
- */
- show_kernel_relocation(KERN_INFO);
-#endif
- }
-#endif
-
/*
* Reserve initrd memory if needed.
*/
@@ -781,7 +716,6 @@ static void __init request_crashkernel(struct resource *res)
*/
static void __init arch_mem_init(char **cmdline_p)
{
- struct memblock_region *reg;
extern void plat_mem_setup(void);
/*
@@ -809,6 +743,9 @@ static void __init arch_mem_init(char **cmdline_p)
arch_mem_addpart(PFN_UP(__pa_symbol(&__init_begin)) << PAGE_SHIFT,
PFN_DOWN(__pa_symbol(&__init_end)) << PAGE_SHIFT,
BOOT_MEM_INIT_RAM);
+ arch_mem_addpart(PFN_DOWN(__pa_symbol(&__bss_start)) << PAGE_SHIFT,
+ PFN_UP(__pa_symbol(&__bss_stop)) << PAGE_SHIFT,
+ BOOT_MEM_RAM);
pr_info("Determined physical RAM map:\n");
print_memory_map();
@@ -884,13 +821,16 @@ static void __init arch_mem_init(char **cmdline_p)
plat_swiotlb_setup();
dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
- /* Tell bootmem about cma reserved memblock section */
- for_each_memblock(reserved, reg)
- if (reg->size != 0)
- memblock_reserve(reg->base, reg->size);
- reserve_bootmem_region(__pa_symbol(&__nosave_begin),
- __pa_symbol(&__nosave_end)); /* Reserve for hibernation */
+ /* Reserve for hibernation. */
+ memblock_reserve(__pa_symbol(&__nosave_begin),
+ __pa_symbol(&__nosave_end) - __pa_symbol(&__nosave_begin));
+
+ fdt_init_reserved_mem();
+
+ memblock_dump_all();
+
+ early_memtest(PFN_PHYS(min_low_pfn), PFN_PHYS(max_low_pfn));
}
static void __init resource_init(void)
@@ -935,6 +875,7 @@ static void __init resource_init(void)
res->flags |= IORESOURCE_SYSRAM;
break;
case BOOT_MEM_RESERVED:
+ case BOOT_MEM_NOMAP:
default:
res->name = "reserved";
}
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 98ca55d62201..c52766a5b85f 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2151,7 +2151,7 @@ static void configure_hwrena(void)
static void configure_exception_vector(void)
{
- if (cpu_has_veic || cpu_has_vint) {
+ if (cpu_has_mips_r2_r6) {
unsigned long sr = set_c0_status(ST0_BEV);
/* If available, use WG to set top bits of EBASE */
if (cpu_has_ebase_wg) {
@@ -2163,6 +2163,8 @@ static void configure_exception_vector(void)
}
write_c0_ebase(ebase);
write_c0_status(sr);
+ }
+ if (cpu_has_veic || cpu_has_vint) {
/* Setting vector spacing enables EI/VI mode */
change_c0_intctl(0x3e0, VECTORSPACING);
}
@@ -2193,22 +2195,6 @@ void per_cpu_trap_init(bool is_boot_cpu)
* o read IntCtl.IPFDC to determine the fast debug channel interrupt
*/
if (cpu_has_mips_r2_r6) {
- /*
- * We shouldn't trust a secondary core has a sane EBASE register
- * so use the one calculated by the boot CPU.
- */
- if (!is_boot_cpu) {
- /* If available, use WG to set top bits of EBASE */
- if (cpu_has_ebase_wg) {
-#ifdef CONFIG_64BIT
- write_c0_ebase_64(ebase | MIPS_EBASE_WG);
-#else
- write_c0_ebase(ebase | MIPS_EBASE_WG);
-#endif
- }
- write_c0_ebase(ebase);
- }
-
cp0_compare_irq_shift = CAUSEB_TI - CAUSEB_IP;
cp0_compare_irq = (read_c0_intctl() >> INTCTLB_IPTI) & 7;
cp0_perfcount_irq = (read_c0_intctl() >> INTCTLB_IPPCI) & 7;
@@ -2284,19 +2270,27 @@ void __init trap_init(void)
extern char except_vec3_generic;
extern char except_vec4;
extern char except_vec3_r4000;
- unsigned long i;
+ unsigned long i, vec_size;
+ phys_addr_t ebase_pa;
check_wait();
- if (cpu_has_veic || cpu_has_vint) {
- unsigned long size = 0x200 + VECTORSPACING*64;
- phys_addr_t ebase_pa;
+ if (!cpu_has_mips_r2_r6) {
+ ebase = CAC_BASE;
+ ebase_pa = virt_to_phys((void *)ebase);
+ vec_size = 0x400;
- ebase = (unsigned long)
- memblock_alloc(size, 1 << fls(size));
- if (!ebase)
+ memblock_reserve(ebase_pa, vec_size);
+ } else {
+ if (cpu_has_veic || cpu_has_vint)
+ vec_size = 0x200 + VECTORSPACING*64;
+ else
+ vec_size = PAGE_SIZE;
+
+ ebase_pa = memblock_phys_alloc(vec_size, 1 << fls(vec_size));
+ if (!ebase_pa)
panic("%s: Failed to allocate %lu bytes align=0x%x\n",
- __func__, size, 1 << fls(size));
+ __func__, vec_size, 1 << fls(vec_size));
/*
* Try to ensure ebase resides in KSeg0 if possible.
@@ -2309,23 +2303,10 @@ void __init trap_init(void)
* EVA is special though as it allows segments to be rearranged
* and to become uncached during cache error handling.
*/
- ebase_pa = __pa(ebase);
if (!IS_ENABLED(CONFIG_EVA) && !WARN_ON(ebase_pa >= 0x20000000))
ebase = CKSEG0ADDR(ebase_pa);
- } else {
- ebase = CAC_BASE;
-
- if (cpu_has_mips_r2_r6) {
- if (cpu_has_ebase_wg) {
-#ifdef CONFIG_64BIT
- ebase = (read_c0_ebase_64() & ~0xfff);
-#else
- ebase = (read_c0_ebase() & ~0xfff);
-#endif
- } else {
- ebase += (read_c0_ebase() & 0x3ffff000);
- }
- }
+ else
+ ebase = (unsigned long)phys_to_virt(ebase_pa);
}
if (cpu_has_mmips) {
@@ -2459,7 +2440,7 @@ void __init trap_init(void)
else
set_handler(0x080, &except_vec3_generic, 0x80);
- local_flush_icache_range(ebase, ebase + 0x400);
+ local_flush_icache_range(ebase, ebase + vec_size);
sort_extable(__start___dbe_table, __stop___dbe_table);
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 4528bc9c3cb1..eac25aef21e0 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -21,7 +21,6 @@ config KVM
depends on MIPS_FP_SUPPORT
select EXPORT_UASM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select HAVE_KVM_VCPU_ASYNC_IOCTL
select KVM_MMIO
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index 0074427b04fb..e5de6bac8197 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -1141,9 +1141,7 @@ enum emulation_result kvm_mips_emul_tlbwr(struct kvm_vcpu *vcpu)
unsigned long pc = vcpu->arch.pc;
int index;
- get_random_bytes(&index, sizeof(index));
- index &= (KVM_MIPS_GUEST_TLB_SIZE - 1);
-
+ index = prandom_u32_max(KVM_MIPS_GUEST_TLB_SIZE);
tlb = &vcpu->arch.guest_tlb[index];
kvm_mips_invalidate_guest_tlb(vcpu, tlb);
diff --git a/arch/mips/mm/uasm-mips.c b/arch/mips/mm/uasm-mips.c
index 6abe40fc413d..7154a1d99aad 100644
--- a/arch/mips/mm/uasm-mips.c
+++ b/arch/mips/mm/uasm-mips.c
@@ -76,14 +76,22 @@ static const struct insn insn_table[insn_invalid] = {
[insn_daddiu] = {M(daddiu_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
[insn_daddu] = {M(spec_op, 0, 0, 0, 0, daddu_op), RS | RT | RD},
[insn_ddivu] = {M(spec_op, 0, 0, 0, 0, ddivu_op), RS | RT},
+ [insn_ddivu_r6] = {M(spec_op, 0, 0, 0, ddivu_ddivu6_op, ddivu_op),
+ RS | RT | RD},
[insn_di] = {M(cop0_op, mfmc0_op, 0, 12, 0, 0), RT},
[insn_dins] = {M(spec3_op, 0, 0, 0, 0, dins_op), RS | RT | RD | RE},
[insn_dinsm] = {M(spec3_op, 0, 0, 0, 0, dinsm_op), RS | RT | RD | RE},
[insn_dinsu] = {M(spec3_op, 0, 0, 0, 0, dinsu_op), RS | RT | RD | RE},
[insn_divu] = {M(spec_op, 0, 0, 0, 0, divu_op), RS | RT},
+ [insn_divu_r6] = {M(spec_op, 0, 0, 0, divu_divu6_op, divu_op),
+ RS | RT | RD},
[insn_dmfc0] = {M(cop0_op, dmfc_op, 0, 0, 0, 0), RT | RD | SET},
+ [insn_dmodu] = {M(spec_op, 0, 0, 0, ddivu_dmodu_op, ddivu_op),
+ RS | RT | RD},
[insn_dmtc0] = {M(cop0_op, dmtc_op, 0, 0, 0, 0), RT | RD | SET},
[insn_dmultu] = {M(spec_op, 0, 0, 0, 0, dmultu_op), RS | RT},
+ [insn_dmulu] = {M(spec_op, 0, 0, 0, dmult_dmul_op, dmultu_op),
+ RS | RT | RD},
[insn_drotr] = {M(spec_op, 1, 0, 0, 0, dsrl_op), RT | RD | RE},
[insn_drotr32] = {M(spec_op, 1, 0, 0, 0, dsrl32_op), RT | RD | RE},
[insn_dsbh] = {M(spec3_op, 0, 0, 0, dsbh_op, dbshfl_op), RT | RD},
@@ -132,12 +140,16 @@ static const struct insn insn_table[insn_invalid] = {
[insn_mfhc0] = {M(cop0_op, mfhc0_op, 0, 0, 0, 0), RT | RD | SET},
[insn_mfhi] = {M(spec_op, 0, 0, 0, 0, mfhi_op), RD},
[insn_mflo] = {M(spec_op, 0, 0, 0, 0, mflo_op), RD},
+ [insn_modu] = {M(spec_op, 0, 0, 0, divu_modu_op, divu_op),
+ RS | RT | RD},
[insn_movn] = {M(spec_op, 0, 0, 0, 0, movn_op), RS | RT | RD},
[insn_movz] = {M(spec_op, 0, 0, 0, 0, movz_op), RS | RT | RD},
[insn_mtc0] = {M(cop0_op, mtc_op, 0, 0, 0, 0), RT | RD | SET},
[insn_mthc0] = {M(cop0_op, mthc0_op, 0, 0, 0, 0), RT | RD | SET},
[insn_mthi] = {M(spec_op, 0, 0, 0, 0, mthi_op), RS},
[insn_mtlo] = {M(spec_op, 0, 0, 0, 0, mtlo_op), RS},
+ [insn_mulu] = {M(spec_op, 0, 0, 0, multu_mulu_op, multu_op),
+ RS | RT | RD},
#ifndef CONFIG_CPU_MIPSR6
[insn_mul] = {M(spec2_op, 0, 0, 0, 0, mul_op), RS | RT | RD},
#else
@@ -163,6 +175,8 @@ static const struct insn insn_table[insn_invalid] = {
[insn_scd] = {M6(spec3_op, 0, 0, 0, scd6_op), RS | RT | SIMM9},
#endif
[insn_sd] = {M(sd_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
+ [insn_seleqz] = {M(spec_op, 0, 0, 0, 0, seleqz_op), RS | RT | RD},
+ [insn_selnez] = {M(spec_op, 0, 0, 0, 0, selnez_op), RS | RT | RD},
[insn_sh] = {M(sh_op, 0, 0, 0, 0, 0), RS | RT | SIMM},
[insn_sll] = {M(spec_op, 0, 0, 0, 0, sll_op), RT | RD | RE},
[insn_sllv] = {M(spec_op, 0, 0, 0, 0, sllv_op), RS | RT | RD},
diff --git a/arch/mips/mm/uasm.c b/arch/mips/mm/uasm.c
index 45b6264ff308..c56f129c9a4b 100644
--- a/arch/mips/mm/uasm.c
+++ b/arch/mips/mm/uasm.c
@@ -50,21 +50,22 @@ enum opcode {
insn_beq, insn_beql, insn_bgez, insn_bgezl, insn_bgtz, insn_blez,
insn_bltz, insn_bltzl, insn_bne, insn_break, insn_cache, insn_cfc1,
insn_cfcmsa, insn_ctc1, insn_ctcmsa, insn_daddiu, insn_daddu, insn_ddivu,
- insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu, insn_dmfc0,
- insn_dmtc0, insn_dmultu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd,
- insn_dsll, insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav,
- insn_dsrl, insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext,
- insn_ins, insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu,
- insn_ld, insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu,
- insn_ll, insn_lld, insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0,
- insn_mfhc0, insn_mfhi, insn_mflo, insn_movn, insn_movz, insn_mtc0,
- insn_mthc0, insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_nor,
- insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb,
- insn_sc, insn_scd, insn_sd, insn_sh, insn_sll, insn_sllv,
- insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra, insn_srav,
- insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync, insn_syscall,
- insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait, insn_wsbh,
- insn_xor, insn_xori, insn_yield,
+ insn_ddivu_r6, insn_di, insn_dins, insn_dinsm, insn_dinsu, insn_divu,
+ insn_divu_r6, insn_dmfc0, insn_dmodu, insn_dmtc0, insn_dmultu,
+ insn_dmulu, insn_drotr, insn_drotr32, insn_dsbh, insn_dshd, insn_dsll,
+ insn_dsll32, insn_dsllv, insn_dsra, insn_dsra32, insn_dsrav, insn_dsrl,
+ insn_dsrl32, insn_dsrlv, insn_dsubu, insn_eret, insn_ext, insn_ins,
+ insn_j, insn_jal, insn_jalr, insn_jr, insn_lb, insn_lbu, insn_ld,
+ insn_lddir, insn_ldpte, insn_ldx, insn_lh, insn_lhu, insn_ll, insn_lld,
+ insn_lui, insn_lw, insn_lwu, insn_lwx, insn_mfc0, insn_mfhc0, insn_mfhi,
+ insn_mflo, insn_modu, insn_movn, insn_movz, insn_mtc0, insn_mthc0,
+ insn_mthi, insn_mtlo, insn_mul, insn_multu, insn_mulu, insn_nor,
+ insn_or, insn_ori, insn_pref, insn_rfe, insn_rotr, insn_sb, insn_sc,
+ insn_scd, insn_seleqz, insn_selnez, insn_sd, insn_sh, insn_sll,
+ insn_sllv, insn_slt, insn_slti, insn_sltiu, insn_sltu, insn_sra,
+ insn_srav, insn_srl, insn_srlv, insn_subu, insn_sw, insn_sync,
+ insn_syscall, insn_tlbp, insn_tlbr, insn_tlbwi, insn_tlbwr, insn_wait,
+ insn_wsbh, insn_xor, insn_xori, insn_yield,
insn_invalid /* insn_invalid must be last */
};
@@ -287,13 +288,17 @@ I_u2u1(_cfcmsa)
I_u1u2(_ctc1)
I_u2u1(_ctcmsa)
I_u1u2(_ddivu)
+I_u3u1u2(_ddivu_r6)
I_u1u2u3(_dmfc0)
+I_u3u1u2(_dmodu)
I_u1u2u3(_dmtc0)
I_u1u2(_dmultu)
+I_u3u1u2(_dmulu)
I_u2u1s3(_daddiu)
I_u3u1u2(_daddu)
I_u1(_di);
I_u1u2(_divu)
+I_u3u1u2(_divu_r6)
I_u2u1(_dsbh);
I_u2u1(_dshd);
I_u2u1u3(_dsll)
@@ -327,6 +332,7 @@ I_u2s3u1(_lw)
I_u2s3u1(_lwu)
I_u1u2u3(_mfc0)
I_u1u2u3(_mfhc0)
+I_u3u1u2(_modu)
I_u3u1u2(_movn)
I_u3u1u2(_movz)
I_u1(_mfhi)
@@ -337,6 +343,7 @@ I_u1(_mthi)
I_u1(_mtlo)
I_u3u1u2(_mul)
I_u1u2(_multu)
+I_u3u1u2(_mulu)
I_u3u1u2(_nor)
I_u3u1u2(_or)
I_u2u1u3(_ori)
@@ -345,6 +352,8 @@ I_u2s3u1(_sb)
I_u2s3u1(_sc)
I_u2s3u1(_scd)
I_u2s3u1(_sd)
+I_u3u1u2(_seleqz)
+I_u3u1u2(_selnez)
I_u2s3u1(_sh)
I_u2u1u3(_sll)
I_u3u2u1(_sllv)
diff --git a/arch/mips/net/Makefile b/arch/mips/net/Makefile
index 47d678416715..72a78462f872 100644
--- a/arch/mips/net/Makefile
+++ b/arch/mips/net/Makefile
@@ -1,4 +1,3 @@
# MIPS networking code
-obj-$(CONFIG_MIPS_CBPF_JIT) += bpf_jit.o bpf_jit_asm.o
obj-$(CONFIG_MIPS_EBPF_JIT) += ebpf_jit.o
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
deleted file mode 100644
index 3a0e34f4e615..000000000000
--- a/arch/mips/net/bpf_jit.c
+++ /dev/null
@@ -1,1270 +0,0 @@
-/*
- * Just-In-Time compiler for BPF filters on MIPS
- *
- * Copyright (c) 2014 Imagination Technologies Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- */
-
-#include <linux/bitops.h>
-#include <linux/compiler.h>
-#include <linux/errno.h>
-#include <linux/filter.h>
-#include <linux/if_vlan.h>
-#include <linux/moduleloader.h>
-#include <linux/netdevice.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <asm/asm.h>
-#include <asm/bitops.h>
-#include <asm/cacheflush.h>
-#include <asm/cpu-features.h>
-#include <asm/uasm.h>
-
-#include "bpf_jit.h"
-
-/* ABI
- * r_skb_hl SKB header length
- * r_data SKB data pointer
- * r_off Offset
- * r_A BPF register A
- * r_X BPF register X
- * r_skb *skb
- * r_M *scratch memory
- * r_skb_len SKB length
- *
- * On entry (*bpf_func)(*skb, *filter)
- * a0 = MIPS_R_A0 = skb;
- * a1 = MIPS_R_A1 = filter;
- *
- * Stack
- * ...
- * M[15]
- * M[14]
- * M[13]
- * ...
- * M[0] <-- r_M
- * saved reg k-1
- * saved reg k-2
- * ...
- * saved reg 0 <-- r_sp
- * <no argument area>
- *
- * Packet layout
- *
- * <--------------------- len ------------------------>
- * <--skb-len(r_skb_hl)-->< ----- skb->data_len ------>
- * ----------------------------------------------------
- * | skb->data |
- * ----------------------------------------------------
- */
-
-#define ptr typeof(unsigned long)
-
-#define SCRATCH_OFF(k) (4 * (k))
-
-/* JIT flags */
-#define SEEN_CALL (1 << BPF_MEMWORDS)
-#define SEEN_SREG_SFT (BPF_MEMWORDS + 1)
-#define SEEN_SREG_BASE (1 << SEEN_SREG_SFT)
-#define SEEN_SREG(x) (SEEN_SREG_BASE << (x))
-#define SEEN_OFF SEEN_SREG(2)
-#define SEEN_A SEEN_SREG(3)
-#define SEEN_X SEEN_SREG(4)
-#define SEEN_SKB SEEN_SREG(5)
-#define SEEN_MEM SEEN_SREG(6)
-/* SEEN_SK_DATA also implies skb_hl an skb_len */
-#define SEEN_SKB_DATA (SEEN_SREG(7) | SEEN_SREG(1) | SEEN_SREG(0))
-
-/* Arguments used by JIT */
-#define ARGS_USED_BY_JIT 2 /* only applicable to 64-bit */
-
-#define SBIT(x) (1 << (x)) /* Signed version of BIT() */
-
-/**
- * struct jit_ctx - JIT context
- * @skf: The sk_filter
- * @prologue_bytes: Number of bytes for prologue
- * @idx: Instruction index
- * @flags: JIT flags
- * @offsets: Instruction offsets
- * @target: Memory location for the compiled filter
- */
-struct jit_ctx {
- const struct bpf_prog *skf;
- unsigned int prologue_bytes;
- u32 idx;
- u32 flags;
- u32 *offsets;
- u32 *target;
-};
-
-
-static inline int optimize_div(u32 *k)
-{
- /* power of 2 divides can be implemented with right shift */
- if (!(*k & (*k-1))) {
- *k = ilog2(*k);
- return 1;
- }
-
- return 0;
-}
-
-static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx);
-
-/* Simply emit the instruction if the JIT memory space has been allocated */
-#define emit_instr(ctx, func, ...) \
-do { \
- if ((ctx)->target != NULL) { \
- u32 *p = &(ctx)->target[ctx->idx]; \
- uasm_i_##func(&p, ##__VA_ARGS__); \
- } \
- (ctx)->idx++; \
-} while (0)
-
-/*
- * Similar to emit_instr but it must be used when we need to emit
- * 32-bit or 64-bit instructions
- */
-#define emit_long_instr(ctx, func, ...) \
-do { \
- if ((ctx)->target != NULL) { \
- u32 *p = &(ctx)->target[ctx->idx]; \
- UASM_i_##func(&p, ##__VA_ARGS__); \
- } \
- (ctx)->idx++; \
-} while (0)
-
-/* Determine if immediate is within the 16-bit signed range */
-static inline bool is_range16(s32 imm)
-{
- return !(imm >= SBIT(15) || imm < -SBIT(15));
-}
-
-static inline void emit_addu(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, addu, dst, src1, src2);
-}
-
-static inline void emit_nop(struct jit_ctx *ctx)
-{
- emit_instr(ctx, nop);
-}
-
-/* Load a u32 immediate to a register */
-static inline void emit_load_imm(unsigned int dst, u32 imm, struct jit_ctx *ctx)
-{
- if (ctx->target != NULL) {
- /* addiu can only handle s16 */
- if (!is_range16(imm)) {
- u32 *p = &ctx->target[ctx->idx];
- uasm_i_lui(&p, r_tmp_imm, (s32)imm >> 16);
- p = &ctx->target[ctx->idx + 1];
- uasm_i_ori(&p, dst, r_tmp_imm, imm & 0xffff);
- } else {
- u32 *p = &ctx->target[ctx->idx];
- uasm_i_addiu(&p, dst, r_zero, imm);
- }
- }
- ctx->idx++;
-
- if (!is_range16(imm))
- ctx->idx++;
-}
-
-static inline void emit_or(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, or, dst, src1, src2);
-}
-
-static inline void emit_ori(unsigned int dst, unsigned src, u32 imm,
- struct jit_ctx *ctx)
-{
- if (imm >= BIT(16)) {
- emit_load_imm(r_tmp, imm, ctx);
- emit_or(dst, src, r_tmp, ctx);
- } else {
- emit_instr(ctx, ori, dst, src, imm);
- }
-}
-
-static inline void emit_daddiu(unsigned int dst, unsigned int src,
- int imm, struct jit_ctx *ctx)
-{
- /*
- * Only used for stack, so the imm is relatively small
- * and it fits in 15-bits
- */
- emit_instr(ctx, daddiu, dst, src, imm);
-}
-
-static inline void emit_addiu(unsigned int dst, unsigned int src,
- u32 imm, struct jit_ctx *ctx)
-{
- if (!is_range16(imm)) {
- emit_load_imm(r_tmp, imm, ctx);
- emit_addu(dst, r_tmp, src, ctx);
- } else {
- emit_instr(ctx, addiu, dst, src, imm);
- }
-}
-
-static inline void emit_and(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, and, dst, src1, src2);
-}
-
-static inline void emit_andi(unsigned int dst, unsigned int src,
- u32 imm, struct jit_ctx *ctx)
-{
- /* If imm does not fit in u16 then load it to register */
- if (imm >= BIT(16)) {
- emit_load_imm(r_tmp, imm, ctx);
- emit_and(dst, src, r_tmp, ctx);
- } else {
- emit_instr(ctx, andi, dst, src, imm);
- }
-}
-
-static inline void emit_xor(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, xor, dst, src1, src2);
-}
-
-static inline void emit_xori(ptr dst, ptr src, u32 imm, struct jit_ctx *ctx)
-{
- /* If imm does not fit in u16 then load it to register */
- if (imm >= BIT(16)) {
- emit_load_imm(r_tmp, imm, ctx);
- emit_xor(dst, src, r_tmp, ctx);
- } else {
- emit_instr(ctx, xori, dst, src, imm);
- }
-}
-
-static inline void emit_stack_offset(int offset, struct jit_ctx *ctx)
-{
- emit_long_instr(ctx, ADDIU, r_sp, r_sp, offset);
-}
-
-static inline void emit_subu(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, subu, dst, src1, src2);
-}
-
-static inline void emit_neg(unsigned int reg, struct jit_ctx *ctx)
-{
- emit_subu(reg, r_zero, reg, ctx);
-}
-
-static inline void emit_sllv(unsigned int dst, unsigned int src,
- unsigned int sa, struct jit_ctx *ctx)
-{
- emit_instr(ctx, sllv, dst, src, sa);
-}
-
-static inline void emit_sll(unsigned int dst, unsigned int src,
- unsigned int sa, struct jit_ctx *ctx)
-{
- /* sa is 5-bits long */
- if (sa >= BIT(5))
- /* Shifting >= 32 results in zero */
- emit_jit_reg_move(dst, r_zero, ctx);
- else
- emit_instr(ctx, sll, dst, src, sa);
-}
-
-static inline void emit_srlv(unsigned int dst, unsigned int src,
- unsigned int sa, struct jit_ctx *ctx)
-{
- emit_instr(ctx, srlv, dst, src, sa);
-}
-
-static inline void emit_srl(unsigned int dst, unsigned int src,
- unsigned int sa, struct jit_ctx *ctx)
-{
- /* sa is 5-bits long */
- if (sa >= BIT(5))
- /* Shifting >= 32 results in zero */
- emit_jit_reg_move(dst, r_zero, ctx);
- else
- emit_instr(ctx, srl, dst, src, sa);
-}
-
-static inline void emit_slt(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, slt, dst, src1, src2);
-}
-
-static inline void emit_sltu(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, sltu, dst, src1, src2);
-}
-
-static inline void emit_sltiu(unsigned dst, unsigned int src,
- unsigned int imm, struct jit_ctx *ctx)
-{
- /* 16 bit immediate */
- if (!is_range16((s32)imm)) {
- emit_load_imm(r_tmp, imm, ctx);
- emit_sltu(dst, src, r_tmp, ctx);
- } else {
- emit_instr(ctx, sltiu, dst, src, imm);
- }
-
-}
-
-/* Store register on the stack */
-static inline void emit_store_stack_reg(ptr reg, ptr base,
- unsigned int offset,
- struct jit_ctx *ctx)
-{
- emit_long_instr(ctx, SW, reg, offset, base);
-}
-
-static inline void emit_store(ptr reg, ptr base, unsigned int offset,
- struct jit_ctx *ctx)
-{
- emit_instr(ctx, sw, reg, offset, base);
-}
-
-static inline void emit_load_stack_reg(ptr reg, ptr base,
- unsigned int offset,
- struct jit_ctx *ctx)
-{
- emit_long_instr(ctx, LW, reg, offset, base);
-}
-
-static inline void emit_load(unsigned int reg, unsigned int base,
- unsigned int offset, struct jit_ctx *ctx)
-{
- emit_instr(ctx, lw, reg, offset, base);
-}
-
-static inline void emit_load_byte(unsigned int reg, unsigned int base,
- unsigned int offset, struct jit_ctx *ctx)
-{
- emit_instr(ctx, lb, reg, offset, base);
-}
-
-static inline void emit_half_load(unsigned int reg, unsigned int base,
- unsigned int offset, struct jit_ctx *ctx)
-{
- emit_instr(ctx, lh, reg, offset, base);
-}
-
-static inline void emit_half_load_unsigned(unsigned int reg, unsigned int base,
- unsigned int offset, struct jit_ctx *ctx)
-{
- emit_instr(ctx, lhu, reg, offset, base);
-}
-
-static inline void emit_mul(unsigned int dst, unsigned int src1,
- unsigned int src2, struct jit_ctx *ctx)
-{
- emit_instr(ctx, mul, dst, src1, src2);
-}
-
-static inline void emit_div(unsigned int dst, unsigned int src,
- struct jit_ctx *ctx)
-{
- if (ctx->target != NULL) {
- u32 *p = &ctx->target[ctx->idx];
- uasm_i_divu(&p, dst, src);
- p = &ctx->target[ctx->idx + 1];
- uasm_i_mflo(&p, dst);
- }
- ctx->idx += 2; /* 2 insts */
-}
-
-static inline void emit_mod(unsigned int dst, unsigned int src,
- struct jit_ctx *ctx)
-{
- if (ctx->target != NULL) {
- u32 *p = &ctx->target[ctx->idx];
- uasm_i_divu(&p, dst, src);
- p = &ctx->target[ctx->idx + 1];
- uasm_i_mfhi(&p, dst);
- }
- ctx->idx += 2; /* 2 insts */
-}
-
-static inline void emit_dsll(unsigned int dst, unsigned int src,
- unsigned int sa, struct jit_ctx *ctx)
-{
- emit_instr(ctx, dsll, dst, src, sa);
-}
-
-static inline void emit_dsrl32(unsigned int dst, unsigned int src,
- unsigned int sa, struct jit_ctx *ctx)
-{
- emit_instr(ctx, dsrl32, dst, src, sa);
-}
-
-static inline void emit_wsbh(unsigned int dst, unsigned int src,
- struct jit_ctx *ctx)
-{
- emit_instr(ctx, wsbh, dst, src);
-}
-
-/* load pointer to register */
-static inline void emit_load_ptr(unsigned int dst, unsigned int src,
- int imm, struct jit_ctx *ctx)
-{
- /* src contains the base addr of the 32/64-pointer */
- emit_long_instr(ctx, LW, dst, imm, src);
-}
-
-/* load a function pointer to register */
-static inline void emit_load_func(unsigned int reg, ptr imm,
- struct jit_ctx *ctx)
-{
- if (IS_ENABLED(CONFIG_64BIT)) {
- /* At this point imm is always 64-bit */
- emit_load_imm(r_tmp, (u64)imm >> 32, ctx);
- emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
- emit_ori(r_tmp, r_tmp_imm, (imm >> 16) & 0xffff, ctx);
- emit_dsll(r_tmp_imm, r_tmp, 16, ctx); /* left shift by 16 */
- emit_ori(reg, r_tmp_imm, imm & 0xffff, ctx);
- } else {
- emit_load_imm(reg, imm, ctx);
- }
-}
-
-/* Move to real MIPS register */
-static inline void emit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
-{
- emit_long_instr(ctx, ADDU, dst, src, r_zero);
-}
-
-/* Move to JIT (32-bit) register */
-static inline void emit_jit_reg_move(ptr dst, ptr src, struct jit_ctx *ctx)
-{
- emit_addu(dst, src, r_zero, ctx);
-}
-
-/* Compute the immediate value for PC-relative branches. */
-static inline u32 b_imm(unsigned int tgt, struct jit_ctx *ctx)
-{
- if (ctx->target == NULL)
- return 0;
-
- /*
- * We want a pc-relative branch. We only do forward branches
- * so tgt is always after pc. tgt is the instruction offset
- * we want to jump to.
-
- * Branch on MIPS:
- * I: target_offset <- sign_extend(offset)
- * I+1: PC += target_offset (delay slot)
- *
- * ctx->idx currently points to the branch instruction
- * but the offset is added to the delay slot so we need
- * to subtract 4.
- */
- return ctx->offsets[tgt] -
- (ctx->idx * 4 - ctx->prologue_bytes) - 4;
-}
-
-static inline void emit_bcond(int cond, unsigned int reg1, unsigned int reg2,
- unsigned int imm, struct jit_ctx *ctx)
-{
- if (ctx->target != NULL) {
- u32 *p = &ctx->target[ctx->idx];
-
- switch (cond) {
- case MIPS_COND_EQ:
- uasm_i_beq(&p, reg1, reg2, imm);
- break;
- case MIPS_COND_NE:
- uasm_i_bne(&p, reg1, reg2, imm);
- break;
- case MIPS_COND_ALL:
- uasm_i_b(&p, imm);
- break;
- default:
- pr_warn("%s: Unhandled branch conditional: %d\n",
- __func__, cond);
- }
- }
- ctx->idx++;
-}
-
-static inline void emit_b(unsigned int imm, struct jit_ctx *ctx)
-{
- emit_bcond(MIPS_COND_ALL, r_zero, r_zero, imm, ctx);
-}
-
-static inline void emit_jalr(unsigned int link, unsigned int reg,
- struct jit_ctx *ctx)
-{
- emit_instr(ctx, jalr, link, reg);
-}
-
-static inline void emit_jr(unsigned int reg, struct jit_ctx *ctx)
-{
- emit_instr(ctx, jr, reg);
-}
-
-static inline u16 align_sp(unsigned int num)
-{
- /* Double word alignment for 32-bit, quadword for 64-bit */
- unsigned int align = IS_ENABLED(CONFIG_64BIT) ? 16 : 8;
- num = (num + (align - 1)) & -align;
- return num;
-}
-
-static void save_bpf_jit_regs(struct jit_ctx *ctx, unsigned offset)
-{
- int i = 0, real_off = 0;
- u32 sflags, tmp_flags;
-
- /* Adjust the stack pointer */
- if (offset)
- emit_stack_offset(-align_sp(offset), ctx);
-
- tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
- /* sflags is essentially a bitmap */
- while (tmp_flags) {
- if ((sflags >> i) & 0x1) {
- emit_store_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
- ctx);
- real_off += SZREG;
- }
- i++;
- tmp_flags >>= 1;
- }
-
- /* save return address */
- if (ctx->flags & SEEN_CALL) {
- emit_store_stack_reg(r_ra, r_sp, real_off, ctx);
- real_off += SZREG;
- }
-
- /* Setup r_M leaving the alignment gap if necessary */
- if (ctx->flags & SEEN_MEM) {
- if (real_off % (SZREG * 2))
- real_off += SZREG;
- emit_long_instr(ctx, ADDIU, r_M, r_sp, real_off);
- }
-}
-
-static void restore_bpf_jit_regs(struct jit_ctx *ctx,
- unsigned int offset)
-{
- int i, real_off = 0;
- u32 sflags, tmp_flags;
-
- tmp_flags = sflags = ctx->flags >> SEEN_SREG_SFT;
- /* sflags is a bitmap */
- i = 0;
- while (tmp_flags) {
- if ((sflags >> i) & 0x1) {
- emit_load_stack_reg(MIPS_R_S0 + i, r_sp, real_off,
- ctx);
- real_off += SZREG;
- }
- i++;
- tmp_flags >>= 1;
- }
-
- /* restore return address */
- if (ctx->flags & SEEN_CALL)
- emit_load_stack_reg(r_ra, r_sp, real_off, ctx);
-
- /* Restore the sp and discard the scrach memory */
- if (offset)
- emit_stack_offset(align_sp(offset), ctx);
-}
-
-static unsigned int get_stack_depth(struct jit_ctx *ctx)
-{
- int sp_off = 0;
-
-
- /* How may s* regs do we need to preserved? */
- sp_off += hweight32(ctx->flags >> SEEN_SREG_SFT) * SZREG;
-
- if (ctx->flags & SEEN_MEM)
- sp_off += 4 * BPF_MEMWORDS; /* BPF_MEMWORDS are 32-bit */
-
- if (ctx->flags & SEEN_CALL)
- sp_off += SZREG; /* Space for our ra register */
-
- return sp_off;
-}
-
-static void build_prologue(struct jit_ctx *ctx)
-{
- int sp_off;
-
- /* Calculate the total offset for the stack pointer */
- sp_off = get_stack_depth(ctx);
- save_bpf_jit_regs(ctx, sp_off);
-
- if (ctx->flags & SEEN_SKB)
- emit_reg_move(r_skb, MIPS_R_A0, ctx);
-
- if (ctx->flags & SEEN_SKB_DATA) {
- /* Load packet length */
- emit_load(r_skb_len, r_skb, offsetof(struct sk_buff, len),
- ctx);
- emit_load(r_tmp, r_skb, offsetof(struct sk_buff, data_len),
- ctx);
- /* Load the data pointer */
- emit_load_ptr(r_skb_data, r_skb,
- offsetof(struct sk_buff, data), ctx);
- /* Load the header length */
- emit_subu(r_skb_hl, r_skb_len, r_tmp, ctx);
- }
-
- if (ctx->flags & SEEN_X)
- emit_jit_reg_move(r_X, r_zero, ctx);
-
- /*
- * Do not leak kernel data to userspace, we only need to clear
- * r_A if it is ever used. In fact if it is never used, we
- * will not save/restore it, so clearing it in this case would
- * corrupt the state of the caller.
- */
- if (bpf_needs_clear_a(&ctx->skf->insns[0]) &&
- (ctx->flags & SEEN_A))
- emit_jit_reg_move(r_A, r_zero, ctx);
-}
-
-static void build_epilogue(struct jit_ctx *ctx)
-{
- unsigned int sp_off;
-
- /* Calculate the total offset for the stack pointer */
-
- sp_off = get_stack_depth(ctx);
- restore_bpf_jit_regs(ctx, sp_off);
-
- /* Return */
- emit_jr(r_ra, ctx);
- emit_nop(ctx);
-}
-
-#define CHOOSE_LOAD_FUNC(K, func) \
- ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative : func) : \
- func##_positive)
-
-static int build_body(struct jit_ctx *ctx)
-{
- const struct bpf_prog *prog = ctx->skf;
- const struct sock_filter *inst;
- unsigned int i, off, condt;
- u32 k, b_off __maybe_unused;
- u8 (*sk_load_func)(unsigned long *skb, int offset);
-
- for (i = 0; i < prog->len; i++) {
- u16 code;
-
- inst = &(prog->insns[i]);
- pr_debug("%s: code->0x%02x, jt->0x%x, jf->0x%x, k->0x%x\n",
- __func__, inst->code, inst->jt, inst->jf, inst->k);
- k = inst->k;
- code = bpf_anc_helper(inst);
-
- if (ctx->target == NULL)
- ctx->offsets[i] = ctx->idx * 4;
-
- switch (code) {
- case BPF_LD | BPF_IMM:
- /* A <- k ==> li r_A, k */
- ctx->flags |= SEEN_A;
- emit_load_imm(r_A, k, ctx);
- break;
- case BPF_LD | BPF_W | BPF_LEN:
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
- /* A <- len ==> lw r_A, offset(skb) */
- ctx->flags |= SEEN_SKB | SEEN_A;
- off = offsetof(struct sk_buff, len);
- emit_load(r_A, r_skb, off, ctx);
- break;
- case BPF_LD | BPF_MEM:
- /* A <- M[k] ==> lw r_A, offset(M) */
- ctx->flags |= SEEN_MEM | SEEN_A;
- emit_load(r_A, r_M, SCRATCH_OFF(k), ctx);
- break;
- case BPF_LD | BPF_W | BPF_ABS:
- /* A <- P[k:4] */
- sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_word);
- goto load;
- case BPF_LD | BPF_H | BPF_ABS:
- /* A <- P[k:2] */
- sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_half);
- goto load;
- case BPF_LD | BPF_B | BPF_ABS:
- /* A <- P[k:1] */
- sk_load_func = CHOOSE_LOAD_FUNC(k, sk_load_byte);
-load:
- emit_load_imm(r_off, k, ctx);
-load_common:
- ctx->flags |= SEEN_CALL | SEEN_OFF |
- SEEN_SKB | SEEN_A | SEEN_SKB_DATA;
-
- emit_load_func(r_s0, (ptr)sk_load_func, ctx);
- emit_reg_move(MIPS_R_A0, r_skb, ctx);
- emit_jalr(MIPS_R_RA, r_s0, ctx);
- /* Load second argument to delay slot */
- emit_reg_move(MIPS_R_A1, r_off, ctx);
- /* Check the error value */
- emit_bcond(MIPS_COND_EQ, r_ret, 0, b_imm(i + 1, ctx),
- ctx);
- /* Load return register on DS for failures */
- emit_reg_move(r_ret, r_zero, ctx);
- /* Return with error */
- emit_b(b_imm(prog->len, ctx), ctx);
- emit_nop(ctx);
- break;
- case BPF_LD | BPF_W | BPF_IND:
- /* A <- P[X + k:4] */
- sk_load_func = sk_load_word;
- goto load_ind;
- case BPF_LD | BPF_H | BPF_IND:
- /* A <- P[X + k:2] */
- sk_load_func = sk_load_half;
- goto load_ind;
- case BPF_LD | BPF_B | BPF_IND:
- /* A <- P[X + k:1] */
- sk_load_func = sk_load_byte;
-load_ind:
- ctx->flags |= SEEN_OFF | SEEN_X;
- emit_addiu(r_off, r_X, k, ctx);
- goto load_common;
- case BPF_LDX | BPF_IMM:
- /* X <- k */
- ctx->flags |= SEEN_X;
- emit_load_imm(r_X, k, ctx);
- break;
- case BPF_LDX | BPF_MEM:
- /* X <- M[k] */
- ctx->flags |= SEEN_X | SEEN_MEM;
- emit_load(r_X, r_M, SCRATCH_OFF(k), ctx);
- break;
- case BPF_LDX | BPF_W | BPF_LEN:
- /* X <- len */
- ctx->flags |= SEEN_X | SEEN_SKB;
- off = offsetof(struct sk_buff, len);
- emit_load(r_X, r_skb, off, ctx);
- break;
- case BPF_LDX | BPF_B | BPF_MSH:
- /* X <- 4 * (P[k:1] & 0xf) */
- ctx->flags |= SEEN_X | SEEN_CALL | SEEN_SKB;
- /* Load offset to a1 */
- emit_load_func(r_s0, (ptr)sk_load_byte, ctx);
- /*
- * This may emit two instructions so it may not fit
- * in the delay slot. So use a0 in the delay slot.
- */
- emit_load_imm(MIPS_R_A1, k, ctx);
- emit_jalr(MIPS_R_RA, r_s0, ctx);
- emit_reg_move(MIPS_R_A0, r_skb, ctx); /* delay slot */
- /* Check the error value */
- emit_bcond(MIPS_COND_NE, r_ret, 0,
- b_imm(prog->len, ctx), ctx);
- emit_reg_move(r_ret, r_zero, ctx);
- /* We are good */
- /* X <- P[1:K] & 0xf */
- emit_andi(r_X, r_A, 0xf, ctx);
- /* X << 2 */
- emit_b(b_imm(i + 1, ctx), ctx);
- emit_sll(r_X, r_X, 2, ctx); /* delay slot */
- break;
- case BPF_ST:
- /* M[k] <- A */
- ctx->flags |= SEEN_MEM | SEEN_A;
- emit_store(r_A, r_M, SCRATCH_OFF(k), ctx);
- break;
- case BPF_STX:
- /* M[k] <- X */
- ctx->flags |= SEEN_MEM | SEEN_X;
- emit_store(r_X, r_M, SCRATCH_OFF(k), ctx);
- break;
- case BPF_ALU | BPF_ADD | BPF_K:
- /* A += K */
- ctx->flags |= SEEN_A;
- emit_addiu(r_A, r_A, k, ctx);
- break;
- case BPF_ALU | BPF_ADD | BPF_X:
- /* A += X */
- ctx->flags |= SEEN_A | SEEN_X;
- emit_addu(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_SUB | BPF_K:
- /* A -= K */
- ctx->flags |= SEEN_A;
- emit_addiu(r_A, r_A, -k, ctx);
- break;
- case BPF_ALU | BPF_SUB | BPF_X:
- /* A -= X */
- ctx->flags |= SEEN_A | SEEN_X;
- emit_subu(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_MUL | BPF_K:
- /* A *= K */
- /* Load K to scratch register before MUL */
- ctx->flags |= SEEN_A;
- emit_load_imm(r_s0, k, ctx);
- emit_mul(r_A, r_A, r_s0, ctx);
- break;
- case BPF_ALU | BPF_MUL | BPF_X:
- /* A *= X */
- ctx->flags |= SEEN_A | SEEN_X;
- emit_mul(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_DIV | BPF_K:
- /* A /= k */
- if (k == 1)
- break;
- if (optimize_div(&k)) {
- ctx->flags |= SEEN_A;
- emit_srl(r_A, r_A, k, ctx);
- break;
- }
- ctx->flags |= SEEN_A;
- emit_load_imm(r_s0, k, ctx);
- emit_div(r_A, r_s0, ctx);
- break;
- case BPF_ALU | BPF_MOD | BPF_K:
- /* A %= k */
- if (k == 1) {
- ctx->flags |= SEEN_A;
- emit_jit_reg_move(r_A, r_zero, ctx);
- } else {
- ctx->flags |= SEEN_A;
- emit_load_imm(r_s0, k, ctx);
- emit_mod(r_A, r_s0, ctx);
- }
- break;
- case BPF_ALU | BPF_DIV | BPF_X:
- /* A /= X */
- ctx->flags |= SEEN_X | SEEN_A;
- /* Check if r_X is zero */
- emit_bcond(MIPS_COND_EQ, r_X, r_zero,
- b_imm(prog->len, ctx), ctx);
- emit_load_imm(r_ret, 0, ctx); /* delay slot */
- emit_div(r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_MOD | BPF_X:
- /* A %= X */
- ctx->flags |= SEEN_X | SEEN_A;
- /* Check if r_X is zero */
- emit_bcond(MIPS_COND_EQ, r_X, r_zero,
- b_imm(prog->len, ctx), ctx);
- emit_load_imm(r_ret, 0, ctx); /* delay slot */
- emit_mod(r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_OR | BPF_K:
- /* A |= K */
- ctx->flags |= SEEN_A;
- emit_ori(r_A, r_A, k, ctx);
- break;
- case BPF_ALU | BPF_OR | BPF_X:
- /* A |= X */
- ctx->flags |= SEEN_A;
- emit_ori(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_XOR | BPF_K:
- /* A ^= k */
- ctx->flags |= SEEN_A;
- emit_xori(r_A, r_A, k, ctx);
- break;
- case BPF_ANC | SKF_AD_ALU_XOR_X:
- case BPF_ALU | BPF_XOR | BPF_X:
- /* A ^= X */
- ctx->flags |= SEEN_A;
- emit_xor(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_AND | BPF_K:
- /* A &= K */
- ctx->flags |= SEEN_A;
- emit_andi(r_A, r_A, k, ctx);
- break;
- case BPF_ALU | BPF_AND | BPF_X:
- /* A &= X */
- ctx->flags |= SEEN_A | SEEN_X;
- emit_and(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_LSH | BPF_K:
- /* A <<= K */
- ctx->flags |= SEEN_A;
- emit_sll(r_A, r_A, k, ctx);
- break;
- case BPF_ALU | BPF_LSH | BPF_X:
- /* A <<= X */
- ctx->flags |= SEEN_A | SEEN_X;
- emit_sllv(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_RSH | BPF_K:
- /* A >>= K */
- ctx->flags |= SEEN_A;
- emit_srl(r_A, r_A, k, ctx);
- break;
- case BPF_ALU | BPF_RSH | BPF_X:
- ctx->flags |= SEEN_A | SEEN_X;
- emit_srlv(r_A, r_A, r_X, ctx);
- break;
- case BPF_ALU | BPF_NEG:
- /* A = -A */
- ctx->flags |= SEEN_A;
- emit_neg(r_A, ctx);
- break;
- case BPF_JMP | BPF_JA:
- /* pc += K */
- emit_b(b_imm(i + k + 1, ctx), ctx);
- emit_nop(ctx);
- break;
- case BPF_JMP | BPF_JEQ | BPF_K:
- /* pc += ( A == K ) ? pc->jt : pc->jf */
- condt = MIPS_COND_EQ | MIPS_COND_K;
- goto jmp_cmp;
- case BPF_JMP | BPF_JEQ | BPF_X:
- ctx->flags |= SEEN_X;
- /* pc += ( A == X ) ? pc->jt : pc->jf */
- condt = MIPS_COND_EQ | MIPS_COND_X;
- goto jmp_cmp;
- case BPF_JMP | BPF_JGE | BPF_K:
- /* pc += ( A >= K ) ? pc->jt : pc->jf */
- condt = MIPS_COND_GE | MIPS_COND_K;
- goto jmp_cmp;
- case BPF_JMP | BPF_JGE | BPF_X:
- ctx->flags |= SEEN_X;
- /* pc += ( A >= X ) ? pc->jt : pc->jf */
- condt = MIPS_COND_GE | MIPS_COND_X;
- goto jmp_cmp;
- case BPF_JMP | BPF_JGT | BPF_K:
- /* pc += ( A > K ) ? pc->jt : pc->jf */
- condt = MIPS_COND_GT | MIPS_COND_K;
- goto jmp_cmp;
- case BPF_JMP | BPF_JGT | BPF_X:
- ctx->flags |= SEEN_X;
- /* pc += ( A > X ) ? pc->jt : pc->jf */
- condt = MIPS_COND_GT | MIPS_COND_X;
-jmp_cmp:
- /* Greater or Equal */
- if ((condt & MIPS_COND_GE) ||
- (condt & MIPS_COND_GT)) {
- if (condt & MIPS_COND_K) { /* K */
- ctx->flags |= SEEN_A;
- emit_sltiu(r_s0, r_A, k, ctx);
- } else { /* X */
- ctx->flags |= SEEN_A |
- SEEN_X;
- emit_sltu(r_s0, r_A, r_X, ctx);
- }
- /* A < (K|X) ? r_scrach = 1 */
- b_off = b_imm(i + inst->jf + 1, ctx);
- emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off,
- ctx);
- emit_nop(ctx);
- /* A > (K|X) ? scratch = 0 */
- if (condt & MIPS_COND_GT) {
- /* Checking for equality */
- ctx->flags |= SEEN_A | SEEN_X;
- if (condt & MIPS_COND_K)
- emit_load_imm(r_s0, k, ctx);
- else
- emit_jit_reg_move(r_s0, r_X,
- ctx);
- b_off = b_imm(i + inst->jf + 1, ctx);
- emit_bcond(MIPS_COND_EQ, r_A, r_s0,
- b_off, ctx);
- emit_nop(ctx);
- /* Finally, A > K|X */
- b_off = b_imm(i + inst->jt + 1, ctx);
- emit_b(b_off, ctx);
- emit_nop(ctx);
- } else {
- /* A >= (K|X) so jump */
- b_off = b_imm(i + inst->jt + 1, ctx);
- emit_b(b_off, ctx);
- emit_nop(ctx);
- }
- } else {
- /* A == K|X */
- if (condt & MIPS_COND_K) { /* K */
- ctx->flags |= SEEN_A;
- emit_load_imm(r_s0, k, ctx);
- /* jump true */
- b_off = b_imm(i + inst->jt + 1, ctx);
- emit_bcond(MIPS_COND_EQ, r_A, r_s0,
- b_off, ctx);
- emit_nop(ctx);
- /* jump false */
- b_off = b_imm(i + inst->jf + 1,
- ctx);
- emit_bcond(MIPS_COND_NE, r_A, r_s0,
- b_off, ctx);
- emit_nop(ctx);
- } else { /* X */
- /* jump true */
- ctx->flags |= SEEN_A | SEEN_X;
- b_off = b_imm(i + inst->jt + 1,
- ctx);
- emit_bcond(MIPS_COND_EQ, r_A, r_X,
- b_off, ctx);
- emit_nop(ctx);
- /* jump false */
- b_off = b_imm(i + inst->jf + 1, ctx);
- emit_bcond(MIPS_COND_NE, r_A, r_X,
- b_off, ctx);
- emit_nop(ctx);
- }
- }
- break;
- case BPF_JMP | BPF_JSET | BPF_K:
- ctx->flags |= SEEN_A;
- /* pc += (A & K) ? pc -> jt : pc -> jf */
- emit_load_imm(r_s1, k, ctx);
- emit_and(r_s0, r_A, r_s1, ctx);
- /* jump true */
- b_off = b_imm(i + inst->jt + 1, ctx);
- emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
- emit_nop(ctx);
- /* jump false */
- b_off = b_imm(i + inst->jf + 1, ctx);
- emit_b(b_off, ctx);
- emit_nop(ctx);
- break;
- case BPF_JMP | BPF_JSET | BPF_X:
- ctx->flags |= SEEN_X | SEEN_A;
- /* pc += (A & X) ? pc -> jt : pc -> jf */
- emit_and(r_s0, r_A, r_X, ctx);
- /* jump true */
- b_off = b_imm(i + inst->jt + 1, ctx);
- emit_bcond(MIPS_COND_NE, r_s0, r_zero, b_off, ctx);
- emit_nop(ctx);
- /* jump false */
- b_off = b_imm(i + inst->jf + 1, ctx);
- emit_b(b_off, ctx);
- emit_nop(ctx);
- break;
- case BPF_RET | BPF_A:
- ctx->flags |= SEEN_A;
- if (i != prog->len - 1)
- /*
- * If this is not the last instruction
- * then jump to the epilogue
- */
- emit_b(b_imm(prog->len, ctx), ctx);
- emit_reg_move(r_ret, r_A, ctx); /* delay slot */
- break;
- case BPF_RET | BPF_K:
- /*
- * It can emit two instructions so it does not fit on
- * the delay slot.
- */
- emit_load_imm(r_ret, k, ctx);
- if (i != prog->len - 1) {
- /*
- * If this is not the last instruction
- * then jump to the epilogue
- */
- emit_b(b_imm(prog->len, ctx), ctx);
- emit_nop(ctx);
- }
- break;
- case BPF_MISC | BPF_TAX:
- /* X = A */
- ctx->flags |= SEEN_X | SEEN_A;
- emit_jit_reg_move(r_X, r_A, ctx);
- break;
- case BPF_MISC | BPF_TXA:
- /* A = X */
- ctx->flags |= SEEN_A | SEEN_X;
- emit_jit_reg_move(r_A, r_X, ctx);
- break;
- /* AUX */
- case BPF_ANC | SKF_AD_PROTOCOL:
- /* A = ntohs(skb->protocol */
- ctx->flags |= SEEN_SKB | SEEN_OFF | SEEN_A;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- protocol) != 2);
- off = offsetof(struct sk_buff, protocol);
- emit_half_load(r_A, r_skb, off, ctx);
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
- /* This needs little endian fixup */
- if (cpu_has_wsbh) {
- /* R2 and later have the wsbh instruction */
- emit_wsbh(r_A, r_A, ctx);
- } else {
- /* Get first byte */
- emit_andi(r_tmp_imm, r_A, 0xff, ctx);
- /* Shift it */
- emit_sll(r_tmp, r_tmp_imm, 8, ctx);
- /* Get second byte */
- emit_srl(r_tmp_imm, r_A, 8, ctx);
- emit_andi(r_tmp_imm, r_tmp_imm, 0xff, ctx);
- /* Put everyting together in r_A */
- emit_or(r_A, r_tmp, r_tmp_imm, ctx);
- }
-#endif
- break;
- case BPF_ANC | SKF_AD_CPU:
- ctx->flags |= SEEN_A | SEEN_OFF;
- /* A = current_thread_info()->cpu */
- BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info,
- cpu) != 4);
- off = offsetof(struct thread_info, cpu);
- /* $28/gp points to the thread_info struct */
- emit_load(r_A, 28, off, ctx);
- break;
- case BPF_ANC | SKF_AD_IFINDEX:
- /* A = skb->dev->ifindex */
- case BPF_ANC | SKF_AD_HATYPE:
- /* A = skb->dev->type */
- ctx->flags |= SEEN_SKB | SEEN_A;
- off = offsetof(struct sk_buff, dev);
- /* Load *dev pointer */
- emit_load_ptr(r_s0, r_skb, off, ctx);
- /* error (0) in the delay slot */
- emit_bcond(MIPS_COND_EQ, r_s0, r_zero,
- b_imm(prog->len, ctx), ctx);
- emit_reg_move(r_ret, r_zero, ctx);
- if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
- off = offsetof(struct net_device, ifindex);
- emit_load(r_A, r_s0, off, ctx);
- } else { /* (code == (BPF_ANC | SKF_AD_HATYPE) */
- BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
- off = offsetof(struct net_device, type);
- emit_half_load_unsigned(r_A, r_s0, off, ctx);
- }
- break;
- case BPF_ANC | SKF_AD_MARK:
- ctx->flags |= SEEN_SKB | SEEN_A;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
- off = offsetof(struct sk_buff, mark);
- emit_load(r_A, r_skb, off, ctx);
- break;
- case BPF_ANC | SKF_AD_RXHASH:
- ctx->flags |= SEEN_SKB | SEEN_A;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
- off = offsetof(struct sk_buff, hash);
- emit_load(r_A, r_skb, off, ctx);
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG:
- ctx->flags |= SEEN_SKB | SEEN_A;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- vlan_tci) != 2);
- off = offsetof(struct sk_buff, vlan_tci);
- emit_half_load_unsigned(r_A, r_skb, off, ctx);
- break;
- case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
- ctx->flags |= SEEN_SKB | SEEN_A;
- emit_load_byte(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET(), ctx);
- if (PKT_VLAN_PRESENT_BIT)
- emit_srl(r_A, r_A, PKT_VLAN_PRESENT_BIT, ctx);
- if (PKT_VLAN_PRESENT_BIT < 7)
- emit_andi(r_A, r_A, 1, ctx);
- break;
- case BPF_ANC | SKF_AD_PKTTYPE:
- ctx->flags |= SEEN_SKB;
-
- emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
- /* Keep only the last 3 bits */
- emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
-#ifdef __BIG_ENDIAN_BITFIELD
- /* Get the actual packet type to the lower 3 bits */
- emit_srl(r_A, r_A, 5, ctx);
-#endif
- break;
- case BPF_ANC | SKF_AD_QUEUE:
- ctx->flags |= SEEN_SKB | SEEN_A;
- BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff,
- queue_mapping) != 2);
- BUILD_BUG_ON(offsetof(struct sk_buff,
- queue_mapping) > 0xff);
- off = offsetof(struct sk_buff, queue_mapping);
- emit_half_load_unsigned(r_A, r_skb, off, ctx);
- break;
- default:
- pr_debug("%s: Unhandled opcode: 0x%02x\n", __FILE__,
- inst->code);
- return -1;
- }
- }
-
- /* compute offsets only during the first pass */
- if (ctx->target == NULL)
- ctx->offsets[i] = ctx->idx * 4;
-
- return 0;
-}
-
-void bpf_jit_compile(struct bpf_prog *fp)
-{
- struct jit_ctx ctx;
- unsigned int alloc_size, tmp_idx;
-
- if (!bpf_jit_enable)
- return;
-
- memset(&ctx, 0, sizeof(ctx));
-
- ctx.offsets = kcalloc(fp->len + 1, sizeof(*ctx.offsets), GFP_KERNEL);
- if (ctx.offsets == NULL)
- return;
-
- ctx.skf = fp;
-
- if (build_body(&ctx))
- goto out;
-
- tmp_idx = ctx.idx;
- build_prologue(&ctx);
- ctx.prologue_bytes = (ctx.idx - tmp_idx) * 4;
- /* just to complete the ctx.idx count */
- build_epilogue(&ctx);
-
- alloc_size = 4 * ctx.idx;
- ctx.target = module_alloc(alloc_size);
- if (ctx.target == NULL)
- goto out;
-
- /* Clean it */
- memset(ctx.target, 0, alloc_size);
-
- ctx.idx = 0;
-
- /* Generate the actual JIT code */
- build_prologue(&ctx);
- build_body(&ctx);
- build_epilogue(&ctx);
-
- /* Update the icache */
- flush_icache_range((ptr)ctx.target, (ptr)(ctx.target + ctx.idx));
-
- if (bpf_jit_enable > 1)
- /* Dump JIT code */
- bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
-
- fp->bpf_func = (void *)ctx.target;
- fp->jited = 1;
-
-out:
- kfree(ctx.offsets);
-}
-
-void bpf_jit_free(struct bpf_prog *fp)
-{
- if (fp->jited)
- module_memfree(fp->bpf_func);
-
- bpf_prog_unlock_free(fp);
-}
diff --git a/arch/mips/net/bpf_jit_asm.S b/arch/mips/net/bpf_jit_asm.S
deleted file mode 100644
index 57154c5883b6..000000000000
--- a/arch/mips/net/bpf_jit_asm.S
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * bpf_jib_asm.S: Packet/header access helper functions for MIPS/MIPS64 BPF
- * compiler.
- *
- * Copyright (C) 2015 Imagination Technologies Ltd.
- * Author: Markos Chandras <markos.chandras@imgtec.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; version 2 of the License.
- */
-
-#include <asm/asm.h>
-#include <asm/isa-rev.h>
-#include <asm/regdef.h>
-#include "bpf_jit.h"
-
-/* ABI
- *
- * r_skb_hl skb header length
- * r_skb_data skb data
- * r_off(a1) offset register
- * r_A BPF register A
- * r_X PF register X
- * r_skb(a0) *skb
- * r_M *scratch memory
- * r_skb_le skb length
- * r_s0 Scratch register 0
- * r_s1 Scratch register 1
- *
- * On entry:
- * a0: *skb
- * a1: offset (imm or imm + X)
- *
- * All non-BPF-ABI registers are free for use. On return, we only
- * care about r_ret. The BPF-ABI registers are assumed to remain
- * unmodified during the entire filter operation.
- */
-
-#define skb a0
-#define offset a1
-#define SKF_LL_OFF (-0x200000) /* Can't include linux/filter.h in assembly */
-
- /* We know better :) so prevent assembler reordering etc */
- .set noreorder
-
-#define is_offset_negative(TYPE) \
- /* If offset is negative we have more work to do */ \
- slti t0, offset, 0; \
- bgtz t0, bpf_slow_path_##TYPE##_neg; \
- /* Be careful what follows in DS. */
-
-#define is_offset_in_header(SIZE, TYPE) \
- /* Reading from header? */ \
- addiu $r_s0, $r_skb_hl, -SIZE; \
- slt t0, $r_s0, offset; \
- bgtz t0, bpf_slow_path_##TYPE; \
-
-LEAF(sk_load_word)
- is_offset_negative(word)
-FEXPORT(sk_load_word_positive)
- is_offset_in_header(4, word)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- .set reorder
- lw $r_A, 0(t1)
- .set noreorder
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh t0, $r_A
- rotr $r_A, t0, 16
-# else
- sll t0, $r_A, 24
- srl t1, $r_A, 24
- srl t2, $r_A, 8
- or t0, t0, t1
- andi t2, t2, 0xff00
- andi t1, $r_A, 0xff00
- or t0, t0, t2
- sll t1, t1, 8
- or $r_A, t0, t1
-# endif
-#endif
- jr $r_ra
- move $r_ret, zero
- END(sk_load_word)
-
-LEAF(sk_load_half)
- is_offset_negative(half)
-FEXPORT(sk_load_half_positive)
- is_offset_in_header(2, half)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- lhu $r_A, 0(t1)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh $r_A, $r_A
-# else
- sll t0, $r_A, 8
- srl t1, $r_A, 8
- andi t0, t0, 0xff00
- or $r_A, t0, t1
-# endif
-#endif
- jr $r_ra
- move $r_ret, zero
- END(sk_load_half)
-
-LEAF(sk_load_byte)
- is_offset_negative(byte)
-FEXPORT(sk_load_byte_positive)
- is_offset_in_header(1, byte)
- /* Offset within header boundaries */
- PTR_ADDU t1, $r_skb_data, offset
- lbu $r_A, 0(t1)
- jr $r_ra
- move $r_ret, zero
- END(sk_load_byte)
-
-/*
- * call skb_copy_bits:
- * (prototype in linux/skbuff.h)
- *
- * int skb_copy_bits(sk_buff *skb, int offset, void *to, int len)
- *
- * o32 mandates we leave 4 spaces for argument registers in case
- * the callee needs to use them. Even though we don't care about
- * the argument registers ourselves, we need to allocate that space
- * to remain ABI compliant since the callee may want to use that space.
- * We also allocate 2 more spaces for $r_ra and our return register (*to).
- *
- * n64 is a bit different. The *caller* will allocate the space to preserve
- * the arguments. So in 64-bit kernels, we allocate the 4-arg space for no
- * good reason but it does not matter that much really.
- *
- * (void *to) is returned in r_s0
- *
- */
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-#define DS_OFFSET(SIZE) (4 * SZREG)
-#else
-#define DS_OFFSET(SIZE) ((4 * SZREG) + (4 - SIZE))
-#endif
-#define bpf_slow_path_common(SIZE) \
- /* Quick check. Are we within reasonable boundaries? */ \
- LONG_ADDIU $r_s1, $r_skb_len, -SIZE; \
- sltu $r_s0, offset, $r_s1; \
- beqz $r_s0, fault; \
- /* Load 4th argument in DS */ \
- LONG_ADDIU a3, zero, SIZE; \
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
- PTR_LA t0, skb_copy_bits; \
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
- /* Assign low slot to a2 */ \
- PTR_ADDIU a2, $r_sp, DS_OFFSET(SIZE); \
- jalr t0; \
- /* Reset our destination slot (DS but it's ok) */ \
- INT_S zero, (4 * SZREG)($r_sp); \
- /* \
- * skb_copy_bits returns 0 on success and -EFAULT \
- * on error. Our data live in a2. Do not bother with \
- * our data if an error has been returned. \
- */ \
- /* Restore our frame */ \
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
- INT_L $r_s0, (4 * SZREG)($r_sp); \
- bltz v0, fault; \
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
- move $r_ret, zero; \
-
-NESTED(bpf_slow_path_word, (6 * SZREG), $r_sp)
- bpf_slow_path_common(4)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- wsbh t0, $r_s0
- jr $r_ra
- rotr $r_A, t0, 16
-# else
- sll t0, $r_s0, 24
- srl t1, $r_s0, 24
- srl t2, $r_s0, 8
- or t0, t0, t1
- andi t2, t2, 0xff00
- andi t1, $r_s0, 0xff00
- or t0, t0, t2
- sll t1, t1, 8
- jr $r_ra
- or $r_A, t0, t1
-# endif
-#else
- jr $r_ra
- move $r_A, $r_s0
-#endif
-
- END(bpf_slow_path_word)
-
-NESTED(bpf_slow_path_half, (6 * SZREG), $r_sp)
- bpf_slow_path_common(2)
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-# if MIPS_ISA_REV >= 2
- jr $r_ra
- wsbh $r_A, $r_s0
-# else
- sll t0, $r_s0, 8
- andi t1, $r_s0, 0xff00
- andi t0, t0, 0xff00
- srl t1, t1, 8
- jr $r_ra
- or $r_A, t0, t1
-# endif
-#else
- jr $r_ra
- move $r_A, $r_s0
-#endif
-
- END(bpf_slow_path_half)
-
-NESTED(bpf_slow_path_byte, (6 * SZREG), $r_sp)
- bpf_slow_path_common(1)
- jr $r_ra
- move $r_A, $r_s0
-
- END(bpf_slow_path_byte)
-
-/*
- * Negative entry points
- */
- .macro bpf_is_end_of_data
- li t0, SKF_LL_OFF
- /* Reading link layer data? */
- slt t1, offset, t0
- bgtz t1, fault
- /* Be careful what follows in DS. */
- .endm
-/*
- * call skb_copy_bits:
- * (prototype in linux/filter.h)
- *
- * void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
- * int k, unsigned int size)
- *
- * see above (bpf_slow_path_common) for ABI restrictions
- */
-#define bpf_negative_common(SIZE) \
- PTR_ADDIU $r_sp, $r_sp, -(6 * SZREG); \
- PTR_LA t0, bpf_internal_load_pointer_neg_helper; \
- PTR_S $r_ra, (5 * SZREG)($r_sp); \
- jalr t0; \
- li a2, SIZE; \
- PTR_L $r_ra, (5 * SZREG)($r_sp); \
- /* Check return pointer */ \
- beqz v0, fault; \
- PTR_ADDIU $r_sp, $r_sp, 6 * SZREG; \
- /* Preserve our pointer */ \
- move $r_s0, v0; \
- /* Set return value */ \
- move $r_ret, zero; \
-
-bpf_slow_path_word_neg:
- bpf_is_end_of_data
-NESTED(sk_load_word_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(4)
- jr $r_ra
- lw $r_A, 0($r_s0)
- END(sk_load_word_negative)
-
-bpf_slow_path_half_neg:
- bpf_is_end_of_data
-NESTED(sk_load_half_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(2)
- jr $r_ra
- lhu $r_A, 0($r_s0)
- END(sk_load_half_negative)
-
-bpf_slow_path_byte_neg:
- bpf_is_end_of_data
-NESTED(sk_load_byte_negative, (6 * SZREG), $r_sp)
- bpf_negative_common(1)
- jr $r_ra
- lbu $r_A, 0($r_s0)
- END(sk_load_byte_negative)
-
-fault:
- jr $r_ra
- addiu $r_ret, zero, 1
diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c
index 0effd3cba9a7..dfd5a4b1b779 100644
--- a/arch/mips/net/ebpf_jit.c
+++ b/arch/mips/net/ebpf_jit.c
@@ -22,6 +22,7 @@
#include <asm/byteorder.h>
#include <asm/cacheflush.h>
#include <asm/cpu-features.h>
+#include <asm/isa-rev.h>
#include <asm/uasm.h>
/* Registers used by JIT */
@@ -125,15 +126,21 @@ static enum reg_val_type get_reg_val_type(const struct jit_ctx *ctx,
}
/* Simply emit the instruction if the JIT memory space has been allocated */
-#define emit_instr(ctx, func, ...) \
-do { \
- if ((ctx)->target != NULL) { \
- u32 *p = &(ctx)->target[ctx->idx]; \
- uasm_i_##func(&p, ##__VA_ARGS__); \
- } \
- (ctx)->idx++; \
+#define emit_instr_long(ctx, func64, func32, ...) \
+do { \
+ if ((ctx)->target != NULL) { \
+ u32 *p = &(ctx)->target[ctx->idx]; \
+ if (IS_ENABLED(CONFIG_64BIT)) \
+ uasm_i_##func64(&p, ##__VA_ARGS__); \
+ else \
+ uasm_i_##func32(&p, ##__VA_ARGS__); \
+ } \
+ (ctx)->idx++; \
} while (0)
+#define emit_instr(ctx, func, ...) \
+ emit_instr_long(ctx, func, func, ##__VA_ARGS__)
+
static unsigned int j_target(struct jit_ctx *ctx, int target_idx)
{
unsigned long target_va, base_va;
@@ -186,8 +193,9 @@ enum which_ebpf_reg {
* separate frame pointer, so BPF_REG_10 relative accesses are
* adjusted to be $sp relative.
*/
-int ebpf_to_mips_reg(struct jit_ctx *ctx, const struct bpf_insn *insn,
- enum which_ebpf_reg w)
+static int ebpf_to_mips_reg(struct jit_ctx *ctx,
+ const struct bpf_insn *insn,
+ enum which_ebpf_reg w)
{
int ebpf_reg = (w == src_reg || w == src_reg_no_fp) ?
insn->src_reg : insn->dst_reg;
@@ -273,17 +281,17 @@ static int gen_int_prologue(struct jit_ctx *ctx)
* If RA we are doing a function call and may need
* extra 8-byte tmp area.
*/
- stack_adjust += 16;
+ stack_adjust += 2 * sizeof(long);
if (ctx->flags & EBPF_SAVE_S0)
- stack_adjust += 8;
+ stack_adjust += sizeof(long);
if (ctx->flags & EBPF_SAVE_S1)
- stack_adjust += 8;
+ stack_adjust += sizeof(long);
if (ctx->flags & EBPF_SAVE_S2)
- stack_adjust += 8;
+ stack_adjust += sizeof(long);
if (ctx->flags & EBPF_SAVE_S3)
- stack_adjust += 8;
+ stack_adjust += sizeof(long);
if (ctx->flags & EBPF_SAVE_S4)
- stack_adjust += 8;
+ stack_adjust += sizeof(long);
BUILD_BUG_ON(MAX_BPF_STACK & 7);
locals_size = (ctx->flags & EBPF_SEEN_FP) ? MAX_BPF_STACK : 0;
@@ -297,41 +305,49 @@ static int gen_int_prologue(struct jit_ctx *ctx)
* On tail call we skip this instruction, and the TCC is
* passed in $v1 from the caller.
*/
- emit_instr(ctx, daddiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
+ emit_instr(ctx, addiu, MIPS_R_V1, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
if (stack_adjust)
- emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, -stack_adjust);
+ emit_instr_long(ctx, daddiu, addiu,
+ MIPS_R_SP, MIPS_R_SP, -stack_adjust);
else
return 0;
- store_offset = stack_adjust - 8;
+ store_offset = stack_adjust - sizeof(long);
if (ctx->flags & EBPF_SAVE_RA) {
- emit_instr(ctx, sd, MIPS_R_RA, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, sd, sw,
+ MIPS_R_RA, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S0) {
- emit_instr(ctx, sd, MIPS_R_S0, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, sd, sw,
+ MIPS_R_S0, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S1) {
- emit_instr(ctx, sd, MIPS_R_S1, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, sd, sw,
+ MIPS_R_S1, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S2) {
- emit_instr(ctx, sd, MIPS_R_S2, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, sd, sw,
+ MIPS_R_S2, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S3) {
- emit_instr(ctx, sd, MIPS_R_S3, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, sd, sw,
+ MIPS_R_S3, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S4) {
- emit_instr(ctx, sd, MIPS_R_S4, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, sd, sw,
+ MIPS_R_S4, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if ((ctx->flags & EBPF_SEEN_TC) && !(ctx->flags & EBPF_TCC_IN_V1))
- emit_instr(ctx, daddu, MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO);
+ emit_instr_long(ctx, daddu, addu,
+ MIPS_R_S4, MIPS_R_V1, MIPS_R_ZERO);
return 0;
}
@@ -340,7 +356,7 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
{
const struct bpf_prog *prog = ctx->skf;
int stack_adjust = ctx->stack_size;
- int store_offset = stack_adjust - 8;
+ int store_offset = stack_adjust - sizeof(long);
enum reg_val_type td;
int r0 = MIPS_R_V0;
@@ -352,33 +368,40 @@ static int build_int_epilogue(struct jit_ctx *ctx, int dest_reg)
}
if (ctx->flags & EBPF_SAVE_RA) {
- emit_instr(ctx, ld, MIPS_R_RA, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, ld, lw,
+ MIPS_R_RA, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S0) {
- emit_instr(ctx, ld, MIPS_R_S0, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, ld, lw,
+ MIPS_R_S0, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S1) {
- emit_instr(ctx, ld, MIPS_R_S1, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, ld, lw,
+ MIPS_R_S1, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S2) {
- emit_instr(ctx, ld, MIPS_R_S2, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, ld, lw,
+ MIPS_R_S2, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S3) {
- emit_instr(ctx, ld, MIPS_R_S3, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, ld, lw,
+ MIPS_R_S3, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
if (ctx->flags & EBPF_SAVE_S4) {
- emit_instr(ctx, ld, MIPS_R_S4, store_offset, MIPS_R_SP);
- store_offset -= 8;
+ emit_instr_long(ctx, ld, lw,
+ MIPS_R_S4, store_offset, MIPS_R_SP);
+ store_offset -= sizeof(long);
}
emit_instr(ctx, jr, dest_reg);
if (stack_adjust)
- emit_instr(ctx, daddiu, MIPS_R_SP, MIPS_R_SP, stack_adjust);
+ emit_instr_long(ctx, daddiu, addiu,
+ MIPS_R_SP, MIPS_R_SP, stack_adjust);
else
emit_instr(ctx, nop);
@@ -645,6 +668,10 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
s64 t64s;
int bpf_op = BPF_OP(insn->code);
+ if (IS_ENABLED(CONFIG_32BIT) && ((BPF_CLASS(insn->code) == BPF_ALU64)
+ || (bpf_op == BPF_DW)))
+ return -EINVAL;
+
switch (insn->code) {
case BPF_ALU64 | BPF_ADD | BPF_K: /* ALU64_IMM */
case BPF_ALU64 | BPF_SUB | BPF_K: /* ALU64_IMM */
@@ -677,8 +704,12 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
if (insn->imm == 1) /* Mult by 1 is a nop */
break;
gen_imm_to_reg(insn, MIPS_R_AT, ctx);
- emit_instr(ctx, dmultu, MIPS_R_AT, dst);
- emit_instr(ctx, mflo, dst);
+ if (MIPS_ISA_REV >= 6) {
+ emit_instr(ctx, dmulu, dst, dst, MIPS_R_AT);
+ } else {
+ emit_instr(ctx, dmultu, MIPS_R_AT, dst);
+ emit_instr(ctx, mflo, dst);
+ }
break;
case BPF_ALU64 | BPF_NEG | BPF_K: /* ALU64_IMM */
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
@@ -700,8 +731,12 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
if (insn->imm == 1) /* Mult by 1 is a nop */
break;
gen_imm_to_reg(insn, MIPS_R_AT, ctx);
- emit_instr(ctx, multu, dst, MIPS_R_AT);
- emit_instr(ctx, mflo, dst);
+ if (MIPS_ISA_REV >= 6) {
+ emit_instr(ctx, mulu, dst, dst, MIPS_R_AT);
+ } else {
+ emit_instr(ctx, multu, dst, MIPS_R_AT);
+ emit_instr(ctx, mflo, dst);
+ }
break;
case BPF_ALU | BPF_NEG | BPF_K: /* ALU_IMM */
dst = ebpf_to_mips_reg(ctx, insn, dst_reg);
@@ -732,6 +767,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
}
gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ if (MIPS_ISA_REV >= 6) {
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, divu_r6, dst, dst, MIPS_R_AT);
+ else
+ emit_instr(ctx, modu, dst, dst, MIPS_R_AT);
+ break;
+ }
emit_instr(ctx, divu, dst, MIPS_R_AT);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
@@ -754,6 +796,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
}
gen_imm_to_reg(insn, MIPS_R_AT, ctx);
+ if (MIPS_ISA_REV >= 6) {
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, ddivu_r6, dst, dst, MIPS_R_AT);
+ else
+ emit_instr(ctx, modu, dst, dst, MIPS_R_AT);
+ break;
+ }
emit_instr(ctx, ddivu, dst, MIPS_R_AT);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
@@ -819,11 +868,23 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit_instr(ctx, and, dst, dst, src);
break;
case BPF_MUL:
- emit_instr(ctx, dmultu, dst, src);
- emit_instr(ctx, mflo, dst);
+ if (MIPS_ISA_REV >= 6) {
+ emit_instr(ctx, dmulu, dst, dst, src);
+ } else {
+ emit_instr(ctx, dmultu, dst, src);
+ emit_instr(ctx, mflo, dst);
+ }
break;
case BPF_DIV:
case BPF_MOD:
+ if (MIPS_ISA_REV >= 6) {
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, ddivu_r6,
+ dst, dst, src);
+ else
+ emit_instr(ctx, modu, dst, dst, src);
+ break;
+ }
emit_instr(ctx, ddivu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
@@ -903,6 +964,13 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
break;
case BPF_DIV:
case BPF_MOD:
+ if (MIPS_ISA_REV >= 6) {
+ if (bpf_op == BPF_DIV)
+ emit_instr(ctx, divu_r6, dst, dst, src);
+ else
+ emit_instr(ctx, modu, dst, dst, src);
+ break;
+ }
emit_instr(ctx, divu, dst, src);
if (bpf_op == BPF_DIV)
emit_instr(ctx, mflo, dst);
@@ -1006,8 +1074,15 @@ static int build_one_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
emit_instr(ctx, dsubu, MIPS_R_T8, dst, src);
emit_instr(ctx, sltu, MIPS_R_AT, dst, src);
/* SP known to be non-zero, movz becomes boolean not */
- emit_instr(ctx, movz, MIPS_R_T9, MIPS_R_SP, MIPS_R_T8);
- emit_instr(ctx, movn, MIPS_R_T9, MIPS_R_ZERO, MIPS_R_T8);
+ if (MIPS_ISA_REV >= 6) {
+ emit_instr(ctx, seleqz, MIPS_R_T9,
+ MIPS_R_SP, MIPS_R_T8);
+ } else {
+ emit_instr(ctx, movz, MIPS_R_T9,
+ MIPS_R_SP, MIPS_R_T8);
+ emit_instr(ctx, movn, MIPS_R_T9,
+ MIPS_R_ZERO, MIPS_R_T8);
+ }
emit_instr(ctx, or, MIPS_R_AT, MIPS_R_T9, MIPS_R_AT);
cmp_eq = bpf_op == BPF_JGT;
dst = MIPS_R_AT;
@@ -1234,7 +1309,7 @@ jeq_common:
case BPF_JMP | BPF_CALL:
ctx->flags |= EBPF_SAVE_RA;
- t64s = (s64)insn->imm + (s64)__bpf_call_base;
+ t64s = (s64)insn->imm + (long)__bpf_call_base;
emit_const_to_reg(ctx, MIPS_R_T9, (u64)t64s);
emit_instr(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
/* delay slot */
@@ -1366,6 +1441,17 @@ jeq_common:
if (src < 0)
return src;
if (BPF_MODE(insn->code) == BPF_XADD) {
+ /*
+ * If mem_off does not fit within the 9 bit ll/sc
+ * instruction immediate field, use a temp reg.
+ */
+ if (MIPS_ISA_REV >= 6 &&
+ (mem_off >= BIT(8) || mem_off < -BIT(8))) {
+ emit_instr(ctx, daddiu, MIPS_R_T6,
+ dst, mem_off);
+ mem_off = 0;
+ dst = MIPS_R_T6;
+ }
switch (BPF_SIZE(insn->code)) {
case BPF_W:
if (get_reg_val_type(ctx, this_idx, insn->src_reg) == REG_32BIT) {
@@ -1720,7 +1806,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
unsigned int image_size;
u8 *image_ptr;
- if (!prog->jit_requested || !cpu_has_mips64r2)
+ if (!prog->jit_requested || MIPS_ISA_REV < 2)
return prog;
tmp = bpf_jit_blind_constants(prog);
diff --git a/arch/mips/pic32/Kconfig b/arch/mips/pic32/Kconfig
index e284e89183cc..7acbb50c1dcd 100644
--- a/arch/mips/pic32/Kconfig
+++ b/arch/mips/pic32/Kconfig
@@ -39,12 +39,12 @@ choice
Select the devicetree.
config DTB_PIC32_NONE
- bool "None"
+ bool "None"
config DTB_PIC32_MZDA_SK
- bool "PIC32MZDA Starter Kit"
- depends on PIC32MZDA
- select BUILTIN_DTB
+ bool "PIC32MZDA Starter Kit"
+ depends on PIC32MZDA
+ select BUILTIN_DTB
endchoice
diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile
index 0ede4deb8181..7221df24cb23 100644
--- a/arch/mips/vdso/Makefile
+++ b/arch/mips/vdso/Makefile
@@ -46,9 +46,7 @@ endif
VDSO_LDFLAGS := \
-Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \
$(addprefix -Wl$(comma),$(filter -E%,$(KBUILD_CFLAGS))) \
- -nostdlib -shared \
- $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \
- $(call cc-ldoption, -Wl$(comma)--build-id)
+ -nostdlib -shared -Wl,--hash-style=sysv -Wl,--build-id
GCOV_PROFILE := n
UBSAN_SANITIZE := n
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index addb7f5f5264..55559ca0efe4 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -60,9 +60,6 @@ config GENERIC_LOCKBREAK
def_bool y
depends on PREEMPT
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config TRACE_IRQFLAGS_SUPPORT
def_bool y
diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild
index 64ceff7ab99b..688b6ed26227 100644
--- a/arch/nds32/include/asm/Kbuild
+++ b/arch/nds32/include/asm/Kbuild
@@ -31,6 +31,7 @@ generic-y += limits.h
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += parport.h
generic-y += pci.h
generic-y += percpu.h
diff --git a/arch/nds32/include/asm/elf.h b/arch/nds32/include/asm/elf.h
index 95f3ea253e4c..02250626b9f0 100644
--- a/arch/nds32/include/asm/elf.h
+++ b/arch/nds32/include/asm/elf.h
@@ -10,14 +10,13 @@
#include <asm/ptrace.h>
#include <asm/fpu.h>
+#include <linux/elf-em.h>
typedef unsigned long elf_greg_t;
typedef unsigned long elf_freg_t[3];
extern unsigned int elf_hwcap;
-#define EM_NDS32 167
-
#define R_NDS32_NONE 0
#define R_NDS32_16_RELA 19
#define R_NDS32_32_RELA 20
diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h
index 71cd226d6863..5ef8ae5ba833 100644
--- a/arch/nds32/include/asm/io.h
+++ b/arch/nds32/include/asm/io.h
@@ -55,8 +55,6 @@ static inline u32 __raw_readl(const volatile void __iomem *addr)
#define __iormb() rmb()
#define __iowmb() wmb()
-#define mmiowb() __asm__ __volatile__ ("msync all" : : : "memory");
-
/*
* {read,write}{b,w,l,q}_relaxed() are like the regular version, but
* are not guaranteed to provide ordering against spinlocks or memory
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index 671ebd357496..174b8571d362 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -5,6 +5,7 @@
#ifndef _ASM_NDS32_SYSCALL_H
#define _ASM_NDS32_SYSCALL_H 1
+#include <uapi/linux/audit.h>
#include <linux/err.h>
struct task_struct;
struct pt_regs;
@@ -145,4 +146,12 @@ void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
memcpy(&regs->uregs[0] + 1, args, 5 * sizeof(args[0]));
}
+
+static inline int
+syscall_get_arch(struct task_struct *task)
+{
+ return IS_ENABLED(CONFIG_CPU_BIG_ENDIAN)
+ ? AUDIT_ARCH_NDS32BE : AUDIT_ARCH_NDS32;
+}
+
#endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nds32/include/asm/tlb.h b/arch/nds32/include/asm/tlb.h
index b35ae5eae3ab..d5ae571c8d30 100644
--- a/arch/nds32/include/asm/tlb.h
+++ b/arch/nds32/include/asm/tlb.h
@@ -4,22 +4,6 @@
#ifndef __ASMNDS32_TLB_H
#define __ASMNDS32_TLB_H
-#define tlb_start_vma(tlb,vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb,vma) \
- do { \
- if(!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte)
diff --git a/arch/nds32/include/asm/tlbflush.h b/arch/nds32/include/asm/tlbflush.h
index 9b411f401903..38ee769b18d8 100644
--- a/arch/nds32/include/asm/tlbflush.h
+++ b/arch/nds32/include/asm/tlbflush.h
@@ -42,6 +42,5 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
void update_mmu_cache(struct vm_area_struct *vma,
unsigned long address, pte_t * pte);
-void tlb_migrate_finish(struct mm_struct *mm);
#endif
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 4ef15a61b7bc..ea37394ff3ea 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -24,6 +24,7 @@ config NIOS2
select USB_ARCH_HAS_HCD if USB_SUPPORT
select CPU_NO_EFFICIENT_FFS
select ARCH_DISCARD_MEMBLOCK
+ select MMU_GATHER_NO_RANGE if MMU
config GENERIC_CSUM
def_bool y
@@ -40,9 +41,6 @@ config NO_IOPORT_MAP
config FPU
def_bool n
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config TRACE_IRQFLAGS_SUPPORT
def_bool n
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 88a667d12aaa..d7ef3512504a 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -27,6 +27,7 @@ generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += module.h
generic-y += pci.h
generic-y += percpu.h
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index d7624ed06efb..c4f3f8b86f28 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -17,6 +17,7 @@
#ifndef __ASM_NIOS2_SYSCALL_H__
#define __ASM_NIOS2_SYSCALL_H__
+#include <uapi/linux/audit.h>
#include <linux/err.h>
#include <linux/sched.h>
@@ -79,4 +80,9 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->r9 = *args;
}
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return AUDIT_ARCH_NIOS2;
+}
+
#endif
diff --git a/arch/nios2/include/asm/tlb.h b/arch/nios2/include/asm/tlb.h
index d3bc648e08b5..f9f2e27e32dd 100644
--- a/arch/nios2/include/asm/tlb.h
+++ b/arch/nios2/include/asm/tlb.h
@@ -11,22 +11,12 @@
#ifndef _ASM_NIOS2_TLB_H
#define _ASM_NIOS2_TLB_H
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
extern void set_mmu_pid(unsigned long pid);
/*
- * NiosII doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for the area to be unmapped.
+ * NIOS32 does have flush_tlb_range(), but it lacks a limit and fallback to
+ * full mm invalidation. So use flush_tlb_mm() for everything.
*/
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index a5e361fbb75a..7cfb20555b10 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -36,6 +36,7 @@ config OPENRISC
select OMPIC if SMP
select ARCH_WANT_FRAME_POINTERS
select GENERIC_IRQ_MULTI_HANDLER
+ select MMU_GATHER_NO_RANGE if MMU
config CPU_BIG_ENDIAN
def_bool y
@@ -43,12 +44,6 @@ config CPU_BIG_ENDIAN
config MMU
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool n
-
config GENERIC_HWEIGHT
def_bool y
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 22aa97136c01..1919cc5e0f11 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -24,6 +24,7 @@ generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += module.h
generic-y += pci.h
generic-y += percpu.h
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index b4ff07c1baed..61de227f53a1 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -68,7 +68,7 @@ syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_OPENRISC;
}
diff --git a/arch/openrisc/include/asm/tlb.h b/arch/openrisc/include/asm/tlb.h
index fa4376a4515d..92d8a4209884 100644
--- a/arch/openrisc/include/asm/tlb.h
+++ b/arch/openrisc/include/asm/tlb.h
@@ -20,14 +20,10 @@
#define __ASM_OPENRISC_TLB_H__
/*
- * or32 doesn't need any special per-pte or
- * per-vma handling..
+ * OpenRISC doesn't have an efficient flush_tlb_range() so use flush_tlb_mm()
+ * for everything.
*/
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
#include <linux/pagemap.h>
#include <asm-generic/tlb.h>
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c8e621296092..09407ed1aacd 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -36,6 +36,7 @@ config PARISC
select GENERIC_STRNCPY_FROM_USER
select SYSCTL_ARCH_UNALIGN_ALLOW
select SYSCTL_EXCEPTION_TRACE
+ select ARCH_DISCARD_MEMBLOCK
select HAVE_MOD_ARCH_SPECIFIC
select VIRT_TO_BUS
select MODULES_USE_ELF_RELA
@@ -44,6 +45,8 @@ config PARISC
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_HASH
+ select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_REGS_AND_STACK_ACCESS_API
@@ -54,6 +57,9 @@ config PARISC
select CPU_NO_EFFICIENT_FFS
select NEED_DMA_MAP_STATE
select NEED_SG_DMA_LENGTH
+ select HAVE_ARCH_KGDB
+ select HAVE_KPROBES
+ select HAVE_KRETPROBES
help
The PA-RISC microprocessor is designed by Hewlett-Packard and used
@@ -75,12 +81,6 @@ config GENERIC_LOCKBREAK
default y
depends on SMP && PREEMPT
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
default n
@@ -311,21 +311,16 @@ config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on 64BIT
-config ARCH_DISCONTIGMEM_ENABLE
+config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on 64BIT
config ARCH_FLATMEM_ENABLE
def_bool y
-config ARCH_DISCONTIGMEM_DEFAULT
+config ARCH_SPARSEMEM_DEFAULT
def_bool y
- depends on ARCH_DISCONTIGMEM_ENABLE
-
-config NODES_SHIFT
- int
- default "3"
- depends on NEED_MULTIPLE_NODES
+ depends on ARCH_SPARSEMEM_ENABLE
source "kernel/Kconfig.hz"
diff --git a/arch/parisc/boot/compressed/head.S b/arch/parisc/boot/compressed/head.S
index 5aba20fa48aa..e8b798fd0cf0 100644
--- a/arch/parisc/boot/compressed/head.S
+++ b/arch/parisc/boot/compressed/head.S
@@ -22,7 +22,7 @@
__HEAD
ENTRY(startup)
- .level LEVEL
+ .level PA_ASM_LEVEL
#define PSW_W_SM 0x200
#define PSW_W_BIT 36
@@ -63,7 +63,7 @@ $bss_loop:
load32 BOOTADDR(decompress_kernel),%r3
#ifdef CONFIG_64BIT
- .level LEVEL
+ .level PA_ASM_LEVEL
ssm PSW_W_SM, %r0 /* set W-bit */
depdi 0, 31, 32, %r3
#endif
@@ -72,7 +72,7 @@ $bss_loop:
startup_continue:
#ifdef CONFIG_64BIT
- .level LEVEL
+ .level PA_ASM_LEVEL
rsm PSW_W_SM, %r0 /* clear W-bit */
#endif
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 2556bb181813..2d395998f524 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -145,14 +145,13 @@ static int putchar(int c)
void __noreturn error(char *x)
{
- puts("\n\n");
- puts(x);
- puts("\n\n -- System halted");
+ if (x) puts(x);
+ puts("\n -- System halted\n");
while (1) /* wait forever */
;
}
-static int print_hex(unsigned long num)
+static int print_num(unsigned long num, int base)
{
const char hex[] = "0123456789abcdef";
char str[40];
@@ -160,12 +159,14 @@ static int print_hex(unsigned long num)
str[i--] = '\0';
do {
- str[i--] = hex[num & 0x0f];
- num >>= 4;
+ str[i--] = hex[num % base];
+ num = num / base;
} while (num);
- str[i--] = 'x';
- str[i] = '0';
+ if (base == 16) {
+ str[i--] = 'x';
+ str[i] = '0';
+ } else i++;
puts(&str[i]);
return 0;
@@ -187,8 +188,9 @@ put:
if (fmt[++i] == '%')
goto put;
+ print_num(va_arg(args, unsigned long),
+ fmt[i] == 'x' ? 16:10);
++i;
- print_hex(va_arg(args, unsigned long));
}
va_end(args);
@@ -327,8 +329,15 @@ unsigned long decompress_kernel(unsigned int started_wide,
free_mem_end_ptr = rd_start;
#endif
- if (free_mem_ptr >= free_mem_end_ptr)
- error("Kernel too big for machine.");
+ if (free_mem_ptr >= free_mem_end_ptr) {
+ int free_ram;
+ free_ram = (free_mem_ptr >> 20) + 1;
+ if (free_ram < 32)
+ free_ram = 32;
+ printf("\nKernel requires at least %d MB RAM.\n",
+ free_ram);
+ error(NULL);
+ }
#ifdef DEBUG
printf("\n");
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 37ae4b57c001..a8f9bbef0975 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -14,7 +14,6 @@ CONFIG_SLAB=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PA7100LC=y
CONFIG_SMP=y
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 9bcd0c903dbb..ed2d8cc94909 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -10,12 +10,12 @@ generic-y += hw_irq.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
-generic-y += kprobes.h
generic-y += kvm_para.h
generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += seccomp.h
diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h
index c17ec0ee6e7c..d85738a7bbe6 100644
--- a/arch/parisc/include/asm/assembly.h
+++ b/arch/parisc/include/asm/assembly.h
@@ -61,14 +61,14 @@
#define LDCW ldcw,co
#define BL b,l
# ifdef CONFIG_64BIT
-# define LEVEL 2.0w
+# define PA_ASM_LEVEL 2.0w
# else
-# define LEVEL 2.0
+# define PA_ASM_LEVEL 2.0
# endif
#else
#define LDCW ldcw
#define BL bl
-#define LEVEL 1.1
+#define PA_ASM_LEVEL 1.1
#endif
#ifdef __ASSEMBLY__
diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h
index 006fb939cac8..4016fe1c65a9 100644
--- a/arch/parisc/include/asm/cache.h
+++ b/arch/parisc/include/asm/cache.h
@@ -44,22 +44,22 @@ void parisc_setup_cache_timing(void);
#define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \
ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
- : : "r" (addr))
+ : : "r" (addr) : "memory")
#define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \
ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \
- : : "r" (addr))
+ : : "r" (addr) : "memory")
#define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \
ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \
- : : "r" (addr))
+ : : "r" (addr) : "memory")
#define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \
ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \
ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) \
- : : "r" (addr))
+ : : "r" (addr) : "memory")
#define asm_io_sync() asm volatile("sync" \
ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \
- ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :: )
+ ALTERNATIVE(ALT_COND_NO_IOC_FDC, INSN_NOP) :::"memory")
#endif /* ! __ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/fixmap.h b/arch/parisc/include/asm/fixmap.h
index f7c3a0905de4..288da73d4cc0 100644
--- a/arch/parisc/include/asm/fixmap.h
+++ b/arch/parisc/include/asm/fixmap.h
@@ -15,17 +15,34 @@
* from areas congruently mapped with user space. It is 8MB large
* and must be 16MB aligned */
#define TMPALIAS_MAP_START ((__PAGE_OFFSET) - 16*1024*1024)
+
+#define FIXMAP_SIZE (FIX_BITMAP_COUNT << PAGE_SHIFT)
+#define FIXMAP_START (TMPALIAS_MAP_START - FIXMAP_SIZE)
/* This is the kernel area for all maps (vmalloc, dma etc.) most
* usually, it extends up to TMPALIAS_MAP_START. Virtual addresses
* 0..GATEWAY_PAGE_SIZE are reserved for the gateway page */
#define KERNEL_MAP_START (GATEWAY_PAGE_SIZE)
-#define KERNEL_MAP_END (TMPALIAS_MAP_START)
+#define KERNEL_MAP_END (FIXMAP_START)
#ifndef __ASSEMBLY__
+
+
+enum fixed_addresses {
+ /* Support writing RO kernel text via kprobes, jump labels, etc. */
+ FIX_TEXT_POKE0,
+ FIX_BITMAP_COUNT
+};
+
extern void *parisc_vmalloc_start;
#define PCXL_DMA_MAP_SIZE (8*1024*1024)
#define VMALLOC_START ((unsigned long)parisc_vmalloc_start)
#define VMALLOC_END (KERNEL_MAP_END)
+
+#define __fix_to_virt(_x) (FIXMAP_START + ((_x) << PAGE_SHIFT))
+
+void set_fixmap(enum fixed_addresses idx, phys_addr_t phys);
+void clear_fixmap(enum fixed_addresses idx);
+
#endif /*__ASSEMBLY__*/
#endif /*_ASM_FIXMAP_H*/
diff --git a/arch/parisc/include/asm/hardware.h b/arch/parisc/include/asm/hardware.h
index d6e1ed145031..9d3d7737c58b 100644
--- a/arch/parisc/include/asm/hardware.h
+++ b/arch/parisc/include/asm/hardware.h
@@ -120,7 +120,7 @@ extern void get_pci_node_path(struct pci_dev *dev, struct hardware_path *path);
extern void init_parisc_bus(void);
extern struct device *hwpath_to_device(struct hardware_path *modpath);
extern void device_to_hwpath(struct device *dev, struct hardware_path *path);
-
+extern int machine_has_merced_bus(void);
/* inventory.c: */
extern void do_memory_inventory(void);
diff --git a/arch/parisc/include/asm/io.h b/arch/parisc/include/asm/io.h
index 30a8315d5c07..93d37010b375 100644
--- a/arch/parisc/include/asm/io.h
+++ b/arch/parisc/include/asm/io.h
@@ -229,8 +229,6 @@ static inline void writeq(unsigned long long q, volatile void __iomem *addr)
#define writel_relaxed(l, addr) writel(l, addr)
#define writeq_relaxed(q, addr) writeq(q, addr)
-#define mmiowb() do { } while (0)
-
void memset_io(volatile void __iomem *addr, unsigned char val, int count);
void memcpy_fromio(void *dst, const volatile void __iomem *src, int count);
void memcpy_toio(volatile void __iomem *dst, const void *src, int count);
diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h
new file mode 100644
index 000000000000..7efb1aa2f7f8
--- /dev/null
+++ b/arch/parisc/include/asm/jump_label.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_PARISC_JUMP_LABEL_H
+#define _ASM_PARISC_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <asm/assembly.h>
+
+#define JUMP_LABEL_NOP_SIZE 4
+
+static __always_inline bool arch_static_branch(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("1:\n\t"
+ "nop\n\t"
+ ".pushsection __jump_table, \"aw\"\n\t"
+ ".word 1b - ., %l[l_yes] - .\n\t"
+ __stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
+ ".popsection\n\t"
+ : : "i" (&((char *)key)[branch]) : : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+static __always_inline bool arch_static_branch_jump(struct static_key *key, bool branch)
+{
+ asm_volatile_goto("1:\n\t"
+ "b,n %l[l_yes]\n\t"
+ ".pushsection __jump_table, \"aw\"\n\t"
+ ".word 1b - ., %l[l_yes] - .\n\t"
+ __stringify(ASM_ULONG_INSN) " %c0 - .\n\t"
+ ".popsection\n\t"
+ : : "i" (&((char *)key)[branch]) : : l_yes);
+
+ return false;
+l_yes:
+ return true;
+}
+
+#endif /* __ASSEMBLY__ */
+#endif
diff --git a/arch/parisc/include/asm/kgdb.h b/arch/parisc/include/asm/kgdb.h
new file mode 100644
index 000000000000..f23e7f8f13a5
--- /dev/null
+++ b/arch/parisc/include/asm/kgdb.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PA-RISC KGDB support
+ *
+ * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ *
+ */
+
+#ifndef __PARISC_KGDB_H__
+#define __PARISC_KGDB_H__
+
+#define BREAK_INSTR_SIZE 4
+#define PARISC_KGDB_COMPILED_BREAK_INSN 0x3ffc01f
+#define PARISC_KGDB_BREAK_INSN 0x3ffa01f
+
+
+#define NUMREGBYTES sizeof(struct parisc_gdb_regs)
+#define BUFMAX 4096
+
+#define CACHE_FLUSH_IS_SAFE 1
+
+#ifndef __ASSEMBLY__
+
+static inline void arch_kgdb_breakpoint(void)
+{
+ asm(".word %0" : : "i"(PARISC_KGDB_COMPILED_BREAK_INSN) : "memory");
+}
+
+struct parisc_gdb_regs {
+ unsigned long gpr[32];
+ unsigned long sar;
+ unsigned long iaoq_f;
+ unsigned long iasq_f;
+ unsigned long iaoq_b;
+ unsigned long iasq_b;
+ unsigned long eiem;
+ unsigned long iir;
+ unsigned long isr;
+ unsigned long ior;
+ unsigned long ipsw;
+ unsigned long __unused0;
+ unsigned long sr4;
+ unsigned long sr0;
+ unsigned long sr1;
+ unsigned long sr2;
+ unsigned long sr3;
+ unsigned long sr5;
+ unsigned long sr6;
+ unsigned long sr7;
+ unsigned long cr0;
+ unsigned long pid1;
+ unsigned long pid2;
+ unsigned long scrccr;
+ unsigned long pid3;
+ unsigned long pid4;
+ unsigned long cr24;
+ unsigned long cr25;
+ unsigned long cr26;
+ unsigned long cr27;
+ unsigned long cr28;
+ unsigned long cr29;
+ unsigned long cr30;
+
+ u64 fr[32];
+};
+
+#endif
+#endif
diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h
new file mode 100644
index 000000000000..e09cf2deeafe
--- /dev/null
+++ b/arch/parisc/include/asm/kprobes.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/parisc/include/asm/kprobes.h
+ *
+ * PA-RISC kprobes implementation
+ *
+ * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ */
+
+#ifndef _PARISC_KPROBES_H
+#define _PARISC_KPROBES_H
+
+#ifdef CONFIG_KPROBES
+
+#include <asm-generic/kprobes.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/notifier.h>
+
+#define PARISC_KPROBES_BREAK_INSN 0x3ff801f
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE 1
+
+typedef u32 kprobe_opcode_t;
+struct kprobe;
+
+void arch_remove_kprobe(struct kprobe *p);
+
+#define flush_insn_slot(p) \
+ flush_icache_range((unsigned long)&(p)->ainsn.insn[0], \
+ (unsigned long)&(p)->ainsn.insn[0] + \
+ sizeof(kprobe_opcode_t))
+
+#define kretprobe_blacklist_size 0
+
+struct arch_specific_insn {
+ kprobe_opcode_t *insn;
+};
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned long status;
+};
+
+struct kprobe_ctlblk {
+ unsigned int kprobe_status;
+ struct prev_kprobe prev_kprobe;
+ unsigned long iaoq[2];
+};
+
+int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs);
+int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
+#endif /* _PARISC_KPROBES_H */
diff --git a/arch/parisc/include/asm/mmzone.h b/arch/parisc/include/asm/mmzone.h
index fafa3893fd70..8d390406d862 100644
--- a/arch/parisc/include/asm/mmzone.h
+++ b/arch/parisc/include/asm/mmzone.h
@@ -2,62 +2,6 @@
#ifndef _PARISC_MMZONE_H
#define _PARISC_MMZONE_H
-#define MAX_PHYSMEM_RANGES 8 /* Fix the size for now (current known max is 3) */
+#define MAX_PHYSMEM_RANGES 4 /* Fix the size for now (current known max is 3) */
-#ifdef CONFIG_DISCONTIGMEM
-
-extern int npmem_ranges;
-
-struct node_map_data {
- pg_data_t pg_data;
-};
-
-extern struct node_map_data node_data[];
-
-#define NODE_DATA(nid) (&node_data[nid].pg_data)
-
-/* We have these possible memory map layouts:
- * Astro: 0-3.75, 67.75-68, 4-64
- * zx1: 0-1, 257-260, 4-256
- * Stretch (N-class): 0-2, 4-32, 34-xxx
- */
-
-/* Since each 1GB can only belong to one region (node), we can create
- * an index table for pfn to nid lookup; each entry in pfnnid_map
- * represents 1GB, and contains the node that the memory belongs to. */
-
-#define PFNNID_SHIFT (30 - PAGE_SHIFT)
-#define PFNNID_MAP_MAX 512 /* support 512GB */
-extern signed char pfnnid_map[PFNNID_MAP_MAX];
-
-#ifndef CONFIG_64BIT
-#define pfn_is_io(pfn) ((pfn & (0xf0000000UL >> PAGE_SHIFT)) == (0xf0000000UL >> PAGE_SHIFT))
-#else
-/* io can be 0xf0f0f0f0f0xxxxxx or 0xfffffffff0000000 */
-#define pfn_is_io(pfn) ((pfn & (0xf000000000000000UL >> PAGE_SHIFT)) == (0xf000000000000000UL >> PAGE_SHIFT))
-#endif
-
-static inline int pfn_to_nid(unsigned long pfn)
-{
- unsigned int i;
-
- if (unlikely(pfn_is_io(pfn)))
- return 0;
-
- i = pfn >> PFNNID_SHIFT;
- BUG_ON(i >= ARRAY_SIZE(pfnnid_map));
-
- return pfnnid_map[i];
-}
-
-static inline int pfn_valid(int pfn)
-{
- int nid = pfn_to_nid(pfn);
-
- if (nid >= 0)
- return (pfn < node_end_pfn(nid));
- return 0;
-}
-
-#endif
#endif /* _PARISC_MMZONE_H */
diff --git a/arch/parisc/include/asm/page.h b/arch/parisc/include/asm/page.h
index b77f49ce6220..93caf17ac5e2 100644
--- a/arch/parisc/include/asm/page.h
+++ b/arch/parisc/include/asm/page.h
@@ -147,9 +147,9 @@ extern int npmem_ranges;
#define __pa(x) ((unsigned long)(x)-PAGE_OFFSET)
#define __va(x) ((void *)((unsigned long)(x)+PAGE_OFFSET))
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
#define pfn_valid(pfn) ((pfn) < max_mapnr)
-#endif /* CONFIG_DISCONTIGMEM */
+#endif
#ifdef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT PMD_SHIFT /* fixed for transparent huge pages */
diff --git a/arch/parisc/include/asm/patch.h b/arch/parisc/include/asm/patch.h
new file mode 100644
index 000000000000..685b58a13968
--- /dev/null
+++ b/arch/parisc/include/asm/patch.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PARISC_KERNEL_PATCH_H
+#define _PARISC_KERNEL_PATCH_H
+
+/* stop machine and patch kernel text */
+void patch_text(void *addr, unsigned int insn);
+
+/* patch kernel text with machine already stopped (e.g. in kgdb) */
+void __patch_text(void *addr, unsigned int insn);
+
+#endif
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index d05c678c77c4..ea75cc966dae 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -41,6 +41,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
__pgd_val_set(*pgd, PxD_FLAG_ATTACHED);
#endif
}
+ spin_lock_init(pgd_spinlock(actual_pgd));
return actual_pgd;
}
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index c7bb74e22436..a39b079e73f2 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -17,7 +17,7 @@
#include <asm/processor.h>
#include <asm/cache.h>
-extern spinlock_t pa_tlb_lock;
+static inline spinlock_t *pgd_spinlock(pgd_t *);
/*
* kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -34,16 +34,46 @@ extern spinlock_t pa_tlb_lock;
*/
#define kern_addr_valid(addr) (1)
-/* Purge data and instruction TLB entries. Must be called holding
- * the pa_tlb_lock. The TLB purge instructions are slow on SMP
- * machines since the purge must be broadcast to all CPUs.
+/* This is for the serialization of PxTLB broadcasts. At least on the N class
+ * systems, only one PxTLB inter processor broadcast can be active at any one
+ * time on the Merced bus.
+
+ * PTE updates are protected by locks in the PMD.
+ */
+extern spinlock_t pa_tlb_flush_lock;
+extern spinlock_t pa_swapper_pg_lock;
+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+extern int pa_serialize_tlb_flushes;
+#else
+#define pa_serialize_tlb_flushes (0)
+#endif
+
+#define purge_tlb_start(flags) do { \
+ if (pa_serialize_tlb_flushes) \
+ spin_lock_irqsave(&pa_tlb_flush_lock, flags); \
+ else \
+ local_irq_save(flags); \
+ } while (0)
+#define purge_tlb_end(flags) do { \
+ if (pa_serialize_tlb_flushes) \
+ spin_unlock_irqrestore(&pa_tlb_flush_lock, flags); \
+ else \
+ local_irq_restore(flags); \
+ } while (0)
+
+/* Purge data and instruction TLB entries. The TLB purge instructions
+ * are slow on SMP machines since the purge must be broadcast to all CPUs.
*/
static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
{
+ unsigned long flags;
+
+ purge_tlb_start(flags);
mtsp(mm->context, 1);
pdtlb(addr);
pitlb(addr);
+ purge_tlb_end(flags);
}
/* Certain architectures need to do special things when PTEs
@@ -59,11 +89,11 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
do { \
pte_t old_pte; \
unsigned long flags; \
- spin_lock_irqsave(&pa_tlb_lock, flags); \
+ spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\
old_pte = *ptep; \
set_pte(ptep, pteval); \
purge_tlb_entries(mm, addr); \
- spin_unlock_irqrestore(&pa_tlb_lock, flags); \
+ spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\
} while (0)
#endif /* !__ASSEMBLY__ */
@@ -88,10 +118,10 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
#if CONFIG_PGTABLE_LEVELS == 3
#define PGD_ORDER 1 /* Number of pages per pgd */
#define PMD_ORDER 1 /* Number of pages per pmd */
-#define PGD_ALLOC_ORDER 2 /* first pgd contains pmd */
+#define PGD_ALLOC_ORDER (2 + 1) /* first pgd contains pmd */
#else
#define PGD_ORDER 1 /* Number of pages per pgd */
-#define PGD_ALLOC_ORDER PGD_ORDER
+#define PGD_ALLOC_ORDER (PGD_ORDER + 1)
#endif
/* Definitions for 3rd level (we use PLD here for Page Lower directory
@@ -459,6 +489,15 @@ extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+static inline spinlock_t *pgd_spinlock(pgd_t *pgd)
+{
+ if (unlikely(pgd == swapper_pg_dir))
+ return &pa_swapper_pg_lock;
+ return (spinlock_t *)((char *)pgd + (PAGE_SIZE << (PGD_ALLOC_ORDER - 1)));
+}
+
+
static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
{
pte_t pte;
@@ -467,15 +506,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned
if (!pte_young(*ptep))
return 0;
- spin_lock_irqsave(&pa_tlb_lock, flags);
+ spin_lock_irqsave(pgd_spinlock(vma->vm_mm->pgd), flags);
pte = *ptep;
if (!pte_young(pte)) {
- spin_unlock_irqrestore(&pa_tlb_lock, flags);
+ spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
return 0;
}
set_pte(ptep, pte_mkold(pte));
purge_tlb_entries(vma->vm_mm, addr);
- spin_unlock_irqrestore(&pa_tlb_lock, flags);
+ spin_unlock_irqrestore(pgd_spinlock(vma->vm_mm->pgd), flags);
return 1;
}
@@ -485,11 +524,11 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
pte_t old_pte;
unsigned long flags;
- spin_lock_irqsave(&pa_tlb_lock, flags);
+ spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
old_pte = *ptep;
set_pte(ptep, __pte(0));
purge_tlb_entries(mm, addr);
- spin_unlock_irqrestore(&pa_tlb_lock, flags);
+ spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
return old_pte;
}
@@ -497,10 +536,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
{
unsigned long flags;
- spin_lock_irqsave(&pa_tlb_lock, flags);
+ spin_lock_irqsave(pgd_spinlock(mm->pgd), flags);
set_pte(ptep, pte_wrprotect(*ptep));
purge_tlb_entries(mm, addr);
- spin_unlock_irqrestore(&pa_tlb_lock, flags);
+ spin_unlock_irqrestore(pgd_spinlock(mm->pgd), flags);
}
#define pte_same(A,B) (pte_val(A) == pte_val(B))
diff --git a/arch/parisc/include/asm/ptrace.h b/arch/parisc/include/asm/ptrace.h
index 9ff033d261ab..143fb2a89dd8 100644
--- a/arch/parisc/include/asm/ptrace.h
+++ b/arch/parisc/include/asm/ptrace.h
@@ -37,4 +37,17 @@ extern int regs_query_register_offset(const char *name);
extern const char *regs_query_register_name(unsigned int offset);
#define MAX_REG_OFFSET (offsetof(struct pt_regs, ipsw))
+#define kernel_stack_pointer(regs) ((regs)->gr[30])
+
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+ unsigned int offset)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return 0;
+ return *(unsigned long *)((unsigned long)regs + offset);
+}
+
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n);
+int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr);
+
#endif
diff --git a/arch/parisc/include/asm/sparsemem.h b/arch/parisc/include/asm/sparsemem.h
new file mode 100644
index 000000000000..b5c3a79045b4
--- /dev/null
+++ b/arch/parisc/include/asm/sparsemem.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_PARISC_SPARSEMEM_H
+#define ASM_PARISC_SPARSEMEM_H
+
+/* We have these possible memory map layouts:
+ * Astro: 0-3.75, 67.75-68, 4-64
+ * zx1: 0-1, 257-260, 4-256
+ * Stretch (N-class): 0-2, 4-32, 34-xxx
+ */
+
+#define MAX_PHYSMEM_BITS 39 /* 512 GB */
+#define SECTION_SIZE_BITS 27 /* 128 MB */
+
+#endif
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index 8a63515f03bf..197d2247e4db 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -37,7 +37,11 @@ static inline void arch_spin_unlock(arch_spinlock_t *x)
volatile unsigned int *a;
a = __ldcw_align(x);
+#ifdef CONFIG_SMP
+ (void) __ldcw(a);
+#else
mb();
+#endif
*a = 1;
}
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 62a6d477fae0..80757e43cf2c 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -48,11 +48,11 @@ static inline void syscall_rollback(struct task_struct *task,
/* do nothing */
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
int arch = AUDIT_ARCH_PARISC;
#ifdef CONFIG_64BIT
- if (!is_compat_task())
+ if (!__is_compat_task(task))
arch = AUDIT_ARCH_PARISC64;
#endif
return arch;
diff --git a/arch/parisc/include/asm/tlb.h b/arch/parisc/include/asm/tlb.h
index 0c881e74d8a6..8c0446b04c9e 100644
--- a/arch/parisc/include/asm/tlb.h
+++ b/arch/parisc/include/asm/tlb.h
@@ -2,24 +2,6 @@
#ifndef _PARISC_TLB_H
#define _PARISC_TLB_H
-#define tlb_flush(tlb) \
-do { if ((tlb)->fullmm) \
- flush_tlb_mm((tlb)->mm);\
-} while (0)
-
-#define tlb_start_vma(tlb, vma) \
-do { if (!(tlb)->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define tlb_end_vma(tlb, vma) \
-do { if (!(tlb)->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
- do { } while (0)
-
#include <asm-generic/tlb.h>
#define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd)
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index 6804374efa66..c5ded01d45be 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -8,21 +8,6 @@
#include <linux/sched.h>
#include <asm/mmu_context.h>
-
-/* This is for the serialisation of PxTLB broadcasts. At least on the
- * N class systems, only one PxTLB inter processor broadcast can be
- * active at any one time on the Merced bus. This tlb purge
- * synchronisation is fairly lightweight and harmless so we activate
- * it on all systems not just the N class.
-
- * It is also used to ensure PTE updates are atomic and consistent
- * with the TLB.
- */
-extern spinlock_t pa_tlb_lock;
-
-#define purge_tlb_start(flags) spin_lock_irqsave(&pa_tlb_lock, flags)
-#define purge_tlb_end(flags) spin_unlock_irqrestore(&pa_tlb_lock, flags)
-
extern void flush_tlb_all(void);
extern void flush_tlb_all_local(void *);
@@ -79,13 +64,6 @@ static inline void flush_tlb_mm(struct mm_struct *mm)
static inline void flush_tlb_page(struct vm_area_struct *vma,
unsigned long addr)
{
- unsigned long flags, sid;
-
- sid = vma->vm_mm->context;
- purge_tlb_start(flags);
- mtsp(sid, 1);
- pdtlb(addr);
- pitlb(addr);
- purge_tlb_end(flags);
+ purge_tlb_entries(vma->vm_mm, addr);
}
#endif
diff --git a/arch/parisc/include/uapi/asm/sockios.h b/arch/parisc/include/uapi/asm/sockios.h
deleted file mode 100644
index 66a3ba64d53f..000000000000
--- a/arch/parisc/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef __ARCH_PARISC_SOCKIOS__
-#define __ARCH_PARISC_SOCKIOS__
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif
diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile
index 8e5f1ab65c68..fc0df5c44468 100644
--- a/arch/parisc/kernel/Makefile
+++ b/arch/parisc/kernel/Makefile
@@ -9,7 +9,8 @@ obj-y := cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \
pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \
ptrace.o hardware.o inventory.o drivers.o alternative.o \
signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \
- process.o processor.o pdc_cons.o pdc_chassis.o unwind.o
+ process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \
+ patch.o
ifdef CONFIG_FUNCTION_TRACER
# Do not profile debug and lowlevel utilities
@@ -32,3 +33,6 @@ obj-$(CONFIG_64BIT) += perf.o perf_asm.o $(obj64-y)
obj-$(CONFIG_PARISC_CPU_TOPOLOGY) += topology.o
obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
+obj-$(CONFIG_JUMP_LABEL) += jump_label.o
+obj-$(CONFIG_KGDB) += kgdb.o
+obj-$(CONFIG_KPROBES) += kprobes.o
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 804880efa11e..0338561968a4 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -40,12 +40,19 @@ void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr);
void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr);
-/* On some machines (e.g. ones with the Merced bus), there can be
+/* On some machines (i.e., ones with the Merced bus), there can be
* only a single PxTLB broadcast at a time; this must be guaranteed
- * by software. We put a spinlock around all TLB flushes to
- * ensure this.
+ * by software. We need a spinlock around all TLB flushes to ensure
+ * this.
*/
-DEFINE_SPINLOCK(pa_tlb_lock);
+DEFINE_SPINLOCK(pa_tlb_flush_lock);
+
+/* Swapper page setup lock. */
+DEFINE_SPINLOCK(pa_swapper_pg_lock);
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+int pa_serialize_tlb_flushes __read_mostly;
+#endif
struct pdc_cache_info cache_info __read_mostly;
#ifndef CONFIG_PA20
diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
index 5eb979d04b90..15e7b3be7b6b 100644
--- a/arch/parisc/kernel/drivers.c
+++ b/arch/parisc/kernel/drivers.c
@@ -38,6 +38,7 @@
#include <asm/io.h>
#include <asm/pdc.h>
#include <asm/parisc-device.h>
+#include <asm/ropes.h>
/* See comments in include/asm-parisc/pci.h */
const struct dma_map_ops *hppa_dma_ops __read_mostly;
@@ -257,6 +258,30 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa)
return ret ? d.dev : NULL;
}
+static int __init is_IKE_device(struct device *dev, void *data)
+{
+ struct parisc_device *pdev = to_parisc_device(dev);
+
+ if (!check_dev(dev))
+ return 0;
+ if (pdev->id.hw_type != HPHW_BCPORT)
+ return 0;
+ if (IS_IKE(pdev) ||
+ (pdev->id.hversion == REO_MERCED_PORT) ||
+ (pdev->id.hversion == REOG_MERCED_PORT)) {
+ return 1;
+ }
+ return 0;
+}
+
+int __init machine_has_merced_bus(void)
+{
+ int ret;
+
+ ret = for_each_padev(is_IKE_device, NULL);
+ return ret ? 1 : 0;
+}
+
/**
* find_pa_parent_type - Find a parent of a specific type
* @dev: The device to start searching from
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index d5eb19efa65b..a1fc04570ade 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -50,12 +50,8 @@
.import pa_tlb_lock,data
.macro load_pa_tlb_lock reg
-#if __PA_LDCW_ALIGNMENT > 4
- load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
- depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
-#else
- load32 PA(pa_tlb_lock), \reg
-#endif
+ mfctl %cr25,\reg
+ addil L%(PAGE_SIZE << (PGD_ALLOC_ORDER - 1)),\reg
.endm
/* space_to_prot macro creates a prot id from a space id */
@@ -471,8 +467,9 @@
nop
LDREG 0(\ptp),\pte
bb,<,n \pte,_PAGE_PRESENT_BIT,3f
+ LDCW 0(\tmp),\tmp1
b \fault
- stw,ma \spc,0(\tmp)
+ stw \spc,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
2: LDREG 0(\ptp),\pte
@@ -481,20 +478,22 @@
.endm
/* Release pa_tlb_lock lock without reloading lock address. */
- .macro tlb_unlock0 spc,tmp
+ .macro tlb_unlock0 spc,tmp,tmp1
#ifdef CONFIG_SMP
98: or,COND(=) %r0,\spc,%r0
- stw,ma \spc,0(\tmp)
+ LDCW 0(\tmp),\tmp1
+ or,COND(=) %r0,\spc,%r0
+ stw \spc,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
.endm
/* Release pa_tlb_lock lock. */
- .macro tlb_unlock1 spc,tmp
+ .macro tlb_unlock1 spc,tmp,tmp1
#ifdef CONFIG_SMP
98: load_pa_tlb_lock \tmp
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
- tlb_unlock0 \spc,\tmp
+ tlb_unlock0 \spc,\tmp,\tmp1
#endif
.endm
@@ -1177,7 +1176,7 @@ dtlb_miss_20w:
idtlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1203,7 +1202,7 @@ nadtlb_miss_20w:
idtlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1237,7 +1236,7 @@ dtlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1270,7 +1269,7 @@ nadtlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1299,7 +1298,7 @@ dtlb_miss_20:
idtlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1327,7 +1326,7 @@ nadtlb_miss_20:
idtlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1434,7 +1433,7 @@ itlb_miss_20w:
iitlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1458,7 +1457,7 @@ naitlb_miss_20w:
iitlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1492,7 +1491,7 @@ itlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1516,7 +1515,7 @@ naitlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1546,7 +1545,7 @@ itlb_miss_20:
iitlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1566,7 +1565,7 @@ naitlb_miss_20:
iitlbt pte,prot
- tlb_unlock1 spc,t0
+ tlb_unlock1 spc,t0,t1
rfir
nop
@@ -1596,7 +1595,7 @@ dbit_trap_20w:
idtlbt pte,prot
- tlb_unlock0 spc,t0
+ tlb_unlock0 spc,t0,t1
rfir
nop
#else
@@ -1622,7 +1621,7 @@ dbit_trap_11:
mtsp t1, %sr1 /* Restore sr1 */
- tlb_unlock0 spc,t0
+ tlb_unlock0 spc,t0,t1
rfir
nop
@@ -1642,7 +1641,7 @@ dbit_trap_20:
idtlbt pte,prot
- tlb_unlock0 spc,t0
+ tlb_unlock0 spc,t0,t1
rfir
nop
#endif
diff --git a/arch/parisc/kernel/head.S b/arch/parisc/kernel/head.S
index fbb4e43fda05..d12de2a13753 100644
--- a/arch/parisc/kernel/head.S
+++ b/arch/parisc/kernel/head.S
@@ -22,7 +22,7 @@
#include <linux/linkage.h>
#include <linux/init.h>
- .level LEVEL
+ .level PA_ASM_LEVEL
__INITDATA
ENTRY(boot_args)
@@ -258,7 +258,7 @@ stext_pdc_ret:
ldo R%PA(fault_vector_11)(%r10),%r10
$is_pa20:
- .level LEVEL /* restore 1.1 || 2.0w */
+ .level PA_ASM_LEVEL /* restore 1.1 || 2.0w */
#endif /*!CONFIG_64BIT*/
load32 PA(fault_vector_20),%r10
@@ -329,6 +329,19 @@ smp_slave_stext:
mtsp %r0,%sr6
mtsp %r0,%sr7
+#ifdef CONFIG_64BIT
+ /*
+ * Enable Wide mode early, in case the task_struct for the idle
+ * task in smp_init_current_idle_task was allocated above 4GB.
+ */
+1: mfia %rp /* clear upper part of pcoq */
+ ldo 2f-1b(%rp),%rp
+ depdi 0,31,32,%rp
+ bv (%rp)
+ ssm PSW_SM_W,%r0
+2:
+#endif
+
/* Initialize the SP - monarch sets up smp_init_current_idle_task */
load32 PA(smp_init_current_idle_task),%sp
LDREG 0(%sp),%sp /* load task address */
diff --git a/arch/parisc/kernel/inventory.c b/arch/parisc/kernel/inventory.c
index 35d05fdd7483..6f2d611347a1 100644
--- a/arch/parisc/kernel/inventory.c
+++ b/arch/parisc/kernel/inventory.c
@@ -31,6 +31,7 @@
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/parisc-device.h>
+#include <asm/tlbflush.h>
/*
** Debug options
@@ -638,4 +639,10 @@ void __init do_device_inventory(void)
}
printk(KERN_INFO "Found devices:\n");
print_parisc_devices();
+
+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+ pa_serialize_tlb_flushes = machine_has_merced_bus();
+ if (pa_serialize_tlb_flushes)
+ pr_info("Merced bus found: Enable PxTLB serialization.\n");
+#endif
}
diff --git a/arch/parisc/kernel/jump_label.c b/arch/parisc/kernel/jump_label.c
new file mode 100644
index 000000000000..d2f3cb12e282
--- /dev/null
+++ b/arch/parisc/kernel/jump_label.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 Helge Deller <deller@gmx.de>
+ *
+ * Based on arch/arm64/kernel/jump_label.c
+ */
+#include <linux/kernel.h>
+#include <linux/jump_label.h>
+#include <linux/bug.h>
+#include <asm/alternative.h>
+#include <asm/patch.h>
+
+static inline int reassemble_17(int as17)
+{
+ return (((as17 & 0x10000) >> 16) |
+ ((as17 & 0x0f800) << 5) |
+ ((as17 & 0x00400) >> 8) |
+ ((as17 & 0x003ff) << 3));
+}
+
+void arch_jump_label_transform(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ void *addr = (void *)jump_entry_code(entry);
+ u32 insn;
+
+ if (type == JUMP_LABEL_JMP) {
+ void *target = (void *)jump_entry_target(entry);
+ int distance = target - addr;
+ /*
+ * Encode the PA1.1 "b,n" instruction with a 17-bit
+ * displacement. In case we hit the BUG(), we could use
+ * another branch instruction with a 22-bit displacement on
+ * 64-bit CPUs instead. But this seems sufficient for now.
+ */
+ distance -= 8;
+ BUG_ON(distance > 262143 || distance < -262144);
+ insn = 0xe8000002 | reassemble_17(distance >> 2);
+ } else {
+ insn = INSN_NOP;
+ }
+
+ patch_text(addr, insn);
+}
+
+void arch_jump_label_transform_static(struct jump_entry *entry,
+ enum jump_label_type type)
+{
+ /*
+ * We use the architected NOP in arch_static_branch, so there's no
+ * need to patch an identical NOP over the top of it here. The core
+ * will call arch_jump_label_transform from a module notifier if the
+ * NOP needs to be replaced by a branch.
+ */
+}
diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c
new file mode 100644
index 000000000000..664278db9b97
--- /dev/null
+++ b/arch/parisc/kernel/kgdb.c
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PA-RISC KGDB support
+ *
+ * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ *
+ */
+
+#include <linux/kgdb.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/notifier.h>
+#include <linux/kdebug.h>
+#include <linux/uaccess.h>
+#include <asm/ptrace.h>
+#include <asm/traps.h>
+#include <asm/processor.h>
+#include <asm/patch.h>
+#include <asm/cacheflush.h>
+
+const struct kgdb_arch arch_kgdb_ops = {
+ .gdb_bpt_instr = { 0x03, 0xff, 0xa0, 0x1f }
+};
+
+static int __kgdb_notify(struct die_args *args, unsigned long cmd)
+{
+ struct pt_regs *regs = args->regs;
+
+ if (kgdb_handle_exception(1, args->signr, cmd, regs))
+ return NOTIFY_DONE;
+ return NOTIFY_STOP;
+}
+
+static int kgdb_notify(struct notifier_block *self,
+ unsigned long cmd, void *ptr)
+{
+ unsigned long flags;
+ int ret;
+
+ local_irq_save(flags);
+ ret = __kgdb_notify(ptr, cmd);
+ local_irq_restore(flags);
+
+ return ret;
+}
+
+static struct notifier_block kgdb_notifier = {
+ .notifier_call = kgdb_notify,
+ .priority = -INT_MAX,
+};
+
+int kgdb_arch_init(void)
+{
+ return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_exit(void)
+{
+ unregister_die_notifier(&kgdb_notifier);
+}
+
+void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ struct parisc_gdb_regs *gr = (struct parisc_gdb_regs *)gdb_regs;
+
+ memset(gr, 0, sizeof(struct parisc_gdb_regs));
+
+ memcpy(gr->gpr, regs->gr, sizeof(gr->gpr));
+ memcpy(gr->fr, regs->fr, sizeof(gr->fr));
+
+ gr->sr0 = regs->sr[0];
+ gr->sr1 = regs->sr[1];
+ gr->sr2 = regs->sr[2];
+ gr->sr3 = regs->sr[3];
+ gr->sr4 = regs->sr[4];
+ gr->sr5 = regs->sr[5];
+ gr->sr6 = regs->sr[6];
+ gr->sr7 = regs->sr[7];
+
+ gr->sar = regs->sar;
+ gr->iir = regs->iir;
+ gr->isr = regs->isr;
+ gr->ior = regs->ior;
+ gr->ipsw = regs->ipsw;
+ gr->cr27 = regs->cr27;
+
+ gr->iaoq_f = regs->iaoq[0];
+ gr->iasq_f = regs->iasq[0];
+
+ gr->iaoq_b = regs->iaoq[1];
+ gr->iasq_b = regs->iasq[1];
+}
+
+void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
+{
+ struct parisc_gdb_regs *gr = (struct parisc_gdb_regs *)gdb_regs;
+
+
+ memcpy(regs->gr, gr->gpr, sizeof(regs->gr));
+ memcpy(regs->fr, gr->fr, sizeof(regs->fr));
+
+ regs->sr[0] = gr->sr0;
+ regs->sr[1] = gr->sr1;
+ regs->sr[2] = gr->sr2;
+ regs->sr[3] = gr->sr3;
+ regs->sr[4] = gr->sr4;
+ regs->sr[5] = gr->sr5;
+ regs->sr[6] = gr->sr6;
+ regs->sr[7] = gr->sr7;
+
+ regs->sar = gr->sar;
+ regs->iir = gr->iir;
+ regs->isr = gr->isr;
+ regs->ior = gr->ior;
+ regs->ipsw = gr->ipsw;
+ regs->cr27 = gr->cr27;
+
+ regs->iaoq[0] = gr->iaoq_f;
+ regs->iasq[0] = gr->iasq_f;
+
+ regs->iaoq[1] = gr->iaoq_b;
+ regs->iasq[1] = gr->iasq_b;
+}
+
+void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs,
+ struct task_struct *task)
+{
+ struct pt_regs *regs = task_pt_regs(task);
+ unsigned long gr30, iaoq;
+
+ gr30 = regs->gr[30];
+ iaoq = regs->iaoq[0];
+
+ regs->gr[30] = regs->ksp;
+ regs->iaoq[0] = regs->kpc;
+ pt_regs_to_gdb_regs(gdb_regs, regs);
+
+ regs->gr[30] = gr30;
+ regs->iaoq[0] = iaoq;
+
+}
+
+static void step_instruction_queue(struct pt_regs *regs)
+{
+ regs->iaoq[0] = regs->iaoq[1];
+ regs->iaoq[1] += 4;
+}
+
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+ regs->iaoq[0] = ip;
+ regs->iaoq[1] = ip + 4;
+}
+
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int ret = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
+ BREAK_INSTR_SIZE);
+ if (ret)
+ return ret;
+
+ __patch_text((void *)bpt->bpt_addr,
+ *(unsigned int *)&arch_kgdb_ops.gdb_bpt_instr);
+ return ret;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ __patch_text((void *)bpt->bpt_addr, *(unsigned int *)&bpt->saved_instr);
+ return 0;
+}
+
+int kgdb_arch_handle_exception(int trap, int signo,
+ int err_code, char *inbuf, char *outbuf,
+ struct pt_regs *regs)
+{
+ unsigned long addr;
+ char *p = inbuf + 1;
+
+ switch (inbuf[0]) {
+ case 'D':
+ case 'c':
+ case 'k':
+ kgdb_contthread = NULL;
+ kgdb_single_step = 0;
+
+ if (kgdb_hex2long(&p, &addr))
+ kgdb_arch_set_pc(regs, addr);
+ else if (trap == 9 && regs->iir ==
+ PARISC_KGDB_COMPILED_BREAK_INSN)
+ step_instruction_queue(regs);
+ return 0;
+ case 's':
+ kgdb_single_step = 1;
+ if (kgdb_hex2long(&p, &addr)) {
+ kgdb_arch_set_pc(regs, addr);
+ } else if (trap == 9 && regs->iir ==
+ PARISC_KGDB_COMPILED_BREAK_INSN) {
+ step_instruction_queue(regs);
+ mtctl(-1, 0);
+ } else {
+ mtctl(0, 0);
+ }
+ regs->gr[0] |= PSW_R;
+ return 0;
+
+ }
+ return -1;
+}
diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c
new file mode 100644
index 000000000000..d58960b33bda
--- /dev/null
+++ b/arch/parisc/kernel/kprobes.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch/parisc/kernel/kprobes.c
+ *
+ * PA-RISC kprobes implementation
+ *
+ * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ */
+
+#include <linux/types.h>
+#include <linux/kprobes.h>
+#include <linux/slab.h>
+#include <asm/cacheflush.h>
+#include <asm/patch.h>
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ if ((unsigned long)p->addr & 3UL)
+ return -EINVAL;
+
+ p->ainsn.insn = get_insn_slot();
+ if (!p->ainsn.insn)
+ return -ENOMEM;
+
+ memcpy(p->ainsn.insn, p->addr,
+ MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ p->opcode = *p->addr;
+ flush_insn_slot(p);
+ return 0;
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+ if (!p->ainsn.insn)
+ return;
+
+ free_insn_slot(p->ainsn.insn, 0);
+ p->ainsn.insn = NULL;
+}
+
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, PARISC_KPROBES_BREAK_INSN);
+}
+
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, p->opcode);
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static inline void __kprobes set_current_kprobe(struct kprobe *p)
+{
+ __this_cpu_write(current_kprobe, p);
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+ struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+ kcb->iaoq[0] = regs->iaoq[0];
+ kcb->iaoq[1] = regs->iaoq[1];
+ regs->iaoq[0] = (unsigned long)p->ainsn.insn;
+ mtctl(0, 0);
+ regs->gr[0] |= PSW_R;
+}
+
+int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+
+ preempt_disable();
+
+ kcb = get_kprobe_ctlblk();
+ p = get_kprobe((unsigned long *)regs->iaoq[0]);
+
+ if (!p) {
+ preempt_enable_no_resched();
+ return 0;
+ }
+
+ if (kprobe_running()) {
+ /*
+ * We have reentered the kprobe_handler, since another kprobe
+ * was hit while within the handler, we save the original
+ * kprobes and single step on the instruction of the new probe
+ * without calling any user handlers to avoid recursive
+ * kprobes.
+ */
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p);
+ kprobes_inc_nmissed_count(p);
+ setup_singlestep(p, kcb, regs);
+ kcb->kprobe_status = KPROBE_REENTER;
+ return 1;
+ }
+
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /* If we have no pre-handler or it returned 0, we continue with
+ * normal processing. If we have a pre-handler and it returned
+ * non-zero - which means user handler setup registers to exit
+ * to another instruction, we must skip the single stepping.
+ */
+
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
+ setup_singlestep(p, kcb, regs);
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ } else {
+ reset_current_kprobe();
+ preempt_enable_no_resched();
+ }
+ return 1;
+}
+
+int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe *p = kprobe_running();
+
+ if (regs->iaoq[0] != (unsigned long)p->ainsn.insn+4)
+ return 0;
+
+ /* restore back original saved kprobe variables and continue */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ return 1;
+ }
+
+ /* for absolute branch instructions we can copy iaoq_b. for relative
+ * branch instructions we need to calculate the new address based on the
+ * difference between iaoq_f and iaoq_b. We cannot use iaoq_b without
+ * modificationt because it's based on our ainsn.insn address.
+ */
+
+ if (p->post_handler)
+ p->post_handler(p, regs, 0);
+
+ switch (regs->iir >> 26) {
+ case 0x38: /* BE */
+ case 0x39: /* BE,L */
+ case 0x3a: /* BV */
+ case 0x3b: /* BVE */
+ /* for absolute branches, regs->iaoq[1] has already the right
+ * address
+ */
+ regs->iaoq[0] = kcb->iaoq[1];
+ break;
+ default:
+ regs->iaoq[1] = kcb->iaoq[0];
+ regs->iaoq[1] += (regs->iaoq[1] - regs->iaoq[0]) + 4;
+ regs->iaoq[0] = kcb->iaoq[1];
+ break;
+ }
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ reset_current_kprobe();
+ return 1;
+}
+
+static inline void kretprobe_trampoline(void)
+{
+ asm volatile("nop");
+ asm volatile("nop");
+}
+
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+ struct pt_regs *regs);
+
+static struct kprobe trampoline_p = {
+ .pre_handler = trampoline_probe_handler
+};
+
+static int __kprobes trampoline_probe_handler(struct kprobe *p,
+ struct pt_regs *regs)
+{
+ struct kretprobe_instance *ri = NULL;
+ struct hlist_head *head, empty_rp;
+ struct hlist_node *tmp;
+ unsigned long flags, orig_ret_address = 0;
+ unsigned long trampoline_address = (unsigned long)trampoline_p.addr;
+ kprobe_opcode_t *correct_ret_addr = NULL;
+
+ INIT_HLIST_HEAD(&empty_rp);
+ kretprobe_hash_lock(current, &head, &flags);
+
+ /*
+ * It is possible to have multiple instances associated with a given
+ * task either because multiple functions in the call path have
+ * a return probe installed on them, and/or more than one return
+ * probe was registered for a target function.
+ *
+ * We can handle this because:
+ * - instances are always inserted at the head of the list
+ * - when multiple return probes are registered for the same
+ * function, the first instance's ret_addr will point to the
+ * real return address, and all the rest will point to
+ * kretprobe_trampoline
+ */
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_assert(ri, orig_ret_address, trampoline_address);
+
+ correct_ret_addr = ri->ret_addr;
+ hlist_for_each_entry_safe(ri, tmp, head, hlist) {
+ if (ri->task != current)
+ /* another task is sharing our hash bucket */
+ continue;
+
+ orig_ret_address = (unsigned long)ri->ret_addr;
+ if (ri->rp && ri->rp->handler) {
+ __this_cpu_write(current_kprobe, &ri->rp->kp);
+ get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE;
+ ri->ret_addr = correct_ret_addr;
+ ri->rp->handler(ri, regs);
+ __this_cpu_write(current_kprobe, NULL);
+ }
+
+ recycle_rp_inst(ri, &empty_rp);
+
+ if (orig_ret_address != trampoline_address)
+ /*
+ * This is the real return address. Any other
+ * instances associated with this task are for
+ * other calls deeper on the call stack
+ */
+ break;
+ }
+
+ kretprobe_hash_unlock(current, &flags);
+
+ hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) {
+ hlist_del(&ri->hlist);
+ kfree(ri);
+ }
+ instruction_pointer_set(regs, orig_ret_address);
+ return 1;
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)regs->gr[2];
+
+ /* Replace the return addr with trampoline addr. */
+ regs->gr[2] = (unsigned long)trampoline_p.addr;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ return p->addr == trampoline_p.addr;
+}
+bool arch_kprobe_on_func_entry(unsigned long offset)
+{
+ return !offset;
+}
+
+int __init arch_init_kprobes(void)
+{
+ trampoline_p.addr = (kprobe_opcode_t *)
+ dereference_function_descriptor(kretprobe_trampoline);
+ return register_kprobe(&trampoline_p);
+}
diff --git a/arch/parisc/kernel/pacache.S b/arch/parisc/kernel/pacache.S
index 187f032c9dd8..4e4e8eb25874 100644
--- a/arch/parisc/kernel/pacache.S
+++ b/arch/parisc/kernel/pacache.S
@@ -311,39 +311,6 @@ fdsync:
nop
ENDPROC_CFI(flush_data_cache_local)
-/* Macros to serialize TLB purge operations on SMP. */
-
- .macro tlb_lock la,flags,tmp
-#ifdef CONFIG_SMP
-98:
-#if __PA_LDCW_ALIGNMENT > 4
- load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
- depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
-#else
- load32 pa_tlb_lock, \la
-#endif
- rsm PSW_SM_I,\flags
-1: LDCW 0(\la),\tmp
- cmpib,<>,n 0,\tmp,3f
-2: ldw 0(\la),\tmp
- cmpb,<> %r0,\tmp,1b
- nop
- b,n 2b
-3:
-99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
-#endif
- .endm
-
- .macro tlb_unlock la,flags,tmp
-#ifdef CONFIG_SMP
-98: ldi 1,\tmp
- sync
- stw \tmp,0(\la)
- mtsm \flags
-99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
-#endif
- .endm
-
/* Clear page using kernel mapping. */
ENTRY_CFI(clear_page_asm)
@@ -601,10 +568,8 @@ ENTRY_CFI(copy_user_page_asm)
pdtlb,l %r0(%r28)
pdtlb,l %r0(%r29)
#else
- tlb_lock %r20,%r21,%r22
0: pdtlb %r0(%r28)
1: pdtlb %r0(%r29)
- tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif
@@ -743,9 +708,7 @@ ENTRY_CFI(clear_user_page_asm)
#ifdef CONFIG_PA20
pdtlb,l %r0(%r28)
#else
- tlb_lock %r20,%r21,%r22
0: pdtlb %r0(%r28)
- tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif
@@ -821,9 +784,7 @@ ENTRY_CFI(flush_dcache_page_asm)
#ifdef CONFIG_PA20
pdtlb,l %r0(%r28)
#else
- tlb_lock %r20,%r21,%r22
0: pdtlb %r0(%r28)
- tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif
@@ -882,9 +843,7 @@ ENTRY_CFI(purge_dcache_page_asm)
#ifdef CONFIG_PA20
pdtlb,l %r0(%r28)
#else
- tlb_lock %r20,%r21,%r22
0: pdtlb %r0(%r28)
- tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
#endif
@@ -948,10 +907,8 @@ ENTRY_CFI(flush_icache_page_asm)
1: pitlb,l %r0(%sr4,%r28)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
#else
- tlb_lock %r20,%r21,%r22
0: pdtlb %r0(%r28)
1: pitlb %r0(%sr4,%r28)
- tlb_unlock %r20,%r21,%r22
ALTERNATIVE(0b, 0b+4, ALT_COND_NO_SMP, INSN_PxTLB)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SMP, INSN_PxTLB)
ALTERNATIVE(1b, 1b+4, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 7baa2265d439..174213b1716e 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -138,12 +138,6 @@ extern void $$dyncall(void);
EXPORT_SYMBOL($$dyncall);
#endif
-#ifdef CONFIG_DISCONTIGMEM
-#include <asm/mmzone.h>
-EXPORT_SYMBOL(node_data);
-EXPORT_SYMBOL(pfnnid_map);
-#endif
-
#ifdef CONFIG_FUNCTION_TRACER
extern void _mcount(void);
EXPORT_SYMBOL(_mcount);
diff --git a/arch/parisc/kernel/patch.c b/arch/parisc/kernel/patch.c
new file mode 100644
index 000000000000..cdcd981278b3
--- /dev/null
+++ b/arch/parisc/kernel/patch.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+ /*
+ * functions to patch RO kernel text during runtime
+ *
+ * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <linux/stop_machine.h>
+
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+#include <asm/patch.h>
+
+struct patch {
+ void *addr;
+ unsigned int insn;
+};
+
+static void __kprobes *patch_map(void *addr, int fixmap)
+{
+ unsigned long uintaddr = (uintptr_t) addr;
+ bool module = !core_kernel_text(uintaddr);
+ struct page *page;
+
+ if (module && IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ page = vmalloc_to_page(addr);
+ else if (!module && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ page = virt_to_page(addr);
+ else
+ return addr;
+
+ set_fixmap(fixmap, page_to_phys(page));
+
+ return (void *) (__fix_to_virt(fixmap) + (uintaddr & ~PAGE_MASK));
+}
+
+static void __kprobes patch_unmap(int fixmap)
+{
+ clear_fixmap(fixmap);
+}
+
+void __kprobes __patch_text(void *addr, unsigned int insn)
+{
+ void *waddr = addr;
+ int size;
+
+ waddr = patch_map(addr, FIX_TEXT_POKE0);
+ *(u32 *)waddr = insn;
+ size = sizeof(u32);
+ flush_kernel_vmap_range(waddr, size);
+ patch_unmap(FIX_TEXT_POKE0);
+ flush_icache_range((uintptr_t)(addr),
+ (uintptr_t)(addr) + size);
+}
+
+static int __kprobes patch_text_stop_machine(void *data)
+{
+ struct patch *patch = data;
+
+ __patch_text(patch->addr, patch->insn);
+
+ return 0;
+}
+
+void __kprobes patch_text(void *addr, unsigned int insn)
+{
+ struct patch patch = {
+ .addr = addr,
+ .insn = insn,
+ };
+
+ stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
+}
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index 841db71958cd..97c206734e24 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -193,6 +193,7 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
*/
int running_on_qemu __read_mostly;
+EXPORT_SYMBOL(running_on_qemu);
void __cpuidle arch_cpu_idle_dead(void)
{
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 7f4d042856b5..e0a81dedc366 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -305,7 +305,8 @@ void __init collect_boot_cpu_data(void)
if (pdc_model_platform_info(orig_prod_num, current_prod_num, serial_no) == PDC_OK) {
printk(KERN_INFO "product %s, original product %s, S/N: %s\n",
- current_prod_num, orig_prod_num, serial_no);
+ current_prod_num[0] ? current_prod_num : "n/a",
+ orig_prod_num, serial_no);
add_device_randomness(orig_prod_num, strlen(orig_prod_num));
add_device_randomness(current_prod_num, strlen(current_prod_num));
add_device_randomness(serial_no, strlen(serial_no));
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 0964c236e3e5..a3d2fb4e6dd2 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -789,3 +789,38 @@ const char *regs_query_register_name(unsigned int offset)
return roff->name;
return NULL;
}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @addr: address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+int regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ return ((addr & ~(THREAD_SIZE - 1)) ==
+ (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1)));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+ addr -= n;
+
+ if (!regs_within_kernel_stack(regs, (unsigned long)addr))
+ return 0;
+
+ return *addr;
+}
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index d908058d05c1..e05cb2a5c16d 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -343,6 +343,12 @@ static int __init parisc_init(void)
boot_cpu_data.cpu_hz / 1000000,
boot_cpu_data.cpu_hz % 1000000 );
+#if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+ /* Don't serialize TLB flushes if we run on one CPU only. */
+ if (num_online_cpus() == 1)
+ pa_serialize_tlb_flushes = 0;
+#endif
+
apply_alternatives_all();
parisc_setup_cache_timing();
diff --git a/arch/parisc/kernel/stacktrace.c b/arch/parisc/kernel/stacktrace.c
index ec5835e83a7a..6f0b9c8d8052 100644
--- a/arch/parisc/kernel/stacktrace.c
+++ b/arch/parisc/kernel/stacktrace.c
@@ -29,22 +29,17 @@ static void dump_trace(struct task_struct *task, struct stack_trace *trace)
}
}
-
/*
* Save stack-backtrace addresses into a stack_trace buffer.
*/
void save_stack_trace(struct stack_trace *trace)
{
dump_trace(current, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
dump_trace(tsk, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 376ea0d1b275..4407ac4c1d84 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -86,7 +86,8 @@ static unsigned long mmap_upper_limit(struct rlimit *rlim_stack)
stack_base = STACK_SIZE_MAX;
/* Add space for stack randomization. */
- stack_base += (STACK_RND_MASK << PAGE_SHIFT);
+ if (current->flags & PF_RANDOMIZE)
+ stack_base += (STACK_RND_MASK << PAGE_SHIFT);
return PAGE_ALIGN(STACK_TOP - stack_base);
}
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 4f77bd9be66b..e54d5e4d3489 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -48,7 +48,7 @@ registers).
*/
#define KILL_INSN break 0,0
- .level LEVEL
+ .level PA_ASM_LEVEL
.text
@@ -640,7 +640,9 @@ cas_action:
sub,<> %r28, %r25, %r0
2: stw %r24, 0(%r26)
/* Free lock */
- sync
+#ifdef CONFIG_SMP
+ LDCW 0(%sr2,%r20), %r1 /* Barrier */
+#endif
stw %r20, 0(%sr2,%r20)
#if ENABLE_LWS_DEBUG
/* Clear thread register indicator */
@@ -655,7 +657,9 @@ cas_action:
3:
/* Error occurred on load or store */
/* Free lock */
- sync
+#ifdef CONFIG_SMP
+ LDCW 0(%sr2,%r20), %r1 /* Barrier */
+#endif
stw %r20, 0(%sr2,%r20)
#if ENABLE_LWS_DEBUG
stw %r0, 4(%sr2,%r20)
@@ -857,7 +861,9 @@ cas2_action:
cas2_end:
/* Free lock */
- sync
+#ifdef CONFIG_SMP
+ LDCW 0(%sr2,%r20), %r1 /* Barrier */
+#endif
stw %r20, 0(%sr2,%r20)
/* Enable interrupts */
ssm PSW_SM_I, %r0
@@ -868,7 +874,9 @@ cas2_end:
22:
/* Error occurred on load or store */
/* Free lock */
- sync
+#ifdef CONFIG_SMP
+ LDCW 0(%sr2,%r20), %r1 /* Barrier */
+#endif
stw %r20, 0(%sr2,%r20)
ssm PSW_SM_I, %r0
ldo 1(%r0),%r28
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 7e1ccafadf57..096e319adeb3 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -42,6 +42,8 @@
#include <asm/unwind.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
+#include <linux/kgdb.h>
+#include <linux/kprobes.h>
#include "../math-emu/math-emu.h" /* for handle_fpe() */
@@ -293,6 +295,22 @@ static void handle_break(struct pt_regs *regs)
(tt == BUG_TRAP_TYPE_NONE) ? 9 : 0);
}
+#ifdef CONFIG_KPROBES
+ if (unlikely(iir == PARISC_KPROBES_BREAK_INSN)) {
+ parisc_kprobe_break_handler(regs);
+ return;
+ }
+
+#endif
+
+#ifdef CONFIG_KGDB
+ if (unlikely(iir == PARISC_KGDB_COMPILED_BREAK_INSN ||
+ iir == PARISC_KGDB_BREAK_INSN)) {
+ kgdb_handle_exception(9, SIGTRAP, 0, regs);
+ return;
+ }
+#endif
+
if (unlikely(iir != GDB_BREAK_INSN))
parisc_printk_ratelimited(0, regs,
KERN_DEBUG "break %d,%d: pid=%d command='%s'\n",
@@ -518,6 +536,19 @@ void notrace handle_interruption(int code, struct pt_regs *regs)
case 3:
/* Recovery counter trap */
regs->gr[0] &= ~PSW_R;
+
+#ifdef CONFIG_KPROBES
+ if (parisc_kprobe_ss_handler(regs))
+ return;
+#endif
+
+#ifdef CONFIG_KGDB
+ if (kgdb_single_step) {
+ kgdb_handle_exception(0, SIGTRAP, 0, regs);
+ return;
+ }
+#endif
+
if (user_space(regs))
handle_gdb_break(regs, TRAP_TRACE);
/* else this must be the start of a syscall - just let it run */
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index c3b1b9c24ede..a8be7a47fcc0 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -18,6 +18,9 @@
*(.data..vm0.pgd) \
*(.data..vm0.pte)
+/* No __ro_after_init data in the .rodata section - which will always be ro */
+#define RO_AFTER_INIT_DATA
+
#include <asm-generic/vmlinux.lds.h>
/* needed for the processor specific cache alignment size */
diff --git a/arch/parisc/mm/Makefile b/arch/parisc/mm/Makefile
index 134393de69d2..20e39b043a60 100644
--- a/arch/parisc/mm/Makefile
+++ b/arch/parisc/mm/Makefile
@@ -2,5 +2,5 @@
# Makefile for arch/parisc/mm
#
-obj-y := init.o fault.o ioremap.o
+obj-y := init.o fault.o ioremap.o fixmap.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
new file mode 100644
index 000000000000..c8d41b54fb19
--- /dev/null
+++ b/arch/parisc/mm/fixmap.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fixmaps for parisc
+ *
+ * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org>
+ */
+
+#include <linux/kprobes.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+#include <asm/fixmap.h>
+
+void set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
+{
+ unsigned long vaddr = __fix_to_virt(idx);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ pmd_t *pmd = pmd_offset(pgd, vaddr);
+ pte_t *pte;
+
+ if (pmd_none(*pmd))
+ pmd = pmd_alloc(NULL, pgd, vaddr);
+
+ pte = pte_offset_kernel(pmd, vaddr);
+ if (pte_none(*pte))
+ pte = pte_alloc_kernel(pmd, vaddr);
+
+ set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX));
+ flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+}
+
+void clear_fixmap(enum fixed_addresses idx)
+{
+ unsigned long vaddr = __fix_to_virt(idx);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ pmd_t *pmd = pmd_offset(pgd, vaddr);
+ pte_t *pte = pte_offset_kernel(pmd, vaddr);
+
+ pte_clear(&init_mm, vaddr, pte);
+
+ flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
+}
diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c
index d77479ae3af2..d578809e55cf 100644
--- a/arch/parisc/mm/hugetlbpage.c
+++ b/arch/parisc/mm/hugetlbpage.c
@@ -139,9 +139,9 @@ void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
{
unsigned long flags;
- purge_tlb_start(flags);
+ spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);
__set_huge_pte_at(mm, addr, ptep, entry);
- purge_tlb_end(flags);
+ spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);
}
@@ -151,10 +151,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
unsigned long flags;
pte_t entry;
- purge_tlb_start(flags);
+ spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);
entry = *ptep;
__set_huge_pte_at(mm, addr, ptep, __pte(0));
- purge_tlb_end(flags);
+ spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);
return entry;
}
@@ -166,10 +166,10 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm,
unsigned long flags;
pte_t old_pte;
- purge_tlb_start(flags);
+ spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);
old_pte = *ptep;
__set_huge_pte_at(mm, addr, ptep, pte_wrprotect(old_pte));
- purge_tlb_end(flags);
+ spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);
}
int huge_ptep_set_access_flags(struct vm_area_struct *vma,
@@ -178,13 +178,14 @@ int huge_ptep_set_access_flags(struct vm_area_struct *vma,
{
unsigned long flags;
int changed;
+ struct mm_struct *mm = vma->vm_mm;
- purge_tlb_start(flags);
+ spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);
changed = !pte_same(*ptep, pte);
if (changed) {
- __set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
+ __set_huge_pte_at(mm, addr, ptep, pte);
}
- purge_tlb_end(flags);
+ spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);
return changed;
}
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index d0b166256f1a..3b0f9eab7f2c 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -32,6 +32,7 @@
#include <asm/mmzone.h>
#include <asm/sections.h>
#include <asm/msgbuf.h>
+#include <asm/sparsemem.h>
extern int data_start;
extern void parisc_kernel_start(void); /* Kernel entry point in head.S */
@@ -48,11 +49,6 @@ pmd_t pmd0[PTRS_PER_PMD] __attribute__ ((__section__ (".data..vm0.pmd"), aligned
pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__ ((__section__ (".data..vm0.pgd"), aligned(PAGE_SIZE)));
pte_t pg0[PT_INITIAL * PTRS_PER_PTE] __attribute__ ((__section__ (".data..vm0.pte"), aligned(PAGE_SIZE)));
-#ifdef CONFIG_DISCONTIGMEM
-struct node_map_data node_data[MAX_NUMNODES] __read_mostly;
-signed char pfnnid_map[PFNNID_MAP_MAX] __read_mostly;
-#endif
-
static struct resource data_resource = {
.name = "Kernel data",
.flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
@@ -76,11 +72,11 @@ static struct resource sysram_resources[MAX_PHYSMEM_RANGES] __read_mostly;
* information retrieved in kernel/inventory.c.
*/
-physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __read_mostly;
-int npmem_ranges __read_mostly;
+physmem_range_t pmem_ranges[MAX_PHYSMEM_RANGES] __initdata;
+int npmem_ranges __initdata;
#ifdef CONFIG_64BIT
-#define MAX_MEM (~0UL)
+#define MAX_MEM (1UL << MAX_PHYSMEM_BITS)
#else /* !CONFIG_64BIT */
#define MAX_MEM (3584U*1024U*1024U)
#endif /* !CONFIG_64BIT */
@@ -119,7 +115,7 @@ static void __init mem_limit_func(void)
static void __init setup_bootmem(void)
{
unsigned long mem_max;
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
physmem_range_t pmem_holes[MAX_PHYSMEM_RANGES - 1];
int npmem_holes;
#endif
@@ -137,23 +133,20 @@ static void __init setup_bootmem(void)
int j;
for (j = i; j > 0; j--) {
- unsigned long tmp;
+ physmem_range_t tmp;
if (pmem_ranges[j-1].start_pfn <
pmem_ranges[j].start_pfn) {
break;
}
- tmp = pmem_ranges[j-1].start_pfn;
- pmem_ranges[j-1].start_pfn = pmem_ranges[j].start_pfn;
- pmem_ranges[j].start_pfn = tmp;
- tmp = pmem_ranges[j-1].pages;
- pmem_ranges[j-1].pages = pmem_ranges[j].pages;
- pmem_ranges[j].pages = tmp;
+ tmp = pmem_ranges[j-1];
+ pmem_ranges[j-1] = pmem_ranges[j];
+ pmem_ranges[j] = tmp;
}
}
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
/*
* Throw out ranges that are too far apart (controlled by
* MAX_GAP).
@@ -165,7 +158,7 @@ static void __init setup_bootmem(void)
pmem_ranges[i-1].pages) > MAX_GAP) {
npmem_ranges = i;
printk("Large gap in memory detected (%ld pages). "
- "Consider turning on CONFIG_DISCONTIGMEM\n",
+ "Consider turning on CONFIG_SPARSEMEM\n",
pmem_ranges[i].start_pfn -
(pmem_ranges[i-1].start_pfn +
pmem_ranges[i-1].pages));
@@ -230,9 +223,8 @@ static void __init setup_bootmem(void)
printk(KERN_INFO "Total Memory: %ld MB\n",mem_max >> 20);
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
/* Merge the ranges, keeping track of the holes */
-
{
unsigned long end_pfn;
unsigned long hole_pages;
@@ -255,18 +247,6 @@ static void __init setup_bootmem(void)
}
#endif
-#ifdef CONFIG_DISCONTIGMEM
- for (i = 0; i < MAX_PHYSMEM_RANGES; i++) {
- memset(NODE_DATA(i), 0, sizeof(pg_data_t));
- }
- memset(pfnnid_map, 0xff, sizeof(pfnnid_map));
-
- for (i = 0; i < npmem_ranges; i++) {
- node_set_state(i, N_NORMAL_MEMORY);
- node_set_online(i);
- }
-#endif
-
/*
* Initialize and free the full range of memory in each range.
*/
@@ -314,7 +294,7 @@ static void __init setup_bootmem(void)
memblock_reserve(__pa(KERNEL_BINARY_TEXT_START),
(unsigned long)(_end - KERNEL_BINARY_TEXT_START));
-#ifndef CONFIG_DISCONTIGMEM
+#ifndef CONFIG_SPARSEMEM
/* reserve the holes */
@@ -360,6 +340,9 @@ static void __init setup_bootmem(void)
/* Initialize Page Deallocation Table (PDT) and check for bad memory. */
pdc_pdt_init();
+
+ memblock_allow_resize();
+ memblock_dump_all();
}
static int __init parisc_text_address(unsigned long vaddr)
@@ -495,7 +478,7 @@ static void __init map_pages(unsigned long start_vaddr,
void __init set_kernel_text_rw(int enable_read_write)
{
- unsigned long start = (unsigned long) _text;
+ unsigned long start = (unsigned long) __init_begin;
unsigned long end = (unsigned long) &data_start;
map_pages(start, __pa(start), end-start,
@@ -622,15 +605,19 @@ void __init mem_init(void)
* But keep code for debugging purposes.
*/
printk("virtual kernel memory layout:\n"
- " vmalloc : 0x%px - 0x%px (%4ld MB)\n"
- " memory : 0x%px - 0x%px (%4ld MB)\n"
- " .init : 0x%px - 0x%px (%4ld kB)\n"
- " .data : 0x%px - 0x%px (%4ld kB)\n"
- " .text : 0x%px - 0x%px (%4ld kB)\n",
+ " vmalloc : 0x%px - 0x%px (%4ld MB)\n"
+ " fixmap : 0x%px - 0x%px (%4ld kB)\n"
+ " memory : 0x%px - 0x%px (%4ld MB)\n"
+ " .init : 0x%px - 0x%px (%4ld kB)\n"
+ " .data : 0x%px - 0x%px (%4ld kB)\n"
+ " .text : 0x%px - 0x%px (%4ld kB)\n",
(void*)VMALLOC_START, (void*)VMALLOC_END,
(VMALLOC_END - VMALLOC_START) >> 20,
+ (void *)FIXMAP_START, (void *)(FIXMAP_START + FIXMAP_SIZE),
+ (unsigned long)(FIXMAP_SIZE / 1024),
+
__va(0), high_memory,
((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
@@ -709,37 +696,46 @@ static void __init gateway_init(void)
PAGE_SIZE, PAGE_GATEWAY, 1);
}
-void __init paging_init(void)
+static void __init parisc_bootmem_free(void)
{
+ unsigned long zones_size[MAX_NR_ZONES] = { 0, };
+ unsigned long holes_size[MAX_NR_ZONES] = { 0, };
+ unsigned long mem_start_pfn = ~0UL, mem_end_pfn = 0, mem_size_pfn = 0;
int i;
+ for (i = 0; i < npmem_ranges; i++) {
+ unsigned long start = pmem_ranges[i].start_pfn;
+ unsigned long size = pmem_ranges[i].pages;
+ unsigned long end = start + size;
+
+ if (mem_start_pfn > start)
+ mem_start_pfn = start;
+ if (mem_end_pfn < end)
+ mem_end_pfn = end;
+ mem_size_pfn += size;
+ }
+
+ zones_size[0] = mem_end_pfn - mem_start_pfn;
+ holes_size[0] = zones_size[0] - mem_size_pfn;
+
+ free_area_init_node(0, zones_size, mem_start_pfn, holes_size);
+}
+
+void __init paging_init(void)
+{
setup_bootmem();
pagetable_init();
gateway_init();
flush_cache_all_local(); /* start with known state */
flush_tlb_all_local(NULL);
- for (i = 0; i < npmem_ranges; i++) {
- unsigned long zones_size[MAX_NR_ZONES] = { 0, };
-
- zones_size[ZONE_NORMAL] = pmem_ranges[i].pages;
-
-#ifdef CONFIG_DISCONTIGMEM
- /* Need to initialize the pfnnid_map before we can initialize
- the zone */
- {
- int j;
- for (j = (pmem_ranges[i].start_pfn >> PFNNID_SHIFT);
- j <= ((pmem_ranges[i].start_pfn + pmem_ranges[i].pages) >> PFNNID_SHIFT);
- j++) {
- pfnnid_map[j] = i;
- }
- }
-#endif
-
- free_area_init_node(i, zones_size,
- pmem_ranges[i].start_pfn, NULL);
- }
+ /*
+ * Mark all memblocks as present for sparsemem using
+ * memory_present() and then initialize sparsemem.
+ */
+ memblocks_present();
+ sparse_init();
+ parisc_bootmem_free();
}
#ifdef CONFIG_PA20
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d0be82c3061..d7996cfaceca 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -103,13 +103,6 @@ config LOCKDEP_SUPPORT
bool
default y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y
-
config GENERIC_LOCKBREAK
bool
default y
@@ -132,6 +125,7 @@ config PPC
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV
+ select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_PTE_SPECIAL
@@ -173,6 +167,7 @@ config PPC
select GENERIC_TIME_VSYSCALL
select HAVE_ARCH_AUDITSYSCALL
select HAVE_ARCH_JUMP_LABEL
+ select HAVE_ARCH_KASAN if PPC32
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
@@ -218,6 +213,8 @@ config PPC
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
+ select HAVE_MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
select HAVE_SYSCALL_TRACEPOINTS
@@ -318,6 +315,10 @@ config ARCH_SUSPEND_POSSIBLE
(PPC_85xx && !PPC_E500MC) || PPC_86xx || PPC_PSERIES \
|| 44x || 40x
+config ARCH_SUSPEND_NONZERO_CPU
+ def_bool y
+ depends on PPC_POWERNV || PPC_PSERIES
+
config PPC_DCR_NATIVE
bool
@@ -375,7 +376,6 @@ config ZONE_DMA
config PGTABLE_LEVELS
int
default 2 if !PPC64
- default 3 if PPC_64K_PAGES && !PPC_BOOK3S_64
default 4
source "arch/powerpc/sysdev/Kconfig"
@@ -391,7 +391,7 @@ source "kernel/Kconfig.hz"
config HUGETLB_PAGE_SIZE_VARIABLE
bool
- depends on HUGETLB_PAGE
+ depends on HUGETLB_PAGE && PPC_BOOK3S_64
default y
config MATH_EMULATION
@@ -832,9 +832,9 @@ config CMDLINE_BOOL
bool "Default bootloader kernel arguments"
config CMDLINE
- string "Initial kernel command string"
- depends on CMDLINE_BOOL
- default "console=ttyS0,9600 console=tty0 root=/dev/sda2"
+ string "Initial kernel command string" if CMDLINE_BOOL
+ default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL
+ default ""
help
On some platforms, there is currently no way for the boot loader to
pass arguments to the kernel. For these platforms, you can supply
diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
index 4e00cb0a5464..c59920920ddc 100644
--- a/arch/powerpc/Kconfig.debug
+++ b/arch/powerpc/Kconfig.debug
@@ -117,6 +117,14 @@ config XMON_DISASSEMBLY
to say Y here, unless you're building for a memory-constrained
system.
+config XMON_DEFAULT_RO_MODE
+ bool "Restrict xmon to read-only operations by default"
+ depends on XMON
+ default y
+ help
+ Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
+ 'xmon=ro' override this default.
+
config DEBUGGER
bool
depends on KGDB || XMON
@@ -361,8 +369,32 @@ config PPC_PTDUMP
If you are unsure, say N.
+config PPC_DEBUG_WX
+ bool "Warn on W+X mappings at boot"
+ depends on PPC_PTDUMP
+ help
+ Generate a warning if any W+X mappings are found at boot.
+
+ This is useful for discovering cases where the kernel is leaving
+ W+X mappings after applying NX, as such mappings are a security risk.
+
+ Note that even if the check fails, your kernel is possibly
+ still fine, as W+X mappings are not a security hole in
+ themselves, what they do is that they make the exploitation
+ of other unfixed kernel bugs easier.
+
+ There is no runtime or memory usage effect of this option
+ once the kernel has booted up - it's a one time check.
+
+ If in doubt, say "Y".
+
config PPC_FAST_ENDIAN_SWITCH
bool "Deprecated fast endian-switch syscall"
depends on DEBUG_KERNEL && PPC_BOOK3S_64
help
If you're unsure what this is, say N.
+
+config KASAN_SHADOW_OFFSET
+ hex
+ depends on KASAN
+ default 0xe0000000
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 7de49889bd5d..258ea6b2f2e7 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -34,11 +34,10 @@ ifdef CONFIG_PPC_BOOK3S_32
KBUILD_CFLAGS += -mcpu=powerpc
endif
-ifeq ($(CROSS_COMPILE),)
-KBUILD_DEFCONFIG := $(shell uname -m)_defconfig
-else
-KBUILD_DEFCONFIG := ppc64_defconfig
-endif
+# If we're on a ppc/ppc64/ppc64le machine use that defconfig, otherwise just use
+# ppc64_defconfig because we have nothing better to go on.
+uname := $(shell uname -m)
+KBUILD_DEFCONFIG := $(if $(filter ppc%,$(uname)),$(uname),ppc64)_defconfig
ifdef CONFIG_PPC64
new_nm := $(shell if $(NM) --help 2>&1 | grep -- '--synthetic' > /dev/null; then echo y; else echo n; fi)
@@ -367,6 +366,10 @@ ppc32_allmodconfig:
$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/book3s_32.config \
-f $(srctree)/Makefile allmodconfig
+PHONY += ppc_defconfig
+ppc_defconfig:
+ $(call merge_into_defconfig,book3s_32.config,)
+
PHONY += ppc64le_allmodconfig
ppc64le_allmodconfig:
$(Q)$(MAKE) KCONFIG_ALLCONFIG=$(srctree)/arch/powerpc/configs/le.config \
@@ -406,7 +409,9 @@ vdso_install:
ifdef CONFIG_PPC64
$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso64 $@
endif
+ifdef CONFIG_VDSO32
$(Q)$(MAKE) $(build)=arch/$(ARCH)/kernel/vdso32 $@
+endif
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/powerpc/boot/addnote.c b/arch/powerpc/boot/addnote.c
index 9d9f6f334d3c..3da3e2b1b51b 100644
--- a/arch/powerpc/boot/addnote.c
+++ b/arch/powerpc/boot/addnote.c
@@ -223,7 +223,11 @@ main(int ac, char **av)
PUT_16(E_PHNUM, np + 2);
/* write back */
- lseek(fd, (long) 0, SEEK_SET);
+ i = lseek(fd, (long) 0, SEEK_SET);
+ if (i < 0) {
+ perror("lseek");
+ exit(1);
+ }
i = write(fd, buf, n);
if (i < 0) {
perror("write");
diff --git a/arch/powerpc/boot/dts/fsl/b4qds.dtsi b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
index 999efd3bc167..05be919f3545 100644
--- a/arch/powerpc/boot/dts/fsl/b4qds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/b4qds.dtsi
@@ -40,6 +40,7 @@
interrupt-parent = <&mpic>;
aliases {
+ crypto = &crypto;
phy_sgmii_10 = &phy_sgmii_10;
phy_sgmii_11 = &phy_sgmii_11;
phy_sgmii_1c = &phy_sgmii_1c;
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index ea79c519863d..62e12f61a3b2 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -217,6 +217,7 @@ CONFIG_USB_MON=m
CONFIG_USB_EHCI_HCD=y
# CONFIG_USB_EHCI_HCD_PPC_OF is not set
CONFIG_USB_OHCI_HCD=y
+CONFIG_USB_XHCI_HCD=y
CONFIG_USB_STORAGE=m
CONFIG_NEW_LEDS=y
CONFIG_LEDS_CLASS=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index 1bcd468ab422..a887616e35a2 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -163,6 +163,8 @@ CONFIG_S2IO=m
CONFIG_MLX4_EN=m
# CONFIG_MLX4_CORE_GEN2 is not set
CONFIG_MLX5_CORE=m
+CONFIG_MLX5_CORE_EN=y
+# CONFIG_MLX5_EN_RXNFC is not set
# CONFIG_NET_VENDOR_MICREL is not set
# CONFIG_NET_VENDOR_MICROSEMI is not set
CONFIG_MYRI10GE=m
diff --git a/arch/powerpc/crypto/crc-vpmsum_test.c b/arch/powerpc/crypto/crc-vpmsum_test.c
index 0153a9c6f4af..98ea4f4d3dde 100644
--- a/arch/powerpc/crypto/crc-vpmsum_test.c
+++ b/arch/powerpc/crypto/crc-vpmsum_test.c
@@ -78,16 +78,12 @@ static int __init crc_test_init(void)
pr_info("crc-vpmsum_test begins, %lu iterations\n", iterations);
for (i=0; i<iterations; i++) {
- size_t len, offset;
+ size_t offset = prandom_u32_max(16);
+ size_t len = prandom_u32_max(MAX_CRC_LENGTH);
- get_random_bytes(data, MAX_CRC_LENGTH);
- get_random_bytes(&len, sizeof(len));
- get_random_bytes(&offset, sizeof(offset));
-
- len %= MAX_CRC_LENGTH;
- offset &= 15;
if (len <= offset)
continue;
+ prandom_bytes(data, len);
len -= offset;
crypto_shash_update(crct10dif_shash, data+offset, len);
diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
index fd1d6c83f0c0..c4fa242dd652 100644
--- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crc32c-vpmsum_glue.c
@@ -1,10 +1,12 @@
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
+#include <asm/simd.h>
#include <asm/switch_to.h>
#define CHKSUM_BLOCK_SIZE 1
@@ -22,7 +24,7 @@ static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len)
unsigned int prealign;
unsigned int tail;
- if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt())
+ if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable())
return __crc32c_le(crc, p, len);
if ((unsigned long)p & VMX_ALIGN_MASK) {
diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c
index 02ea277863d1..e27ff16573b5 100644
--- a/arch/powerpc/crypto/crct10dif-vpmsum_glue.c
+++ b/arch/powerpc/crypto/crct10dif-vpmsum_glue.c
@@ -12,11 +12,13 @@
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <linux/cpufeature.h>
+#include <asm/simd.h>
#include <asm/switch_to.h>
#define VMX_ALIGN 16
@@ -32,7 +34,7 @@ static u16 crct10dif_vpmsum(u16 crci, unsigned char const *p, size_t len)
unsigned int tail;
u32 crc = crci;
- if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || in_interrupt())
+ if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable())
return crc_t10dif_generic(crc, p, len);
if ((unsigned long)p & VMX_ALIGN_MASK) {
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a0c132bedfae..b9f6e72bf4e5 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -8,6 +8,6 @@ generic-y += irq_regs.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += vtime.h
generic-y += msi.h
+generic-y += simd.h
diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h
new file mode 100644
index 000000000000..677e9babef80
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -0,0 +1,145 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_32_KUP_H
+#define _ASM_POWERPC_BOOK3S_32_KUP_H
+
+#include <asm/book3s/32/mmu-hash.h>
+
+#ifdef __ASSEMBLY__
+
+.macro kuep_update_sr gpr1, gpr2 /* NEVER use r0 as gpr2 due to addis */
+101: mtsrin \gpr1, \gpr2
+ addi \gpr1, \gpr1, 0x111 /* next VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
+ addis \gpr2, \gpr2, 0x1000 /* address of next segment */
+ bdnz 101b
+ isync
+.endm
+
+.macro kuep_lock gpr1, gpr2
+#ifdef CONFIG_PPC_KUEP
+ li \gpr1, NUM_USER_SEGMENTS
+ li \gpr2, 0
+ mtctr \gpr1
+ mfsrin \gpr1, \gpr2
+ oris \gpr1, \gpr1, SR_NX@h /* set Nx */
+ kuep_update_sr \gpr1, \gpr2
+#endif
+.endm
+
+.macro kuep_unlock gpr1, gpr2
+#ifdef CONFIG_PPC_KUEP
+ li \gpr1, NUM_USER_SEGMENTS
+ li \gpr2, 0
+ mtctr \gpr1
+ mfsrin \gpr1, \gpr2
+ rlwinm \gpr1, \gpr1, 0, ~SR_NX /* Clear Nx */
+ kuep_update_sr \gpr1, \gpr2
+#endif
+.endm
+
+#ifdef CONFIG_PPC_KUAP
+
+.macro kuap_update_sr gpr1, gpr2, gpr3 /* NEVER use r0 as gpr2 due to addis */
+101: mtsrin \gpr1, \gpr2
+ addi \gpr1, \gpr1, 0x111 /* next VSID */
+ rlwinm \gpr1, \gpr1, 0, 0xf0ffffff /* clear VSID overflow */
+ addis \gpr2, \gpr2, 0x1000 /* address of next segment */
+ cmplw \gpr2, \gpr3
+ blt- 101b
+ isync
+.endm
+
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+ lwz \gpr2, KUAP(\thread)
+ rlwinm. \gpr3, \gpr2, 28, 0xf0000000
+ stw \gpr2, STACK_REGS_KUAP(\sp)
+ beq+ 102f
+ li \gpr1, 0
+ stw \gpr1, KUAP(\thread)
+ mfsrin \gpr1, \gpr2
+ oris \gpr1, \gpr1, SR_KS@h /* set Ks */
+ kuap_update_sr \gpr1, \gpr2, \gpr3
+102:
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+ lwz \gpr2, STACK_REGS_KUAP(\sp)
+ rlwinm. \gpr3, \gpr2, 28, 0xf0000000
+ stw \gpr2, THREAD + KUAP(\current)
+ beq+ 102f
+ mfsrin \gpr1, \gpr2
+ rlwinm \gpr1, \gpr1, 0, ~SR_KS /* Clear Ks */
+ kuap_update_sr \gpr1, \gpr2, \gpr3
+102:
+.endm
+
+.macro kuap_check current, gpr
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ lwz \gpr2, KUAP(thread)
+999: twnei \gpr, 0
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+.endm
+
+#endif /* CONFIG_PPC_KUAP */
+
+#else /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <linux/sched.h>
+
+static inline void kuap_update_sr(u32 sr, u32 addr, u32 end)
+{
+ barrier(); /* make sure thread.kuap is updated before playing with SRs */
+ while (addr < end) {
+ mtsrin(sr, addr);
+ sr += 0x111; /* next VSID */
+ sr &= 0xf0ffffff; /* clear VSID overflow */
+ addr += 0x10000000; /* address of next segment */
+ }
+ isync(); /* Context sync required after mtsrin() */
+}
+
+static inline void allow_user_access(void __user *to, const void __user *from, u32 size)
+{
+ u32 addr, end;
+
+ if (__builtin_constant_p(to) && to == NULL)
+ return;
+
+ addr = (__force u32)to;
+
+ if (!addr || addr >= TASK_SIZE || !size)
+ return;
+
+ end = min(addr + size, TASK_SIZE);
+ current->thread.kuap = (addr & 0xf0000000) | ((((end - 1) >> 28) + 1) & 0xf);
+ kuap_update_sr(mfsrin(addr) & ~SR_KS, addr, end); /* Clear Ks */
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from, u32 size)
+{
+ u32 addr = (__force u32)to;
+ u32 end = min(addr + size, TASK_SIZE);
+
+ if (!addr || addr >= TASK_SIZE || !size)
+ return;
+
+ current->thread.kuap = 0;
+ kuap_update_sr(mfsrin(addr) | SR_KS, addr, end); /* set Ks */
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ if (!is_write)
+ return false;
+
+ return WARN(!regs->kuap, "Bug: write fault blocked by segment registers !");
+}
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_32_KUP_H */
diff --git a/arch/powerpc/include/asm/book3s/32/mmu-hash.h b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
index 5cb588395fdc..2e277ca0170f 100644
--- a/arch/powerpc/include/asm/book3s/32/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/32/mmu-hash.h
@@ -10,8 +10,6 @@
* BATs
*/
-#include <asm/page.h>
-
/* Block size masks */
#define BL_128K 0x000
#define BL_256K 0x001
@@ -49,8 +47,6 @@ struct ppc_bat {
u32 batu;
u32 batl;
};
-
-typedef pte_t *pgtable_t;
#endif /* !__ASSEMBLY__ */
/*
@@ -63,6 +59,11 @@ typedef pte_t *pgtable_t;
#define PP_RWRW 2 /* Supervisor read/write, User read/write */
#define PP_RXRX 3 /* Supervisor read, User read */
+/* Values for Segment Registers */
+#define SR_NX 0x10000000 /* No Execute */
+#define SR_KP 0x20000000 /* User key */
+#define SR_KS 0x40000000 /* Supervisor key */
+
#ifndef __ASSEMBLY__
/*
diff --git a/arch/powerpc/include/asm/book3s/32/pgalloc.h b/arch/powerpc/include/asm/book3s/32/pgalloc.h
index 3633502e102c..998317702630 100644
--- a/arch/powerpc/include/asm/book3s/32/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/32/pgalloc.h
@@ -5,28 +5,6 @@
#include <linux/threads.h>
#include <linux/slab.h>
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern void __bad_pte(pmd_t *pmd);
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
{
return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
@@ -59,24 +37,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
*pmdp = __pmd(__pa(pte_page) | _PMD_PRESENT);
}
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm);
-void pte_frag_destroy(void *pte_frag);
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
-void pte_fragment_free(unsigned long *table, int kernel);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void pgtable_free(void *table, unsigned index_size)
{
if (!index_size) {
@@ -87,7 +47,6 @@ static inline void pgtable_free(void *table, unsigned index_size)
}
}
-#define check_pgt_cache() do { } while (0)
#define get_hugepd_cache_index(x) (x)
#ifdef CONFIG_SMP
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index aa8406b8f7ba..838de59f6754 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -134,15 +134,24 @@ static inline bool pte_user(pte_t pte)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
+
+#ifndef __ASSEMBLY__
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+
+#endif /* !__ASSEMBLY__ */
+
/*
* This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
* value (for now) on others, from where we can start layout kernel
* virtual space that goes below PKMAP and FIXMAP
*/
+#include <asm/fixmap.h>
+
#ifdef CONFIG_HIGHMEM
#define KVIRT_TOP PKMAP_BASE
#else
-#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */
+#define KVIRT_TOP FIXADDR_START
#endif
/*
@@ -373,8 +382,6 @@ static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
-
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
static inline int pte_read(pte_t pte) { return 1; }
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index cf5ba5254299..8fd8599c9395 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -2,10 +2,10 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_4K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_4K_H
-#define H_PTE_INDEX_SIZE 9
-#define H_PMD_INDEX_SIZE 7
-#define H_PUD_INDEX_SIZE 9
-#define H_PGD_INDEX_SIZE 9
+#define H_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 4KB = 2MB
+#define H_PMD_INDEX_SIZE 7 // size: 8B << 7 = 1KB, maps: 2^7 x 2MB = 256MB
+#define H_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 256MB = 128GB
+#define H_PGD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps: 2^9 x 128GB = 64TB
/*
* Each context is 512TB. But on 4k we restrict our max TASK size to 64TB
@@ -13,6 +13,21 @@
*/
#define MAX_EA_BITS_PER_CONTEXT 46
+#define REGION_SHIFT (MAX_EA_BITS_PER_CONTEXT - 2)
+
+/*
+ * Our page table limit us to 64TB. Hence for the kernel mapping,
+ * each MAP area is limited to 16 TB.
+ * The four map areas are: linear mapping, vmap, IO and vmemmap
+ */
+#define H_KERN_MAP_SIZE (ASM_CONST(1) << REGION_SHIFT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 16TB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc000100000000000)
+
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)
#define H_PMD_TABLE_SIZE (sizeof(pmd_t) << H_PMD_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index f82ee8a3b561..d1d9177d9ebd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -2,16 +2,29 @@
#ifndef _ASM_POWERPC_BOOK3S_64_HASH_64K_H
#define _ASM_POWERPC_BOOK3S_64_HASH_64K_H
-#define H_PTE_INDEX_SIZE 8
-#define H_PMD_INDEX_SIZE 10
-#define H_PUD_INDEX_SIZE 10
-#define H_PGD_INDEX_SIZE 8
+#define H_PTE_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 64KB = 16MB
+#define H_PMD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16MB = 16GB
+#define H_PUD_INDEX_SIZE 10 // size: 8B << 10 = 8KB, maps 2^10 x 16GB = 16TB
+#define H_PGD_INDEX_SIZE 8 // size: 8B << 8 = 2KB, maps 2^8 x 16TB = 4PB
+
/*
* Each context is 512TB size. SLB miss for first context/default context
* is handled in the hotpath.
*/
#define MAX_EA_BITS_PER_CONTEXT 49
+#define REGION_SHIFT MAX_EA_BITS_PER_CONTEXT
+
+/*
+ * We use one context for each MAP area.
+ */
+#define H_KERN_MAP_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+
+/*
+ * Define the address range of the kernel non-linear virtual area
+ * 2PB
+ */
+#define H_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
* 64k aligned address free up few of the lower bits of RPN for us
diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h
index 54b7af6cd27f..1d1183048cfd 100644
--- a/arch/powerpc/include/asm/book3s/64/hash.h
+++ b/arch/powerpc/include/asm/book3s/64/hash.h
@@ -29,6 +29,10 @@
#define H_PGTABLE_EADDR_SIZE (H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + \
H_PUD_INDEX_SIZE + H_PGD_INDEX_SIZE + PAGE_SHIFT)
#define H_PGTABLE_RANGE (ASM_CONST(1) << H_PGTABLE_EADDR_SIZE)
+/*
+ * Top 2 bits are ignored in page table walk.
+ */
+#define EA_MASK (~(0xcUL << 60))
/*
* We store the slot details in the second half of page table.
@@ -42,59 +46,63 @@
#endif
/*
- * Define the address range of the kernel non-linear virtual area. In contrast
- * to the linear mapping, this is managed using the kernel page tables and then
- * inserted into the hash page table to actually take effect, similarly to user
- * mappings.
+ * +------------------------------+
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel virtual map end (0xc00e000000000000)
+ * | |
+ * | |
+ * | 512TB/16TB of vmemmap |
+ * | |
+ * | |
+ * +------------------------------+ Kernel vmemmap start
+ * | |
+ * | 512TB/16TB of IO map |
+ * | |
+ * +------------------------------+ Kernel IO map start
+ * | |
+ * | 512TB/16TB of vmap |
+ * | |
+ * +------------------------------+ Kernel virt start (0xc008000000000000)
+ * | |
+ * | |
+ * | |
+ * +------------------------------+ Kernel linear (0xc.....)
*/
-#define H_KERN_VIRT_START ASM_CONST(0xD000000000000000)
-/*
- * Allow virtual mapping of one context size.
- * 512TB for 64K page size
- * 64TB for 4K page size
- */
-#define H_KERN_VIRT_SIZE (1UL << MAX_EA_BITS_PER_CONTEXT)
+#define H_VMALLOC_START H_KERN_VIRT_START
+#define H_VMALLOC_SIZE H_KERN_MAP_SIZE
+#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
-/*
- * 8TB IO mapping size
- */
-#define H_KERN_IO_SIZE ASM_CONST(0x80000000000) /* 8T */
+#define H_KERN_IO_START H_VMALLOC_END
+#define H_KERN_IO_SIZE H_KERN_MAP_SIZE
+#define H_KERN_IO_END (H_KERN_IO_START + H_KERN_IO_SIZE)
-/*
- * The vmalloc space starts at the beginning of the kernel non-linear virtual
- * region, and occupies 504T (64K) or 56T (4K)
- */
-#define H_VMALLOC_START H_KERN_VIRT_START
-#define H_VMALLOC_SIZE (H_KERN_VIRT_SIZE - H_KERN_IO_SIZE)
-#define H_VMALLOC_END (H_VMALLOC_START + H_VMALLOC_SIZE)
+#define H_VMEMMAP_START H_KERN_IO_END
+#define H_VMEMMAP_SIZE H_KERN_MAP_SIZE
+#define H_VMEMMAP_END (H_VMEMMAP_START + H_VMEMMAP_SIZE)
-#define H_KERN_IO_START H_VMALLOC_END
+#define NON_LINEAR_REGION_ID(ea) ((((unsigned long)ea - H_KERN_VIRT_START) >> REGION_SHIFT) + 2)
/*
* Region IDs
*/
-#define REGION_SHIFT 60UL
-#define REGION_MASK (0xfUL << REGION_SHIFT)
-#define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT)
-
-#define VMALLOC_REGION_ID (REGION_ID(H_VMALLOC_START))
-#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
-#define USER_REGION_ID (0UL)
+#define USER_REGION_ID 0
+#define LINEAR_MAP_REGION_ID 1
+#define VMALLOC_REGION_ID NON_LINEAR_REGION_ID(H_VMALLOC_START)
+#define IO_REGION_ID NON_LINEAR_REGION_ID(H_KERN_IO_START)
+#define VMEMMAP_REGION_ID NON_LINEAR_REGION_ID(H_VMEMMAP_START)
/*
* Defines the address of the vmemap area, in its own region on
* hash table CPUs.
*/
-#define H_VMEMMAP_BASE (VMEMMAP_REGION_ID << REGION_SHIFT)
-
#ifdef CONFIG_PPC_MM_SLICES
#define HAVE_ARCH_UNMAPPED_AREA
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
#endif /* CONFIG_PPC_MM_SLICES */
-
/* PTEIDX nibble */
#define _PTEIDX_SECONDARY 0x8
#define _PTEIDX_GROUP_IX 0x7
@@ -103,6 +111,25 @@
#define H_PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#ifndef __ASSEMBLY__
+static inline int get_region_id(unsigned long ea)
+{
+ int region_id;
+ int id = (ea >> 60UL);
+
+ if (id == 0)
+ return USER_REGION_ID;
+
+ if (ea < H_KERN_VIRT_START)
+ return LINEAR_MAP_REGION_ID;
+
+ VM_BUG_ON(id != 0xc);
+ BUILD_BUG_ON(NON_LINEAR_REGION_ID(H_VMALLOC_START) != 2);
+
+ region_id = NON_LINEAR_REGION_ID(ea);
+ VM_BUG_ON(region_id > VMEMMAP_REGION_ID);
+ return region_id;
+}
+
#define hash__pmd_bad(pmd) (pmd_val(pmd) & H_PMD_BAD_BITS)
#define hash__pud_bad(pud) (pud_val(pud) & H_PUD_BAD_BITS)
static inline int hash__pgd_bad(pgd_t pgd)
diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index ec2a55a553c7..56140d19c85f 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -62,4 +62,76 @@ extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
pte_t old_pte, pte_t new_pte);
+/*
+ * This should work for other subarchs too. But right now we use the
+ * new format only for 64bit book3s
+ */
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ BUG_ON(!hugepd_ok(hpd));
+ /*
+ * We have only four bits to encode, MMU page size
+ */
+ BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
+ return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
+}
+
+static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
+{
+ return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
+}
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ if (radix_enabled())
+ return radix__flush_hugetlb_page(vma, vmaddr);
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+ unsigned int pdshift)
+{
+ unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
+
+ return hugepd_page(hpd) + idx;
+}
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS | (shift_to_mmu_psize(pshift) << 2));
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ int mmu_psize;
+
+ if (shift > SLICE_HIGH_SHIFT)
+ return -EINVAL;
+
+ mmu_psize = shift_to_mmu_psize(shift);
+
+ /*
+ * We need to make sure that for different page sizes reported by
+ * firmware we only add hugetlb support for page sizes that can be
+ * supported by linux page table layout.
+ * For now we have
+ * Radix: 2M and 1G
+ * Hash: 16M and 16G
+ */
+ if (radix_enabled()) {
+ if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
+ return -EINVAL;
+ } else {
+ if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
+ return -EINVAL;
+ }
+ return mmu_psize;
+}
+
#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kup-radix.h b/arch/powerpc/include/asm/book3s/64/kup-radix.h
new file mode 100644
index 000000000000..f254de956d6a
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kup-radix.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+#define _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H
+
+#include <linux/const.h>
+
+#define AMR_KUAP_BLOCK_READ UL(0x4000000000000000)
+#define AMR_KUAP_BLOCK_WRITE UL(0x8000000000000000)
+#define AMR_KUAP_BLOCKED (AMR_KUAP_BLOCK_READ | AMR_KUAP_BLOCK_WRITE)
+#define AMR_KUAP_SHIFT 62
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_restore_amr gpr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ ld \gpr, STACK_REGS_KUAP(r1)
+ mtspr SPRN_AMR, \gpr
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_check_amr gpr1, gpr2
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ mfspr \gpr1, SPRN_AMR
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+999: tdne \gpr1, \gpr2
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+.macro kuap_save_amr_and_lock gpr1, gpr2, use_cr, msr_pr_cr
+#ifdef CONFIG_PPC_KUAP
+ BEGIN_MMU_FTR_SECTION_NESTED(67)
+ .ifnb \msr_pr_cr
+ bne \msr_pr_cr, 99f
+ .endif
+ mfspr \gpr1, SPRN_AMR
+ std \gpr1, STACK_REGS_KUAP(r1)
+ li \gpr2, (AMR_KUAP_BLOCKED >> AMR_KUAP_SHIFT)
+ sldi \gpr2, \gpr2, AMR_KUAP_SHIFT
+ cmpd \use_cr, \gpr1, \gpr2
+ beq \use_cr, 99f
+ // We don't isync here because we very recently entered via rfid
+ mtspr SPRN_AMR, \gpr2
+ isync
+99:
+ END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_RADIX_KUAP, 67)
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#ifdef CONFIG_PPC_KUAP
+
+#include <asm/reg.h>
+
+/*
+ * We support individually allowing read or write, but we don't support nesting
+ * because that would require an expensive read/modify write of the AMR.
+ */
+
+static inline void set_kuap(unsigned long value)
+{
+ if (!early_mmu_has_feature(MMU_FTR_RADIX_KUAP))
+ return;
+
+ /*
+ * ISA v3.0B says we need a CSI (Context Synchronising Instruction) both
+ * before and after the move to AMR. See table 6 on page 1134.
+ */
+ isync();
+ mtspr(SPRN_AMR, value);
+ isync();
+}
+
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ // This is written so we can resolve to a single case at build time
+ if (__builtin_constant_p(to) && to == NULL)
+ set_kuap(AMR_KUAP_BLOCK_WRITE);
+ else if (__builtin_constant_p(from) && from == NULL)
+ set_kuap(AMR_KUAP_BLOCK_READ);
+ else
+ set_kuap(0);
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ set_kuap(AMR_KUAP_BLOCKED);
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ return WARN(mmu_has_feature(MMU_FTR_RADIX_KUAP) &&
+ (regs->kuap & (is_write ? AMR_KUAP_BLOCK_WRITE : AMR_KUAP_BLOCK_READ)),
+ "Bug: %s fault blocked by AMR!", is_write ? "Write" : "Read");
+}
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_BOOK3S_64_KUP_RADIX_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index a28a28079edb..1e4705516a54 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -588,7 +588,8 @@ extern void slb_set_size(u16 size);
#endif
#define MAX_VMALLOC_CTX_CNT 1
-#define MAX_MEMMAP_CTX_CNT 1
+#define MAX_IO_CTX_CNT 1
+#define MAX_VMEMMAP_CTX_CNT 1
/*
* 256MB segment
@@ -601,13 +602,10 @@ extern void slb_set_size(u16 size);
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
* because of the modulo operation in vsid scramble.
*
- * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
- * context easily. The +1 is to map the unused 0xe region mapping.
*/
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
- MAX_MEMMAP_CTX_CNT + 2)
-
+ MAX_IO_CTX_CNT + MAX_VMEMMAP_CTX_CNT)
/*
* For platforms that support on 65bit VA we limit the context bits
*/
@@ -657,8 +655,8 @@ extern void slb_set_size(u16 size);
/* 4 bits per slice and we have one slice per 1TB */
#define SLICE_ARRAY_SIZE (H_PGTABLE_RANGE >> 41)
-#define TASK_SLICE_ARRAY_SZ(x) ((x)->context.slb_addr_limit >> 41)
-
+#define LOW_SLICE_ARRAY_SZ (BITS_PER_LONG / BITS_PER_BYTE)
+#define TASK_SLICE_ARRAY_SZ(x) ((x)->hash_context->slb_addr_limit >> 41)
#ifndef __ASSEMBLY__
#ifdef CONFIG_PPC_SUBPAGE_PROT
@@ -687,12 +685,41 @@ struct subpage_prot_table {
#define SBP_L3_SHIFT (SBP_L2_SHIFT + SBP_L2_BITS)
extern void subpage_prot_free(struct mm_struct *mm);
-extern void subpage_prot_init_new_context(struct mm_struct *mm);
#else
static inline void subpage_prot_free(struct mm_struct *mm) {}
-static inline void subpage_prot_init_new_context(struct mm_struct *mm) { }
#endif /* CONFIG_PPC_SUBPAGE_PROT */
+/*
+ * One bit per slice. We have lower slices which cover 256MB segments
+ * upto 4G range. That gets us 16 low slices. For the rest we track slices
+ * in 1TB size.
+ */
+struct slice_mask {
+ u64 low_slices;
+ DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
+};
+
+struct hash_mm_context {
+ u16 user_psize; /* page size index */
+
+ /* SLB page size encodings*/
+ unsigned char low_slices_psize[LOW_SLICE_ARRAY_SZ];
+ unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
+ unsigned long slb_addr_limit;
+#ifdef CONFIG_PPC_64K_PAGES
+ struct slice_mask mask_64k;
+#endif
+ struct slice_mask mask_4k;
+#ifdef CONFIG_HUGETLB_PAGE
+ struct slice_mask mask_16m;
+ struct slice_mask mask_16g;
+#endif
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ struct subpage_prot_table *spt;
+#endif /* CONFIG_PPC_SUBPAGE_PROT */
+};
+
#if 0
/*
* The code below is equivalent to this function for arguments
@@ -747,7 +774,7 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
/*
* Bad address. We return VSID 0 for that
*/
- if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
+ if ((ea & EA_MASK) >= H_PGTABLE_RANGE)
return 0;
if (!mmu_has_feature(MMU_FTR_68_BIT_VA))
@@ -774,28 +801,29 @@ static inline unsigned long get_vsid(unsigned long context, unsigned long ea,
* 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
* 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
* 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
-
- * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
- * 0x00006 - Not used - Can map 0xe000000000000000 range.
- * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
*
- * So we can compute the context from the region (top nibble) by
- * subtracting 11, or 0xc - 1.
+ * vmap, IO, vmemap
+ *
+ * 0x00005 - [ 0xc008000000000000 - 0xc009ffffffffffff]
+ * 0x00006 - [ 0xc00a000000000000 - 0xc00bffffffffffff]
+ * 0x00007 - [ 0xc00c000000000000 - 0xc00dffffffffffff]
+ *
*/
static inline unsigned long get_kernel_context(unsigned long ea)
{
- unsigned long region_id = REGION_ID(ea);
+ unsigned long region_id = get_region_id(ea);
unsigned long ctx;
/*
- * For linear mapping we do support multiple context
+ * Depending on Kernel config, kernel region can have one context
+ * or more.
*/
- if (region_id == KERNEL_REGION_ID) {
+ if (region_id == LINEAR_MAP_REGION_ID) {
/*
* We already verified ea to be not beyond the addr limit.
*/
- ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+ ctx = 1 + ((ea & EA_MASK) >> MAX_EA_BITS_PER_CONTEXT);
} else
- ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
+ ctx = region_id + MAX_KERNEL_CTX_CNT - 1;
return ctx;
}
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 1ceee000c18d..74d24201fc4f 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -25,15 +25,22 @@ struct mmu_psize_def {
};
};
extern struct mmu_psize_def mmu_psize_defs[MMU_PAGE_COUNT];
+#endif /* __ASSEMBLY__ */
/*
- * For BOOK3s 64 with 4k and 64K linux page size
- * we want to use pointers, because the page table
- * actually store pfn
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
*/
-typedef pte_t *pgtable_t;
-
-#endif /* __ASSEMBLY__ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
+ defined(CONFIG_PPC_64K_PAGES)
+#define MAX_PHYSMEM_BITS 51
+#else
+#define MAX_PHYSMEM_BITS 46
+#endif
/* 64-bit classic hash table MMU */
#include <asm/book3s/64/mmu-hash.h>
@@ -89,16 +96,6 @@ struct spinlock;
/* Maximum possible number of NPUs in a system. */
#define NV_MAX_NPUS 8
-/*
- * One bit per slice. We have lower slices which cover 256MB segments
- * upto 4G range. That gets us 16 low slices. For the rest we track slices
- * in 1TB size.
- */
-struct slice_mask {
- u64 low_slices;
- DECLARE_BITMAP(high_slices, SLICE_NUM_HIGH);
-};
-
typedef struct {
union {
/*
@@ -112,7 +109,6 @@ typedef struct {
mm_context_id_t id;
mm_context_id_t extended_id[TASK_SIZE_USER64/TASK_CONTEXT_SIZE];
};
- u16 user_psize; /* page size index */
/* Number of bits in the mm_cpumask */
atomic_t active_cpus;
@@ -122,27 +118,9 @@ typedef struct {
/* NPU NMMU context */
struct npu_context *npu_context;
+ struct hash_mm_context *hash_context;
-#ifdef CONFIG_PPC_MM_SLICES
- /* SLB page size encodings*/
- unsigned char low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
- unsigned char high_slices_psize[SLICE_ARRAY_SIZE];
- unsigned long slb_addr_limit;
-# ifdef CONFIG_PPC_64K_PAGES
- struct slice_mask mask_64k;
-# endif
- struct slice_mask mask_4k;
-# ifdef CONFIG_HUGETLB_PAGE
- struct slice_mask mask_16m;
- struct slice_mask mask_16g;
-# endif
-#else
- u16 sllp; /* SLB page size encoding */
-#endif
unsigned long vdso_base;
-#ifdef CONFIG_PPC_SUBPAGE_PROT
- struct subpage_prot_table spt;
-#endif /* CONFIG_PPC_SUBPAGE_PROT */
/*
* pagetable fragment support
*/
@@ -163,6 +141,60 @@ typedef struct {
#endif
} mm_context_t;
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->hash_context->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->hash_context->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->hash_context->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->hash_context->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->hash_context->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
+{
+#ifdef CONFIG_PPC_64K_PAGES
+ if (psize == MMU_PAGE_64K)
+ return &ctx->hash_context->mask_64k;
+#endif
+#ifdef CONFIG_HUGETLB_PAGE
+ if (psize == MMU_PAGE_16M)
+ return &ctx->hash_context->mask_16m;
+ if (psize == MMU_PAGE_16G)
+ return &ctx->hash_context->mask_16g;
+#endif
+ BUG_ON(psize != MMU_PAGE_4K);
+
+ return &ctx->hash_context->mask_4k;
+}
+
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+static inline struct subpage_prot_table *mm_ctx_subpage_prot(mm_context_t *ctx)
+{
+ return ctx->hash_context->spt;
+}
+#endif
+
/*
* The current system page and segment sizes
*/
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 138bc2ecc0c4..d45e4449619f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -19,29 +19,7 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
-extern pte_t *pte_fragment_alloc(struct mm_struct *, int);
extern pmd_t *pmd_fragment_alloc(struct mm_struct *, unsigned long);
-extern void pte_fragment_free(unsigned long *, int);
extern void pmd_fragment_free(unsigned long *);
extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift);
#ifdef CONFIG_SMP
@@ -81,6 +59,9 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
pgd = kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
pgtable_gfp_flags(mm, GFP_KERNEL));
+ if (unlikely(!pgd))
+ return pgd;
+
/*
* Don't scan the PGD for pointers, it contains references to PUDs but
* those references are not full pointers and so can't be recognised by
@@ -185,31 +166,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
*pmd = __pmd(__pgtable_ptr_val(pte_page) | PMD_VAL_BITS);
}
-static inline pgtable_t pmd_pgtable(pmd_t pmd)
-{
- return (pgtable_t)pmd_page_vaddr(pmd);
-}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
unsigned long address)
{
@@ -221,8 +177,6 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
pgtable_free_tlb(tlb, table, PTE_INDEX);
}
-#define check_pgt_cache() do { } while (0)
-
extern atomic_long_t direct_pages_count[MMU_PAGE_COUNT];
static inline void update_page_count(int psize, long count)
{
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 581f91be9dd4..7dede2e34b70 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -277,9 +277,11 @@ extern unsigned long __vmalloc_end;
extern unsigned long __kernel_virt_start;
extern unsigned long __kernel_virt_size;
extern unsigned long __kernel_io_start;
+extern unsigned long __kernel_io_end;
#define KERN_VIRT_START __kernel_virt_start
-#define KERN_VIRT_SIZE __kernel_virt_size
#define KERN_IO_START __kernel_io_start
+#define KERN_IO_END __kernel_io_end
+
extern struct page *vmemmap;
extern unsigned long ioremap_bot;
extern unsigned long pci_io_base;
@@ -296,8 +298,7 @@ extern unsigned long pci_io_base;
#include <asm/barrier.h>
/*
- * The second half of the kernel virtual space is used for IO mappings,
- * it's itself carved into the PIO region (ISA and PHB IO space) and
+ * IO space itself carved into the PIO region (ISA and PHB IO space) and
* the ioremap space
*
* ISA_IO_BASE = KERN_IO_START, 64K reserved area
@@ -310,7 +311,7 @@ extern unsigned long pci_io_base;
#define PHB_IO_BASE (ISA_IO_END)
#define PHB_IO_END (KERN_IO_START + FULL_IO_SIZE)
#define IOREMAP_BASE (PHB_IO_END)
-#define IOREMAP_END (KERN_VIRT_START + KERN_VIRT_SIZE)
+#define IOREMAP_END (KERN_IO_END)
/* Advertise special mapping type for AGP */
#define HAVE_PAGE_AGP
@@ -992,7 +993,8 @@ extern struct page *pgd_page(pgd_t pgd);
(((pte_t *) pmd_page_vaddr(*(dir))) + pte_index(addr))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
+
+static inline void pte_unmap(pte_t *pte) { }
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index 863c3e8286fb..d5f5ab73dc7f 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -5,10 +5,11 @@
/*
* For 4K page size supported index is 13/9/9/9
*/
-#define RADIX_PTE_INDEX_SIZE 9 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 4K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
+
/*
* One fragment per per page
*/
diff --git a/arch/powerpc/include/asm/book3s/64/radix-64k.h b/arch/powerpc/include/asm/book3s/64/radix-64k.h
index ccb78ca9d0c5..54e33828b0fb 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-64k.h
@@ -5,10 +5,10 @@
/*
* For 64K page size supported index is 13/9/9/5
*/
-#define RADIX_PTE_INDEX_SIZE 5 /* 2MB huge page */
-#define RADIX_PMD_INDEX_SIZE 9 /* 1G huge page */
-#define RADIX_PUD_INDEX_SIZE 9
-#define RADIX_PGD_INDEX_SIZE 13
+#define RADIX_PTE_INDEX_SIZE 5 // size: 8B << 5 = 256B, maps 2^5 x 64K = 2MB
+#define RADIX_PMD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 2MB = 1GB
+#define RADIX_PUD_INDEX_SIZE 9 // size: 8B << 9 = 4KB, maps 2^9 x 1GB = 512GB
+#define RADIX_PGD_INDEX_SIZE 13 // size: 8B << 13 = 64KB, maps 2^13 x 512GB = 4PB
/*
* We use a 256 byte PTE page fragment in radix
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h
index 5ab134eeed20..574eca33f893 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -72,19 +72,17 @@
* | |
* | |
* | |
- * +------------------------------+ Kernel IO map end (0xc010000000000000)
+ * +------------------------------+ Kernel vmemmap end (0xc010000000000000)
* | |
+ * | 512TB |
* | |
- * | 1/2 of virtual map |
+ * +------------------------------+ Kernel IO map end/vmemap start
* | |
+ * | 512TB |
* | |
- * +------------------------------+ Kernel IO map start
+ * +------------------------------+ Kernel vmap end/ IO map start
* | |
- * | 1/4 of virtual map |
- * | |
- * +------------------------------+ Kernel vmemap start
- * | |
- * | 1/4 of virtual map |
+ * | 512TB |
* | |
* +------------------------------+ Kernel virt start (0xc008000000000000)
* | |
@@ -93,24 +91,24 @@
* +------------------------------+ Kernel linear (0xc.....)
*/
-#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
-#define RADIX_KERN_VIRT_SIZE ASM_CONST(0x0008000000000000)
-
+#define RADIX_KERN_VIRT_START ASM_CONST(0xc008000000000000)
/*
- * The vmalloc space starts at the beginning of that region, and
- * occupies a quarter of it on radix config.
- * (we keep a quarter for the virtual memmap)
+ * 49 = MAX_EA_BITS_PER_CONTEXT (hash specific). To make sure we pick
+ * the same value as hash.
*/
+#define RADIX_KERN_MAP_SIZE (1UL << 49)
+
#define RADIX_VMALLOC_START RADIX_KERN_VIRT_START
-#define RADIX_VMALLOC_SIZE (RADIX_KERN_VIRT_SIZE >> 2)
+#define RADIX_VMALLOC_SIZE RADIX_KERN_MAP_SIZE
#define RADIX_VMALLOC_END (RADIX_VMALLOC_START + RADIX_VMALLOC_SIZE)
-/*
- * Defines the address of the vmemap area, in its own region on
- * hash table CPUs.
- */
-#define RADIX_VMEMMAP_BASE (RADIX_VMALLOC_END)
-#define RADIX_KERN_IO_START (RADIX_KERN_VIRT_START + (RADIX_KERN_VIRT_SIZE >> 1))
+#define RADIX_KERN_IO_START RADIX_VMALLOC_END
+#define RADIX_KERN_IO_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_KERN_IO_END (RADIX_KERN_IO_START + RADIX_KERN_IO_SIZE)
+
+#define RADIX_VMEMMAP_START RADIX_KERN_IO_END
+#define RADIX_VMEMMAP_SIZE RADIX_KERN_MAP_SIZE
+#define RADIX_VMEMMAP_END (RADIX_VMEMMAP_START + RADIX_VMEMMAP_SIZE)
#ifndef __ASSEMBLY__
#define RADIX_PTE_TABLE_SIZE (sizeof(pte_t) << RADIX_PTE_INDEX_SIZE)
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h b/arch/powerpc/include/asm/book3s/64/slice.h
index db0dedab65ee..f0d3194ba41b 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,8 +2,6 @@
#ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
#define _ASM_POWERPC_BOOK3S_64_SLICE_H
-#ifdef CONFIG_PPC_MM_SLICES
-
#define SLICE_LOW_SHIFT 28
#define SLICE_LOW_TOP (0x100000000ul)
#define SLICE_NUM_LOW (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,15 +11,6 @@
#define SLICE_NUM_HIGH (H_PGTABLE_RANGE >> SLICE_HIGH_SHIFT)
#define GET_HIGH_SLICE_INDEX(addr) ((addr) >> SLICE_HIGH_SHIFT)
-#else /* CONFIG_PPC_MM_SLICES */
-
-#define get_slice_psize(mm, addr) ((mm)->context.user_psize)
-#define slice_set_user_psize(mm, psize) \
-do { \
- (mm)->context.user_psize = (psize); \
- (mm)->context.sllp = SLB_VSID_USER | mmu_psize_defs[(psize)].sllp; \
-} while (0)
-
-#endif /* CONFIG_PPC_MM_SLICES */
+#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
#endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 43e5f31fe64d..9844b3ded187 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -27,10 +27,11 @@
* the THREAD_WINKLE_BITS are set, which indicate which threads have not
* yet woken from the winkle state.
*/
-#define PNV_CORE_IDLE_LOCK_BIT 0x10000000
+#define NR_PNV_CORE_IDLE_LOCK_BIT 28
+#define PNV_CORE_IDLE_LOCK_BIT (1ULL << NR_PNV_CORE_IDLE_LOCK_BIT)
+#define PNV_CORE_IDLE_WINKLE_COUNT_SHIFT 16
#define PNV_CORE_IDLE_WINKLE_COUNT 0x00010000
-#define PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT 0x00080000
#define PNV_CORE_IDLE_WINKLE_COUNT_BITS 0x000F0000
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT 8
#define PNV_CORE_IDLE_THREAD_WINKLE_BITS 0x0000FF00
@@ -68,16 +69,6 @@
#define ERR_DEEP_STATE_ESL_MISMATCH -2
#ifndef __ASSEMBLY__
-/* Additional SPRs that need to be saved/restored during stop */
-struct stop_sprs {
- u64 pid;
- u64 ldbar;
- u64 fscr;
- u64 hfscr;
- u64 mmcr1;
- u64 mmcr2;
- u64 mmcra;
-};
#define PNV_IDLE_NAME_LEN 16
struct pnv_idle_states_t {
@@ -92,10 +83,6 @@ struct pnv_idle_states_t {
extern struct pnv_idle_states_t *pnv_idle_states;
extern int nr_pnv_idle_states;
-extern u32 pnv_fastsleep_workaround_at_entry[];
-extern u32 pnv_fastsleep_workaround_at_exit[];
-
-extern u64 pnv_first_deep_stop_state;
unsigned long pnv_cpu_offline(unsigned int cpu);
int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags);
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 7c1d8e74b25d..7f3279b014db 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -17,6 +17,9 @@ struct drmem_lmb {
u32 drc_index;
u32 aa_index;
u32 flags;
+#ifdef CONFIG_MEMORY_HOTPLUG
+ int nid;
+#endif
};
struct drmem_lmb_info {
@@ -104,4 +107,22 @@ static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
lmb->aa_index = 0xffffffff;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+ lmb->nid = memory_add_physaddr_to_nid(lmb->base_addr);
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+ lmb->nid = -1;
+}
+#else
+static inline void lmb_set_nid(struct drmem_lmb *lmb)
+{
+}
+static inline void lmb_clear_nid(struct drmem_lmb *lmb)
+{
+}
+#endif
+
#endif /* _ASM_POWERPC_LMB_H */
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 937bb630093f..bef4e05a6823 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -497,6 +497,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
RESTORE_CTR(r1, area); \
b bad_stack; \
3: EXCEPTION_PROLOG_COMMON_1(); \
+ kuap_save_amr_and_lock r9, r10, cr1, cr0; \
beq 4f; /* if from kernel mode */ \
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
SAVE_PPR(area, r9); \
@@ -691,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
*/
#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \
EXCEPTION_PROLOG_COMMON_1(); \
+ kuap_save_amr_and_lock r9, r10, cr1; \
EXCEPTION_PROLOG_COMMON_2(area); \
EXCEPTION_PROLOG_COMMON_3(trap); \
/* Volatile regs are potentially clobbered here */ \
diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h
index 188776befaf9..e2099c0a15c3 100644
--- a/arch/powerpc/include/asm/fadump.h
+++ b/arch/powerpc/include/asm/fadump.h
@@ -219,5 +219,6 @@ extern void fadump_cleanup(void);
static inline int is_fadump_active(void) { return 0; }
static inline int should_fadump_crash(void) { return 0; }
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
+static inline void fadump_cleanup(void) { }
#endif
#endif
diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h
index 40a6c9261a6b..f6fc31f8baff 100644
--- a/arch/powerpc/include/asm/feature-fixups.h
+++ b/arch/powerpc/include/asm/feature-fixups.h
@@ -100,6 +100,9 @@ label##5: \
#define END_MMU_FTR_SECTION(msk, val) \
END_MMU_FTR_SECTION_NESTED(msk, val, 97)
+#define END_MMU_FTR_SECTION_NESTED_IFSET(msk, label) \
+ END_MMU_FTR_SECTION_NESTED((msk), (msk), label)
+
#define END_MMU_FTR_SECTION_IFSET(msk) END_MMU_FTR_SECTION((msk), (msk))
#define END_MMU_FTR_SECTION_IFCLR(msk) END_MMU_FTR_SECTION((msk), 0)
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index b9fbed84ddca..0cfc365d814b 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -22,7 +22,12 @@
#include <asm/kmap_types.h>
#endif
+#ifdef CONFIG_KASAN
+#include <asm/kasan.h>
+#define FIXADDR_TOP (KASAN_SHADOW_START - PAGE_SIZE)
+#else
#define FIXADDR_TOP ((unsigned long)(-PAGE_SIZE))
+#endif
/*
* Here we define all the compile-time 'special' virtual
diff --git a/arch/powerpc/include/asm/futex.h b/arch/powerpc/include/asm/futex.h
index 88b38b37c21b..3a6aa57b9d90 100644
--- a/arch/powerpc/include/asm/futex.h
+++ b/arch/powerpc/include/asm/futex.h
@@ -35,6 +35,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
{
int oldval = 0, ret;
+ allow_write_to_user(uaddr, sizeof(*uaddr));
pagefault_disable();
switch (op) {
@@ -62,6 +63,7 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
if (!ret)
*oval = oldval;
+ prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
}
@@ -75,6 +77,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
+ allow_write_to_user(uaddr, sizeof(*uaddr));
__asm__ __volatile__ (
PPC_ATOMIC_ENTRY_BARRIER
"1: lwarx %1,0,%3 # futex_atomic_cmpxchg_inatomic\n\
@@ -95,6 +98,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
: "cc", "memory");
*uval = prev;
+ prevent_write_to_user(uaddr, sizeof(*uaddr));
return ret;
}
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 8d40565ad0c3..20a101046cff 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -6,82 +6,16 @@
#include <asm/page.h>
#ifdef CONFIG_PPC_BOOK3S_64
-
#include <asm/book3s/64/hugetlb.h>
-/*
- * This should work for other subarchs too. But right now we use the
- * new format only for 64bit book3s
- */
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
- BUG_ON(!hugepd_ok(hpd));
- /*
- * We have only four bits to encode, MMU page size
- */
- BUILD_BUG_ON((MMU_PAGE_COUNT - 1) > 0xf);
- return __va(hpd_val(hpd) & HUGEPD_ADDR_MASK);
-}
-
-static inline unsigned int hugepd_mmu_psize(hugepd_t hpd)
-{
- return (hpd_val(hpd) & HUGEPD_SHIFT_MASK) >> 2;
-}
-
-static inline unsigned int hugepd_shift(hugepd_t hpd)
-{
- return mmu_psize_to_shift(hugepd_mmu_psize(hpd));
-}
-static inline void flush_hugetlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- if (radix_enabled())
- return radix__flush_hugetlb_page(vma, vmaddr);
-}
-
-#else
-
-static inline pte_t *hugepd_page(hugepd_t hpd)
-{
- BUG_ON(!hugepd_ok(hpd));
-#ifdef CONFIG_PPC_8xx
- return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
-#else
- return (pte_t *)((hpd_val(hpd) &
- ~HUGEPD_SHIFT_MASK) | PD_HUGE);
-#endif
-}
-
-static inline unsigned int hugepd_shift(hugepd_t hpd)
-{
-#ifdef CONFIG_PPC_8xx
- return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17;
-#else
- return hpd_val(hpd) & HUGEPD_SHIFT_MASK;
-#endif
-}
-
+#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#include <asm/nohash/hugetlb-book3e.h>
+#elif defined(CONFIG_PPC_8xx)
+#include <asm/nohash/32/hugetlb-8xx.h>
#endif /* CONFIG_PPC_BOOK3S_64 */
+extern bool hugetlb_disabled;
-static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
- unsigned pdshift)
-{
- /*
- * On FSL BookE, we have multiple higher-level table entries that
- * point to the same hugepte. Just use the first one since they're all
- * identical. So for that case, idx=0.
- */
- unsigned long idx = 0;
-
- pte_t *dir = hugepd_page(hpd);
-#ifdef CONFIG_PPC_8xx
- idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT;
-#elif !defined(CONFIG_PPC_FSL_BOOK3E)
- idx = (addr & ((1UL << pdshift) - 1)) >> hugepd_shift(hpd);
-#endif
-
- return dir + idx;
-}
+void hugetlbpage_init_default(void);
void flush_dcache_icache_hugepage(struct page *page);
@@ -99,15 +33,6 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
pte_t pte);
-#ifdef CONFIG_PPC_8xx
-static inline void flush_hugetlb_page(struct vm_area_struct *vma,
- unsigned long vmaddr)
-{
- flush_tlb_page(vma, vmaddr);
-}
-#else
-void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
-#endif
#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index ece4dc89c90b..0fe8c1e46bbc 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -90,10 +90,18 @@ static inline void hw_breakpoint_disable(void)
extern void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs);
int hw_breakpoint_handler(struct die_args *args);
+extern int set_dawr(struct arch_hw_breakpoint *brk);
+extern bool dawr_force_enable;
+static inline bool dawr_enabled(void)
+{
+ return dawr_force_enable;
+}
+
#else /* CONFIG_HAVE_HW_BREAKPOINT */
static inline void hw_breakpoint_disable(void) { }
static inline void thread_change_pc(struct task_struct *tsk,
struct pt_regs *regs) { }
+static inline bool dawr_enabled(void) { return false; }
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
#endif /* __KERNEL__ */
#endif /* _PPC_BOOK3S_64_HW_BREAKPOINT_H */
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 69f516ecb2fd..7c2ef0e42661 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -33,6 +33,7 @@
*/
#define THREAD_IMC_LDBAR_MASK 0x0003ffffffffe000ULL
#define THREAD_IMC_ENABLE 0x8000000000000000ULL
+#define TRACE_IMC_ENABLE 0x4000000000000000ULL
/*
* For debugfs interface for imc-mode and imc-command
@@ -59,6 +60,34 @@ struct imc_events {
char *scale;
};
+/*
+ * Trace IMC hardware updates a 64bytes record on
+ * Core Performance Monitoring Counter (CPMC)
+ * overflow. Here is the layout for the trace imc record
+ *
+ * DW 0 : Timebase
+ * DW 1 : Program Counter
+ * DW 2 : PIDR information
+ * DW 3 : CPMC1
+ * DW 4 : CPMC2
+ * DW 5 : CPMC3
+ * Dw 6 : CPMC4
+ * DW 7 : Timebase
+ * .....
+ *
+ * The following is the data structure to hold trace imc data.
+ */
+struct trace_imc_data {
+ u64 tb1;
+ u64 ip;
+ u64 val;
+ u64 cpmc1;
+ u64 cpmc2;
+ u64 cpmc3;
+ u64 cpmc4;
+ u64 tb2;
+};
+
/* Event attribute array index */
#define IMC_FORMAT_ATTR 0
#define IMC_EVENT_ATTR 1
@@ -69,6 +98,13 @@ struct imc_events {
#define IMC_EVENT_OFFSET_MASK 0xffffffffULL
/*
+ * Macro to mask bits 0:21 of first double word(which is the timebase) to
+ * compare with 8th double word (timebase) of trace imc record data.
+ */
+#define IMC_TRACE_RECORD_TB1_MASK 0x3ffffffffffULL
+
+
+/*
* Device tree parser code detects IMC pmu support and
* registers new IMC pmus. This structure will hold the
* pmu functions, events, counter memory information
@@ -113,6 +149,7 @@ struct imc_pmu_ref {
enum {
IMC_TYPE_THREAD = 0x1,
+ IMC_TYPE_TRACE = 0x2,
IMC_TYPE_CORE = 0x4,
IMC_TYPE_CHIP = 0x10,
};
@@ -123,6 +160,8 @@ enum {
#define IMC_DOMAIN_NEST 1
#define IMC_DOMAIN_CORE 2
#define IMC_DOMAIN_THREAD 3
+/* For trace-imc the domain is still thread but it operates in trace-mode */
+#define IMC_DOMAIN_TRACE 4
extern int init_imc_pmu(struct device_node *parent,
struct imc_pmu *pmu_ptr, int pmu_id);
diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h
index 4b73847e9b95..1fad67b46409 100644
--- a/arch/powerpc/include/asm/io.h
+++ b/arch/powerpc/include/asm/io.h
@@ -34,14 +34,11 @@ extern struct pci_dev *isa_bridge_pcidev;
#include <asm/byteorder.h>
#include <asm/synch.h>
#include <asm/delay.h>
+#include <asm/mmiowb.h>
#include <asm/mmu.h>
#include <asm/ppc_asm.h>
#include <asm/pgtable.h>
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#endif
-
#define SIO_CONFIG_RA 0x398
#define SIO_CONFIG_RD 0x399
@@ -107,12 +104,6 @@ extern bool isa_io_special;
*
*/
-#ifdef CONFIG_PPC64
-#define IO_SET_SYNC_FLAG() do { local_paca->io_sync = 1; } while(0)
-#else
-#define IO_SET_SYNC_FLAG()
-#endif
-
#define DEF_MMIO_IN_X(name, size, insn) \
static inline u##size name(const volatile u##size __iomem *addr) \
{ \
@@ -127,7 +118,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn" %1,%y0" \
: "=Z" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
+ mmiowb_set_pending(); \
}
#define DEF_MMIO_IN_D(name, size, insn) \
@@ -144,7 +135,7 @@ static inline void name(volatile u##size __iomem *addr, u##size val) \
{ \
__asm__ __volatile__("sync;"#insn"%U0%X0 %1,%0" \
: "=m" (*addr) : "r" (val) : "memory"); \
- IO_SET_SYNC_FLAG(); \
+ mmiowb_set_pending(); \
}
DEF_MMIO_IN_D(in_8, 8, lbz);
@@ -652,24 +643,6 @@ static inline void name at \
#include <asm-generic/iomap.h>
-#ifdef CONFIG_PPC32
-#define mmiowb()
-#else
-/*
- * Enforce synchronisation of stores vs. spin_unlock
- * (this does it explicitly, though our implementation of spin_unlock
- * does it implicitely too)
- */
-static inline void mmiowb(void)
-{
- unsigned long tmp;
-
- __asm__ __volatile__("sync; li %0,0; stb %0,%1(13)"
- : "=&r" (tmp) : "i" (offsetof(struct paca_struct, io_sync))
- : "memory");
-}
-#endif /* !CONFIG_PPC32 */
-
static inline void iosync(void)
{
__asm__ __volatile__ ("sync" : : : "memory");
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
new file mode 100644
index 000000000000..296e51c2f066
--- /dev/null
+++ b/arch/powerpc/include/asm/kasan.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_KASAN_H
+#define __ASM_KASAN_H
+
+#ifdef CONFIG_KASAN
+#define _GLOBAL_KASAN(fn) _GLOBAL(__##fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(__##fn)
+#define EXPORT_SYMBOL_KASAN(fn) EXPORT_SYMBOL(__##fn)
+#else
+#define _GLOBAL_KASAN(fn) _GLOBAL(fn)
+#define _GLOBAL_TOC_KASAN(fn) _GLOBAL_TOC(fn)
+#define EXPORT_SYMBOL_KASAN(fn)
+#endif
+
+#ifndef __ASSEMBLY__
+
+#include <asm/page.h>
+
+#define KASAN_SHADOW_SCALE_SHIFT 3
+
+#define KASAN_SHADOW_START (KASAN_SHADOW_OFFSET + \
+ (PAGE_OFFSET >> KASAN_SHADOW_SCALE_SHIFT))
+
+#define KASAN_SHADOW_OFFSET ASM_CONST(CONFIG_KASAN_SHADOW_OFFSET)
+
+#define KASAN_SHADOW_END 0UL
+
+#define KASAN_SHADOW_SIZE (KASAN_SHADOW_END - KASAN_SHADOW_START)
+
+#ifdef CONFIG_KASAN
+void kasan_early_init(void);
+void kasan_mmu_init(void);
+void kasan_init(void);
+#else
+static inline void kasan_init(void) { }
+static inline void kasan_mmu_init(void) { }
+#endif
+
+#endif /* __ASSEMBLY */
+#endif
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
new file mode 100644
index 000000000000..5b5e39643a27
--- /dev/null
+++ b/arch/powerpc/include/asm/kup.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_H_
+#define _ASM_POWERPC_KUP_H_
+
+#ifdef CONFIG_PPC64
+#include <asm/book3s/64/kup-radix.h>
+#endif
+#ifdef CONFIG_PPC_8xx
+#include <asm/nohash/32/kup-8xx.h>
+#endif
+#ifdef CONFIG_PPC_BOOK3S_32
+#include <asm/book3s/32/kup.h>
+#endif
+
+#ifdef __ASSEMBLY__
+#ifndef CONFIG_PPC_KUAP
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+.endm
+
+.macro kuap_check current, gpr
+.endm
+
+#endif
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/pgtable.h>
+
+void setup_kup(void);
+
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled);
+#else
+static inline void setup_kuep(bool disabled) { }
+#endif /* CONFIG_PPC_KUEP */
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled);
+#else
+static inline void setup_kuap(bool disabled) { }
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size) { }
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size) { }
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write) { return false; }
+#endif /* CONFIG_PPC_KUAP */
+
+static inline void allow_read_from_user(const void __user *from, unsigned long size)
+{
+ allow_user_access(NULL, from, size);
+}
+
+static inline void allow_write_to_user(void __user *to, unsigned long size)
+{
+ allow_user_access(to, NULL, size);
+}
+
+static inline void prevent_read_from_user(const void __user *from, unsigned long size)
+{
+ prevent_user_access(NULL, from, size);
+}
+
+static inline void prevent_write_to_user(void __user *to, unsigned long size)
+{
+ prevent_user_access(to, NULL, size);
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* _ASM_POWERPC_KUP_H_ */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 17996bc9382b..23247a132ce8 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -31,7 +31,7 @@ enum MCE_Version {
enum MCE_Severity {
MCE_SEV_NO_ERROR = 0,
MCE_SEV_WARNING = 1,
- MCE_SEV_ERROR_SYNC = 2,
+ MCE_SEV_SEVERE = 2,
MCE_SEV_FATAL = 3,
};
@@ -56,6 +56,14 @@ enum MCE_ErrorType {
MCE_ERROR_TYPE_LINK = 7,
};
+enum MCE_ErrorClass {
+ MCE_ECLASS_UNKNOWN = 0,
+ MCE_ECLASS_HARDWARE,
+ MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_ECLASS_SOFTWARE,
+ MCE_ECLASS_SOFT_INDETERMINATE,
+};
+
enum MCE_UeErrorType {
MCE_UE_ERROR_INDETERMINATE = 0,
MCE_UE_ERROR_IFETCH = 1,
@@ -110,73 +118,75 @@ enum MCE_LinkErrorType {
};
struct machine_check_event {
- enum MCE_Version version:8; /* 0x00 */
- uint8_t in_use; /* 0x01 */
- enum MCE_Severity severity:8; /* 0x02 */
- enum MCE_Initiator initiator:8; /* 0x03 */
- enum MCE_ErrorType error_type:8; /* 0x04 */
- enum MCE_Disposition disposition:8; /* 0x05 */
- uint8_t reserved_1[2]; /* 0x06 */
- uint64_t gpr3; /* 0x08 */
- uint64_t srr0; /* 0x10 */
- uint64_t srr1; /* 0x18 */
- union { /* 0x20 */
+ enum MCE_Version version:8;
+ u8 in_use;
+ enum MCE_Severity severity:8;
+ enum MCE_Initiator initiator:8;
+ enum MCE_ErrorType error_type:8;
+ enum MCE_ErrorClass error_class:8;
+ enum MCE_Disposition disposition:8;
+ bool sync_error;
+ u16 cpu;
+ u64 gpr3;
+ u64 srr0;
+ u64 srr1;
+ union {
struct {
enum MCE_UeErrorType ue_error_type:8;
- uint8_t effective_address_provided;
- uint8_t physical_address_provided;
- uint8_t reserved_1[5];
- uint64_t effective_address;
- uint64_t physical_address;
- uint8_t reserved_2[8];
+ u8 effective_address_provided;
+ u8 physical_address_provided;
+ u8 reserved_1[5];
+ u64 effective_address;
+ u64 physical_address;
+ u8 reserved_2[8];
} ue_error;
struct {
enum MCE_SlbErrorType slb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} slb_error;
struct {
enum MCE_EratErrorType erat_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} erat_error;
struct {
enum MCE_TlbErrorType tlb_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} tlb_error;
struct {
enum MCE_UserErrorType user_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} user_error;
struct {
enum MCE_RaErrorType ra_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} ra_error;
struct {
enum MCE_LinkErrorType link_error_type:8;
- uint8_t effective_address_provided;
- uint8_t reserved_1[6];
- uint64_t effective_address;
- uint8_t reserved_2[16];
+ u8 effective_address_provided;
+ u8 reserved_1[6];
+ u64 effective_address;
+ u8 reserved_2[16];
} link_error;
} u;
};
@@ -194,6 +204,8 @@ struct mce_error_info {
} u;
enum MCE_Severity severity:8;
enum MCE_Initiator initiator:8;
+ enum MCE_ErrorClass error_class:8;
+ bool sync_error;
};
#define MAX_MC_EVT 100
@@ -210,6 +222,7 @@ extern void release_mce_event(void);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest);
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
#ifdef CONFIG_PPC_BOOK3S_64
void flush_and_reload_slb(void);
#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmiowb.h b/arch/powerpc/include/asm/mmiowb.h
new file mode 100644
index 000000000000..74a00127eb20
--- /dev/null
+++ b/arch/powerpc/include/asm/mmiowb.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_MMIOWB_H
+#define _ASM_POWERPC_MMIOWB_H
+
+#ifdef CONFIG_MMIOWB
+
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <asm/paca.h>
+
+#define arch_mmiowb_state() (&local_paca->mmiowb_state)
+#define mmiowb() mb()
+
+#endif /* CONFIG_MMIOWB */
+
+#include <asm-generic/mmiowb.h>
+
+#endif /* _ASM_POWERPC_MMIOWB_H */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 8ddd4a91bdc1..ba94ce8c22d7 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -107,6 +107,11 @@
*/
#define MMU_FTR_1T_SEGMENT ASM_CONST(0x40000000)
+/*
+ * Supports KUAP (key 0 controlling userspace addresses) on radix
+ */
+#define MMU_FTR_RADIX_KUAP ASM_CONST(0x80000000)
+
/* MMU feature bit sets for various CPUs */
#define MMU_FTRS_DEFAULT_HPTE_ARCH_V2 \
MMU_FTR_HPTE_TABLE | MMU_FTR_PPCAS_ARCH_V2
@@ -124,6 +129,9 @@
#ifndef __ASSEMBLY__
#include <linux/bug.h>
#include <asm/cputable.h>
+#include <asm/page.h>
+
+typedef pte_t *pgtable_t;
#ifdef CONFIG_PPC_FSL_BOOK3E
#include <asm/percpu.h>
@@ -164,7 +172,10 @@ enum {
#endif
#ifdef CONFIG_PPC_RADIX_MMU
MMU_FTR_TYPE_RADIX |
-#endif
+#ifdef CONFIG_PPC_KUAP
+ MMU_FTR_RADIX_KUAP |
+#endif /* CONFIG_PPC_KUAP */
+#endif /* CONFIG_PPC_RADIX_MMU */
0,
};
@@ -341,21 +352,6 @@ static inline bool strict_kernel_rwx_enabled(void)
*/
#define MMU_PAGE_COUNT 16
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
- * memory requirements with large number of sections.
- * 51 bits is the max physical real address on POWER9
- */
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
- defined (CONFIG_PPC_64K_PAGES)
-#define MAX_PHYSMEM_BITS 51
-#elif defined(CONFIG_PPC64)
-#define MAX_PHYSMEM_BITS 46
-#endif
-
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/mmu.h>
#else /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 6ee8195a2ffb..611204e588b9 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -52,6 +52,7 @@ static inline bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa,
{
return false;
}
+static inline void mm_iommu_init(struct mm_struct *mm) { }
#endif
extern void switch_slb(struct task_struct *tsk, struct mm_struct *mm);
extern void set_context(unsigned long id, pgd_t *pgd);
@@ -228,13 +229,7 @@ static inline void enter_lazy_tlb(struct mm_struct *mm,
#endif
}
-#ifdef CONFIG_PPC_BOOK3E_64
-static inline void arch_exit_mmap(struct mm_struct *mm)
-{
-}
-#else
extern void arch_exit_mmap(struct mm_struct *mm);
-#endif
static inline void arch_unmap(struct mm_struct *mm,
struct vm_area_struct *vma,
diff --git a/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
new file mode 100644
index 000000000000..a46616937d20
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+#define _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H
+
+#define PAGE_SHIFT_8M 23
+
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ BUG_ON(!hugepd_ok(hpd));
+
+ return (pte_t *)__va(hpd_val(hpd) & ~HUGEPD_SHIFT_MASK);
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return ((hpd_val(hpd) & _PMD_PAGE_MASK) >> 1) + 17;
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+ unsigned int pdshift)
+{
+ unsigned long idx = (addr & ((1UL << pdshift) - 1)) >> PAGE_SHIFT;
+
+ return hugepd_page(hpd) + idx;
+}
+
+static inline void flush_hugetlb_page(struct vm_area_struct *vma,
+ unsigned long vmaddr)
+{
+ flush_tlb_page(vma, vmaddr);
+}
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ *hpdp = __hugepd(__pa(new) | _PMD_USER | _PMD_PRESENT |
+ (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M : _PMD_PAGE_512K));
+}
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ return shift_to_mmu_psize(shift);
+}
+
+#endif /* _ASM_POWERPC_NOHASH_32_HUGETLB_8XX_H */
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
new file mode 100644
index 000000000000..1c3133b5f86a
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_KUP_8XX_H_
+#define _ASM_POWERPC_KUP_8XX_H_
+
+#include <asm/bug.h>
+
+#ifdef CONFIG_PPC_KUAP
+
+#ifdef __ASSEMBLY__
+
+.macro kuap_save_and_lock sp, thread, gpr1, gpr2, gpr3
+ lis \gpr2, MD_APG_KUAP@h /* only APG0 and APG1 are used */
+ mfspr \gpr1, SPRN_MD_AP
+ mtspr SPRN_MD_AP, \gpr2
+ stw \gpr1, STACK_REGS_KUAP(\sp)
+.endm
+
+.macro kuap_restore sp, current, gpr1, gpr2, gpr3
+ lwz \gpr1, STACK_REGS_KUAP(\sp)
+ mtspr SPRN_MD_AP, \gpr1
+.endm
+
+.macro kuap_check current, gpr
+#ifdef CONFIG_PPC_KUAP_DEBUG
+ mfspr \gpr, SPRN_MD_AP
+ rlwinm \gpr, \gpr, 16, 0xffff
+999: twnei \gpr, MD_APG_KUAP@h
+ EMIT_BUG_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE)
+#endif
+.endm
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/reg.h>
+
+static inline void allow_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ mtspr(SPRN_MD_AP, MD_APG_INIT);
+}
+
+static inline void prevent_user_access(void __user *to, const void __user *from,
+ unsigned long size)
+{
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+
+static inline bool bad_kuap_fault(struct pt_regs *regs, bool is_write)
+{
+ return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
+ "Bug: fault blocked by AP register !");
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* CONFIG_PPC_KUAP */
+
+#endif /* _ASM_POWERPC_KUP_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 0a1a3fc54e54..76af5b0cb16e 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -35,11 +35,18 @@
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * We define all 16 groups so that all other bits of APG can take any value
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
+ */
+#define MI_APG_INIT 0x4fffffff
+
+/*
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
+ * 1 => User => 10 (all accesses performed according to swaped page definition)
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
*/
-#define MI_APG_INIT 0x44444444
+#define MI_APG_KUEP 0x6fffffff
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -108,11 +115,18 @@
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
* Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => No user => 01 (all accesses performed according to page definition)
+ * 0 => Kernel => 01 (all accesses performed according to page definition)
* 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * We define all 16 groups so that all other bits of APG can take any value
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
+ */
+#define MD_APG_INIT 0x4fffffff
+
+/*
+ * 0 => No user => 01 (all accesses performed according to page definition)
+ * 1 => User => 10 (all accesses performed according to swaped page definition)
+ * 2-16 => NA => 11 (all accesses performed as user iaw page definition)
*/
-#define MD_APG_INIT 0x44444444
+#define MD_APG_KUAP 0x6fffffff
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
@@ -167,9 +181,26 @@
#ifdef CONFIG_PPC_MM_SLICES
#include <asm/nohash/32/slice.h>
#define SLICE_ARRAY_SIZE (1 << (32 - SLICE_LOW_SHIFT - 1))
+#define LOW_SLICE_ARRAY_SZ SLICE_ARRAY_SIZE
#endif
+#if defined(CONFIG_PPC_4K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_4K
+#elif defined(CONFIG_PPC_16K_PAGES)
+#define mmu_virtual_psize MMU_PAGE_16K
+#define PTE_FRAG_NR 4
+#define PTE_FRAG_SIZE_SHIFT 12
+#define PTE_FRAG_SIZE (1UL << 12)
+#else
+#error "Unsupported PAGE_SIZE"
+#endif
+
+#define mmu_linear_psize MMU_PAGE_8M
+
#ifndef __ASSEMBLY__
+
+#include <linux/mmdebug.h>
+
struct slice_mask {
u64 low_slices;
DECLARE_BITMAP(high_slices, 0);
@@ -185,14 +216,56 @@ typedef struct {
unsigned char high_slices_psize[0];
unsigned long slb_addr_limit;
struct slice_mask mask_base_psize; /* 4k or 16k */
-# ifdef CONFIG_HUGETLB_PAGE
struct slice_mask mask_512k;
struct slice_mask mask_8m;
-# endif
#endif
void *pte_frag;
} mm_context_t;
+#ifdef CONFIG_PPC_MM_SLICES
+static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
+{
+ return ctx->user_psize;
+}
+
+static inline void mm_ctx_set_user_psize(mm_context_t *ctx, u16 user_psize)
+{
+ ctx->user_psize = user_psize;
+}
+
+static inline unsigned char *mm_ctx_low_slices(mm_context_t *ctx)
+{
+ return ctx->low_slices_psize;
+}
+
+static inline unsigned char *mm_ctx_high_slices(mm_context_t *ctx)
+{
+ return ctx->high_slices_psize;
+}
+
+static inline unsigned long mm_ctx_slb_addr_limit(mm_context_t *ctx)
+{
+ return ctx->slb_addr_limit;
+}
+
+static inline void mm_ctx_set_slb_addr_limit(mm_context_t *ctx, unsigned long limit)
+{
+ ctx->slb_addr_limit = limit;
+}
+
+static inline struct slice_mask *slice_mask_for_size(mm_context_t *ctx, int psize)
+{
+ if (psize == MMU_PAGE_512K)
+ return &ctx->mask_512k;
+ if (psize == MMU_PAGE_8M)
+ return &ctx->mask_8m;
+
+ BUG_ON(psize != mmu_virtual_psize);
+
+ return &ctx->mask_base_psize;
+}
+#endif /* CONFIG_PPC_MM_SLICE */
+
#define PHYS_IMMR_BASE (mfspr(SPRN_IMMR) & 0xfff80000)
#define VIRT_IMMR_BASE (__fix_to_virt(FIX_IMMR_BASE))
@@ -242,17 +315,4 @@ extern s32 patch__itlbmiss_perf, patch__dtlbmiss_perf;
#endif /* !__ASSEMBLY__ */
-#if defined(CONFIG_PPC_4K_PAGES)
-#define mmu_virtual_psize MMU_PAGE_4K
-#elif defined(CONFIG_PPC_16K_PAGES)
-#define mmu_virtual_psize MMU_PAGE_16K
-#define PTE_FRAG_NR 4
-#define PTE_FRAG_SIZE_SHIFT 12
-#define PTE_FRAG_SIZE (1UL << 12)
-#else
-#error "Unsupported PAGE_SIZE"
-#endif
-
-#define mmu_linear_psize MMU_PAGE_8M
-
#endif /* _ASM_POWERPC_MMU_8XX_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/mmu.h b/arch/powerpc/include/asm/nohash/32/mmu.h
deleted file mode 100644
index 7d94a36d57d2..000000000000
--- a/arch/powerpc/include/asm/nohash/32/mmu.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_32_MMU_H_
-#define _ASM_POWERPC_NOHASH_32_MMU_H_
-
-#include <asm/page.h>
-
-#if defined(CONFIG_40x)
-/* 40x-style software loaded TLB */
-#include <asm/nohash/32/mmu-40x.h>
-#elif defined(CONFIG_44x)
-/* 44x-style software loaded TLB */
-#include <asm/nohash/32/mmu-44x.h>
-#elif defined(CONFIG_PPC_BOOK3E_MMU)
-/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-#include <asm/nohash/mmu-book3e.h>
-#elif defined (CONFIG_PPC_8xx)
-/* Motorola/Freescale 8xx software loaded TLB */
-#include <asm/nohash/32/mmu-8xx.h>
-#endif
-
-#ifndef __ASSEMBLY__
-typedef pte_t *pgtable_t;
-#endif
-
-#endif /* _ASM_POWERPC_NOHASH_32_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/32/pgalloc.h b/arch/powerpc/include/asm/nohash/32/pgalloc.h
index bd186e85b4f7..11eac371e7e0 100644
--- a/arch/powerpc/include/asm/nohash/32/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/32/pgalloc.h
@@ -6,39 +6,6 @@
#include <linux/slab.h>
/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern void __bad_pte(pmd_t *pmd);
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
-/*
* We don't have any real pmd's, and this code never triggers because
* the pgd will always be present..
*/
@@ -47,96 +14,22 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
#define __pmd_free_tlb(tlb,x,a) do { } while (0)
/* #define pgd_populate(mm, pmd, pte) BUG() */
-#ifndef CONFIG_BOOKE
-
-static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
- pte_t *pte)
-{
- *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
-}
-
-static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
- pgtable_t pte_page)
-{
- *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
-}
-
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-#else
-
static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmdp,
pte_t *pte)
{
- *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte) | _PMD_PRESENT);
}
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pte_page)
{
- *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
+ if (IS_ENABLED(CONFIG_BOOKE))
+ *pmdp = __pmd((unsigned long)pte_page | _PMD_PRESENT);
+ else
+ *pmdp = __pmd(__pa(pte_page) | _PMD_USER | _PMD_PRESENT);
}
-#define pmd_pgtable(pmd) ((pgtable_t)pmd_page_vaddr(pmd))
-#endif
-
-extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
-extern pgtable_t pte_alloc_one(struct mm_struct *mm);
-void pte_frag_destroy(void *pte_frag);
-pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
-void pte_fragment_free(unsigned long *table, int kernel);
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- pte_fragment_free((unsigned long *)pte, 1);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pte_fragment_free((unsigned long *)ptepage, 0);
-}
-
-static inline void pgtable_free(void *table, unsigned index_size)
-{
- if (!index_size) {
- pte_fragment_free((unsigned long *)table, 0);
- } else {
- BUG_ON(index_size > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(index_size), table);
- }
-}
-
-#define check_pgt_cache() do { } while (0)
-#define get_hugepd_cache_index(x) (x)
-
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb,
- void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, table, 0);
-}
#endif /* _ASM_POWERPC_PGALLOC_32_H */
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index bed433358260..0284f8f5305f 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -64,15 +64,24 @@ extern int icache_44x_need_flush;
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+#ifndef __ASSEMBLY__
+
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
+
+#endif /* !__ASSEMBLY__ */
+
+
/*
* This is the bottom of the PKMAP area with HIGHMEM or an arbitrary
* value (for now) on others, from where we can start layout kernel
* virtual space that goes below PKMAP and FIXMAP
*/
+#include <asm/fixmap.h>
+
#ifdef CONFIG_HIGHMEM
#define KVIRT_TOP PKMAP_BASE
#else
-#define KVIRT_TOP (0xfe000000UL) /* for now, could be FIXMAP_BASE ? */
+#define KVIRT_TOP FIXADDR_START
#endif
/*
@@ -379,8 +388,6 @@ static inline int pte_young(pte_t pte)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
-
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_POWERPC_NOHASH_32_PGTABLE_H */
diff --git a/arch/powerpc/include/asm/nohash/32/slice.h b/arch/powerpc/include/asm/nohash/32/slice.h
index 777d62e40ac0..39eb0154ae2d 100644
--- a/arch/powerpc/include/asm/nohash/32/slice.h
+++ b/arch/powerpc/include/asm/nohash/32/slice.h
@@ -13,6 +13,8 @@
#define SLICE_NUM_HIGH 0ul
#define GET_HIGH_SLICE_INDEX(addr) (addr & 0)
+#define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW
+
#endif /* CONFIG_PPC_MM_SLICES */
#endif /* _ASM_POWERPC_NOHASH_32_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/64/mmu.h b/arch/powerpc/include/asm/nohash/64/mmu.h
deleted file mode 100644
index e6585480dfc4..000000000000
--- a/arch/powerpc/include/asm/nohash/64/mmu.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_64_MMU_H_
-#define _ASM_POWERPC_NOHASH_64_MMU_H_
-
-/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
-#include <asm/nohash/mmu-book3e.h>
-
-#ifndef __ASSEMBLY__
-typedef struct page *pgtable_t;
-#endif
-
-#endif /* _ASM_POWERPC_NOHASH_64_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/64/pgalloc.h b/arch/powerpc/include/asm/nohash/64/pgalloc.h
index 66d086f85bd5..62321cd12da9 100644
--- a/arch/powerpc/include/asm/nohash/64/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/64/pgalloc.h
@@ -18,37 +18,6 @@ struct vmemmap_backing {
};
extern struct vmemmap_backing *vmemmap_list;
-/*
- * Functions that deal with pagetables that could be at any level of
- * the table need to be passed an "index_size" so they know how to
- * handle allocation. For PTE pages (which are linked to a struct
- * page for now, and drawn from the main get_free_pages() pool), the
- * allocation size will be (2^index_size * sizeof(pointer)) and
- * allocations are drawn from the kmem_cache in PGT_CACHE(index_size).
- *
- * The maximum index size needs to be big enough to allow any
- * pagetable sizes we need, but small enough to fit in the low bits of
- * any page table pointer. In other words all pagetables, even tiny
- * ones, must be aligned to allow at least enough low 0 bits to
- * contain this value. This value is also used as a mask, so it must
- * be one less than a power of two.
- */
-#define MAX_PGTABLE_INDEX_SIZE 0xf
-
-extern struct kmem_cache *pgtable_cache[];
-#define PGT_CACHE(shift) pgtable_cache[shift]
-
-static inline pgd_t *pgd_alloc(struct mm_struct *mm)
-{
- return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
- pgtable_gfp_flags(mm, GFP_KERNEL));
-}
-
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
- kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
-}
-
#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, (unsigned long)PUD)
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
@@ -76,11 +45,9 @@ static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte_page)
{
- pmd_set(pmd, (unsigned long)page_address(pte_page));
+ pmd_set(pmd, (unsigned long)pte_page);
}
-#define pmd_pgtable(pmd) pmd_page(pmd)
-
static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr)
{
return kmem_cache_alloc(PGT_CACHE(PMD_CACHE_INDEX),
@@ -92,91 +59,9 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
kmem_cache_free(PGT_CACHE(PMD_CACHE_INDEX), pmd);
}
-
-static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- return (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- struct page *page;
- pte_t *pte;
-
- pte = (pte_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO | __GFP_ACCOUNT);
- if (!pte)
- return NULL;
- page = virt_to_page(pte);
- if (!pgtable_page_ctor(page)) {
- __free_page(page);
- return NULL;
- }
- return page;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
- free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
-{
- pgtable_page_dtor(ptepage);
- __free_page(ptepage);
-}
-
-static inline void pgtable_free(void *table, int shift)
-{
- if (!shift) {
- pgtable_page_dtor(virt_to_page(table));
- free_page((unsigned long)table);
- } else {
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- kmem_cache_free(PGT_CACHE(shift), table);
- }
-}
-
-#define get_hugepd_cache_index(x) (x)
-#ifdef CONFIG_SMP
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- unsigned long pgf = (unsigned long)table;
-
- BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
- pgf |= shift;
- tlb_remove_table(tlb, (void *)pgf);
-}
-
-static inline void __tlb_remove_table(void *_table)
-{
- void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
- unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
-
- pgtable_free(table, shift);
-}
-
-#else
-static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
-{
- pgtable_free(table, shift);
-}
-#endif
-
-static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
- unsigned long address)
-{
- tlb_flush_pgtable(tlb, address);
- pgtable_free_tlb(tlb, page_address(table), 0);
-}
-
#define __pmd_free_tlb(tlb, pmd, addr) \
pgtable_free_tlb(tlb, pmd, PMD_CACHE_INDEX)
-#ifndef CONFIG_PPC_64K_PAGES
#define __pud_free_tlb(tlb, pud, addr) \
pgtable_free_tlb(tlb, pud, PUD_INDEX_SIZE)
-#endif /* CONFIG_PPC_64K_PAGES */
-
-#define check_pgt_cache() do { } while (0)
-
#endif /* _ASM_POWERPC_PGALLOC_64_H */
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index e77ed9761632..b9f66cf15c31 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -10,10 +10,6 @@
#include <asm/barrier.h>
#include <asm/asm-const.h>
-#ifdef CONFIG_PPC_64K_PAGES
-#error "Page size not supported"
-#endif
-
#define FIRST_USER_ADDRESS 0UL
/*
@@ -23,11 +19,7 @@
PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT)
#define PGTABLE_RANGE (ASM_CONST(1) << PGTABLE_EADDR_SIZE)
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define PMD_CACHE_INDEX (PMD_INDEX_SIZE + 1)
-#else
#define PMD_CACHE_INDEX PMD_INDEX_SIZE
-#endif
#define PUD_CACHE_INDEX PUD_INDEX_SIZE
/*
@@ -73,7 +65,6 @@
#define VMALLOC_REGION_ID (REGION_ID(VMALLOC_START))
#define KERNEL_REGION_ID (REGION_ID(PAGE_OFFSET))
-#define VMEMMAP_REGION_ID (0xfUL) /* Server only */
#define USER_REGION_ID (0UL)
/*
@@ -205,7 +196,8 @@ static inline void pgd_set(pgd_t *pgdp, unsigned long val)
(((pte_t *) pmd_page_vaddr(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
#define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr))
-#define pte_unmap(pte) do { } while(0)
+
+static inline void pte_unmap(pte_t *pte) { }
/* to find an entry in a kernel page-table-directory */
/* This now only contains the vmalloc pages */
diff --git a/arch/powerpc/include/asm/nohash/64/slice.h b/arch/powerpc/include/asm/nohash/64/slice.h
deleted file mode 100644
index ad0d6e3cc1c5..000000000000
--- a/arch/powerpc/include/asm/nohash/64/slice.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_NOHASH_64_SLICE_H
-#define _ASM_POWERPC_NOHASH_64_SLICE_H
-
-#ifdef CONFIG_PPC_64K_PAGES
-#define get_slice_psize(mm, addr) MMU_PAGE_64K
-#else /* CONFIG_PPC_64K_PAGES */
-#define get_slice_psize(mm, addr) MMU_PAGE_4K
-#endif /* !CONFIG_PPC_64K_PAGES */
-#define slice_set_user_psize(mm, psize) do { BUG(); } while (0)
-
-#endif /* _ASM_POWERPC_NOHASH_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/nohash/hugetlb-book3e.h b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h
new file mode 100644
index 000000000000..ecd8694cb229
--- /dev/null
+++ b/arch/powerpc/include/asm/nohash/hugetlb-book3e.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
+#define _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H
+
+static inline pte_t *hugepd_page(hugepd_t hpd)
+{
+ if (WARN_ON(!hugepd_ok(hpd)))
+ return NULL;
+
+ return (pte_t *)((hpd_val(hpd) & ~HUGEPD_SHIFT_MASK) | PD_HUGE);
+}
+
+static inline unsigned int hugepd_shift(hugepd_t hpd)
+{
+ return hpd_val(hpd) & HUGEPD_SHIFT_MASK;
+}
+
+static inline pte_t *hugepte_offset(hugepd_t hpd, unsigned long addr,
+ unsigned int pdshift)
+{
+ /*
+ * On FSL BookE, we have multiple higher-level table entries that
+ * point to the same hugepte. Just use the first one since they're all
+ * identical. So for that case, idx=0.
+ */
+ return hugepd_page(hpd);
+}
+
+void flush_hugetlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+
+static inline void hugepd_populate(hugepd_t *hpdp, pte_t *new, unsigned int pshift)
+{
+ /* We use the old format for PPC_FSL_BOOK3E */
+ *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
+}
+
+static inline int check_and_get_huge_psize(int shift)
+{
+ if (shift & 1) /* Not a power of 4 */
+ return -EINVAL;
+
+ return shift_to_mmu_psize(shift);
+}
+
+#endif /* _ASM_POWERPC_NOHASH_HUGETLB_BOOK3E_H */
diff --git a/arch/powerpc/include/asm/nohash/mmu-book3e.h b/arch/powerpc/include/asm/nohash/mmu-book3e.h
index e20072972e35..4c9777d256fb 100644
--- a/arch/powerpc/include/asm/nohash/mmu-book3e.h
+++ b/arch/powerpc/include/asm/nohash/mmu-book3e.h
@@ -306,6 +306,8 @@ extern int book3e_htw_mode;
#define mmu_cleanup_all NULL
+#define MAX_PHYSMEM_BITS 44
+
#endif
#endif /* !__ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/nohash/mmu.h b/arch/powerpc/include/asm/nohash/mmu.h
index a037cb1efb57..edc793e5f08f 100644
--- a/arch/powerpc/include/asm/nohash/mmu.h
+++ b/arch/powerpc/include/asm/nohash/mmu.h
@@ -2,10 +2,18 @@
#ifndef _ASM_POWERPC_NOHASH_MMU_H_
#define _ASM_POWERPC_NOHASH_MMU_H_
-#ifdef CONFIG_PPC64
-#include <asm/nohash/64/mmu.h>
-#else
-#include <asm/nohash/32/mmu.h>
+#if defined(CONFIG_40x)
+/* 40x-style software loaded TLB */
+#include <asm/nohash/32/mmu-40x.h>
+#elif defined(CONFIG_44x)
+/* 44x-style software loaded TLB */
+#include <asm/nohash/32/mmu-44x.h>
+#elif defined(CONFIG_PPC_BOOK3E_MMU)
+/* Freescale Book-E software loaded TLB or Book-3e (ISA 2.06+) MMU */
+#include <asm/nohash/mmu-book3e.h>
+#elif defined (CONFIG_PPC_8xx)
+/* Motorola/Freescale 8xx software loaded TLB */
+#include <asm/nohash/32/mmu-8xx.h>
#endif
#endif /* _ASM_POWERPC_NOHASH_MMU_H_ */
diff --git a/arch/powerpc/include/asm/nohash/pgalloc.h b/arch/powerpc/include/asm/nohash/pgalloc.h
index 0634f2949438..332b13b4ecdb 100644
--- a/arch/powerpc/include/asm/nohash/pgalloc.h
+++ b/arch/powerpc/include/asm/nohash/pgalloc.h
@@ -3,6 +3,7 @@
#define _ASM_POWERPC_NOHASH_PGALLOC_H
#include <linux/mm.h>
+#include <linux/slab.h>
extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
#ifdef CONFIG_PPC64
@@ -16,9 +17,64 @@ static inline void tlb_flush_pgtable(struct mmu_gather *tlb,
}
#endif /* !CONFIG_PPC_BOOK3E */
+static inline pgd_t *pgd_alloc(struct mm_struct *mm)
+{
+ return kmem_cache_alloc(PGT_CACHE(PGD_INDEX_SIZE),
+ pgtable_gfp_flags(mm, GFP_KERNEL));
+}
+
+static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
+{
+ kmem_cache_free(PGT_CACHE(PGD_INDEX_SIZE), pgd);
+}
+
#ifdef CONFIG_PPC64
#include <asm/nohash/64/pgalloc.h>
#else
#include <asm/nohash/32/pgalloc.h>
#endif
+
+static inline void pgtable_free(void *table, int shift)
+{
+ if (!shift) {
+ pte_fragment_free((unsigned long *)table, 0);
+ } else {
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ kmem_cache_free(PGT_CACHE(shift), table);
+ }
+}
+
+#define get_hugepd_cache_index(x) (x)
+
+#ifdef CONFIG_SMP
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ unsigned long pgf = (unsigned long)table;
+
+ BUG_ON(shift > MAX_PGTABLE_INDEX_SIZE);
+ pgf |= shift;
+ tlb_remove_table(tlb, (void *)pgf);
+}
+
+static inline void __tlb_remove_table(void *_table)
+{
+ void *table = (void *)((unsigned long)_table & ~MAX_PGTABLE_INDEX_SIZE);
+ unsigned shift = (unsigned long)_table & MAX_PGTABLE_INDEX_SIZE;
+
+ pgtable_free(table, shift);
+}
+
+#else
+static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
+{
+ pgtable_free(table, shift);
+}
+#endif
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
+ unsigned long address)
+{
+ tlb_flush_pgtable(tlb, address);
+ pgtable_free_tlb(tlb, table, 0);
+}
#endif /* _ASM_POWERPC_NOHASH_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/nohash/pte-book3e.h b/arch/powerpc/include/asm/nohash/pte-book3e.h
index dd40d200f274..813918f40765 100644
--- a/arch/powerpc/include/asm/nohash/pte-book3e.h
+++ b/arch/powerpc/include/asm/nohash/pte-book3e.h
@@ -60,13 +60,8 @@
#define _PAGE_SPECIAL _PAGE_SW0
/* Base page size */
-#ifdef CONFIG_PPC_64K_PAGES
-#define _PAGE_PSIZE _PAGE_PSIZE_64K
-#define PTE_RPN_SHIFT (28)
-#else
#define _PAGE_PSIZE _PAGE_PSIZE_4K
#define PTE_RPN_SHIFT (24)
-#endif
#define PTE_WIMGE_SHIFT (19)
#define PTE_BAP_SHIFT (2)
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h
index 870fb7b239ea..e1577cfa7186 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -186,8 +186,8 @@
#define OPAL_XIVE_FREE_IRQ 140
#define OPAL_XIVE_SYNC 141
#define OPAL_XIVE_DUMP 142
-#define OPAL_XIVE_RESERVED3 143
-#define OPAL_XIVE_RESERVED4 144
+#define OPAL_XIVE_GET_QUEUE_STATE 143
+#define OPAL_XIVE_SET_QUEUE_STATE 144
#define OPAL_SIGNAL_SYSTEM_RESET 145
#define OPAL_NPU_INIT_CONTEXT 146
#define OPAL_NPU_DESTROY_CONTEXT 147
@@ -209,8 +209,10 @@
#define OPAL_SENSOR_GROUP_ENABLE 163
#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR 164
#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR 165
+#define OPAL_HANDLE_HMI2 166
#define OPAL_NX_COPROC_INIT 167
-#define OPAL_LAST 167
+#define OPAL_XIVE_GET_VP_STATE 170
+#define OPAL_LAST 170
#define QUIESCE_HOLD 1 /* Spin all calls at entry */
#define QUIESCE_REJECT 2 /* Fail all calls with OPAL_BUSY */
@@ -634,6 +636,15 @@ struct OpalHMIEvent {
} u;
};
+/* OPAL_HANDLE_HMI2 out_flags */
+enum {
+ OPAL_HMI_FLAGS_TB_RESYNC = (1ull << 0), /* Timebase has been resynced */
+ OPAL_HMI_FLAGS_DEC_LOST = (1ull << 1), /* DEC lost, needs to be reprogrammed */
+ OPAL_HMI_FLAGS_HDEC_LOST = (1ull << 2), /* HDEC lost, needs to be reprogrammed */
+ OPAL_HMI_FLAGS_TOD_TB_FAIL = (1ull << 3), /* TOD/TB recovery failed. */
+ OPAL_HMI_FLAGS_NEW_EVENT = (1ull << 63), /* An event has been created */
+};
+
enum {
OPAL_P7IOC_DIAG_TYPE_NONE = 0,
OPAL_P7IOC_DIAG_TYPE_RGC = 1,
@@ -1118,6 +1129,7 @@ enum {
enum {
OPAL_IMC_COUNTERS_NEST = 1,
OPAL_IMC_COUNTERS_CORE = 2,
+ OPAL_IMC_COUNTERS_TRACE = 3,
};
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index a55b01c90bb1..4cc37e708bc7 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -203,6 +203,7 @@ int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer,
int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_sensor_read_u64(u32 sensor_hndl, int token, __be64 *sensor_data);
int64_t opal_handle_hmi(void);
+int64_t opal_handle_hmi2(__be64 *out_flags);
int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
int64_t opal_unregister_dump_region(uint32_t id);
int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
@@ -279,6 +280,13 @@ int64_t opal_xive_allocate_irq(uint32_t chip_id);
int64_t opal_xive_free_irq(uint32_t girq);
int64_t opal_xive_sync(uint32_t type, uint32_t id);
int64_t opal_xive_dump(uint32_t type, uint32_t id);
+int64_t opal_xive_get_queue_state(uint64_t vp, uint32_t prio,
+ __be32 *out_qtoggle,
+ __be32 *out_qindex);
+int64_t opal_xive_set_queue_state(uint64_t vp, uint32_t prio,
+ uint32_t qtoggle,
+ uint32_t qindex);
+int64_t opal_xive_get_vp_state(uint64_t vp, __be64 *out_w01);
int64_t opal_pci_set_p2p(uint64_t phb_init, uint64_t phb_target,
uint64_t desc, uint16_t pe_number);
@@ -352,6 +360,7 @@ int opal_power_control_init(void);
extern int opal_machine_check(struct pt_regs *regs);
extern bool opal_mce_check_early_recovery(struct pt_regs *regs);
extern int opal_hmi_exception_early(struct pt_regs *regs);
+extern int opal_hmi_exception_early2(struct pt_regs *regs);
extern int opal_handle_hmi_exception(struct pt_regs *regs);
extern void opal_shutdown(void);
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index e843bc5d1a0f..62f27e0aef7c 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -34,6 +34,8 @@
#include <asm/cpuidle.h>
#include <asm/atomic.h>
+#include <asm-generic/mmiowb_types.h>
+
register struct paca_struct *local_paca asm("r13");
#if defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_SMP)
@@ -171,9 +173,7 @@ struct paca_struct {
u16 trap_save; /* Used when bad stack is encountered */
u8 irq_soft_mask; /* mask for irq soft masking */
u8 irq_happened; /* irq happened while soft-disabled */
- u8 io_sync; /* writel() needs spin_unlock sync */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
- u8 nap_state_lost; /* NV GPR values lost in power7_idle */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
u8 pmcregs_in_use; /* pseries puts this in lppaca */
#endif
@@ -183,23 +183,28 @@ struct paca_struct {
#endif
#ifdef CONFIG_PPC_POWERNV
- /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
- u32 *core_idle_state_ptr;
- u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
- /* Mask to indicate thread id in core */
- u8 thread_mask;
- /* Mask to denote subcore sibling threads */
- u8 subcore_sibling_mask;
- /* Flag to request this thread not to stop */
- atomic_t dont_stop;
- /* The PSSCR value that the kernel requested before going to stop */
- u64 requested_psscr;
-
- /*
- * Save area for additional SPRs that need to be
- * saved/restored during cpuidle stop.
- */
- struct stop_sprs stop_sprs;
+ /* PowerNV idle fields */
+ /* PNV_CORE_IDLE_* bits, all siblings work on thread 0 paca */
+ unsigned long idle_state;
+ union {
+ /* P7/P8 specific fields */
+ struct {
+ /* PNV_THREAD_RUNNING/NAP/SLEEP */
+ u8 thread_idle_state;
+ /* Mask to denote subcore sibling threads */
+ u8 subcore_sibling_mask;
+ };
+
+ /* P9 specific fields */
+ struct {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* The PSSCR value that the kernel requested before going to stop */
+ u64 requested_psscr;
+ /* Flag to request this thread not to stop */
+ atomic_t dont_stop;
+#endif
+ };
+ };
#endif
#ifdef CONFIG_PPC_BOOK3S_64
@@ -264,6 +269,9 @@ struct paca_struct {
#ifdef CONFIG_STACKPROTECTOR
unsigned long canary;
#endif
+#ifdef CONFIG_MMIOWB
+ struct mmiowb_state mmiowb_state;
+#endif
} ____cacheline_aligned;
extern void copy_mm_to_paca(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index ed870468ef6f..dbc8c0679480 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -28,11 +28,15 @@
#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT)
#ifndef __ASSEMBLY__
-#ifdef CONFIG_HUGETLB_PAGE
-extern bool hugetlb_disabled;
-extern unsigned int HPAGE_SHIFT;
-#else
+#ifndef CONFIG_HUGETLB_PAGE
#define HPAGE_SHIFT PAGE_SHIFT
+#elif defined(CONFIG_PPC_BOOK3S_64)
+extern unsigned int hpage_shift;
+#define HPAGE_SHIFT hpage_shift
+#elif defined(CONFIG_PPC_8xx)
+#define HPAGE_SHIFT 19 /* 512k pages */
+#elif defined(CONFIG_PPC_FSL_BOOK3E)
+#define HPAGE_SHIFT 22 /* 4M pages */
#endif
#define HPAGE_SIZE ((1UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
@@ -132,18 +136,7 @@ static inline bool pfn_valid(unsigned long pfn)
#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
-#ifdef CONFIG_PPC_BOOK3S_64
-/*
- * On hash the vmalloc and other regions alias to the kernel region when passed
- * through __pa(), which virt_to_pfn() uses. That means virt_addr_valid() can
- * return true for some vmalloc addresses, which is incorrect. So explicitly
- * check that the address is in the kernel region.
- */
-#define virt_addr_valid(kaddr) (REGION_ID(kaddr) == KERNEL_REGION_ID && \
- pfn_valid(virt_to_pfn(kaddr)))
-#else
#define virt_addr_valid(kaddr) pfn_valid(virt_to_pfn(kaddr))
-#endif
/*
* On Book-E parts we need __va to parse the device tree and we can't
diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
index e11f03007b57..2b2c60a1a66d 100644
--- a/arch/powerpc/include/asm/pgalloc.h
+++ b/arch/powerpc/include/asm/pgalloc.h
@@ -20,10 +20,61 @@ static inline gfp_t pgtable_gfp_flags(struct mm_struct *mm, gfp_t gfp)
#define PGALLOC_GFP (GFP_KERNEL | __GFP_ZERO)
+pte_t *pte_fragment_alloc(struct mm_struct *mm, int kernel);
+
+static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
+{
+ return (pte_t *)pte_fragment_alloc(mm, 1);
+}
+
+static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
+{
+ return (pgtable_t)pte_fragment_alloc(mm, 0);
+}
+
+void pte_frag_destroy(void *pte_frag);
+void pte_fragment_free(unsigned long *table, int kernel);
+
+static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
+{
+ pte_fragment_free((unsigned long *)pte, 1);
+}
+
+static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)
+{
+ pte_fragment_free((unsigned long *)ptepage, 0);
+}
+
+/*
+ * Functions that deal with pagetables that could be at any level of
+ * the table need to be passed an "index_size" so they know how to
+ * handle allocation. For PTE pages, the allocation size will be
+ * (2^index_size * sizeof(pointer)) and allocations are drawn from
+ * the kmem_cache in PGT_CACHE(index_size).
+ *
+ * The maximum index size needs to be big enough to allow any
+ * pagetable sizes we need, but small enough to fit in the low bits of
+ * any page table pointer. In other words all pagetables, even tiny
+ * ones, must be aligned to allow at least enough low 0 bits to
+ * contain this value. This value is also used as a mask, so it must
+ * be one less than a power of two.
+ */
+#define MAX_PGTABLE_INDEX_SIZE 0xf
+
+extern struct kmem_cache *pgtable_cache[];
+#define PGT_CACHE(shift) pgtable_cache[shift]
+
+static inline void check_pgt_cache(void) { }
+
#ifdef CONFIG_PPC_BOOK3S
#include <asm/book3s/pgalloc.h>
#else
#include <asm/nohash/pgalloc.h>
#endif
+static inline pgtable_t pmd_pgtable(pmd_t pmd)
+{
+ return (pgtable_t)pmd_page_vaddr(pmd);
+}
+
#endif /* _ASM_POWERPC_PGALLOC_H */
diff --git a/arch/powerpc/include/asm/pgtable-be-types.h b/arch/powerpc/include/asm/pgtable-be-types.h
index a89c67b62680..b169bbf95fcb 100644
--- a/arch/powerpc/include/asm/pgtable-be-types.h
+++ b/arch/powerpc/include/asm/pgtable-be-types.h
@@ -33,11 +33,7 @@ static inline __be64 pmd_raw(pmd_t x)
return x.pmd;
}
-/*
- * 64 bit hash always use 4 level table. Everybody else use 4 level
- * only for 4K page size.
- */
-#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+/* 64 bit always use 4 level table. */
typedef struct { __be64 pud; } pud_t;
#define __pud(x) ((pud_t) { cpu_to_be64(x) })
#define __pud_raw(x) ((pud_t) { (x) })
@@ -51,7 +47,6 @@ static inline __be64 pud_raw(pud_t x)
return x.pud;
}
-#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
#endif /* CONFIG_PPC64 */
/* PGD level */
@@ -77,7 +72,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
+#ifdef CONFIG_PPC_64K_PAGES
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable-types.h b/arch/powerpc/include/asm/pgtable-types.h
index 3b0edf041b2e..d11b4c61d686 100644
--- a/arch/powerpc/include/asm/pgtable-types.h
+++ b/arch/powerpc/include/asm/pgtable-types.h
@@ -23,18 +23,13 @@ static inline unsigned long pmd_val(pmd_t x)
return x.pmd;
}
-/*
- * 64 bit hash always use 4 level table. Everybody else use 4 level
- * only for 4K page size.
- */
-#if defined(CONFIG_PPC_BOOK3S_64) || !defined(CONFIG_PPC_64K_PAGES)
+/* 64 bit always use 4 level table. */
typedef struct { unsigned long pud; } pud_t;
#define __pud(x) ((pud_t) { (x) })
static inline unsigned long pud_val(pud_t x)
{
return x.pud;
}
-#endif /* CONFIG_PPC_BOOK3S_64 || !CONFIG_PPC_64K_PAGES */
#endif /* CONFIG_PPC64 */
/* PGD level */
@@ -54,7 +49,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
* With hash config 64k pages additionally define a bigger "real PTE" type that
* gathers the "second half" part of the PTE for pseudo 64k pages
*/
-#if defined(CONFIG_PPC_64K_PAGES) && defined(CONFIG_PPC_BOOK3S_64)
+#ifdef CONFIG_PPC_64K_PAGES
typedef struct { pte_t pte; unsigned long hidx; } real_pte_t;
#else
typedef struct { pte_t pte; } real_pte_t;
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 505550fb2935..3f53be60fb01 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -89,9 +89,6 @@ extern void paging_init(void);
*/
extern void update_mmu_cache(struct vm_area_struct *, unsigned long, pte_t *);
-extern int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write,
- struct page **pages, int *nr);
#ifndef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_large(pmd) 0
#endif
@@ -108,6 +105,12 @@ void mark_initmem_nx(void);
static inline void mark_initmem_nx(void) { }
#endif
+#ifdef CONFIG_PPC_DEBUG_WX
+void ptdump_check_wx(void);
+#else
+static inline void ptdump_check_wx(void) { }
+#endif
+
/*
* When used, PTE_FRAG_NR is defined in subarch pgtable.h
* so we are sure it is included when arriving here.
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 3351bcf42f2d..706ac5df546f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -164,6 +164,9 @@ struct thread_struct {
unsigned long rtas_sp; /* stack pointer for when in RTAS */
#endif
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ unsigned long kuap; /* opened segments for user access */
+#endif
/* Debug Registers */
struct debug_reg debug;
struct thread_fp_state fp_state;
@@ -411,14 +414,17 @@ static inline unsigned long get_clean_sp(unsigned long sp, int is_32)
}
#endif
+/* asm stubs */
+extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
+extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
+extern unsigned long isa206_idle_insn_mayloss(unsigned long type);
+
extern unsigned long cpuidle_disable;
enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
-extern unsigned long power7_idle_insn(unsigned long type); /* PNV_THREAD_NAP/etc*/
+
extern void power7_idle_type(unsigned long type);
-extern unsigned long power9_idle_stop(unsigned long psscr_val);
-extern unsigned long power9_offline_stop(unsigned long psscr_val);
extern void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask);
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 64271e562fed..6f047730e642 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -52,10 +52,17 @@ struct pt_regs
};
};
+ union {
+ struct {
#ifdef CONFIG_PPC64
- unsigned long ppr;
- unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */
+ unsigned long ppr;
+#endif
+#ifdef CONFIG_PPC_KUAP
+ unsigned long kuap;
#endif
+ };
+ unsigned long __pad[2]; /* Maintain 16 byte interrupt stack alignment */
+ };
};
#endif
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c5b2aff0ce8e..10caa145f98b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -168,6 +168,7 @@
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_PLS_SHIFT 60
#define PSSCR_GUEST_VIS 0xf0000000000003ffUL /* Guest-visible PSSCR fields */
#define PSSCR_FAKE_SUSPEND 0x00000400 /* Fake-suspend bit (P9 DD2.2) */
#define PSSCR_FAKE_SUSPEND_LG 10 /* Fake-suspend bit position */
@@ -758,10 +759,9 @@
#define SRR1_WAKERESET 0x00100000 /* System reset */
#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
-#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
- * may not be recoverable */
-#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
-#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
+#define SRR1_WS_HVLOSS 0x00030000 /* HV resources not maintained */
+#define SRR1_WS_GPRLOSS 0x00020000 /* GPRs not maintained */
+#define SRR1_WS_NOLOSS 0x00010000 /* All resources maintained */
#define SRR1_PROGTM 0x00200000 /* TM Bad Thing */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h
index eb2a33d5df26..e382bd6ede84 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -41,7 +41,7 @@
#if defined(CONFIG_PPC_BOOK3E_64)
#define MSR_64BIT MSR_CM
-#define MSR_ (MSR_ME | MSR_CE)
+#define MSR_ (MSR_ME | MSR_RI | MSR_CE)
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
#define MSR_USER64 (MSR_USER32 | MSR_64BIT)
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
index 44816cbc4198..c6f466f4c241 100644
--- a/arch/powerpc/include/asm/slice.h
+++ b/arch/powerpc/include/asm/slice.h
@@ -4,9 +4,7 @@
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/slice.h>
-#elif defined(CONFIG_PPC64)
-#include <asm/nohash/64/slice.h>
-#elif defined(CONFIG_PPC_MMU_NOHASH)
+#elif defined(CONFIG_PPC_MMU_NOHASH_32)
#include <asm/nohash/32/slice.h>
#endif
@@ -38,6 +36,11 @@ void slice_setup_new_exec(void);
static inline void slice_init_new_context_exec(struct mm_struct *mm) {}
+static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
+{
+ return 0;
+}
+
#endif /* CONFIG_PPC_MM_SLICES */
#endif /* __ASSEMBLY__ */
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index 68da49320592..3192d454a733 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,9 +17,9 @@ extern int create_section_mapping(unsigned long start, unsigned long end, int ni
extern int remove_section_mapping(unsigned long start, unsigned long end);
#ifdef CONFIG_PPC_BOOK3S_64
-extern void resize_hpt_for_hotplug(unsigned long new_mem_size);
+extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
#else
-static inline void resize_hpt_for_hotplug(unsigned long new_mem_size) { }
+static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; }
#endif
#ifdef CONFIG_NUMA
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 685c72310f5d..15b39c407c4e 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -39,19 +39,6 @@
#define LOCK_TOKEN 1
#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SMP)
-#define CLEAR_IO_SYNC (get_paca()->io_sync = 0)
-#define SYNC_IO do { \
- if (unlikely(get_paca()->io_sync)) { \
- mb(); \
- get_paca()->io_sync = 0; \
- } \
- } while (0)
-#else
-#define CLEAR_IO_SYNC
-#define SYNC_IO
-#endif
-
#ifdef CONFIG_PPC_PSERIES
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
@@ -99,7 +86,6 @@ static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
- CLEAR_IO_SYNC;
return __arch_spin_trylock(lock) == 0;
}
@@ -130,7 +116,6 @@ extern void __rw_yield(arch_rwlock_t *lock);
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
- CLEAR_IO_SYNC;
while (1) {
if (likely(__arch_spin_trylock(lock) == 0))
break;
@@ -148,7 +133,6 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
{
unsigned long flags_dis;
- CLEAR_IO_SYNC;
while (1) {
if (likely(__arch_spin_trylock(lock) == 0))
break;
@@ -167,7 +151,6 @@ void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
- SYNC_IO;
__asm__ __volatile__("# arch_spin_unlock\n\t"
PPC_RELEASE_BARRIER: : :"memory");
lock->slock = 0;
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 1647de15a31e..9bf6dffb4090 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -4,14 +4,17 @@
#ifdef __KERNEL__
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_STRNCPY
#define __HAVE_ARCH_STRNCMP
+#define __HAVE_ARCH_MEMCHR
+#define __HAVE_ARCH_MEMCMP
+#define __HAVE_ARCH_MEMSET16
+#endif
+
#define __HAVE_ARCH_MEMSET
#define __HAVE_ARCH_MEMCPY
#define __HAVE_ARCH_MEMMOVE
-#define __HAVE_ARCH_MEMCMP
-#define __HAVE_ARCH_MEMCHR
-#define __HAVE_ARCH_MEMSET16
#define __HAVE_ARCH_MEMCPY_FLUSHCACHE
extern char * strcpy(char *,const char *);
@@ -27,7 +30,27 @@ extern int memcmp(const void *,const void *,__kernel_size_t);
extern void * memchr(const void *,int,__kernel_size_t);
extern void * memcpy_flushcache(void *,const void *,__kernel_size_t);
+void *__memset(void *s, int c, __kernel_size_t count);
+void *__memcpy(void *to, const void *from, __kernel_size_t n);
+void *__memmove(void *to, const void *from, __kernel_size_t n);
+
+#if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+/*
+ * For files that are not instrumented (e.g. mm/slub.c) we
+ * should use not instrumented version of mem* functions.
+ */
+#define memcpy(dst, src, len) __memcpy(dst, src, len)
+#define memmove(dst, src, len) __memmove(dst, src, len)
+#define memset(s, c, n) __memset(s, c, n)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
+#endif
+
#ifdef CONFIG_PPC64
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_MEMSET32
#define __HAVE_ARCH_MEMSET64
@@ -49,8 +72,11 @@ static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
{
return __memset64(p, v, n * 8);
}
+#endif
#else
+#ifndef CONFIG_KASAN
#define __HAVE_ARCH_STRLEN
+#endif
extern void *memset16(uint16_t *, uint16_t, __kernel_size_t);
#endif
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 1243045bad2d..a048fed0722f 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -94,9 +94,15 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->orig_gpr3 = args[0];
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
- int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+ int arch;
+
+ if (IS_ENABLED(CONFIG_PPC64) && !test_tsk_thread_flag(task, TIF_32BIT))
+ arch = AUDIT_ARCH_PPC64;
+ else
+ arch = AUDIT_ARCH_PPC;
+
#ifdef __LITTLE_ENDIAN__
arch |= __AUDIT_ARCH_LE;
#endif
diff --git a/arch/powerpc/include/asm/task_size_64.h b/arch/powerpc/include/asm/task_size_64.h
index eab4779f6b84..c993482237ed 100644
--- a/arch/powerpc/include/asm/task_size_64.h
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -20,7 +20,7 @@
/*
* For now 512TB is only supported with book3s and 64K linux page size.
*/
-#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
+#ifdef CONFIG_PPC_64K_PAGES
/*
* Max value currently used:
*/
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 54bf7e68a7e1..57e968413d1e 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -36,6 +36,8 @@ extern unsigned long ppc_proc_freq;
extern unsigned long ppc_tb_freq;
#define DEFAULT_TB_FREQ 125000000UL
+extern bool tb_invalid;
+
struct div_result {
u64 result_high;
u64 result_low;
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index e24c67d5ba75..34fba1ce27f7 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -27,8 +27,8 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
+#define tlb_flush tlb_flush
extern void tlb_flush(struct mmu_gather *tlb);
/* Get the generic bits... */
@@ -46,22 +46,6 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
#endif
}
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
- if (!tlb->page_size)
- tlb->page_size = page_size;
- else if (tlb->page_size != page_size) {
- if (!tlb->fullmm)
- tlb_flush_mmu(tlb);
- /*
- * update the page size after flush for the new
- * mmu_gather.
- */
- tlb->page_size = page_size;
- }
-}
-
#ifdef CONFIG_SMP
static inline int mm_is_core_local(struct mm_struct *mm)
{
diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h
index 58ef8c43a89d..08cd60cd70b7 100644
--- a/arch/powerpc/include/asm/trace.h
+++ b/arch/powerpc/include/asm/trace.h
@@ -54,6 +54,22 @@ DEFINE_EVENT(ppc64_interrupt_class, timer_interrupt_exit,
TP_ARGS(regs)
);
+#ifdef CONFIG_PPC_DOORBELL
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_entry,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs)
+);
+
+DEFINE_EVENT(ppc64_interrupt_class, doorbell_exit,
+
+ TP_PROTO(struct pt_regs *regs),
+
+ TP_ARGS(regs)
+);
+#endif
+
#ifdef CONFIG_PPC_PSERIES
extern int hcall_tracepoint_regfunc(void);
extern void hcall_tracepoint_unregfunc(void);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 4d6d905e9138..76f34346b642 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -6,6 +6,7 @@
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/extable.h>
+#include <asm/kup.h>
/*
* The fs value determines whether argument validity checking should be
@@ -140,6 +141,7 @@ extern long __put_user_bad(void);
#define __put_user_size(x, ptr, size, retval) \
do { \
retval = 0; \
+ allow_write_to_user(ptr, size); \
switch (size) { \
case 1: __put_user_asm(x, ptr, retval, "stb"); break; \
case 2: __put_user_asm(x, ptr, retval, "sth"); break; \
@@ -147,6 +149,7 @@ do { \
case 8: __put_user_asm2(x, ptr, retval); break; \
default: __put_user_bad(); \
} \
+ prevent_write_to_user(ptr, size); \
} while (0)
#define __put_user_nocheck(x, ptr, size) \
@@ -239,6 +242,7 @@ do { \
__chk_user_ptr(ptr); \
if (size > sizeof(x)) \
(x) = __get_user_bad(); \
+ allow_read_from_user(ptr, size); \
switch (size) { \
case 1: __get_user_asm(x, ptr, retval, "lbz"); break; \
case 2: __get_user_asm(x, ptr, retval, "lhz"); break; \
@@ -246,6 +250,7 @@ do { \
case 8: __get_user_asm2(x, ptr, retval); break; \
default: (x) = __get_user_bad(); \
} \
+ prevent_read_from_user(ptr, size); \
} while (0)
/*
@@ -305,15 +310,21 @@ extern unsigned long __copy_tofrom_user(void __user *to,
static inline unsigned long
raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
{
- return __copy_tofrom_user(to, from, n);
+ unsigned long ret;
+
+ allow_user_access(to, from, n);
+ ret = __copy_tofrom_user(to, from, n);
+ prevent_user_access(to, from, n);
+ return ret;
}
#endif /* __powerpc64__ */
static inline unsigned long raw_copy_from_user(void *to,
const void __user *from, unsigned long n)
{
+ unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
+ ret = 1;
switch (n) {
case 1:
@@ -338,14 +349,18 @@ static inline unsigned long raw_copy_from_user(void *to,
}
barrier_nospec();
- return __copy_tofrom_user((__force void __user *)to, from, n);
+ allow_read_from_user(from, n);
+ ret = __copy_tofrom_user((__force void __user *)to, from, n);
+ prevent_read_from_user(from, n);
+ return ret;
}
static inline unsigned long raw_copy_to_user(void __user *to,
const void *from, unsigned long n)
{
+ unsigned long ret;
if (__builtin_constant_p(n) && (n <= 8)) {
- unsigned long ret = 1;
+ ret = 1;
switch (n) {
case 1:
@@ -365,17 +380,24 @@ static inline unsigned long raw_copy_to_user(void __user *to,
return 0;
}
- return __copy_tofrom_user(to, (__force const void __user *)from, n);
+ allow_write_to_user(to, n);
+ ret = __copy_tofrom_user(to, (__force const void __user *)from, n);
+ prevent_write_to_user(to, n);
+ return ret;
}
extern unsigned long __clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
{
+ unsigned long ret = size;
might_fault();
- if (likely(access_ok(addr, size)))
- return __clear_user(addr, size);
- return size;
+ if (likely(access_ok(addr, size))) {
+ allow_write_to_user(addr, size);
+ ret = __clear_user(addr, size);
+ prevent_write_to_user(addr, size);
+ }
+ return ret;
}
extern long strncpy_from_user(char *dst, const char __user *src, long count);
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 3c704f5dd3ae..b579a943407b 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -109,12 +109,26 @@ extern int xive_native_configure_queue(u32 vp_id, struct xive_q *q, u8 prio,
extern void xive_native_disable_queue(u32 vp_id, struct xive_q *q, u8 prio);
extern void xive_native_sync_source(u32 hw_irq);
+extern void xive_native_sync_queue(u32 hw_irq);
extern bool is_xive_irq(struct irq_chip *chip);
extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
extern int xive_native_disable_vp(u32 vp_id);
extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 *out_chip_id);
extern bool xive_native_has_single_escalation(void);
+extern int xive_native_get_queue_info(u32 vp_id, uint32_t prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags);
+
+extern int xive_native_get_queue_state(u32 vp_id, uint32_t prio, u32 *qtoggle,
+ u32 *qindex);
+extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
+ u32 qindex);
+extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+
#else
static inline bool xive_enabled(void) { return false; }
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index cddadccf551d..0ea6c4aa3a20 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -31,6 +31,18 @@ CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
endif
+KASAN_SANITIZE_early_32.o := n
+KASAN_SANITIZE_cputable.o := n
+KASAN_SANITIZE_prom_init.o := n
+KASAN_SANITIZE_btext.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
+endif
+
obj-y := cputable.o ptrace.o syscalls.o \
irq.o align.o signal_32.o pmc.o vdso.o \
process.o systbl.o idle.o \
@@ -93,7 +105,7 @@ extra-y += vmlinux.lds
obj-$(CONFIG_RELOCATABLE) += reloc_$(BITS).o
-obj-$(CONFIG_PPC32) += entry_32.o setup_32.o
+obj-$(CONFIG_PPC32) += entry_32.o setup_32.o early_32.o
obj-$(CONFIG_PPC64) += dma-iommu.o iommu.o
obj-$(CONFIG_KGDB) += kgdb.o
obj-$(CONFIG_BOOTX_TEXT) += btext.o
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 86a61e5f8285..8e02444e9d3d 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -147,6 +147,9 @@ int main(void)
#if defined(CONFIG_KVM) && defined(CONFIG_BOOKE)
OFFSET(THREAD_KVM_VCPU, thread_struct, kvm_vcpu);
#endif
+#if defined(CONFIG_PPC_BOOK3S_32) && defined(CONFIG_PPC_KUAP)
+ OFFSET(KUAP, thread_struct, kuap);
+#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
OFFSET(PACATMSCRATCH, paca_struct, tm_scratch);
@@ -268,7 +271,6 @@ int main(void)
OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
- OFFSET(PACA_NAPSTATELOST, paca_struct, nap_state_lost);
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
#else /* CONFIG_PPC64 */
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
@@ -332,6 +334,10 @@ int main(void)
STACK_PT_REGS_OFFSET(_PPR, ppr);
#endif /* CONFIG_PPC64 */
+#ifdef CONFIG_PPC_KUAP
+ STACK_PT_REGS_OFFSET(STACK_REGS_KUAP, kuap);
+#endif
+
#if defined(CONFIG_PPC32)
#if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
DEFINE(EXC_LVL_SIZE, STACK_EXC_LVL_FRAME_SIZE);
@@ -766,23 +772,6 @@ int main(void)
OFFSET(VCPU_TIMING_LAST_ENTER_TBL, kvm_vcpu, arch.timing_last_enter.tv32.tbl);
#endif
-#ifdef CONFIG_PPC_POWERNV
- OFFSET(PACA_CORE_IDLE_STATE_PTR, paca_struct, core_idle_state_ptr);
- OFFSET(PACA_THREAD_IDLE_STATE, paca_struct, thread_idle_state);
- OFFSET(PACA_THREAD_MASK, paca_struct, thread_mask);
- OFFSET(PACA_SUBCORE_SIBLING_MASK, paca_struct, subcore_sibling_mask);
- OFFSET(PACA_REQ_PSSCR, paca_struct, requested_psscr);
- OFFSET(PACA_DONT_STOP, paca_struct, dont_stop);
-#define STOP_SPR(x, f) OFFSET(x, paca_struct, stop_sprs.f)
- STOP_SPR(STOP_PID, pid);
- STOP_SPR(STOP_LDBAR, ldbar);
- STOP_SPR(STOP_FSCR, fscr);
- STOP_SPR(STOP_HFSCR, hfscr);
- STOP_SPR(STOP_MMCR1, mmcr1);
- STOP_SPR(STOP_MMCR2, mmcr2);
- STOP_SPR(STOP_MMCRA, mmcra);
-#endif
-
DEFINE(PPC_DBELL_SERVER, PPC_DBELL_SERVER);
DEFINE(PPC_DBELL_MSGTYPE, PPC_DBELL_MSGTYPE);
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 53102764fd2f..f2ed3ef4b129 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -759,23 +759,22 @@ static void cacheinfo_create_index_dir(struct cache *cache, int index,
index_dir = kzalloc(sizeof(*index_dir), GFP_KERNEL);
if (!index_dir)
- goto err;
+ return;
index_dir->cache = cache;
rc = kobject_init_and_add(&index_dir->kobj, &cache_index_type,
cache_dir->kobj, "index%d", index);
- if (rc)
- goto err;
+ if (rc) {
+ kobject_put(&index_dir->kobj);
+ kfree(index_dir);
+ return;
+ }
index_dir->next = cache_dir->index;
cache_dir->index = index_dir;
cacheinfo_create_index_opt_attrs(index_dir);
-
- return;
-err:
- kfree(index_dir);
}
static void cacheinfo_sysfs_populate(unsigned int cpu_id,
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 1eab54bc6ee9..cd12f362b61f 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -2147,7 +2147,11 @@ void __init set_cur_cpu_spec(struct cpu_spec *s)
struct cpu_spec *t = &the_cpu_spec;
t = PTRRELOC(t);
- *t = *s;
+ /*
+ * use memcpy() instead of *t = *s so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
*PTRRELOC(&cur_cpu_spec) = &the_cpu_spec;
}
@@ -2161,8 +2165,11 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset,
t = PTRRELOC(t);
old = *t;
- /* Copy everything, then do fixups */
- *t = *s;
+ /*
+ * Copy everything, then do fixups. Use memcpy() instead of *t = *s
+ * so that GCC replaces it by __memcpy() when KASAN is active
+ */
+ memcpy(t, s, sizeof(*t));
/*
* If we are overriding a previous value derived from the real
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index b6fe883b1016..5ec3b3835925 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -18,6 +18,7 @@
#include <asm/dbell.h>
#include <asm/irq_regs.h>
#include <asm/kvm_ppc.h>
+#include <asm/trace.h>
#ifdef CONFIG_SMP
@@ -81,6 +82,7 @@ void doorbell_exception(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
+ trace_doorbell_entry(regs);
ppc_msgsync();
@@ -91,6 +93,7 @@ void doorbell_exception(struct pt_regs *regs)
smp_ipi_demux_relaxed(); /* already performed the barrier */
+ trace_doorbell_exit(regs);
irq_exit();
set_irq_regs(old_regs);
}
diff --git a/arch/powerpc/kernel/early_32.c b/arch/powerpc/kernel/early_32.c
new file mode 100644
index 000000000000..3482118ffe76
--- /dev/null
+++ b/arch/powerpc/kernel/early_32.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Early init before relocation
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <asm/setup.h>
+#include <asm/sections.h>
+#include <asm/asm-prototypes.h>
+
+/*
+ * We're called here very early in the boot.
+ *
+ * Note that the kernel may be running at an address which is different
+ * from the address that it was linked at, so we must use RELOC/PTRRELOC
+ * to access static data (including strings). -- paulus
+ */
+notrace unsigned long __init early_init(unsigned long dt_ptr)
+{
+ unsigned long offset = reloc_offset();
+
+ /* First zero the BSS */
+ memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+
+ /*
+ * Identify the CPU type and fix up code sections
+ * that depend on which cpu we have.
+ */
+ identify_cpu(offset, mfspr(SPRN_PVR));
+
+ apply_feature_fixups();
+
+ return KERNELBASE + offset;
+}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index b61cfd29c76f..c18f3490a77e 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -36,15 +36,10 @@
#include <asm/asm-405.h>
#include <asm/feature-fixups.h>
#include <asm/barrier.h>
+#include <asm/kup.h>
+#include <asm/bug.h>
-/*
- * MSR_KERNEL is > 0x10000 on 4xx/Book-E since it include MSR_CE.
- */
-#if MSR_KERNEL >= 0x10000
-#define LOAD_MSR_KERNEL(r, x) lis r,(x)@h; ori r,r,(x)@l
-#else
-#define LOAD_MSR_KERNEL(r, x) li r,(x)
-#endif
+#include "head_32.h"
/*
* Align to 4k in order to ensure that all functions modyfing srr0/srr1
@@ -150,8 +145,8 @@ transfer_to_handler:
stw r12,_CTR(r11)
stw r2,_XER(r11)
mfspr r12,SPRN_SPRG_THREAD
- addi r2,r12,-THREAD
beq 2f /* if from user, fix up THREAD.regs */
+ addi r2, r12, -THREAD
addi r11,r1,STACK_FRAME_OVERHEAD
stw r11,PT_REGS(r12)
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
@@ -161,6 +156,9 @@ transfer_to_handler:
andis. r12,r12,DBCR0_IDM@h
#endif
ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_lock r11, r12
+#endif
#if defined(CONFIG_40x) || defined(CONFIG_BOOKE)
beq+ 3f
/* From user and task is ptraced - load up global dbcr0 */
@@ -186,6 +184,8 @@ transfer_to_handler:
2: /* if from kernel, check interrupted DOZE/NAP mode and
* check for stack overflow
*/
+ kuap_save_and_lock r11, r12, r9, r2, r0
+ addi r2, r12, -THREAD
lwz r9,KSP_LIMIT(r12)
cmplw r1,r9 /* if r1 <= ksp_limit */
ble- stack_ovf /* then the kernel stack overflowed */
@@ -207,26 +207,43 @@ transfer_to_handler_cont:
mtspr SPRN_NRI, r0
#endif
#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * When tracing IRQ state (lockdep) we enable the MMU before we call
+ * the IRQ tracing functions as they might access vmalloc space or
+ * perform IOs for console output.
+ *
+ * To speed up the syscall path where interrupts stay on, let's check
+ * first if we are changing the MSR value at all.
+ */
+ tophys(r12, r1)
+ lwz r12,_MSR(r12)
+ andi. r12,r12,MSR_EE
+ bne 1f
+
+ /* MSR isn't changing, just transition directly */
+#endif
+ mtspr SPRN_SRR0,r11
+ mtspr SPRN_SRR1,r10
+ mtlr r9
+ SYNC
+ RFI /* jump to handler, enable MMU */
+
+#ifdef CONFIG_TRACE_IRQFLAGS
+1: /* MSR is changing, re-enable MMU so we can notify lockdep. We need to
+ * keep interrupts disabled at this point otherwise we might risk
+ * taking an interrupt before we tell lockdep they are enabled.
+ */
lis r12,reenable_mmu@h
ori r12,r12,reenable_mmu@l
+ LOAD_MSR_KERNEL(r0, MSR_KERNEL)
mtspr SPRN_SRR0,r12
- mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR1,r0
SYNC
RFI
-reenable_mmu: /* re-enable mmu so we can */
- mfmsr r10
- lwz r12,_MSR(r1)
- xor r10,r10,r12
- andi. r10,r10,MSR_EE /* Did EE change? */
- beq 1f
+reenable_mmu:
/*
- * The trace_hardirqs_off will use CALLER_ADDR0 and CALLER_ADDR1.
- * If from user mode there is only one stack frame on the stack, and
- * accessing CALLER_ADDR1 will cause oops. So we need create a dummy
- * stack frame to make trace_hardirqs_off happy.
- *
- * This is handy because we also need to save a bunch of GPRs,
+ * We save a bunch of GPRs,
* r3 can be different from GPR3(r1) at this point, r9 and r11
* contains the old MSR and handler address respectively,
* r4 & r5 can contain page fault arguments that need to be passed
@@ -234,14 +251,19 @@ reenable_mmu: /* re-enable mmu so we can */
* they aren't useful past this point (aren't syscall arguments),
* the rest is restored from the exception frame.
*/
+
stwu r1,-32(r1)
stw r9,8(r1)
stw r11,12(r1)
stw r3,16(r1)
stw r4,20(r1)
stw r5,24(r1)
- bl trace_hardirqs_off
- lwz r5,24(r1)
+
+ /* If we are disabling interrupts (normal case), simply log it with
+ * lockdep
+ */
+1: bl trace_hardirqs_off
+2: lwz r5,24(r1)
lwz r4,20(r1)
lwz r3,16(r1)
lwz r11,12(r1)
@@ -251,15 +273,9 @@ reenable_mmu: /* re-enable mmu so we can */
lwz r6,GPR6(r1)
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
-1: mtctr r11
+ mtctr r11
mtlr r9
bctr /* jump to handler */
-#else /* CONFIG_TRACE_IRQFLAGS */
- mtspr SPRN_SRR0,r11
- mtspr SPRN_SRR1,r10
- mtlr r9
- SYNC
- RFI /* jump to handler, enable MMU */
#endif /* CONFIG_TRACE_IRQFLAGS */
#if defined (CONFIG_PPC_BOOK3S_32) || defined(CONFIG_E500)
@@ -272,6 +288,7 @@ reenable_mmu: /* re-enable mmu so we can */
lwz r9,_MSR(r11) /* if sleeping, clear MSR.EE */
rlwinm r9,r9,0,~MSR_EE
lwz r12,_LINK(r11) /* and return to address in LR */
+ kuap_restore r11, r2, r3, r4, r5
b fast_exception_return
#endif
@@ -301,6 +318,33 @@ stack_ovf:
SYNC
RFI
+#ifdef CONFIG_TRACE_IRQFLAGS
+trace_syscall_entry_irq_off:
+ /*
+ * Syscall shouldn't happen while interrupts are disabled,
+ * so let's do a warning here.
+ */
+0: trap
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
+ bl trace_hardirqs_on
+
+ /* Now enable for real */
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+ mtmsr r10
+
+ REST_GPR(0, r1)
+ REST_4GPRS(3, r1)
+ REST_2GPRS(7, r1)
+ b DoSyscall
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
+ .globl transfer_to_syscall
+transfer_to_syscall:
+#ifdef CONFIG_TRACE_IRQFLAGS
+ andi. r12,r9,MSR_EE
+ beq- trace_syscall_entry_irq_off
+#endif /* CONFIG_TRACE_IRQFLAGS */
+
/*
* Handle a system call.
*/
@@ -312,33 +356,14 @@ _GLOBAL(DoSyscall)
stw r3,ORIG_GPR3(r1)
li r12,0
stw r12,RESULT(r1)
- lwz r11,_CCR(r1) /* Clear SO bit in CR */
- rlwinm r11,r11,0,4,2
- stw r11,_CCR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
- /* Return from syscalls can (and generally will) hard enable
- * interrupts. You aren't supposed to call a syscall with
- * interrupts disabled in the first place. However, to ensure
- * that we get it right vs. lockdep if it happens, we force
- * that hard enable here with appropriate tracing if we see
- * that we have been called with interrupts off
- */
+ /* Make sure interrupts are enabled */
mfmsr r11
andi. r12,r11,MSR_EE
- bne+ 1f
- /* We came in with interrupts disabled, we enable them now */
- bl trace_hardirqs_on
- mfmsr r11
- lwz r0,GPR0(r1)
- lwz r3,GPR3(r1)
- lwz r4,GPR4(r1)
- ori r11,r11,MSR_EE
- lwz r5,GPR5(r1)
- lwz r6,GPR6(r1)
- lwz r7,GPR7(r1)
- lwz r8,GPR8(r1)
- mtmsr r11
-1:
+ /* We came in with interrupts disabled, we WARN and mark them enabled
+ * for lockdep now */
+0: tweqi r12, 0
+ EMIT_BUG_ENTRY 0b,__FILE__,__LINE__, BUGFLAG_WARNING
#endif /* CONFIG_TRACE_IRQFLAGS */
lwz r11,TI_FLAGS(r2)
andi. r11,r11,_TIF_SYSCALL_DOTRACE
@@ -392,8 +417,7 @@ syscall_exit_cont:
lwz r8,_MSR(r1)
#ifdef CONFIG_TRACE_IRQFLAGS
/* If we are going to return from the syscall with interrupts
- * off, we trace that here. It shouldn't happen though but we
- * want to catch the bugger if it does right ?
+ * off, we trace that here. It shouldn't normally happen.
*/
andi. r10,r8,MSR_EE
bne+ 1f
@@ -422,12 +446,11 @@ BEGIN_FTR_SECTION
lwarx r7,0,r1
END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
stwcx. r0,0,r1 /* to clear the reservation */
-#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
- andi. r4,r8,MSR_PR
- beq 3f
ACCOUNT_CPU_USER_EXIT(r2, r5, r7)
-3:
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r5, r7
#endif
+ kuap_check r2, r4
lwz r4,_LINK(r1)
lwz r5,_CCR(r1)
mtlr r4
@@ -678,6 +701,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_SPE)
stw r10,_CCR(r1)
stw r1,KSP(r3) /* Set old stack pointer */
+ kuap_check r2, r4
#ifdef CONFIG_SMP
/* We need a sync somewhere here to make sure that if the
* previous task gets rescheduled on another CPU, it sees all
@@ -820,6 +844,9 @@ restore_user:
bnel- load_dbcr0
#endif
ACCOUNT_CPU_USER_EXIT(r2, r10, r11)
+#ifdef CONFIG_PPC_BOOK3S_32
+ kuep_unlock r10, r11
+#endif
b restore
@@ -866,12 +893,12 @@ resume_kernel:
/* check current_thread_info->preempt_count */
lwz r0,TI_PREEMPT(r2)
cmpwi 0,r0,0 /* if non-zero, just restore regs and return */
- bne restore
+ bne restore_kuap
andi. r8,r8,_TIF_NEED_RESCHED
- beq+ restore
+ beq+ restore_kuap
lwz r3,_MSR(r1)
andi. r0,r3,MSR_EE /* interrupts off? */
- beq restore /* don't schedule if so */
+ beq restore_kuap /* don't schedule if so */
#ifdef CONFIG_TRACE_IRQFLAGS
/* Lockdep thinks irqs are enabled, we need to call
* preempt_schedule_irq with IRQs off, so we inform lockdep
@@ -879,10 +906,7 @@ resume_kernel:
*/
bl trace_hardirqs_off
#endif
-1: bl preempt_schedule_irq
- lwz r3,TI_FLAGS(r2)
- andi. r0,r3,_TIF_NEED_RESCHED
- bne- 1b
+ bl preempt_schedule_irq
#ifdef CONFIG_TRACE_IRQFLAGS
/* And now, to properly rebalance the above, we tell lockdep they
* are being turned back on, which will happen when we return
@@ -890,6 +914,8 @@ resume_kernel:
bl trace_hardirqs_on
#endif
#endif /* CONFIG_PREEMPT */
+restore_kuap:
+ kuap_restore r1, r2, r9, r10, r0
/* interrupts are hard-disabled at this point */
restore:
@@ -913,28 +939,14 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_47x)
* off in this assembly code while peeking at TI_FLAGS() and such. However
* we need to inform it if the exception turned interrupts off, and we
* are about to trun them back on.
- *
- * The problem here sadly is that we don't know whether the exceptions was
- * one that turned interrupts off or not. So we always tell lockdep about
- * turning them on here when we go back to wherever we came from with EE
- * on, even if that may meen some redudant calls being tracked. Maybe later
- * we could encode what the exception did somewhere or test the exception
- * type in the pt_regs but that sounds overkill
*/
andi. r10,r9,MSR_EE
beq 1f
- /*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
stwu r1,-32(r1)
mflr r0
stw r0,4(r1)
- stwu r1,-32(r1)
bl trace_hardirqs_on
- lwz r1,0(r1)
- lwz r1,0(r1)
+ addi r1, r1, 32
lwz r9,_MSR(r1)
1:
#endif /* CONFIG_TRACE_IRQFLAGS */
@@ -1197,6 +1209,7 @@ load_dbcr0:
.section .bss
.align 4
+ .global global_dbcr0
global_dbcr0:
.space 8*NR_CPUS
.previous
@@ -1207,9 +1220,10 @@ do_work: /* r10 contains MSR_KERNEL here */
beq do_user_signal
do_resched: /* r10 contains MSR_KERNEL here */
- /* Note: We don't need to inform lockdep that we are enabling
- * interrupts here. As far as it knows, they are already enabled
- */
+#ifdef CONFIG_TRACE_IRQFLAGS
+ bl trace_hardirqs_on
+ mfmsr r10
+#endif
ori r10,r10,MSR_EE
SYNC
MTMSRD(r10) /* hard-enable interrupts */
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 15c67d2c0534..d978af78bf2a 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -46,6 +46,7 @@
#include <asm/exception-64e.h>
#endif
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
* System calls.
@@ -120,6 +121,9 @@ END_BTB_FLUSH_SECTION
addi r9,r1,STACK_FRAME_OVERHEAD
ld r11,exception_marker@toc(r2)
std r11,-16(r9) /* "regshere" marker */
+
+ kuap_check_amr r10, r11
+
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC_SPLPAR)
BEGIN_FW_FTR_SECTION
beq 33f
@@ -275,6 +279,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
andi. r6,r8,MSR_PR
ld r4,_LINK(r1)
+ kuap_check_amr r10, r11
+
#ifdef CONFIG_PPC_BOOK3S
/*
* Clear MSR_RI, MSR_EE is already and remains disabled. We could do
@@ -296,6 +302,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r8, PACATMSCRATCH(r13)
#endif
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
ld r13,GPR13(r1) /* only restore r13 if returning to usermode */
ld r2,GPR2(r1)
ld r1,GPR1(r1)
@@ -306,8 +316,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
RFI_TO_USER
b . /* prevent speculative execution */
- /* exit to kernel */
-1: ld r2,GPR2(r1)
+1: /* exit to kernel */
+ kuap_restore_amr r2
+
+ ld r2,GPR2(r1)
ld r1,GPR1(r1)
mtlr r4
mtcr r5
@@ -594,6 +606,8 @@ _GLOBAL(_switch)
std r23,_CCR(r1)
std r1,KSP(r3) /* Set old stack pointer */
+ kuap_check_amr r9, r10
+
FLUSH_COUNT_CACHE
/*
@@ -851,13 +865,7 @@ resume_kernel:
* sure we are soft-disabled first and reconcile irq state.
*/
RECONCILE_IRQ_STATE(r3,r4)
-1: bl preempt_schedule_irq
-
- /* Re-test flags and eventually loop */
- ld r9, PACA_THREAD_INFO(r13)
- ld r4,TI_FLAGS(r9)
- andi. r0,r4,_TIF_NEED_RESCHED
- bne 1b
+ bl preempt_schedule_irq
/*
* arch_local_irq_restore() from preempt_schedule_irq above may
@@ -942,6 +950,8 @@ fast_exception_return:
ld r4,_XER(r1)
mtspr SPRN_XER,r4
+ kuap_check_amr r5, r6
+
REST_8GPRS(5, r1)
andi. r0,r3,MSR_RI
@@ -974,6 +984,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
+ /*
+ * We don't need to restore AMR on the way back to userspace for KUAP.
+ * The value of AMR only matters while we're in the kernel.
+ */
mtspr SPRN_SRR1,r3
ld r2,_CCR(r1)
@@ -1006,6 +1020,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ld r0,GPR0(r1)
ld r2,GPR2(r1)
ld r3,GPR3(r1)
+
+ kuap_restore_amr r4
+
ld r4,GPR4(r1)
ld r1,GPR1(r1)
RFI_TO_KERNEL
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 9481a117e242..6b86055e5251 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -19,6 +19,7 @@
#include <asm/cpuidle.h>
#include <asm/head-64.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
/*
* There are a few constraints to be concerned with.
@@ -120,7 +121,9 @@ EXC_VIRT_NONE(0x4000, 0x100)
mfspr r10,SPRN_SRR1 ; \
rlwinm. r10,r10,47-31,30,31 ; \
beq- 1f ; \
- cmpwi cr3,r10,2 ; \
+ cmpwi cr1,r10,2 ; \
+ mfspr r3,SPRN_SRR1 ; \
+ bltlr cr1 ; /* no state loss, return to idle caller */ \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
1: \
KVMTEST_PR(n) ; \
@@ -144,8 +147,11 @@ TRAMP_KVM(PACA_EXNMI, 0x100)
#ifdef CONFIG_PPC_P7_NAP
EXC_COMMON_BEGIN(system_reset_idle_common)
- mfspr r12,SPRN_SRR1
- b pnv_powersave_wakeup
+ /*
+ * This must be a direct branch (without linker branch stub) because
+ * we can not use TOC at this point as r2 may not be restored yet.
+ */
+ b idle_return_gpr_loss
#endif
/*
@@ -309,6 +315,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
mfspr r11,SPRN_DSISR /* Save DSISR */
std r11,_DSISR(r1)
std r9,_CCR(r1) /* Save CR in stackframe */
+ kuap_save_amr_and_lock r9, r10, cr1
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr r11 /* get MSR value */
@@ -427,17 +434,17 @@ EXC_COMMON_BEGIN(machine_check_idle_common)
* Then decrement MCE nesting after finishing with the stack.
*/
ld r3,_MSR(r1)
+ ld r4,_LINK(r1)
lhz r11,PACA_IN_MCE(r13)
subi r11,r11,1
sth r11,PACA_IN_MCE(r13)
- /* Turn off the RI bit because SRR1 is used by idle wakeup code. */
- /* Recoverability could be improved by reducing the use of SRR1. */
- li r11,0
- mtmsrd r11,1
-
- b pnv_powersave_wakeup_mce
+ mtlr r4
+ rlwinm r10,r3,47-31,30,31
+ cmpwi cr1,r10,2
+ bltlr cr1 /* no state loss, return to idle caller */
+ b idle_return_gpr_loss
#endif
/*
* Handle machine check early in real mode. We come here with
@@ -1109,6 +1116,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
mfspr r11,SPRN_HSRR0 /* Save HSRR0 */
mfspr r12,SPRN_HSRR1 /* Save HSRR1 */
EXCEPTION_PROLOG_COMMON_1()
+ /* We don't touch AMR here, we never go to virtual mode */
EXCEPTION_PROLOG_COMMON_2(PACA_EXGEN)
EXCEPTION_PROLOG_COMMON_3(0xe60)
addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 45a8d0be1c96..25f063f56ec5 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -36,6 +36,7 @@
#include <linux/sysfs.h>
#include <linux/slab.h>
#include <linux/cma.h>
+#include <linux/hugetlb.h>
#include <asm/debugfs.h>
#include <asm/page.h>
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index 529dcc21c3f9..cecd57e1d046 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -63,6 +63,7 @@ _GLOBAL(load_fp_state)
REST_32FPVSRS(0, R4, R3)
blr
EXPORT_SYMBOL(load_fp_state)
+_ASM_NOKPROBE_SYMBOL(load_fp_state); /* used by restore_math */
/*
* Store FP state into memory, including FPSCR
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index e25b615e9f9e..755fab9641d6 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -37,6 +37,8 @@
#include <asm/export.h>
#include <asm/feature-fixups.h>
+#include "head_32.h"
+
/* 601 only have IBAT; cr0.eq is set on 601 when using this macro */
#define LOAD_BAT(n, reg, RA, RB) \
/* see the comment for clear_bats() -- Cort */ \
@@ -160,6 +162,10 @@ __after_mmu_off:
bl flush_tlbs
bl initial_bats
+ bl load_segment_registers
+#ifdef CONFIG_KASAN
+ bl early_hash_table
+#endif
#if defined(CONFIG_BOOTX_TEXT)
bl setup_disp_bat
#endif
@@ -205,7 +211,7 @@ __after_mmu_off:
*/
turn_on_mmu:
mfmsr r0
- ori r0,r0,MSR_DR|MSR_IR
+ ori r0,r0,MSR_DR|MSR_IR|MSR_RI
mtspr SPRN_SRR1,r0
lis r0,start_here@h
ori r0,r0,start_here@l
@@ -242,103 +248,6 @@ __secondary_hold_spinloop:
__secondary_hold_acknowledge:
.long -1
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; \
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,TASK_STACK-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- MTMSRD(r10); /* (except for mach check in rtas) */ \
- stw r0,GPR0(r11); \
- lis r10,STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
- addi r10,r10,STACK_FRAME_REGS_MARKER@l; \
- stw r10,8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
- DO_KVM n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
/* System reset */
/* core99 pmac starts the seconary here by changing the vector, and
putting it back to what it was (unknown_exception) when done. */
@@ -387,7 +296,11 @@ DataAccess:
EXCEPTION_PROLOG
mfspr r10,SPRN_DSISR
stw r10,_DSISR(r11)
+#ifdef CONFIG_PPC_KUAP
+ andis. r0,r10,(DSISR_BAD_FAULT_32S | DSISR_DABRMATCH | DSISR_PROTFAULT)@h
+#else
andis. r0,r10,(DSISR_BAD_FAULT_32S|DSISR_DABRMATCH)@h
+#endif
bne 1f /* if not, try to put a PTE */
mfspr r4,SPRN_DAR /* into the hash table */
rlwinm r3,r10,32-15,21,21 /* DSISR_STORE -> _PAGE_RW */
@@ -428,7 +341,7 @@ Alignment:
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -449,24 +362,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_FPU_UNAVAILABLE)
bl load_up_fpu /* if from user, just load it up */
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+ EXC_XFER_LITE(0x800, kernel_fp_unavailable_exception)
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
/* System call */
. = 0xc00
DO_KVM 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
/*
* The Altivec unavailable trap is at 0x0f20. Foo.
@@ -522,9 +434,9 @@ InstructionTLBMiss:
andc. r1,r1,r0 /* check access & ~permission */
bne- InstructionAddressInvalid /* return if access not permitted */
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- ori r1, r1, 0xe05 /* clear out reserved bits */
- andc r1, r0, r1 /* PP = user? 2 : 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ ori r1, r1, 0xe06 /* clear out reserved bits */
+ andc r1, r0, r1 /* PP = user? 1 : 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -590,11 +502,11 @@ DataLoadTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwinm r1,r0,32-10,31,31 /* _PAGE_RW -> PP lsb */
+ rlwinm r1,r0,32-9,30,30 /* _PAGE_RW -> PP msb */
rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r0,r0,32-1,31,31 /* _PAGE_USER -> PP lsb */
ori r1,r1,0xe04 /* clear out reserved bits */
- andc r1,r0,r1 /* PP = user? rw? 2: 3: 0 */
+ andc r1,r0,r1 /* PP = user? rw? 1: 3: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -670,9 +582,9 @@ DataStoreTLBMiss:
* we would need to update the pte atomically with lwarx/stwcx.
*/
/* Convert linux-style PTE to low word of PPC-style PTE */
- rlwimi r0,r0,32-1,30,30 /* _PAGE_USER -> PP msb */
- li r1,0xe05 /* clear out reserved bits & PP lsb */
- andc r1,r0,r1 /* PP = user? 2: 0 */
+ rlwimi r0,r0,32-2,31,31 /* _PAGE_USER -> PP lsb */
+ li r1,0xe06 /* clear out reserved bits & PP msb */
+ andc r1,r0,r1 /* PP = user? 1: 0 */
BEGIN_FTR_SECTION
rlwinm r1,r1,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
@@ -698,35 +610,35 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU)
#define altivec_assist_exception unknown_exception
#endif
- EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_EE)
+ EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
+ EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, altivec_assist_exception, EXC_XFER_STD)
EXCEPTION(0x1700, Trap_17, TAUException, EXC_XFER_STD)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_EE)
- EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1c00, Trap_1c, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2000, RunMode, RunModeException, EXC_XFER_STD)
+ EXCEPTION(0x2100, Trap_21, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2200, Trap_22, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2300, Trap_23, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2400, Trap_24, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2500, Trap_25, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2600, Trap_26, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2700, Trap_27, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2800, Trap_28, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2900, Trap_29, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2a00, Trap_2a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2b00, Trap_2b, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2c00, Trap_2c, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2d00, Trap_2d, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2e00, Trap_2e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x2f00, Trap_2f, unknown_exception, EXC_XFER_STD)
. = 0x3000
@@ -738,7 +650,7 @@ AltiVecUnavailable:
b fast_exception_return
#endif /* CONFIG_ALTIVEC */
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0xf20, altivec_unavailable_exception)
+ EXC_XFER_LITE(0xf20, altivec_unavailable_exception)
PerformanceMonitor:
EXCEPTION_PROLOG
@@ -880,11 +792,24 @@ _ENTRY(__restore_cpu_setup)
blr
#endif /* !defined(CONFIG_PPC_BOOK3S_32) */
-
/*
* Load stuff into the MMU. Intended to be called with
* IR=0 and DR=0.
*/
+#ifdef CONFIG_KASAN
+early_hash_table:
+ sync /* Force all PTE updates to finish */
+ isync
+ tlbia /* Clear all TLB entries */
+ sync /* wait for tlbia/tlbie to finish */
+ TLBSYNC /* ... on all CPUs */
+ /* Load the SDR1 register (hash table base & size) */
+ lis r6, early_hash - PAGE_OFFSET@h
+ ori r6, r6, 3 /* 256kB table */
+ mtspr SPRN_SDR1, r6
+ blr
+#endif
+
load_up_mmu:
sync /* Force all PTE updates to finish */
isync
@@ -896,14 +821,6 @@ load_up_mmu:
tophys(r6,r6)
lwz r6,_SDR1@l(r6)
mtspr SPRN_SDR1,r6
- li r0,16 /* load up segment register values */
- mtctr r0 /* for context 0 */
- lis r3,0x2000 /* Ku = 1, VSID = 0 */
- li r4,0
-3: mtsrin r3,r4
- addi r3,r3,0x111 /* increment VSID */
- addis r4,r4,0x1000 /* address of next segment */
- bdnz 3b
/* Load the BAT registers with the values set up by MMU_init.
MMU_init takes care of whether we're on a 601 or not. */
@@ -925,6 +842,32 @@ BEGIN_MMU_FTR_SECTION
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blr
+load_segment_registers:
+ li r0, NUM_USER_SEGMENTS /* load up user segment register values */
+ mtctr r0 /* for context 0 */
+ li r3, 0 /* Kp = 0, Ks = 0, VSID = 0 */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
+ li r4, 0
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ li r0, 16 - NUM_USER_SEGMENTS /* load up kernel segment registers */
+ mtctr r0 /* for context 0 */
+ rlwinm r3, r3, 0, ~SR_NX /* Nx = 0 */
+ rlwinm r3, r3, 0, ~SR_KS /* Ks = 0 */
+ oris r3, r3, SR_KP@h /* Kp = 1 */
+3: mtsrin r3, r4
+ addi r3, r3, 0x111 /* increment VSID */
+ addis r4, r4, 0x1000 /* address of next segment */
+ bdnz 3b
+ blr
+
/*
* This is where the main kernel code starts.
*/
@@ -950,11 +893,17 @@ start_here:
* Do early platform-specific initialization,
* and set up the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
bl __save_cpu_setup
bl MMU_init
+BEGIN_MMU_FTR_SECTION
+ bl MMU_init_hw_patch
+END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE)
/*
* Go back to running unmapped so we can load up new values
@@ -1006,7 +955,12 @@ _ENTRY(switch_mmu_context)
blt- 4f
mulli r3,r3,897 /* multiply context by skew factor */
rlwinm r3,r3,4,8,27 /* VSID = (context & 0xfffff) << 4 */
- addis r3,r3,0x6000 /* Set Ks, Ku bits */
+#ifdef CONFIG_PPC_KUEP
+ oris r3, r3, SR_NX@h /* Set Nx */
+#endif
+#ifdef CONFIG_PPC_KUAP
+ oris r3, r3, SR_KS@h /* Set Ks */
+#endif
li r0,NUM_USER_SEGMENTS
mtctr r0
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
new file mode 100644
index 000000000000..4a692553651f
--- /dev/null
+++ b/arch/powerpc/kernel/head_32.h
@@ -0,0 +1,203 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __HEAD_32_H__
+#define __HEAD_32_H__
+
+#include <asm/ptrace.h> /* for STACK_FRAME_REGS_MARKER */
+
+/*
+ * MSR_KERNEL is > 0x8000 on 4xx/Book-E since it include MSR_CE.
+ */
+.macro __LOAD_MSR_KERNEL r, x
+.if \x >= 0x8000
+ lis \r, (\x)@h
+ ori \r, \r, (\x)@l
+.else
+ li \r, (\x)
+.endif
+.endm
+#define LOAD_MSR_KERNEL(r, x) __LOAD_MSR_KERNEL r, x
+
+/*
+ * Exception entry code. This code runs with address translation
+ * turned off, i.e. using physical addresses.
+ * We assume sprg3 has the physical address of the current
+ * task's thread_struct.
+ */
+
+.macro EXCEPTION_PROLOG
+ mtspr SPRN_SPRG_SCRATCH0,r10
+ mtspr SPRN_SPRG_SCRATCH1,r11
+ mfcr r10
+ EXCEPTION_PROLOG_1
+ EXCEPTION_PROLOG_2
+.endm
+
+.macro EXCEPTION_PROLOG_1
+ mfspr r11,SPRN_SRR1 /* check whether user or kernel */
+ andi. r11,r11,MSR_PR
+ tophys(r11,r1) /* use tophys(r1) if kernel */
+ beq 1f
+ mfspr r11,SPRN_SPRG_THREAD
+ lwz r11,TASK_STACK-THREAD(r11)
+ addi r11,r11,THREAD_SIZE
+ tophys(r11,r11)
+1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
+.endm
+
+.macro EXCEPTION_PROLOG_2
+ stw r10,_CCR(r11) /* save registers */
+ stw r12,GPR12(r11)
+ stw r9,GPR9(r11)
+ mfspr r10,SPRN_SPRG_SCRATCH0
+ stw r10,GPR10(r11)
+ mfspr r12,SPRN_SPRG_SCRATCH1
+ stw r12,GPR11(r11)
+ mflr r10
+ stw r10,_LINK(r11)
+ mfspr r12,SPRN_SRR0
+ mfspr r9,SPRN_SRR1
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1,r11) /* set new kernel sp */
+#ifdef CONFIG_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#else
+ li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR) /* can take exceptions */
+ MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ stw r0,GPR0(r11)
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r10,8(r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+.endm
+
+.macro SYSCALL_ENTRY trapno
+ mfspr r12,SPRN_SPRG_THREAD
+ mfcr r10
+ lwz r11,TASK_STACK-THREAD(r12)
+ mflr r9
+ addi r11,r11,THREAD_SIZE - INT_FRAME_SIZE
+ rlwinm r10,r10,0,4,2 /* Clear SO bit in CR */
+ tophys(r11,r11)
+ stw r10,_CCR(r11) /* save registers */
+ mfspr r10,SPRN_SRR0
+ stw r9,_LINK(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1,GPR1(r11)
+ stw r1,0(r11)
+ tovirt(r1,r11) /* set new kernel sp */
+ stw r10,_NIP(r11)
+#ifdef CONFIG_40x
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+#else
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL & ~(MSR_IR|MSR_DR)) /* can take exceptions */
+ MTMSRD(r10) /* (except for mach check in rtas) */
+#endif
+ lis r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r10,r10,STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r10,8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r12,-THREAD
+ stw r11,PT_REGS(r12)
+#if defined(CONFIG_40x)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r12)
+ andis. r12,r12,DBCR0_IDM@h
+#endif
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+#if defined(CONFIG_40x)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+#endif
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL)
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ LOAD_MSR_KERNEL(r10, MSR_KERNEL | MSR_EE)
+#endif
+#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PERF_EVENTS)
+ mtspr SPRN_NRI, r0
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
+/*
+ * Note: code which follows this uses cr0.eq (set if from kernel),
+ * r11, r12 (SRR0), and r9 (SRR1).
+ *
+ * Note2: once we have set r1 we are in a position to take exceptions
+ * again, and we could thus set MSR:RI at that point.
+ */
+
+/*
+ * Exception vectors.
+ */
+#ifdef CONFIG_PPC_BOOK3S
+#define START_EXCEPTION(n, label) \
+ . = n; \
+ DO_KVM n; \
+label:
+
+#else
+#define START_EXCEPTION(n, label) \
+ . = n; \
+label:
+
+#endif
+
+#define EXCEPTION(n, label, hdlr, xfer) \
+ START_EXCEPTION(n, label) \
+ EXCEPTION_PROLOG; \
+ addi r3,r1,STACK_FRAME_OVERHEAD; \
+ xfer(n, hdlr)
+
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
+ li r10,trap; \
+ stw r10,_TRAP(r11); \
+ LOAD_MSR_KERNEL(r10, msr); \
+ bl tfer; \
+ .long hdlr; \
+ .long ret
+
+#define EXC_XFER_STD(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
+ ret_from_except_full)
+
+#define EXC_XFER_LITE(n, hdlr) \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
+ ret_from_except)
+
+#endif /* __HEAD_32_H__ */
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index a9c934f2319b..cf54b784100d 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -44,6 +44,8 @@
#include <asm/export.h>
#include <asm/asm-405.h>
+#include "head_32.h"
+
/* As with the other PowerPC ports, it is expected that when code
* execution begins here, the following registers contain valid, yet
* optional, information:
@@ -99,46 +101,6 @@ _ENTRY(saved_ksp_limit)
.space 4
/*
- * Exception vector entry code. This code runs with address translation
- * turned off (i.e. using physical addresses). We assume SPRG_THREAD has
- * the physical address of the current task thread_struct.
- * Note that we have to have decremented r1 before we write to any fields
- * of the exception frame, since a critical interrupt could occur at any
- * time, and it will write to the area immediately below the current r1.
- */
-#define NORMAL_EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0,r10; /* save two registers to work with */\
- mtspr SPRN_SPRG_SCRATCH1,r11; \
- mtspr SPRN_SPRG_SCRATCH2,r1; \
- mfcr r10; /* save CR in r10 for now */\
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */\
- andi. r11,r11,MSR_PR; \
- beq 1f; \
- mfspr r1,SPRN_SPRG_THREAD; /* if from user, start at top of */\
- lwz r1,TASK_STACK-THREAD(r1); /* this thread's kernel stack */\
- addi r1,r1,THREAD_SIZE; \
-1: subi r1,r1,INT_FRAME_SIZE; /* Allocate an exception frame */\
- tophys(r11,r1); \
- stw r10,_CCR(r11); /* save various registers */\
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH2; \
- mfspr r12,SPRN_SRR0; \
- stw r10,GPR1(r11); \
- mfspr r9,SPRN_SRR1; \
- stw r10,0(r11); \
- rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
- stw r0,GPR0(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
* Exception prolog for critical exceptions. This is a little different
* from the normal exception prolog above since a critical exception
* can potentially occur at any point during normal exception processing.
@@ -177,6 +139,9 @@ _ENTRY(saved_ksp_limit)
tovirt(r1,r11); \
rlwinm r9,r9,0,14,12; /* clear MSR_WE (necessary?) */\
stw r0,GPR0(r11); \
+ lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */\
+ addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
+ stw r10, 8(r11); \
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
@@ -196,53 +161,12 @@ _ENTRY(saved_ksp_limit)
/*
* Exception vectors.
*/
-#define START_EXCEPTION(n, label) \
- . = n; \
-label:
-
-#define EXCEPTION(n, label, hdlr, xfer) \
- START_EXCEPTION(n, label); \
- NORMAL_EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
#define CRITICAL_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(n, label); \
CRITICAL_EXCEPTION_PROLOG; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
-
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- lis r10,msr@h; \
- ori r10,r10,msr@l; \
- copyee(r10, r9); \
- bl tfer; \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
+ crit_transfer_to_handler, ret_from_crit_exc)
/*
* 0x0100 - Critical Interrupt Exception
@@ -393,7 +317,7 @@ label:
* This is caused by a fetch from non-execute or guarded pages.
*/
START_EXCEPTION(0x0400, InstructionAccess)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mr r4,r12 /* Pass SRR0 as arg2 */
li r5,0 /* Pass zero as arg3 */
EXC_XFER_LITE(0x400, handle_page_fault)
@@ -403,33 +327,32 @@ label:
/* 0x0600 - Alignment Exception */
START_EXCEPTION(0x0600, Alignment)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r4,SPRN_DEAR /* Grab the DEAR and save it */
stw r4,_DEAR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* 0x0700 - Program Exception */
START_EXCEPTION(0x0700, ProgramCheck)
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r4,SPRN_ESR /* Grab the ESR and save it */
stw r4,_ESR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_STD(0x700, program_check_exception)
- EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0800, Trap_08, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0900, Trap_09, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0A00, Trap_0A, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0B00, Trap_0B, unknown_exception, EXC_XFER_STD)
/* 0x0C00 - System Call Exception */
START_EXCEPTION(0x0C00, SystemCall)
- NORMAL_EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
- EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x0D00, Trap_0D, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0E00, Trap_0E, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x0F00, Trap_0F, unknown_exception, EXC_XFER_STD)
/* 0x1000 - Programmable Interval Timer (PIT) Exception */
. = 0x1000
@@ -646,25 +569,25 @@ label:
mfspr r10, SPRN_SPRG_SCRATCH0
b InstructionAccess
- EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1300, Trap_13, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1400, Trap_14, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
#ifdef CONFIG_IBM405_ERR51
/* 405GP errata 51 */
START_EXCEPTION(0x1700, Trap_17)
b DTLBMiss
#else
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
#endif
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1A00, Trap_1A, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1B00, Trap_1B, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1C00, Trap_1C, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1D00, Trap_1D, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1E00, Trap_1E, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1F00, Trap_1F, unknown_exception, EXC_XFER_STD)
/* Check for a single step debug exception while in an exception
* handler before state has been saved. This is to catch the case
@@ -726,11 +649,11 @@ label:
addi r3,r1,STACK_FRAME_OVERHEAD
EXC_XFER_TEMPLATE(DebugException, 0x2002, \
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
/* Programmable Interval Timer (PIT) Exception. (from 0x1000) */
Decrementer:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
lis r0,TSR_PIS@h
mtspr SPRN_TSR,r0 /* Clear the PIT exception */
addi r3,r1,STACK_FRAME_OVERHEAD
@@ -738,9 +661,9 @@ Decrementer:
/* Fixed Interval Timer (FIT) Exception. (from 0x1010) */
FITException:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
addi r3,r1,STACK_FRAME_OVERHEAD;
- EXC_XFER_EE(0x1010, unknown_exception)
+ EXC_XFER_STD(0x1010, unknown_exception)
/* Watchdog Timer (WDT) Exception. (from 0x1020) */
WDTException:
@@ -748,15 +671,14 @@ WDTException:
addi r3,r1,STACK_FRAME_OVERHEAD;
EXC_XFER_TEMPLATE(WatchdogException, 0x1020+2,
(MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)),
- NOCOPY, crit_transfer_to_handler,
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
/*
* The other Data TLB exceptions bail out to this point
* if they can't resolve the lightweight TLB fault.
*/
DataAccess:
- NORMAL_EXCEPTION_PROLOG
+ EXCEPTION_PROLOG
mfspr r5,SPRN_ESR /* Grab the ESR, save it, pass arg3 */
stw r5,_ESR(r11)
mfspr r4,SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */
@@ -848,6 +770,9 @@ start_here:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 37117ab11584..f15fba58c744 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -203,6 +203,9 @@ _ENTRY(_start);
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
@@ -278,16 +281,15 @@ interrupt_base:
FP_UNAVAILABLE_EXCEPTION
#else
EXCEPTION(0x2010, BOOKE_INTERRUPT_FP_UNAVAIL, \
- FloatingPointUnavailable, unknown_exception, EXC_XFER_EE)
+ FloatingPointUnavailable, unknown_exception, EXC_XFER_STD)
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(BOOKE_INTERRUPT_SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 BOOKE_INTERRUPT_SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2020, BOOKE_INTERRUPT_AP_UNAVAIL, \
- AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_EE)
+ AuxillaryProcessorUnavailable, unknown_exception, EXC_XFER_STD)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
@@ -295,7 +297,7 @@ interrupt_base:
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
EXCEPTION(0x1010, BOOKE_INTERRUPT_FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Watchdog Timer Interrupt */
/* TODO: Add watchdog support */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 3fad8d499767..5321a11c2835 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -968,7 +968,9 @@ start_here_multiplatform:
/* Restore parameters passed from prom_init/kexec */
mr r3,r31
- bl early_setup /* also sets r13 and SPRG_PACA */
+ LOAD_REG_ADDR(r12, DOTSYM(early_setup))
+ mtctr r12
+ bctrl /* also sets r13 and SPRG_PACA */
LOAD_REG_ADDR(r3, start_here_common)
ld r4,PACAKMSR(r13)
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 03c73b4c6435..885be7f3d29a 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -33,6 +33,8 @@
#include <asm/export.h>
#include <asm/code-patching-asm.h>
+#include "head_32.h"
+
#if CONFIG_TASK_SIZE <= 0x80000000 && CONFIG_PAGE_OFFSET >= 0x80000000
/* By simply checking Address >= 0x80000000, we know if its a kernel address */
#define SIMPLE_KERNEL_ADDRESS 1
@@ -123,102 +125,6 @@ instruction_counter:
.space 4
#endif
-/*
- * Exception entry code. This code runs with address translation
- * turned off, i.e. using physical addresses.
- * We assume sprg3 has the physical address of the current
- * task's thread_struct.
- */
-#define EXCEPTION_PROLOG \
- mtspr SPRN_SPRG_SCRATCH0, r10; \
- mtspr SPRN_SPRG_SCRATCH1, r11; \
- mfcr r10; \
- EXCEPTION_PROLOG_1; \
- EXCEPTION_PROLOG_2
-
-#define EXCEPTION_PROLOG_1 \
- mfspr r11,SPRN_SRR1; /* check whether user or kernel */ \
- andi. r11,r11,MSR_PR; \
- tophys(r11,r1); /* use tophys(r1) if kernel */ \
- beq 1f; \
- mfspr r11,SPRN_SPRG_THREAD; \
- lwz r11,TASK_STACK-THREAD(r11); \
- addi r11,r11,THREAD_SIZE; \
- tophys(r11,r11); \
-1: subi r11,r11,INT_FRAME_SIZE /* alloc exc. frame */
-
-
-#define EXCEPTION_PROLOG_2 \
- stw r10,_CCR(r11); /* save registers */ \
- stw r12,GPR12(r11); \
- stw r9,GPR9(r11); \
- mfspr r10,SPRN_SPRG_SCRATCH0; \
- stw r10,GPR10(r11); \
- mfspr r12,SPRN_SPRG_SCRATCH1; \
- stw r12,GPR11(r11); \
- mflr r10; \
- stw r10,_LINK(r11); \
- mfspr r12,SPRN_SRR0; \
- mfspr r9,SPRN_SRR1; \
- stw r1,GPR1(r11); \
- stw r1,0(r11); \
- tovirt(r1,r11); /* set new kernel sp */ \
- li r10,MSR_KERNEL & ~(MSR_IR|MSR_DR); /* can take exceptions */ \
- mtmsr r10; \
- stw r0,GPR0(r11); \
- lis r10, STACK_FRAME_REGS_MARKER@ha; /* exception frame marker */ \
- addi r10, r10, STACK_FRAME_REGS_MARKER@l; \
- stw r10, 8(r11); \
- SAVE_4GPRS(3, r11); \
- SAVE_2GPRS(7, r11)
-
-/*
- * Note: code which follows this uses cr0.eq (set if from kernel),
- * r11, r12 (SRR0), and r9 (SRR1).
- *
- * Note2: once we have set r1 we are in a position to take exceptions
- * again, and we could thus set MSR:RI at that point.
- */
-
-/*
- * Exception vectors.
- */
-#define EXCEPTION(n, label, hdlr, xfer) \
- . = n; \
-label: \
- EXCEPTION_PROLOG; \
- addi r3,r1,STACK_FRAME_OVERHEAD; \
- xfer(n, hdlr)
-
-#define EXC_XFER_TEMPLATE(n, hdlr, trap, copyee, tfer, ret) \
- li r10,trap; \
- stw r10,_TRAP(r11); \
- li r10,MSR_KERNEL; \
- copyee(r10, r9); \
- bl tfer; \
-i##n: \
- .long hdlr; \
- .long ret
-
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
-#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, NOCOPY, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(n, hdlr, n+1, COPY_EE, transfer_to_handler, \
- ret_from_except)
-
/* System reset */
EXCEPTION(0x100, Reset, system_reset_exception, EXC_XFER_STD)
@@ -261,7 +167,7 @@ Alignment:
mfspr r5,SPRN_DSISR
stw r5,_DSISR(r11)
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE(0x600, alignment_exception)
+ EXC_XFER_STD(0x600, alignment_exception)
/* Program check exception */
EXCEPTION(0x700, ProgramCheck, program_check_exception, EXC_XFER_STD)
@@ -273,19 +179,18 @@ Alignment:
/* Decrementer */
EXCEPTION(0x900, Decrementer, timer_interrupt, EXC_XFER_LITE)
- EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xa00, Trap_0a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xb00, Trap_0b, unknown_exception, EXC_XFER_STD)
/* System call */
. = 0xc00
SystemCall:
- EXCEPTION_PROLOG
- EXC_XFER_EE_LITE(0xc00, DoSyscall)
+ SYSCALL_ENTRY 0xc00
/* Single step - not used on 601 */
EXCEPTION(0xd00, SingleStep, single_step_exception, EXC_XFER_STD)
- EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0xe00, Trap_0e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0xf00, Trap_0f, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, this is a software emulation interrupt. It occurs
* for all unimplemented and illegal instructions.
@@ -615,13 +520,13 @@ DARFixed:/* Return from dcbx instruction bug workaround */
/* 0x300 is DataAccess exception, needed by bad_page_fault() */
EXC_XFER_LITE(0x300, handle_page_fault)
- EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1600, Trap_16, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1700, Trap_17, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1800, Trap_18, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1900, Trap_19, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1a00, Trap_1a, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1b00, Trap_1b, unknown_exception, EXC_XFER_STD)
/* On the MPC8xx, these next four traps are used for development
* support of breakpoints and such. Someday I will get around to
@@ -643,7 +548,7 @@ DataBreakpoint:
mfspr r4,SPRN_BAR
stw r4,_DAR(r11)
mfspr r5,SPRN_DSISR
- EXC_XFER_EE(0x1c00, do_break)
+ EXC_XFER_STD(0x1c00, do_break)
11:
mtcr r10
mfspr r10, SPRN_SPRG_SCRATCH0
@@ -663,10 +568,10 @@ InstructionBreakpoint:
mfspr r10, SPRN_SPRG_SCRATCH0
rfi
#else
- EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1d00, Trap_1d, unknown_exception, EXC_XFER_STD)
#endif
- EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_EE)
- EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0x1e00, Trap_1e, unknown_exception, EXC_XFER_STD)
+ EXCEPTION(0x1f00, Trap_1f, unknown_exception, EXC_XFER_STD)
. = 0x2000
@@ -853,6 +758,9 @@ start_here:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
li r3,0
mr r4,r31
bl machine_init
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 1b22a8dea399..bfeb469e8106 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -6,6 +6,8 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_booke_hv_asm.h>
+#ifdef __ASSEMBLY__
+
/*
* Macros used for common Book-e exception handling
*/
@@ -81,6 +83,101 @@ END_BTB_FLUSH_SECTION
SAVE_4GPRS(3, r11); \
SAVE_2GPRS(7, r11)
+.macro SYSCALL_ENTRY trapno intno
+ mfspr r10, SPRN_SPRG_THREAD
+#ifdef CONFIG_KVM_BOOKE_HV
+BEGIN_FTR_SECTION
+ mtspr SPRN_SPRG_WSCRATCH0, r10
+ stw r11, THREAD_NORMSAVE(0)(r10)
+ stw r13, THREAD_NORMSAVE(2)(r10)
+ mfcr r13 /* save CR in r13 for now */
+ mfspr r11, SPRN_SRR1
+ mtocrf 0x80, r11 /* check MSR[GS] without clobbering reg */
+ bf 3, 1975f
+ b kvmppc_handler_BOOKE_INTERRUPT_\intno\()_SPRN_SRR1
+1975:
+ mr r12, r13
+ lwz r13, THREAD_NORMSAVE(2)(r10)
+FTR_SECTION_ELSE
+#endif
+ mfcr r12
+#ifdef CONFIG_KVM_BOOKE_HV
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_EMB_HV)
+#endif
+ BOOKE_CLEAR_BTB(r11)
+ lwz r11, TASK_STACK - THREAD(r10)
+ rlwinm r12,r12,0,4,2 /* Clear SO bit in CR */
+ ALLOC_STACK_FRAME(r11, THREAD_SIZE - INT_FRAME_SIZE)
+ stw r12, _CCR(r11) /* save various registers */
+ mflr r12
+ stw r12,_LINK(r11)
+ mfspr r12,SPRN_SRR0
+ stw r1, GPR1(r11)
+ mfspr r9,SPRN_SRR1
+ stw r1, 0(r11)
+ mr r1, r11
+ stw r12,_NIP(r11)
+ rlwinm r9,r9,0,14,12 /* clear MSR_WE (necessary?) */
+ lis r12, STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
+ stw r2,GPR2(r11)
+ addi r12, r12, STACK_FRAME_REGS_MARKER@l
+ stw r9,_MSR(r11)
+ li r2, \trapno + 1
+ stw r12, 8(r11)
+ stw r2,_TRAP(r11)
+ SAVE_GPR(0, r11)
+ SAVE_4GPRS(3, r11)
+ SAVE_2GPRS(7, r11)
+
+ addi r11,r1,STACK_FRAME_OVERHEAD
+ addi r2,r10,-THREAD
+ stw r11,PT_REGS(r10)
+ /* Check to see if the dbcr0 register is set up to debug. Use the
+ internal debug mode bit to do this. */
+ lwz r12,THREAD_DBCR0(r10)
+ andis. r12,r12,DBCR0_IDM@h
+ ACCOUNT_CPU_USER_ENTRY(r2, r11, r12)
+ beq+ 3f
+ /* From user and task is ptraced - load up global dbcr0 */
+ li r12,-1 /* clear all pending debug events */
+ mtspr SPRN_DBSR,r12
+ lis r11,global_dbcr0@ha
+ tophys(r11,r11)
+ addi r11,r11,global_dbcr0@l
+#ifdef CONFIG_SMP
+ lwz r9,TASK_CPU(r2)
+ slwi r9,r9,3
+ add r11,r11,r9
+#endif
+ lwz r12,0(r11)
+ mtspr SPRN_DBCR0,r12
+ lwz r12,4(r11)
+ addi r12,r12,-1
+ stw r12,4(r11)
+
+3:
+ tovirt(r2, r2) /* set r2 to current */
+ lis r11, transfer_to_syscall@h
+ ori r11, r11, transfer_to_syscall@l
+#ifdef CONFIG_TRACE_IRQFLAGS
+ /*
+ * If MSR is changing we need to keep interrupts disabled at this point
+ * otherwise we might risk taking an interrupt before we tell lockdep
+ * they are enabled.
+ */
+ lis r10, MSR_KERNEL@h
+ ori r10, r10, MSR_KERNEL@l
+ rlwimi r10, r9, 0, MSR_EE
+#else
+ lis r10, (MSR_KERNEL | MSR_EE)@h
+ ori r10, r10, (MSR_KERNEL | MSR_EE)@l
+#endif
+ mtspr SPRN_SRR1,r10
+ mtspr SPRN_SRR0,r11
+ SYNC
+ RFI /* jump to handler, enable MMU */
+.endm
+
/* To handle the additional exception priority levels on 40x and Book-E
* processors we allocate a stack per additional priority level.
*
@@ -217,8 +314,7 @@ label:
CRITICAL_EXCEPTION_PROLOG(intno); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+2, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, crit_transfer_to_handler, \
- ret_from_crit_exc)
+ crit_transfer_to_handler, ret_from_crit_exc)
#define MCHECK_EXCEPTION(n, label, hdlr) \
START_EXCEPTION(label); \
@@ -227,36 +323,23 @@ label:
stw r5,_ESR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
EXC_XFER_TEMPLATE(hdlr, n+4, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), \
- NOCOPY, mcheck_transfer_to_handler, \
- ret_from_mcheck_exc)
+ mcheck_transfer_to_handler, ret_from_mcheck_exc)
-#define EXC_XFER_TEMPLATE(hdlr, trap, msr, copyee, tfer, ret) \
+#define EXC_XFER_TEMPLATE(hdlr, trap, msr, tfer, ret) \
li r10,trap; \
stw r10,_TRAP(r11); \
lis r10,msr@h; \
ori r10,r10,msr@l; \
- copyee(r10, r9); \
bl tfer; \
.long hdlr; \
.long ret
-#define COPY_EE(d, s) rlwimi d,s,0,16,16
-#define NOCOPY(d, s)
-
#define EXC_XFER_STD(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, NOCOPY, transfer_to_handler_full, \
+ EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, transfer_to_handler_full, \
ret_from_except_full)
#define EXC_XFER_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, NOCOPY, transfer_to_handler, \
- ret_from_except)
-
-#define EXC_XFER_EE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n, MSR_KERNEL, COPY_EE, transfer_to_handler_full, \
- ret_from_except_full)
-
-#define EXC_XFER_EE_LITE(n, hdlr) \
- EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, COPY_EE, transfer_to_handler, \
+ EXC_XFER_TEMPLATE(hdlr, n+1, MSR_KERNEL, transfer_to_handler, \
ret_from_except)
/* Check for a single step debug exception while in an exception
@@ -323,7 +406,7 @@ label:
/* continue normal handling for a debug exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, debug_transfer_to_handler, ret_from_debug_exc)
+ EXC_XFER_TEMPLATE(DebugException, 0x2008, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), debug_transfer_to_handler, ret_from_debug_exc)
#define DEBUG_CRIT_EXCEPTION \
START_EXCEPTION(DebugCrit); \
@@ -376,7 +459,7 @@ label:
/* continue normal handling for a critical exception... */ \
2: mfspr r4,SPRN_DBSR; \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), NOCOPY, crit_transfer_to_handler, ret_from_crit_exc)
+ EXC_XFER_TEMPLATE(DebugException, 0x2002, (MSR_KERNEL & ~(MSR_ME|MSR_DE|MSR_CE)), crit_transfer_to_handler, ret_from_crit_exc)
#define DATA_STORAGE_EXCEPTION \
START_EXCEPTION(DataStorage) \
@@ -401,7 +484,7 @@ label:
mfspr r4,SPRN_DEAR; /* Grab the DEAR and save it */ \
stw r4,_DEAR(r11); \
addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE(0x0600, alignment_exception)
+ EXC_XFER_STD(0x0600, alignment_exception)
#define PROGRAM_EXCEPTION \
START_EXCEPTION(Program) \
@@ -426,9 +509,9 @@ label:
bl load_up_fpu; /* if from user, just load it up */ \
b fast_exception_return; \
1: addi r3,r1,STACK_FRAME_OVERHEAD; \
- EXC_XFER_EE_LITE(0x800, kernel_fp_unavailable_exception)
+ EXC_XFER_STD(0x800, kernel_fp_unavailable_exception)
-#ifndef __ASSEMBLY__
+#else /* __ASSEMBLY__ */
struct exception_regs {
unsigned long mas0;
unsigned long mas1;
diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S
index 32332e24e421..6621f230cc37 100644
--- a/arch/powerpc/kernel/head_fsl_booke.S
+++ b/arch/powerpc/kernel/head_fsl_booke.S
@@ -268,6 +268,9 @@ set_ivor:
/*
* Decide what sort of machine this is and initialize the MMU.
*/
+#ifdef CONFIG_KASAN
+ bl kasan_early_init
+#endif
mr r3,r30
mr r4,r31
bl machine_init
@@ -380,7 +383,7 @@ interrupt_base:
EXC_XFER_LITE(0x0300, handle_page_fault)
1:
addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x0300, CacheLockingException)
+ EXC_XFER_LITE(0x0300, CacheLockingException)
/* Instruction Storage Interrupt */
INSTRUCTION_STORAGE_EXCEPTION
@@ -401,21 +404,20 @@ interrupt_base:
#ifdef CONFIG_E200
/* E200 treats 'normal' floating point instructions as FP Unavail exception */
EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- program_check_exception, EXC_XFER_EE)
+ program_check_exception, EXC_XFER_STD)
#else
EXCEPTION(0x0800, FP_UNAVAIL, FloatingPointUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif
#endif
/* System Call Interrupt */
START_EXCEPTION(SystemCall)
- NORMAL_EXCEPTION_PROLOG(SYSCALL)
- EXC_XFER_EE_LITE(0x0c00, DoSyscall)
+ SYSCALL_ENTRY 0xc00 SYSCALL
/* Auxiliary Processor Unavailable Interrupt */
EXCEPTION(0x2900, AP_UNAVAIL, AuxillaryProcessorUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Decrementer Interrupt */
DECREMENTER_EXCEPTION
@@ -423,7 +425,7 @@ interrupt_base:
/* Fixed Internal Timer Interrupt */
/* TODO: Add FIT support */
EXCEPTION(0x3100, FIT, FixedIntervalTimer, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
/* Watchdog Timer Interrupt */
#ifdef CONFIG_BOOKE_WDT
@@ -633,25 +635,25 @@ END_BTB_FLUSH_SECTION
bl load_up_spe
b fast_exception_return
1: addi r3,r1,STACK_FRAME_OVERHEAD
- EXC_XFER_EE_LITE(0x2010, KernelSPE)
+ EXC_XFER_LITE(0x2010, KernelSPE)
#elif defined(CONFIG_SPE_POSSIBLE)
EXCEPTION(0x2020, SPE_UNAVAIL, SPEUnavailable, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif /* CONFIG_SPE_POSSIBLE */
/* SPE Floating Point Data */
#ifdef CONFIG_SPE
EXCEPTION(0x2030, SPE_FP_DATA, SPEFloatingPointData,
- SPEFloatingPointException, EXC_XFER_EE)
+ SPEFloatingPointException, EXC_XFER_STD)
/* SPE Floating Point Round */
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- SPEFloatingPointRoundException, EXC_XFER_EE)
+ SPEFloatingPointRoundException, EXC_XFER_STD)
#elif defined(CONFIG_SPE_POSSIBLE)
EXCEPTION(0x2040, SPE_FP_DATA, SPEFloatingPointData,
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
EXCEPTION(0x2050, SPE_FP_ROUND, SPEFloatingPointRound, \
- unknown_exception, EXC_XFER_EE)
+ unknown_exception, EXC_XFER_STD)
#endif /* CONFIG_SPE_POSSIBLE */
@@ -674,10 +676,10 @@ END_BTB_FLUSH_SECTION
unknown_exception)
/* Hypercall */
- EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_SYSCALL, Hypercall, unknown_exception, EXC_XFER_STD)
/* Embedded Hypervisor Privilege */
- EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_EE)
+ EXCEPTION(0, HV_PRIV, Ehvpriv, unknown_exception, EXC_XFER_STD)
interrupt_end:
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index fec8a6773119..da307dd93ee3 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,11 +29,15 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/smp.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
#include <asm/hw_breakpoint.h>
#include <asm/processor.h>
#include <asm/sstep.h>
#include <asm/debug.h>
+#include <asm/debugfs.h>
+#include <asm/hvcall.h>
#include <linux/uaccess.h>
/*
@@ -174,7 +178,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
if (!ppc_breakpoint_available())
return -ENODEV;
length_max = 8; /* DABR */
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
length_max = 512 ; /* 64 doublewords */
/* DAWR region can't cross 512 boundary */
if ((attr->bp_addr >> 9) !=
@@ -376,3 +380,59 @@ void hw_breakpoint_pmu_read(struct perf_event *bp)
{
/* TODO */
}
+
+bool dawr_force_enable;
+EXPORT_SYMBOL_GPL(dawr_force_enable);
+
+static ssize_t dawr_write_file_bool(struct file *file,
+ const char __user *user_buf,
+ size_t count, loff_t *ppos)
+{
+ struct arch_hw_breakpoint null_brk = {0, 0, 0};
+ size_t rc;
+
+ /* Send error to user if they hypervisor won't allow us to write DAWR */
+ if ((!dawr_force_enable) &&
+ (firmware_has_feature(FW_FEATURE_LPAR)) &&
+ (set_dawr(&null_brk) != H_SUCCESS))
+ return -1;
+
+ rc = debugfs_write_file_bool(file, user_buf, count, ppos);
+ if (rc)
+ return rc;
+
+ /* If we are clearing, make sure all CPUs have the DAWR cleared */
+ if (!dawr_force_enable)
+ smp_call_function((smp_call_func_t)set_dawr, &null_brk, 0);
+
+ return rc;
+}
+
+static const struct file_operations dawr_enable_fops = {
+ .read = debugfs_read_file_bool,
+ .write = dawr_write_file_bool,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
+static int __init dawr_force_setup(void)
+{
+ dawr_force_enable = false;
+
+ if (cpu_has_feature(CPU_FTR_DAWR)) {
+ /* Don't setup sysfs file for user control on P8 */
+ dawr_force_enable = true;
+ return 0;
+ }
+
+ if (PVR_VER(mfspr(SPRN_PVR)) == PVR_POWER9) {
+ /* Turn DAWR off by default, but allow admin to turn it on */
+ dawr_force_enable = false;
+ debugfs_create_file_unsafe("dawr_enable_dangerous", 0600,
+ powerpc_debugfs_root,
+ &dawr_force_enable,
+ &dawr_enable_fops);
+ }
+ return 0;
+}
+arch_initcall(dawr_force_setup);
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index 7f5ac2e8581b..2dfbd5d5b932 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -1,956 +1,188 @@
/*
- * This file contains idle entry/exit functions for POWER7,
- * POWER8 and POWER9 CPUs.
+ * Copyright 2018, IBM Corporation.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
+ *
+ * This file contains general idle entry/exit functions to save
+ * and restore stack and NVGPRs which allows C code to call idle
+ * states that lose GPRs, and it will return transparently with
+ * SRR1 wakeup reason return value.
+ *
+ * The platform / CPU caller must ensure SPRs and any other non-GPR
+ * state is saved and restored correctly, handle KVM, interrupts, etc.
*/
-#include <linux/threads.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/cputable.h>
-#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ppc-opcode.h>
-#include <asm/hw_irq.h>
-#include <asm/kvm_book3s_asm.h>
-#include <asm/opal.h>
#include <asm/cpuidle.h>
-#include <asm/exception-64s.h>
-#include <asm/book3s/64/mmu-hash.h>
-#include <asm/mmu.h>
-#include <asm/asm-compat.h>
-#include <asm/feature-fixups.h>
-
-#undef DEBUG
-
-/*
- * Use unused space in the interrupt stack to save and restore
- * registers for winkle support.
- */
-#define _MMCR0 GPR0
-#define _SDR1 GPR3
-#define _PTCR GPR3
-#define _RPR GPR4
-#define _SPURR GPR5
-#define _PURR GPR6
-#define _TSCR GPR7
-#define _DSCR GPR8
-#define _AMOR GPR9
-#define _WORT GPR10
-#define _WORC GPR11
-#define _LPCR GPR12
-
-#define PSSCR_EC_ESL_MASK_SHIFTED (PSSCR_EC | PSSCR_ESL) >> 16
- .text
-
-/*
- * Used by threads before entering deep idle states. Saves SPRs
- * in interrupt stack frame
- */
-save_sprs_to_stack:
- /*
- * Note all register i.e per-core, per-subcore or per-thread is saved
- * here since any thread in the core might wake up first
- */
-BEGIN_FTR_SECTION
- /*
- * Note - SDR1 is dropped in Power ISA v3. Hence not restoring
- * SDR1 here
- */
- mfspr r3,SPRN_PTCR
- std r3,_PTCR(r1)
- mfspr r3,SPRN_LPCR
- std r3,_LPCR(r1)
-FTR_SECTION_ELSE
- mfspr r3,SPRN_SDR1
- std r3,_SDR1(r1)
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
- mfspr r3,SPRN_RPR
- std r3,_RPR(r1)
- mfspr r3,SPRN_SPURR
- std r3,_SPURR(r1)
- mfspr r3,SPRN_PURR
- std r3,_PURR(r1)
- mfspr r3,SPRN_TSCR
- std r3,_TSCR(r1)
- mfspr r3,SPRN_DSCR
- std r3,_DSCR(r1)
- mfspr r3,SPRN_AMOR
- std r3,_AMOR(r1)
- mfspr r3,SPRN_WORT
- std r3,_WORT(r1)
- mfspr r3,SPRN_WORC
- std r3,_WORC(r1)
/*
- * On POWER9, there are idle states such as stop4, invoked via cpuidle,
- * that lose hypervisor resources. In such cases, we need to save
- * additional SPRs before entering those idle states so that they can
- * be restored to their older values on wakeup from the idle state.
+ * Desired PSSCR in r3
*
- * On POWER8, the only such deep idle state is winkle which is used
- * only in the context of CPU-Hotplug, where these additional SPRs are
- * reinitiazed to a sane value. Hence there is no need to save/restore
- * these SPRs.
+ * No state will be lost regardless of wakeup mechanism (interrupt or NIA).
+ *
+ * An EC=0 type wakeup will return with a value of 0. SRESET wakeup (which can
+ * happen with xscom SRESET and possibly MCE) may clobber volatiles except LR,
+ * and must blr, to return to caller with r3 set according to caller's expected
+ * return code (for Book3S/64 that is SRR1).
*/
-BEGIN_FTR_SECTION
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
-power9_save_additional_sprs:
- mfspr r3, SPRN_PID
- mfspr r4, SPRN_LDBAR
- std r3, STOP_PID(r13)
- std r4, STOP_LDBAR(r13)
-
- mfspr r3, SPRN_FSCR
- mfspr r4, SPRN_HFSCR
- std r3, STOP_FSCR(r13)
- std r4, STOP_HFSCR(r13)
-
- mfspr r3, SPRN_MMCRA
- mfspr r4, SPRN_MMCR0
- std r3, STOP_MMCRA(r13)
- std r4, _MMCR0(r1)
-
- mfspr r3, SPRN_MMCR1
- mfspr r4, SPRN_MMCR2
- std r3, STOP_MMCR1(r13)
- std r4, STOP_MMCR2(r13)
- blr
-
-power9_restore_additional_sprs:
- ld r3,_LPCR(r1)
- ld r4, STOP_PID(r13)
- mtspr SPRN_LPCR,r3
- mtspr SPRN_PID, r4
-
- ld r3, STOP_LDBAR(r13)
- ld r4, STOP_FSCR(r13)
- mtspr SPRN_LDBAR, r3
- mtspr SPRN_FSCR, r4
-
- ld r3, STOP_HFSCR(r13)
- ld r4, STOP_MMCRA(r13)
- mtspr SPRN_HFSCR, r3
- mtspr SPRN_MMCRA, r4
-
- ld r3, _MMCR0(r1)
- ld r4, STOP_MMCR1(r13)
- mtspr SPRN_MMCR0, r3
- mtspr SPRN_MMCR1, r4
-
- ld r3, STOP_MMCR2(r13)
- ld r4, PACA_SPRG_VDSO(r13)
- mtspr SPRN_MMCR2, r3
- mtspr SPRN_SPRG3, r4
+_GLOBAL(isa300_idle_stop_noloss)
+ mtspr SPRN_PSSCR,r3
+ PPC_STOP
+ li r3,0
blr
/*
- * Used by threads when the lock bit of core_idle_state is set.
- * Threads will spin in HMT_LOW until the lock bit is cleared.
- * r14 - pointer to core_idle_state
- * r15 - used to load contents of core_idle_state
- * r9 - used as a temporary variable
+ * Desired PSSCR in r3
+ *
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
+ *
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
+ *
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
*/
-
-core_idle_lock_held:
- HMT_LOW
-3: lwz r15,0(r14)
- andis. r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne 3b
- HMT_MEDIUM
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bne- core_idle_lock_held
- blr
+_GLOBAL(isa300_idle_stop_mayloss)
+ mtspr SPRN_PSSCR,r3
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /* use stack red zone rather than a new frame for saving regs */
+ std r2,-8*0(r1)
+ std r14,-8*1(r1)
+ std r15,-8*2(r1)
+ std r16,-8*3(r1)
+ std r17,-8*4(r1)
+ std r18,-8*5(r1)
+ std r19,-8*6(r1)
+ std r20,-8*7(r1)
+ std r21,-8*8(r1)
+ std r22,-8*9(r1)
+ std r23,-8*10(r1)
+ std r24,-8*11(r1)
+ std r25,-8*12(r1)
+ std r26,-8*13(r1)
+ std r27,-8*14(r1)
+ std r28,-8*15(r1)
+ std r29,-8*16(r1)
+ std r30,-8*17(r1)
+ std r31,-8*18(r1)
+ std r4,-8*19(r1)
+ std r5,-8*20(r1)
+ /* 168 bytes */
+ PPC_STOP
+ b . /* catch bugs */
/*
- * Pass requested state in r3:
- * r3 - PNV_THREAD_NAP/SLEEP/WINKLE in POWER8
- * - Requested PSSCR value in POWER9
+ * Desired return value in r3
+ *
+ * The idle wakeup SRESET interrupt can call this after calling
+ * to return to the idle sleep function caller with r3 as the return code.
*
- * Address of idle handler to branch to in realmode in r4
+ * This must not be used if idle was entered via a _noloss function (use
+ * a simple blr instead).
*/
-pnv_powersave_common:
- /* Use r3 to pass state nap/sleep/winkle */
- /* NAP is a state loss, we create a regs frame on the
- * stack, fill it up with the state we care about and
- * stick a pointer to it in PACAR1. We really only
- * need to save PC, some CR bits and the NV GPRs,
- * but for now an interrupt frame will do.
- */
- mtctr r4
-
- mflr r0
- std r0,16(r1)
- stdu r1,-INT_FRAME_SIZE(r1)
- std r0,_LINK(r1)
- std r0,_NIP(r1)
-
- /* We haven't lost state ... yet */
- li r0,0
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* Continue saving state */
- SAVE_GPR(2, r1)
- SAVE_NVGPRS(r1)
- mfcr r5
- std r5,_CCR(r1)
- std r1,PACAR1(r13)
-
-BEGIN_FTR_SECTION
- /*
- * POWER9 does not require real mode to stop, and presently does not
- * set hwthread_state for KVM (threads don't share MMU context), so
- * we can remain in virtual mode for this.
- */
- bctr
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- /*
- * POWER8
- * Go to real mode to do the nap, as required by the architecture.
- * Also, we need to be in real mode before setting hwthread_state,
- * because as soon as we do that, another thread can switch
- * the MMU context to the guest.
- */
- LOAD_REG_IMMEDIATE(r7, MSR_IDLE)
- mtmsrd r7,0
- bctr
+_GLOBAL(idle_return_gpr_loss)
+ ld r1,PACAR1(r13)
+ ld r4,-8*19(r1)
+ ld r5,-8*20(r1)
+ mtlr r4
+ mtcr r5
+ /*
+ * KVM nap requires r2 to be saved, rather than just restoring it
+ * from PACATOC. This could be avoided for that less common case
+ * if KVM saved its r2.
+ */
+ ld r2,-8*0(r1)
+ ld r14,-8*1(r1)
+ ld r15,-8*2(r1)
+ ld r16,-8*3(r1)
+ ld r17,-8*4(r1)
+ ld r18,-8*5(r1)
+ ld r19,-8*6(r1)
+ ld r20,-8*7(r1)
+ ld r21,-8*8(r1)
+ ld r22,-8*9(r1)
+ ld r23,-8*10(r1)
+ ld r24,-8*11(r1)
+ ld r25,-8*12(r1)
+ ld r26,-8*13(r1)
+ ld r27,-8*14(r1)
+ ld r28,-8*15(r1)
+ ld r29,-8*16(r1)
+ ld r30,-8*17(r1)
+ ld r31,-8*18(r1)
+ blr
/*
* This is the sequence required to execute idle instructions, as
* specified in ISA v2.07 (and earlier). MSR[IR] and MSR[DR] must be 0.
+ *
+ * The 0(r1) slot is used to save r2 in isa206, so use that here.
*/
#define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \
/* Magic NAP/SLEEP/WINKLE mode enter sequence */ \
- std r0,0(r1); \
+ std r2,0(r1); \
ptesync; \
- ld r0,0(r1); \
-236: cmpd cr0,r0,r0; \
+ ld r2,0(r1); \
+236: cmpd cr0,r2,r2; \
bne 236b; \
- IDLE_INST;
-
-
- .globl pnv_enter_arch207_idle_mode
-pnv_enter_arch207_idle_mode:
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- /******************************************************/
- /* N O T E W E L L ! ! ! N O T E W E L L */
- /* The following store to HSTATE_HWTHREAD_STATE(r13) */
- /* MUST occur in real mode, i.e. with the MMU off, */
- /* and the MMU must stay off until we clear this flag */
- /* and test HSTATE_HWTHREAD_REQ(r13) in */
- /* pnv_powersave_wakeup in this file. */
- /* The reason is that another thread can switch the */
- /* MMU to a guest context whenever this flag is set */
- /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
- /* that would potentially cause this thread to start */
- /* executing instructions from guest memory in */
- /* hypervisor mode, leading to a host crash or data */
- /* corruption, or worse. */
- /******************************************************/
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
- stb r3,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr3,r3,PNV_THREAD_SLEEP
- bge cr3,2f
- IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
- /* No return */
-2:
- /* Sleep or winkle */
- lbz r7,PACA_THREAD_MASK(r13)
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
- li r5,0
- beq cr3,3f
- lis r5,PNV_CORE_IDLE_WINKLE_COUNT@h
-3:
-lwarx_loop1:
- lwarx r15,0,r14
-
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
-
- add r15,r15,r5 /* Add if winkle */
- andc r15,r15,r7 /* Clear thread bit */
-
- andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
-
-/*
- * If cr0 = 0, then current thread is the last thread of the core entering
- * sleep. Last thread needs to execute the hardware bug workaround code if
- * required by the platform.
- * Make the workaround call unconditionally here. The below branch call is
- * patched out when the idle states are discovered if the platform does not
- * require it.
- */
-.global pnv_fastsleep_workaround_at_entry
-pnv_fastsleep_workaround_at_entry:
- beq fastsleep_workaround_at_entry
-
- stwcx. r15,0,r14
- bne- lwarx_loop1
- isync
-
-common_enter: /* common code for all the threads entering sleep or winkle */
- bgt cr3,enter_winkle
- IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
-
-fastsleep_workaround_at_entry:
- oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- stwcx. r15,0,r14
- bne- lwarx_loop1
- isync
-
- /* Fast sleep workaround */
- li r3,1
- li r4,1
- bl opal_config_cpu_idle_state
-
- /* Unlock */
- xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- lwsync
- stw r15,0(r14)
- b common_enter
-
-enter_winkle:
- bl save_sprs_to_stack
-
- IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-
-/*
- * r3 - PSSCR value corresponding to the requested stop state.
- */
-power_enter_stop:
-/*
- * Check if we are executing the lite variant with ESL=EC=0
- */
- andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED
- clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */
- bne .Lhandle_esl_ec_set
- PPC_STOP
- li r3,0 /* Since we didn't lose state, return 0 */
- std r3, PACA_REQ_PSSCR(r13)
-
- /*
- * pnv_wakeup_noloss() expects r12 to contain the SRR1 value so
- * it can determine if the wakeup reason is an HMI in
- * CHECK_HMI_INTERRUPT.
- *
- * However, when we wakeup with ESL=0, SRR1 will not contain the wakeup
- * reason, so there is no point setting r12 to SRR1.
- *
- * Further, we clear r12 here, so that we don't accidentally enter the
- * HMI in pnv_wakeup_noloss() if the value of r12[42:45] == WAKE_HMI.
- */
- li r12, 0
- b pnv_wakeup_noloss
-
-.Lhandle_esl_ec_set:
-BEGIN_FTR_SECTION
- /*
- * POWER9 DD2.0 or earlier can incorrectly set PMAO when waking up after
- * a state-loss idle. Saving and restoring MMCR0 over idle is a
- * workaround.
- */
- mfspr r4,SPRN_MMCR0
- std r4,_MMCR0(r1)
-END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
+ IDLE_INST; \
+ b . /* catch bugs */
/*
- * Check if the requested state is a deep idle state.
- */
- LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
- ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
- cmpd r3,r4
- bge .Lhandle_deep_stop
- PPC_STOP /* Does not return (system reset interrupt) */
-
-.Lhandle_deep_stop:
-/*
- * Entering deep idle state.
- * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to
- * stack and enter stop
- */
- lbz r7,PACA_THREAD_MASK(r13)
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
-
-lwarx_loop_stop:
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
- andc r15,r15,r7 /* Clear thread bit */
-
- stwcx. r15,0,r14
- bne- lwarx_loop_stop
- isync
-
- bl save_sprs_to_stack
-
- PPC_STOP /* Does not return (system reset interrupt) */
-
-/*
- * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
- * r3 contains desired idle state (PNV_THREAD_NAP/SLEEP/WINKLE).
- */
-_GLOBAL(power7_idle_insn)
- /* Now check if user or arch enabled NAP mode */
- LOAD_REG_ADDR(r4, pnv_enter_arch207_idle_mode)
- b pnv_powersave_common
-
-#define CHECK_HMI_INTERRUPT \
-BEGIN_FTR_SECTION_NESTED(66); \
- rlwinm r0,r12,45-31,0xf; /* extract wake reason field (P8) */ \
-FTR_SECTION_ELSE_NESTED(66); \
- rlwinm r0,r12,45-31,0xe; /* P7 wake reason field is 3 bits */ \
-ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
- cmpwi r0,0xa; /* Hypervisor maintenance ? */ \
- bne+ 20f; \
- /* Invoke opal call to handle hmi */ \
- ld r2,PACATOC(r13); \
- ld r1,PACAR1(r13); \
- std r3,ORIG_GPR3(r1); /* Save original r3 */ \
- li r3,0; /* NULL argument */ \
- bl hmi_exception_realmode; \
- nop; \
- ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
-20: nop;
-
-/*
- * Entered with MSR[EE]=0 and no soft-masked interrupts pending.
- * r3 contains desired PSSCR register value.
+ * Desired instruction type in r3
*
- * Offline (CPU unplug) case also must notify KVM that the CPU is
- * idle.
- */
-_GLOBAL(power9_offline_stop)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- */
- li r4,KVM_HWTHREAD_IN_IDLE
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
- /* fall through */
-
-_GLOBAL(power9_idle_stop)
- std r3, PACA_REQ_PSSCR(r13)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-BEGIN_FTR_SECTION
- sync
- lwz r5, PACA_DONT_STOP(r13)
- cmpwi r5, 0
- bne 1f
-END_FTR_SECTION_IFSET(CPU_FTR_P9_TM_XER_SO_BUG)
-#endif
- mtspr SPRN_PSSCR,r3
- LOAD_REG_ADDR(r4,power_enter_stop)
- b pnv_powersave_common
- /* No return */
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-1:
- /*
- * We get here when TM / thread reconfiguration bug workaround
- * code wants to get the CPU into SMT4 mode, and therefore
- * we are being asked not to stop.
- */
- li r3, 0
- std r3, PACA_REQ_PSSCR(r13)
- blr /* return 0 for wakeup cause / SRR1 value */
-#endif
-
-/*
- * Called from machine check handler for powersave wakeups.
- * Low level machine check processing has already been done. Now just
- * go through the wake up path to get everything in order.
+ * GPRs may be lost, so they are saved here. Wakeup is by interrupt only.
+ * The SRESET wakeup returns to this function's caller by calling
+ * idle_return_gpr_loss with r3 set to desired return value.
*
- * r3 - The original SRR1 value.
- * Original SRR[01] have been clobbered.
- * MSR_RI is clear.
- */
-.global pnv_powersave_wakeup_mce
-pnv_powersave_wakeup_mce:
- /* Set cr3 for pnv_powersave_wakeup */
- rlwinm r11,r3,47-31,30,31
- cmpwi cr3,r11,2
-
- /*
- * Now put the original SRR1 with SRR1_WAKEMCE_RESVD as the wake
- * reason into r12, which allows reuse of the system reset wakeup
- * code without being mistaken for another type of wakeup.
- */
- oris r12,r3,SRR1_WAKEMCE_RESVD@h
-
- b pnv_powersave_wakeup
-
-/*
- * Called from reset vector for powersave wakeups.
- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
- * r12 - SRR1
- */
-.global pnv_powersave_wakeup
-pnv_powersave_wakeup:
- ld r2, PACATOC(r13)
-
-BEGIN_FTR_SECTION
- bl pnv_restore_hyp_resource_arch300
-FTR_SECTION_ELSE
- bl pnv_restore_hyp_resource_arch207
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
-
- li r0,PNV_THREAD_RUNNING
- stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
-
- mr r3,r12
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- lbz r0,HSTATE_HWTHREAD_STATE(r13)
- cmpwi r0,KVM_HWTHREAD_IN_KERNEL
- beq 0f
- li r0,KVM_HWTHREAD_IN_KERNEL
- stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* Order setting hwthread_state vs. testing hwthread_req */
- sync
-0: lbz r0,HSTATE_HWTHREAD_REQ(r13)
- cmpwi r0,0
- beq 1f
- b kvm_start_guest
-1:
-#endif
-
- /* Return SRR1 from power7_nap() */
- blt cr3,pnv_wakeup_noloss
- b pnv_wakeup_loss
-
-/*
- * Check whether we have woken up with hypervisor state loss.
- * If yes, restore hypervisor state and return back to link.
+ * A wakeup without GPR loss may alteratively be handled as in
+ * isa300_idle_stop_noloss and blr directly, as an optimisation.
*
- * cr3 - set to gt if waking up with partial/complete hypervisor state loss
- */
-pnv_restore_hyp_resource_arch300:
- /*
- * Workaround for POWER9, if we lost resources, the ERAT
- * might have been mixed up and needs flushing. We also need
- * to reload MMCR0 (see comment above). We also need to set
- * then clear bit 60 in MMCRA to ensure the PMU starts running.
- */
- blt cr3,1f
-BEGIN_FTR_SECTION
- PPC_INVALIDATE_ERAT
- ld r1,PACAR1(r13)
- ld r4,_MMCR0(r1)
- mtspr SPRN_MMCR0,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_POWER9_DD2_1)
- mfspr r4,SPRN_MMCRA
- ori r4,r4,(1 << (63-60))
- mtspr SPRN_MMCRA,r4
- xori r4,r4,(1 << (63-60))
- mtspr SPRN_MMCRA,r4
-1:
- /*
- * POWER ISA 3. Use PSSCR to determine if we
- * are waking up from deep idle state
- */
- LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state)
- ld r4,ADDROFF(pnv_first_deep_stop_state)(r5)
-
- /*
- * 0-3 bits correspond to Power-Saving Level Status
- * which indicates the idle state we are waking up from
- */
- mfspr r5, SPRN_PSSCR
- rldicl r5,r5,4,60
- li r0, 0 /* clear requested_psscr to say we're awake */
- std r0, PACA_REQ_PSSCR(r13)
- cmpd cr4,r5,r4
- bge cr4,pnv_wakeup_tb_loss /* returns to caller */
-
- blr /* Waking up without hypervisor state loss. */
-
-/* Same calling convention as arch300 */
-pnv_restore_hyp_resource_arch207:
- /*
- * POWER ISA 2.07 or less.
- * Check if we slept with sleep or winkle.
- */
- lbz r4,PACA_THREAD_IDLE_STATE(r13)
- cmpwi cr2,r4,PNV_THREAD_NAP
- bgt cr2,pnv_wakeup_tb_loss /* Either sleep or Winkle */
-
- /*
- * We fall through here if PACA_THREAD_IDLE_STATE shows we are waking
- * up from nap. At this stage CR3 shouldn't contains 'gt' since that
- * indicates we are waking with hypervisor state loss from nap.
- */
- bgt cr3,.
-
- blr /* Waking up without hypervisor state loss */
-
-/*
- * Called if waking up from idle state which can cause either partial or
- * complete hyp state loss.
- * In POWER8, called if waking up from fastsleep or winkle
- * In POWER9, called if waking up from stop state >= pnv_first_deep_stop_state
- *
- * r13 - PACA
- * cr3 - gt if waking up with partial/complete hypervisor state loss
- *
- * If ISA300:
- * cr4 - gt or eq if waking up from complete hypervisor state loss.
+ * The caller is responsible for saving/restoring SPRs, MSR, timebase,
+ * etc.
*
- * If ISA207:
- * r4 - PACA_THREAD_IDLE_STATE
+ * This must be called in real-mode (MSR_IDLE).
*/
-pnv_wakeup_tb_loss:
- ld r1,PACAR1(r13)
- /*
- * Before entering any idle state, the NVGPRs are saved in the stack.
- * If there was a state loss, or PACA_NAPSTATELOST was set, then the
- * NVGPRs are restored. If we are here, it is likely that state is lost,
- * but not guaranteed -- neither ISA207 nor ISA300 tests to reach
- * here are the same as the test to restore NVGPRS:
- * PACA_THREAD_IDLE_STATE test for ISA207, PSSCR test for ISA300,
- * and SRR1 test for restoring NVGPRs.
- *
- * We are about to clobber NVGPRs now, so set NAPSTATELOST to
- * guarantee they will always be restored. This might be tightened
- * with careful reading of specs (particularly for ISA300) but this
- * is already a slow wakeup path and it's simpler to be safe.
- */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-
- /*
- *
- * Save SRR1 and LR in NVGPRs as they might be clobbered in
- * opal_call() (called in CHECK_HMI_INTERRUPT). SRR1 is required
- * to determine the wakeup reason if we branch to kvm_start_guest. LR
- * is required to return back to reset vector after hypervisor state
- * restore is complete.
- */
- mr r19,r12
- mr r18,r4
- mflr r17
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-
- ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
- lbz r7,PACA_THREAD_MASK(r13)
-
- /*
- * Take the core lock to synchronize against other threads.
- *
- * Lock bit is set in one of the 2 cases-
- * a. In the sleep/winkle enter path, the last thread is executing
- * fastsleep workaround code.
- * b. In the wake up path, another thread is executing fastsleep
- * workaround undo code or resyncing timebase or restoring context
- * In either case loop until the lock bit is cleared.
- */
-1:
- lwarx r15,0,r14
- andis. r9,r15,PNV_CORE_IDLE_LOCK_BIT@h
- bnel- core_idle_lock_held
- oris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- stwcx. r15,0,r14
- bne- 1b
- isync
-
- andi. r9,r15,PNV_CORE_IDLE_THREAD_BITS
- cmpwi cr2,r9,0
-
- /*
- * At this stage
- * cr2 - eq if first thread to wakeup in core
- * cr3- gt if waking up with partial/complete hypervisor state loss
- * ISA300:
- * cr4 - gt or eq if waking up from complete hypervisor state loss.
- */
-
-BEGIN_FTR_SECTION
- /*
- * Were we in winkle?
- * If yes, check if all threads were in winkle, decrement our
- * winkle count, set all thread winkle bits if all were in winkle.
- * Check if our thread has a winkle bit set, and set cr4 accordingly
- * (to match ISA300, above). Pseudo-code for core idle state
- * transitions for ISA207 is as follows (everything happens atomically
- * due to store conditional and/or lock bit):
- *
- * nap_idle() { }
- * nap_wake() { }
- *
- * sleep_idle()
- * {
- * core_idle_state &= ~thread_in_core
- * }
- *
- * sleep_wake()
- * {
- * bool first_in_core, first_in_subcore;
- *
- * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
- * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
- *
- * core_idle_state |= thread_in_core;
- * }
- *
- * winkle_idle()
- * {
- * core_idle_state &= ~thread_in_core;
- * core_idle_state += 1 << WINKLE_COUNT_SHIFT;
- * }
- *
- * winkle_wake()
- * {
- * bool first_in_core, first_in_subcore, winkle_state_lost;
- *
- * first_in_core = (core_idle_state & IDLE_THREAD_BITS) == 0;
- * first_in_subcore = (core_idle_state & SUBCORE_SIBLING_MASK) == 0;
- *
- * core_idle_state |= thread_in_core;
- *
- * if ((core_idle_state & WINKLE_MASK) == (8 << WINKLE_COUNT_SIHFT))
- * core_idle_state |= THREAD_WINKLE_BITS;
- * core_idle_state -= 1 << WINKLE_COUNT_SHIFT;
- *
- * winkle_state_lost = core_idle_state &
- * (thread_in_core << WINKLE_THREAD_SHIFT);
- * core_idle_state &= ~(thread_in_core << WINKLE_THREAD_SHIFT);
- * }
- *
- */
- cmpwi r18,PNV_THREAD_WINKLE
+_GLOBAL(isa206_idle_insn_mayloss)
+ std r1,PACAR1(r13)
+ mflr r4
+ mfcr r5
+ /* use stack red zone rather than a new frame for saving regs */
+ std r2,-8*0(r1)
+ std r14,-8*1(r1)
+ std r15,-8*2(r1)
+ std r16,-8*3(r1)
+ std r17,-8*4(r1)
+ std r18,-8*5(r1)
+ std r19,-8*6(r1)
+ std r20,-8*7(r1)
+ std r21,-8*8(r1)
+ std r22,-8*9(r1)
+ std r23,-8*10(r1)
+ std r24,-8*11(r1)
+ std r25,-8*12(r1)
+ std r26,-8*13(r1)
+ std r27,-8*14(r1)
+ std r28,-8*15(r1)
+ std r29,-8*16(r1)
+ std r30,-8*17(r1)
+ std r31,-8*18(r1)
+ std r4,-8*19(r1)
+ std r5,-8*20(r1)
+ cmpwi r3,PNV_THREAD_NAP
+ bne 1f
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_NAP)
+1: cmpwi r3,PNV_THREAD_SLEEP
bne 2f
- andis. r9,r15,PNV_CORE_IDLE_WINKLE_COUNT_ALL_BIT@h
- subis r15,r15,PNV_CORE_IDLE_WINKLE_COUNT@h
- beq 2f
- ori r15,r15,PNV_CORE_IDLE_THREAD_WINKLE_BITS /* all were winkle */
-2:
- /* Shift thread bit to winkle mask, then test if this thread is set,
- * and remove it from the winkle bits */
- slwi r8,r7,8
- and r8,r8,r15
- andc r15,r15,r8
- cmpwi cr4,r8,1 /* cr4 will be gt if our bit is set, lt if not */
-
- lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
- and r4,r4,r15
- cmpwi r4,0 /* Check if first in subcore */
-
- or r15,r15,r7 /* Set thread bit */
- beq first_thread_in_subcore
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
-
- or r15,r15,r7 /* Set thread bit */
- beq cr2,first_thread_in_core
-
- /* Not first thread in core or subcore to wake up */
- b clear_lock
-
-first_thread_in_subcore:
- /*
- * If waking up from sleep, subcore state is not lost. Hence
- * skip subcore state restore
- */
- blt cr4,subcore_state_restored
-
- /* Restore per-subcore state */
- ld r4,_SDR1(r1)
- mtspr SPRN_SDR1,r4
-
- ld r4,_RPR(r1)
- mtspr SPRN_RPR,r4
- ld r4,_AMOR(r1)
- mtspr SPRN_AMOR,r4
-
-subcore_state_restored:
- /*
- * Check if the thread is also the first thread in the core. If not,
- * skip to clear_lock.
- */
- bne cr2,clear_lock
-
-first_thread_in_core:
-
- /*
- * First thread in the core waking up from any state which can cause
- * partial or complete hypervisor state loss. It needs to
- * call the fastsleep workaround code if the platform requires it.
- * Call it unconditionally here. The below branch instruction will
- * be patched out if the platform does not have fastsleep or does not
- * require the workaround. Patching will be performed during the
- * discovery of idle-states.
- */
-.global pnv_fastsleep_workaround_at_exit
-pnv_fastsleep_workaround_at_exit:
- b fastsleep_workaround_at_exit
-
-timebase_resync:
- /*
- * Use cr3 which indicates that we are waking up with atleast partial
- * hypervisor state loss to determine if TIMEBASE RESYNC is needed.
- */
- ble cr3,.Ltb_resynced
- /* Time base re-sync */
- bl opal_resync_timebase;
- /*
- * If waking up from sleep (POWER8), per core state
- * is not lost, skip to clear_lock.
- */
-.Ltb_resynced:
- blt cr4,clear_lock
-
- /*
- * First thread in the core to wake up and its waking up with
- * complete hypervisor state loss. Restore per core hypervisor
- * state.
- */
-BEGIN_FTR_SECTION
- ld r4,_PTCR(r1)
- mtspr SPRN_PTCR,r4
- ld r4,_RPR(r1)
- mtspr SPRN_RPR,r4
- ld r4,_AMOR(r1)
- mtspr SPRN_AMOR,r4
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
- ld r4,_TSCR(r1)
- mtspr SPRN_TSCR,r4
- ld r4,_WORC(r1)
- mtspr SPRN_WORC,r4
-
-clear_lock:
- xoris r15,r15,PNV_CORE_IDLE_LOCK_BIT@h
- lwsync
- stw r15,0(r14)
-
-common_exit:
- /*
- * Common to all threads.
- *
- * If waking up from sleep, hypervisor state is not lost. Hence
- * skip hypervisor state restore.
- */
- blt cr4,hypervisor_state_restored
-
- /* Waking up from winkle */
-
-BEGIN_MMU_FTR_SECTION
- b no_segments
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
- /* Restore SLB from PACA */
- ld r8,PACA_SLBSHADOWPTR(r13)
-
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7,r5,SLB_ESID_V@h
- beq 1f
- slbmte r6,r5
-1: addi r8,r8,16
- .endr
-no_segments:
-
- /* Restore per thread state */
-
- ld r4,_SPURR(r1)
- mtspr SPRN_SPURR,r4
- ld r4,_PURR(r1)
- mtspr SPRN_PURR,r4
- ld r4,_DSCR(r1)
- mtspr SPRN_DSCR,r4
- ld r4,_WORT(r1)
- mtspr SPRN_WORT,r4
-
- /* Call cur_cpu_spec->cpu_restore() */
- LOAD_REG_ADDR(r4, cur_cpu_spec)
- ld r4,0(r4)
- ld r12,CPU_SPEC_RESTORE(r4)
-#ifdef PPC64_ELF_ABI_v1
- ld r12,0(r12)
-#endif
- mtctr r12
- bctrl
-
-/*
- * On POWER9, we can come here on wakeup from a cpuidle stop state.
- * Hence restore the additional SPRs to the saved value.
- *
- * On POWER8, we come here only on winkle. Since winkle is used
- * only in the case of CPU-Hotplug, we don't need to restore
- * the additional SPRs.
- */
-BEGIN_FTR_SECTION
- bl power9_restore_additional_sprs
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-hypervisor_state_restored:
-
- mr r12,r19
- mtlr r17
- blr /* return to pnv_powersave_wakeup */
-
-fastsleep_workaround_at_exit:
- li r3,1
- li r4,0
- bl opal_config_cpu_idle_state
- b timebase_resync
-
-/*
- * R3 here contains the value that will be returned to the caller
- * of power7_nap.
- * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
- */
-.global pnv_wakeup_loss
-pnv_wakeup_loss:
- ld r1,PACAR1(r13)
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- REST_NVGPRS(r1)
- REST_GPR(2, r1)
- ld r4,PACAKMSR(r13)
- ld r5,_LINK(r1)
- ld r6,_CCR(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtlr r5
- mtcr r6
- mtmsrd r4
- blr
+ IDLE_STATE_ENTER_SEQ_NORET(PPC_SLEEP)
+2: IDLE_STATE_ENTER_SEQ_NORET(PPC_WINKLE)
-/*
- * R3 here contains the value that will be returned to the caller
- * of power7_nap.
- * R12 contains SRR1 for CHECK_HMI_INTERRUPT.
- */
-pnv_wakeup_noloss:
- lbz r0,PACA_NAPSTATELOST(r13)
- cmpwi r0,0
- bne pnv_wakeup_loss
- ld r1,PACAR1(r13)
-BEGIN_FTR_SECTION
- CHECK_HMI_INTERRUPT
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
- ld r4,PACAKMSR(r13)
- ld r5,_NIP(r1)
- ld r6,_CCR(r1)
- addi r1,r1,INT_FRAME_SIZE
- mtlr r5
- mtcr r6
- mtmsrd r4
- blr
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 8a936723c791..ada901af4950 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -81,10 +81,7 @@
DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
EXPORT_PER_CPU_SYMBOL(irq_stat);
-int __irq_offset_value;
-
#ifdef CONFIG_PPC32
-EXPORT_SYMBOL(__irq_offset_value);
atomic_t ppc_n_lost_interrupts;
#ifdef CONFIG_TAU_INT
@@ -261,16 +258,9 @@ notrace void arch_local_irq_restore(unsigned long mask)
*/
irq_happened = get_irq_happened();
if (!irq_happened) {
- /*
- * FIXME. Here we'd like to be able to do:
- *
- * #ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
- * WARN_ON(!(mfmsr() & MSR_EE));
- * #endif
- *
- * But currently it hits in a few paths, we should fix those and
- * enable the warning.
- */
+#ifdef CONFIG_PPC_IRQ_SOFT_MASK_DEBUG
+ WARN_ON(!(mfmsr() & MSR_EE));
+#endif
return;
}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index b5fec1f9751a..4581377cfc98 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -112,6 +112,7 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->srr1 = regs->msr;
mce->gpr3 = regs->gpr[3];
mce->in_use = 1;
+ mce->cpu = get_paca()->paca_index;
/* Mark it recovered if we have handled it and MSR(RI=1). */
if (handled && (regs->msr & MSR_RI))
@@ -121,6 +122,8 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->initiator = mce_err->initiator;
mce->severity = mce_err->severity;
+ mce->sync_error = mce_err->sync_error;
+ mce->error_class = mce_err->error_class;
/*
* Populate the mce error_type and type-specific error_type.
@@ -310,7 +313,11 @@ static void machine_check_process_queued_event(struct irq_work *work)
void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode, bool in_guest)
{
- const char *level, *sevstr, *subtype;
+ const char *level, *sevstr, *subtype, *err_type;
+ uint64_t ea = 0, pa = 0;
+ int n = 0;
+ char dar_str[50];
+ char pa_str[50];
static const char *mc_ue_types[] = {
"Indeterminate",
"Instruction fetch",
@@ -357,6 +364,13 @@ void machine_check_print_event_info(struct machine_check_event *evt,
"Store (timeout)",
"Page table walk Load/Store (timeout)",
};
+ static const char *mc_error_class[] = {
+ "Unknown",
+ "Hardware error",
+ "Probable Hardware error (some chance of software cause)",
+ "Software error",
+ "Probable Software error (some chance of hardware cause)",
+ };
/* Print things out */
if (evt->version != MCE_V1) {
@@ -371,9 +385,9 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
case MCE_SEV_WARNING:
level = KERN_WARNING;
- sevstr = "";
+ sevstr = "Warning";
break;
- case MCE_SEV_ERROR_SYNC:
+ case MCE_SEV_SEVERE:
level = KERN_ERR;
sevstr = "Severe";
break;
@@ -384,101 +398,107 @@ void machine_check_print_event_info(struct machine_check_event *evt,
break;
}
- printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
- evt->disposition == MCE_DISPOSITION_RECOVERED ?
- "Recovered" : "Not recovered");
-
- if (in_guest) {
- printk("%s Guest NIP: %016llx\n", level, evt->srr0);
- } else if (user_mode) {
- printk("%s NIP: [%016llx] PID: %d Comm: %s\n", level,
- evt->srr0, current->pid, current->comm);
- } else {
- printk("%s NIP [%016llx]: %pS\n", level, evt->srr0,
- (void *)evt->srr0);
- }
-
- printk("%s Initiator: %s\n", level,
- evt->initiator == MCE_INITIATOR_CPU ? "CPU" : "Unknown");
switch (evt->error_type) {
case MCE_ERROR_TYPE_UE:
+ err_type = "UE";
subtype = evt->u.ue_error.ue_error_type <
ARRAY_SIZE(mc_ue_types) ?
mc_ue_types[evt->u.ue_error.ue_error_type]
: "Unknown";
- printk("%s Error type: UE [%s]\n", level, subtype);
if (evt->u.ue_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ue_error.effective_address);
+ ea = evt->u.ue_error.effective_address;
if (evt->u.ue_error.physical_address_provided)
- printk("%s Physical address: %016llx\n",
- level, evt->u.ue_error.physical_address);
+ pa = evt->u.ue_error.physical_address;
break;
case MCE_ERROR_TYPE_SLB:
+ err_type = "SLB";
subtype = evt->u.slb_error.slb_error_type <
ARRAY_SIZE(mc_slb_types) ?
mc_slb_types[evt->u.slb_error.slb_error_type]
: "Unknown";
- printk("%s Error type: SLB [%s]\n", level, subtype);
if (evt->u.slb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.slb_error.effective_address);
+ ea = evt->u.slb_error.effective_address;
break;
case MCE_ERROR_TYPE_ERAT:
+ err_type = "ERAT";
subtype = evt->u.erat_error.erat_error_type <
ARRAY_SIZE(mc_erat_types) ?
mc_erat_types[evt->u.erat_error.erat_error_type]
: "Unknown";
- printk("%s Error type: ERAT [%s]\n", level, subtype);
if (evt->u.erat_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.erat_error.effective_address);
+ ea = evt->u.erat_error.effective_address;
break;
case MCE_ERROR_TYPE_TLB:
+ err_type = "TLB";
subtype = evt->u.tlb_error.tlb_error_type <
ARRAY_SIZE(mc_tlb_types) ?
mc_tlb_types[evt->u.tlb_error.tlb_error_type]
: "Unknown";
- printk("%s Error type: TLB [%s]\n", level, subtype);
if (evt->u.tlb_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.tlb_error.effective_address);
+ ea = evt->u.tlb_error.effective_address;
break;
case MCE_ERROR_TYPE_USER:
+ err_type = "User";
subtype = evt->u.user_error.user_error_type <
ARRAY_SIZE(mc_user_types) ?
mc_user_types[evt->u.user_error.user_error_type]
: "Unknown";
- printk("%s Error type: User [%s]\n", level, subtype);
if (evt->u.user_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.user_error.effective_address);
+ ea = evt->u.user_error.effective_address;
break;
case MCE_ERROR_TYPE_RA:
+ err_type = "Real address";
subtype = evt->u.ra_error.ra_error_type <
ARRAY_SIZE(mc_ra_types) ?
mc_ra_types[evt->u.ra_error.ra_error_type]
: "Unknown";
- printk("%s Error type: Real address [%s]\n", level, subtype);
if (evt->u.ra_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.ra_error.effective_address);
+ ea = evt->u.ra_error.effective_address;
break;
case MCE_ERROR_TYPE_LINK:
+ err_type = "Link";
subtype = evt->u.link_error.link_error_type <
ARRAY_SIZE(mc_link_types) ?
mc_link_types[evt->u.link_error.link_error_type]
: "Unknown";
- printk("%s Error type: Link [%s]\n", level, subtype);
if (evt->u.link_error.effective_address_provided)
- printk("%s Effective address: %016llx\n",
- level, evt->u.link_error.effective_address);
+ ea = evt->u.link_error.effective_address;
break;
default:
case MCE_ERROR_TYPE_UNKNOWN:
- printk("%s Error type: Unknown\n", level);
+ err_type = "Unknown";
+ subtype = "";
break;
}
+
+ dar_str[0] = pa_str[0] = '\0';
+ if (ea && evt->srr0 != ea) {
+ /* Load/Store address */
+ n = sprintf(dar_str, "DAR: %016llx ", ea);
+ if (pa)
+ sprintf(dar_str + n, "paddr: %016llx ", pa);
+ } else if (pa) {
+ sprintf(pa_str, " paddr: %016llx", pa);
+ }
+
+ printk("%sMCE: CPU%d: machine check (%s) %s %s %s %s[%s]\n",
+ level, evt->cpu, sevstr, in_guest ? "Guest" : "Host",
+ err_type, subtype, dar_str,
+ evt->disposition == MCE_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+
+ if (in_guest || user_mode) {
+ printk("%sMCE: CPU%d: PID: %d Comm: %s %sNIP: [%016llx]%s\n",
+ level, evt->cpu, current->pid, current->comm,
+ in_guest ? "Guest " : "", evt->srr0, pa_str);
+ } else {
+ printk("%sMCE: CPU%d: NIP: [%016llx] %pS%s\n",
+ level, evt->cpu, evt->srr0, (void *)evt->srr0, pa_str);
+ }
+
+ subtype = evt->error_class < ARRAY_SIZE(mc_error_class) ?
+ mc_error_class[evt->error_class] : "Unknown";
+ printk("%sMCE: CPU%d: %s\n", level, evt->cpu, subtype);
}
EXPORT_SYMBOL_GPL(machine_check_print_event_info);
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index 6b800eec31f2..b5e876efe864 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -36,7 +36,7 @@
* Convert an address related to an mm to a PFN. NOTE: we are in real
* mode, we could potentially race with page table updates.
*/
-static unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
+unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr)
{
pte_t *ptep;
unsigned long flags;
@@ -131,213 +131,232 @@ struct mce_ierror_table {
bool nip_valid; /* nip is a valid indicator of faulting address */
unsigned int error_type;
unsigned int error_subtype;
+ unsigned int error_class;
unsigned int initiator;
unsigned int severity;
+ bool sync_error;
};
static const struct mce_ierror_table mce_p7_ierror_table[] = {
{ 0x00000000001c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000100000, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000001c0000, 0x0000000000180000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000001c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
static const struct mce_ierror_table mce_p8_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000100000, true,
- MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000180000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008000000, true,
- MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008040000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
static const struct mce_ierror_table mce_p9_ierror_table[] = {
{ 0x00000000081c0000, 0x0000000000040000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000000080000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000000c0000, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000100000, true,
- MCE_ERROR_TYPE_ERAT,MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000140000, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000000081c0000, 0x0000000000180000, true,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000001c0000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008000000, true,
- MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_IFETCH_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008040000, true,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_PAGE_TABLE_WALK_IFETCH_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x00000000080c0000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008100000, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000000081c0000, 0x0000000008140000, false,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
{ 0x00000000081c0000, 0x0000000008180000, false,
MCE_ERROR_TYPE_LINK,MCE_LINK_ERROR_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_FATAL, }, /* ASYNC is fatal */
-{ 0x00000000081c0000, 0x00000000081c0000, true,
+ MCE_INITIATOR_CPU, MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, 0, 0, 0, 0, 0 } };
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
struct mce_derror_table {
unsigned long dsisr_value;
bool dar_valid; /* dar is a valid indicator of faulting address */
unsigned int error_type;
unsigned int error_subtype;
+ unsigned int error_class;
unsigned int initiator;
unsigned int severity;
+ bool sync_error;
};
static const struct mce_derror_table mce_p7_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000040, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_INDETERMINATE, /* BOTH */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static const struct mce_derror_table mce_p8_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00002000, true,
- MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00001000, true,
MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000200, true,
MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, /* SECONDARY ERAT */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static const struct mce_derror_table mce_p9_derror_table[] = {
{ 0x00008000, false,
- MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_UE, MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00004000, true,
MCE_ERROR_TYPE_UE, MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00002000, true,
- MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_LOAD_TIMEOUT, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00001000, true,
MCE_ERROR_TYPE_LINK, MCE_LINK_ERROR_PAGE_TABLE_WALK_LOAD_STORE_TIMEOUT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000800, true,
- MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000400, true,
- MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000200, false,
- MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000080, true,
MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, /* Before PARITY */
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_SOFT_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_WARNING, true },
{ 0x00000100, true,
- MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000040, true,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000020, false,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000010, false,
MCE_ERROR_TYPE_RA, MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
+ MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
{ 0x00000008, false,
- MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN,
- MCE_INITIATOR_CPU, MCE_SEV_ERROR_SYNC, },
-{ 0, false, 0, 0, 0, 0 } };
+ MCE_ERROR_TYPE_RA, MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+ MCE_INITIATOR_CPU, MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
static int mce_find_instr_ea_and_pfn(struct pt_regs *regs, uint64_t *addr,
uint64_t *phys_addr)
@@ -404,6 +423,7 @@ static int mce_handle_ierror(struct pt_regs *regs,
/* now fill in mce_error_info */
mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
switch (table[i].error_type) {
case MCE_ERROR_TYPE_UE:
mce_err->u.ue_error_type = table[i].error_subtype;
@@ -427,11 +447,12 @@ static int mce_handle_ierror(struct pt_regs *regs,
mce_err->u.link_error_type = table[i].error_subtype;
break;
}
+ mce_err->sync_error = table[i].sync_error;
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
if (table[i].nip_valid) {
*addr = regs->nip;
- if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
unsigned long pfn;
@@ -448,8 +469,10 @@ static int mce_handle_ierror(struct pt_regs *regs,
}
mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
- mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
return 0;
}
@@ -496,6 +519,7 @@ static int mce_handle_derror(struct pt_regs *regs,
/* now fill in mce_error_info */
mce_err->error_type = table[i].error_type;
+ mce_err->error_class = table[i].error_class;
switch (table[i].error_type) {
case MCE_ERROR_TYPE_UE:
mce_err->u.ue_error_type = table[i].error_subtype;
@@ -519,11 +543,12 @@ static int mce_handle_derror(struct pt_regs *regs,
mce_err->u.link_error_type = table[i].error_subtype;
break;
}
+ mce_err->sync_error = table[i].sync_error;
mce_err->severity = table[i].severity;
mce_err->initiator = table[i].initiator;
if (table[i].dar_valid)
*addr = regs->dar;
- else if (mce_err->severity == MCE_SEV_ERROR_SYNC &&
+ else if (mce_err->sync_error &&
table[i].error_type == MCE_ERROR_TYPE_UE) {
/*
* We do a maximum of 4 nested MCE calls, see
@@ -539,8 +564,10 @@ static int mce_handle_derror(struct pt_regs *regs,
return handled;
mce_err->error_type = MCE_ERROR_TYPE_UNKNOWN;
- mce_err->severity = MCE_SEV_ERROR_SYNC;
+ mce_err->error_class = MCE_ECLASS_UNKNOWN;
+ mce_err->severity = MCE_SEV_SEVERE;
mce_err->initiator = MCE_INITIATOR_CPU;
+ mce_err->sync_error = true;
return 0;
}
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index e7382abee868..9cc91d03ab62 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -267,12 +267,12 @@ void copy_mm_to_paca(struct mm_struct *mm)
get_paca()->mm_ctx_id = context->id;
#ifdef CONFIG_PPC_MM_SLICES
- VM_BUG_ON(!mm->context.slb_addr_limit);
- get_paca()->mm_ctx_slb_addr_limit = mm->context.slb_addr_limit;
- memcpy(&get_paca()->mm_ctx_low_slices_psize,
- &context->low_slices_psize, sizeof(context->low_slices_psize));
- memcpy(&get_paca()->mm_ctx_high_slices_psize,
- &context->high_slices_psize, TASK_SLICE_ARRAY_SZ(mm));
+ VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
+ get_paca()->mm_ctx_slb_addr_limit = mm_ctx_slb_addr_limit(context);
+ memcpy(&get_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
+ LOW_SLICE_ARRAY_SZ);
+ memcpy(&get_paca()->mm_ctx_high_slices_psize, mm_ctx_high_slices(context),
+ TASK_SLICE_ARRAY_SZ(context));
#else /* CONFIG_PPC_MM_SLICES */
get_paca()->mm_ctx_user_psize = context->user_psize;
get_paca()->mm_ctx_sllp = context->sllp;
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dd9e0d5386ee..87da40129927 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -67,6 +67,7 @@
#include <asm/cpu_has_feature.h>
#include <asm/asm-prototypes.h>
#include <asm/stacktrace.h>
+#include <asm/hw_breakpoint.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -133,7 +134,8 @@ static int __init enable_strict_msr_control(char *str)
}
early_param("ppc_strict_facility_enable", enable_strict_msr_control);
-unsigned long msr_check_and_set(unsigned long bits)
+/* notrace because it's called by restore_math */
+unsigned long notrace msr_check_and_set(unsigned long bits)
{
unsigned long oldmsr = mfmsr();
unsigned long newmsr;
@@ -152,7 +154,8 @@ unsigned long msr_check_and_set(unsigned long bits)
}
EXPORT_SYMBOL_GPL(msr_check_and_set);
-void __msr_check_and_clear(unsigned long bits)
+/* notrace because it's called by restore_math */
+void notrace __msr_check_and_clear(unsigned long bits)
{
unsigned long oldmsr = mfmsr();
unsigned long newmsr;
@@ -525,7 +528,17 @@ void giveup_all(struct task_struct *tsk)
}
EXPORT_SYMBOL(giveup_all);
-void restore_math(struct pt_regs *regs)
+/*
+ * The exception exit path calls restore_math() with interrupts hard disabled
+ * but the soft irq state not "reconciled". ftrace code that calls
+ * local_irq_save/restore causes warnings.
+ *
+ * Rather than complicate the exit path, just don't trace restore_math. This
+ * could be done by having ftrace entry code check for this un-reconciled
+ * condition where MSR[EE]=0 and PACA_IRQ_HARD_DIS is not set, and
+ * temporarily fix it up for the duration of the ftrace call.
+ */
+void notrace restore_math(struct pt_regs *regs)
{
unsigned long msr;
@@ -784,7 +797,7 @@ static inline int set_dabr(struct arch_hw_breakpoint *brk)
return __set_dabr(dabr, dabrx);
}
-static inline int set_dawr(struct arch_hw_breakpoint *brk)
+int set_dawr(struct arch_hw_breakpoint *brk)
{
unsigned long dawr, dawrx, mrd;
@@ -816,7 +829,7 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
{
memcpy(this_cpu_ptr(&current_brk), brk, sizeof(*brk));
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
// Power8 or later
set_dawr(brk);
else if (!cpu_has_feature(CPU_FTR_ARCH_207S))
@@ -830,8 +843,8 @@ void __set_breakpoint(struct arch_hw_breakpoint *brk)
/* Check if we have DAWR or DABR hardware */
bool ppc_breakpoint_available(void)
{
- if (cpu_has_feature(CPU_FTR_DAWR))
- return true; /* POWER8 DAWR */
+ if (dawr_enabled())
+ return true; /* POWER8 DAWR or POWER9 forced DAWR */
if (cpu_has_feature(CPU_FTR_ARCH_207S))
return false; /* POWER9 with DAWR disabled */
/* DABR: Everything but POWER8 and POWER9 */
@@ -1151,11 +1164,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
thread_pkey_regs_restore(new_thread, old_thread);
}
-#ifdef CONFIG_PPC_BOOK3S_64
-#define CP_SIZE 128
-static const u8 dummy_copy_buffer[CP_SIZE] __attribute__((aligned(CP_SIZE)));
-#endif
-
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *new)
{
@@ -1729,7 +1737,8 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
#ifdef CONFIG_PPC_BOOK3S_64
- preload_new_slb_context(start, sp);
+ if (!radix_enabled())
+ preload_new_slb_context(start, sp);
#endif
#endif
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index f33ff4163a51..523bb99d7676 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -154,10 +154,8 @@ static struct prom_t __prombss prom;
static unsigned long __prombss prom_entry;
-#define PROM_SCRATCH_SIZE 256
-
static char __prombss of_stdout_device[256];
-static char __prombss prom_scratch[PROM_SCRATCH_SIZE];
+static char __prombss prom_scratch[256];
static unsigned long __prombss dt_header_start;
static unsigned long __prombss dt_struct_start, dt_struct_end;
@@ -224,6 +222,135 @@ static bool __prombss rtas_has_query_cpu_stopped;
#define PHANDLE_VALID(p) ((p) != 0 && (p) != PROM_ERROR)
#define IHANDLE_VALID(i) ((i) != 0 && (i) != PROM_ERROR)
+/* Copied from lib/string.c and lib/kstrtox.c */
+
+static int __init prom_strcmp(const char *cs, const char *ct)
+{
+ unsigned char c1, c2;
+
+ while (1) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ }
+ return 0;
+}
+
+static char __init *prom_strcpy(char *dest, const char *src)
+{
+ char *tmp = dest;
+
+ while ((*dest++ = *src++) != '\0')
+ /* nothing */;
+ return tmp;
+}
+
+static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
+{
+ unsigned char c1, c2;
+
+ while (count) {
+ c1 = *cs++;
+ c2 = *ct++;
+ if (c1 != c2)
+ return c1 < c2 ? -1 : 1;
+ if (!c1)
+ break;
+ count--;
+ }
+ return 0;
+}
+
+static size_t __init prom_strlen(const char *s)
+{
+ const char *sc;
+
+ for (sc = s; *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
+
+static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
+ if ((res = *su1 - *su2) != 0)
+ break;
+ return res;
+}
+
+static char __init *prom_strstr(const char *s1, const char *s2)
+{
+ size_t l1, l2;
+
+ l2 = prom_strlen(s2);
+ if (!l2)
+ return (char *)s1;
+ l1 = prom_strlen(s1);
+ while (l1 >= l2) {
+ l1--;
+ if (!prom_memcmp(s1, s2, l2))
+ return (char *)s1;
+ s1++;
+ }
+ return NULL;
+}
+
+static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
+{
+ size_t ret = prom_strlen(src);
+
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+ memcpy(dest, src, len);
+ dest[len] = '\0';
+ }
+ return ret;
+}
+
+#ifdef CONFIG_PPC_PSERIES
+static int __init prom_strtobool(const char *s, bool *res)
+{
+ if (!s)
+ return -EINVAL;
+
+ switch (s[0]) {
+ case 'y':
+ case 'Y':
+ case '1':
+ *res = true;
+ return 0;
+ case 'n':
+ case 'N':
+ case '0':
+ *res = false;
+ return 0;
+ case 'o':
+ case 'O':
+ switch (s[1]) {
+ case 'n':
+ case 'N':
+ *res = true;
+ return 0;
+ case 'f':
+ case 'F':
+ *res = false;
+ return 0;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+
+ return -EINVAL;
+}
+#endif
/* This is the one and *ONLY* place where we actually call open
* firmware.
@@ -555,7 +682,7 @@ static int __init prom_setprop(phandle node, const char *nodename,
add_string(&p, tohex((u32)(unsigned long) value));
add_string(&p, tohex(valuelen));
add_string(&p, tohex(ADDR(pname)));
- add_string(&p, tohex(strlen(pname)));
+ add_string(&p, tohex(prom_strlen(pname)));
add_string(&p, "property");
*p = 0;
return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
@@ -631,33 +758,30 @@ static void __init early_cmdline_parse(void)
const char *opt;
char *p;
- int l __maybe_unused = 0;
+ int l = 0;
prom_cmd_line[0] = 0;
p = prom_cmd_line;
if ((long)prom.chosen > 0)
l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
-#ifdef CONFIG_CMDLINE
- if (l <= 0 || p[0] == '\0') /* dbl check */
- strlcpy(prom_cmd_line,
- CONFIG_CMDLINE, sizeof(prom_cmd_line));
-#endif /* CONFIG_CMDLINE */
+ if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
+ prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
prom_printf("command line: %s\n", prom_cmd_line);
#ifdef CONFIG_PPC64
- opt = strstr(prom_cmd_line, "iommu=");
+ opt = prom_strstr(prom_cmd_line, "iommu=");
if (opt) {
prom_printf("iommu opt is: %s\n", opt);
opt += 6;
while (*opt && *opt == ' ')
opt++;
- if (!strncmp(opt, "off", 3))
+ if (!prom_strncmp(opt, "off", 3))
prom_iommu_off = 1;
- else if (!strncmp(opt, "force", 5))
+ else if (!prom_strncmp(opt, "force", 5))
prom_iommu_force_on = 1;
}
#endif
- opt = strstr(prom_cmd_line, "mem=");
+ opt = prom_strstr(prom_cmd_line, "mem=");
if (opt) {
opt += 4;
prom_memory_limit = prom_memparse(opt, (const char **)&opt);
@@ -669,13 +793,13 @@ static void __init early_cmdline_parse(void)
#ifdef CONFIG_PPC_PSERIES
prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
- opt = strstr(prom_cmd_line, "disable_radix");
+ opt = prom_strstr(prom_cmd_line, "disable_radix");
if (opt) {
opt += 13;
if (*opt && *opt == '=') {
bool val;
- if (kstrtobool(++opt, &val))
+ if (prom_strtobool(++opt, &val))
prom_radix_disable = false;
else
prom_radix_disable = val;
@@ -1028,7 +1152,7 @@ static int __init prom_count_smt_threads(void)
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu"))
+ if (prom_strcmp(type, "cpu"))
continue;
/*
* There is an entry for each smt thread, each entry being
@@ -1138,8 +1262,14 @@ static void __init prom_check_platform_support(void)
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
- /* First copy the architecture vec template */
- ibm_architecture_vec = ibm_architecture_vec_template;
+ /*
+ * First copy the architecture vec template
+ *
+ * use memcpy() instead of *vec = *vec_template so that GCC replaces it
+ * by __memcpy() when KASAN is active
+ */
+ memcpy(&ibm_architecture_vec, &ibm_architecture_vec_template,
+ sizeof(ibm_architecture_vec));
if (prop_len > 1) {
int i;
@@ -1475,7 +1605,7 @@ static void __init prom_init_mem(void)
*/
prom_getprop(node, "name", type, sizeof(type));
}
- if (strcmp(type, "memory"))
+ if (prom_strcmp(type, "memory"))
continue;
plen = prom_getprop(node, "reg", regbuf, sizeof(regbuf));
@@ -1487,8 +1617,8 @@ static void __init prom_init_mem(void)
endp = p + (plen / sizeof(cell_t));
#ifdef DEBUG_PROM
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+ memset(path, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
prom_debug(" node %s :\n", path);
#endif /* DEBUG_PROM */
@@ -1756,19 +1886,19 @@ static void __init prom_initialize_tce_table(void)
prom_getprop(node, "device_type", type, sizeof(type));
prom_getprop(node, "model", model, sizeof(model));
- if ((type[0] == 0) || (strstr(type, "pci") == NULL))
+ if ((type[0] == 0) || (prom_strstr(type, "pci") == NULL))
continue;
/* Keep the old logic intact to avoid regression. */
if (compatible[0] != 0) {
- if ((strstr(compatible, "python") == NULL) &&
- (strstr(compatible, "Speedwagon") == NULL) &&
- (strstr(compatible, "Winnipeg") == NULL))
+ if ((prom_strstr(compatible, "python") == NULL) &&
+ (prom_strstr(compatible, "Speedwagon") == NULL) &&
+ (prom_strstr(compatible, "Winnipeg") == NULL))
continue;
} else if (model[0] != 0) {
- if ((strstr(model, "ython") == NULL) &&
- (strstr(model, "peedwagon") == NULL) &&
- (strstr(model, "innipeg") == NULL))
+ if ((prom_strstr(model, "ython") == NULL) &&
+ (prom_strstr(model, "peedwagon") == NULL) &&
+ (prom_strstr(model, "innipeg") == NULL))
continue;
}
@@ -1796,10 +1926,10 @@ static void __init prom_initialize_tce_table(void)
local_alloc_bottom = base;
/* It seems OF doesn't null-terminate the path :-( */
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/* Call OF to setup the TCE hardware */
if (call_prom("package-to-path", 3, 1, node,
- path, PROM_SCRATCH_SIZE-1) == PROM_ERROR) {
+ path, sizeof(prom_scratch) - 1) == PROM_ERROR) {
prom_printf("package-to-path failed\n");
}
@@ -1917,12 +2047,12 @@ static void __init prom_hold_cpus(void)
type[0] = 0;
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "cpu") != 0)
+ if (prom_strcmp(type, "cpu") != 0)
continue;
/* Skip non-configured cpus. */
if (prom_getprop(node, "status", type, sizeof(type)) > 0)
- if (strcmp(type, "okay") != 0)
+ if (prom_strcmp(type, "okay") != 0)
continue;
reg = cpu_to_be32(-1); /* make sparse happy */
@@ -1998,9 +2128,9 @@ static void __init prom_find_mmu(void)
return;
version[sizeof(version) - 1] = 0;
/* XXX might need to add other versions here */
- if (strcmp(version, "Open Firmware, 1.0.5") == 0)
+ if (prom_strcmp(version, "Open Firmware, 1.0.5") == 0)
of_workarounds = OF_WA_CLAIM;
- else if (strncmp(version, "FirmWorks,3.", 12) == 0) {
+ else if (prom_strncmp(version, "FirmWorks,3.", 12) == 0) {
of_workarounds = OF_WA_CLAIM | OF_WA_LONGTRAIL;
call_prom("interpret", 1, 1, "dev /memory 0 to allow-reclaim");
} else
@@ -2033,7 +2163,7 @@ static void __init prom_init_stdout(void)
call_prom("instance-to-path", 3, 1, prom.stdout, path, 255);
prom_printf("OF stdout device is: %s\n", of_stdout_device);
prom_setprop(prom.chosen, "/chosen", "linux,stdout-path",
- path, strlen(path) + 1);
+ path, prom_strlen(path) + 1);
/* instance-to-package fails on PA-Semi */
stdout_node = call_prom("instance-to-package", 1, 1, prom.stdout);
@@ -2043,7 +2173,7 @@ static void __init prom_init_stdout(void)
/* If it's a display, note it */
memset(type, 0, sizeof(type));
prom_getprop(stdout_node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") == 0)
+ if (prom_strcmp(type, "display") == 0)
prom_setprop(stdout_node, path, "linux,boot-display", NULL, 0);
}
}
@@ -2064,19 +2194,19 @@ static int __init prom_find_machine_type(void)
compat[len] = 0;
while (i < len) {
char *p = &compat[i];
- int sl = strlen(p);
+ int sl = prom_strlen(p);
if (sl == 0)
break;
- if (strstr(p, "Power Macintosh") ||
- strstr(p, "MacRISC"))
+ if (prom_strstr(p, "Power Macintosh") ||
+ prom_strstr(p, "MacRISC"))
return PLATFORM_POWERMAC;
#ifdef CONFIG_PPC64
/* We must make sure we don't detect the IBM Cell
* blades as pSeries due to some firmware issues,
* so we do it here.
*/
- if (strstr(p, "IBM,CBEA") ||
- strstr(p, "IBM,CPBW-1.0"))
+ if (prom_strstr(p, "IBM,CBEA") ||
+ prom_strstr(p, "IBM,CPBW-1.0"))
return PLATFORM_GENERIC;
#endif /* CONFIG_PPC64 */
i += sl + 1;
@@ -2093,7 +2223,7 @@ static int __init prom_find_machine_type(void)
compat, sizeof(compat)-1);
if (len <= 0)
return PLATFORM_GENERIC;
- if (strcmp(compat, "chrp"))
+ if (prom_strcmp(compat, "chrp"))
return PLATFORM_GENERIC;
/* Default to pSeries. We need to know if we are running LPAR */
@@ -2155,19 +2285,19 @@ static void __init prom_check_displays(void)
for (node = 0; prom_next_node(&node); ) {
memset(type, 0, sizeof(type));
prom_getprop(node, "device_type", type, sizeof(type));
- if (strcmp(type, "display") != 0)
+ if (prom_strcmp(type, "display") != 0)
continue;
/* It seems OF doesn't null-terminate the path :-( */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
+ memset(path, 0, sizeof(prom_scratch));
/*
* leave some room at the end of the path for appending extra
* arguments
*/
if (call_prom("package-to-path", 3, 1, node, path,
- PROM_SCRATCH_SIZE-10) == PROM_ERROR)
+ sizeof(prom_scratch) - 10) == PROM_ERROR)
continue;
prom_printf("found display : %s, opening... ", path);
@@ -2259,9 +2389,9 @@ static unsigned long __init dt_find_string(char *str)
s = os = (char *)dt_string_start;
s += 4;
while (s < (char *)dt_string_end) {
- if (strcmp(s, str) == 0)
+ if (prom_strcmp(s, str) == 0)
return s - os;
- s += strlen(s) + 1;
+ s += prom_strlen(s) + 1;
}
return 0;
}
@@ -2294,7 +2424,7 @@ static void __init scan_dt_build_strings(phandle node,
}
/* skip "name" */
- if (strcmp(namep, "name") == 0) {
+ if (prom_strcmp(namep, "name") == 0) {
*mem_start = (unsigned long)namep;
prev_name = "name";
continue;
@@ -2306,7 +2436,7 @@ static void __init scan_dt_build_strings(phandle node,
namep = sstart + soff;
} else {
/* Trim off some if we can */
- *mem_start = (unsigned long)namep + strlen(namep) + 1;
+ *mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
dt_string_end = *mem_start;
}
prev_name = namep;
@@ -2363,8 +2493,8 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
/* get it again for debugging */
path = prom_scratch;
- memset(path, 0, PROM_SCRATCH_SIZE);
- call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1);
+ memset(path, 0, sizeof(prom_scratch));
+ call_prom("package-to-path", 3, 1, node, path, sizeof(prom_scratch) - 1);
/* get and store all properties */
prev_name = "";
@@ -2375,7 +2505,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
break;
/* skip "name" */
- if (strcmp(pname, "name") == 0) {
+ if (prom_strcmp(pname, "name") == 0) {
prev_name = "name";
continue;
}
@@ -2406,7 +2536,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start,
call_prom("getprop", 4, 1, node, pname, valp, l);
*mem_start = _ALIGN(*mem_start, 4);
- if (!strcmp(pname, "phandle"))
+ if (!prom_strcmp(pname, "phandle"))
has_phandle = 1;
}
@@ -2476,8 +2606,8 @@ static void __init flatten_device_tree(void)
/* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- strcpy(namep, "phandle");
- mem_start = (unsigned long)namep + strlen(namep) + 1;
+ prom_strcpy(namep, "phandle");
+ mem_start = (unsigned long)namep + prom_strlen(namep) + 1;
/* Build string array */
prom_printf("Building dt strings...\n");
@@ -2799,7 +2929,7 @@ static void __init fixup_device_tree_efika(void)
rv = prom_getprop(node, "model", prop, sizeof(prop));
if (rv == PROM_ERROR)
return;
- if (strcmp(prop, "EFIKA5K2"))
+ if (prom_strcmp(prop, "EFIKA5K2"))
return;
prom_printf("Applying EFIKA device tree fixups\n");
@@ -2807,13 +2937,13 @@ static void __init fixup_device_tree_efika(void)
/* Claiming to be 'chrp' is death */
node = call_prom("finddevice", 1, 1, ADDR("/"));
rv = prom_getprop(node, "device_type", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strcmp(prop, "chrp") == 0))
+ if (rv != PROM_ERROR && (prom_strcmp(prop, "chrp") == 0))
prom_setprop(node, "/", "device_type", "efika", sizeof("efika"));
/* CODEGEN,description is exposed in /proc/cpuinfo so
fix that too */
rv = prom_getprop(node, "CODEGEN,description", prop, sizeof(prop));
- if (rv != PROM_ERROR && (strstr(prop, "CHRP")))
+ if (rv != PROM_ERROR && (prom_strstr(prop, "CHRP")))
prom_setprop(node, "/", "CODEGEN,description",
"Efika 5200B PowerPC System",
sizeof("Efika 5200B PowerPC System"));
diff --git a/arch/powerpc/kernel/prom_init_check.sh b/arch/powerpc/kernel/prom_init_check.sh
index 667df97d2595..4cac45cb5de5 100644
--- a/arch/powerpc/kernel/prom_init_check.sh
+++ b/arch/powerpc/kernel/prom_init_check.sh
@@ -16,10 +16,18 @@
# If you really need to reference something from prom_init.o add
# it to the list below:
+grep "^CONFIG_KASAN=y$" .config >/dev/null
+if [ $? -eq 0 ]
+then
+ MEM_FUNCS="__memcpy __memset"
+else
+ MEM_FUNCS="memcpy memset"
+fi
+
WHITELIST="add_reloc_offset __bss_start __bss_stop copy_and_flush
-_end enter_prom memcpy memset reloc_offset __secondary_hold
+_end enter_prom $MEM_FUNCS reloc_offset __secondary_hold
__secondary_hold_acknowledge __secondary_hold_spinloop __start
-strcmp strcpy strlcpy strlen strncmp strstr kstrtobool logo_linux_clut224
+logo_linux_clut224
reloc_got2 kernstart_addr memstart_addr linux_banner _stext
__prom_init_toc_start __prom_init_toc_end btext_setup_display TOC."
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index d9ac7d94656e..684b0b315c32 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -43,6 +43,7 @@
#include <asm/tm.h>
#include <asm/asm-prototypes.h>
#include <asm/debug.h>
+#include <asm/hw_breakpoint.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -3088,7 +3089,7 @@ long arch_ptrace(struct task_struct *child, long request,
dbginfo.sizeof_condition = 0;
#ifdef CONFIG_HAVE_HW_BREAKPOINT
dbginfo.features = PPC_DEBUG_FEATURE_DATA_BP_RANGE;
- if (cpu_has_feature(CPU_FTR_DAWR))
+ if (dawr_enabled())
dbginfo.features |= PPC_DEBUG_FEATURE_DATA_BP_DAWR;
#else
dbginfo.features = 0;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index b33bafb8fcea..e1c9cf079503 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -57,7 +57,7 @@ void setup_barrier_nospec(void)
enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) &&
security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR);
- if (!no_nospec)
+ if (!no_nospec && !cpu_mitigations_off())
enable_barrier_nospec(enable);
}
@@ -104,6 +104,14 @@ static __init int barrier_nospec_debugfs_init(void)
return 0;
}
device_initcall(barrier_nospec_debugfs_init);
+
+static __init int security_feature_debugfs_init(void)
+{
+ debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
+ (u64 *)&powerpc_security_features);
+ return 0;
+}
+device_initcall(security_feature_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
#ifdef CONFIG_PPC_FSL_BOOK3E
@@ -116,7 +124,7 @@ static int __init handle_nospectre_v2(char *p)
early_param("nospectre_v2", handle_nospectre_v2);
void setup_spectre_v2(void)
{
- if (no_spectrev2)
+ if (no_spectrev2 || cpu_mitigations_off())
do_btb_flush_fixups();
else
btb_flush_enabled = true;
@@ -300,7 +308,7 @@ void setup_stf_barrier(void)
stf_enabled_flush_types = type;
- if (!no_stf_barrier)
+ if (!no_stf_barrier && !cpu_mitigations_off())
stf_barrier_enable(enable);
}
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 2e5dfb6e0823..aad9f5df6ab6 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -67,6 +67,7 @@
#include <asm/livepatch.h>
#include <asm/mmu_context.h>
#include <asm/cpu_has_feature.h>
+#include <asm/kasan.h>
#include "setup.h"
@@ -133,13 +134,11 @@ int crashing_cpu = -1;
/* also used by kexec */
void machine_shutdown(void)
{
-#ifdef CONFIG_FA_DUMP
/*
* if fadump is active, cleanup the fadump registration before we
* shutdown.
*/
fadump_cleanup();
-#endif
if (ppc_md.machine_shutdown)
ppc_md.machine_shutdown();
@@ -200,14 +199,15 @@ static void show_cpuinfo_summary(struct seq_file *m)
{
struct device_node *root;
const char *model = NULL;
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC32)
unsigned long bogosum = 0;
int i;
- for_each_online_cpu(i)
- bogosum += loops_per_jiffy;
- seq_printf(m, "total bogomips\t: %lu.%02lu\n",
- bogosum/(500000/HZ), bogosum/(5000/HZ) % 100);
-#endif /* CONFIG_SMP && CONFIG_PPC32 */
+
+ if (IS_ENABLED(CONFIG_SMP) && IS_ENABLED(CONFIG_PPC32)) {
+ for_each_online_cpu(i)
+ bogosum += loops_per_jiffy;
+ seq_printf(m, "total bogomips\t: %lu.%02lu\n",
+ bogosum / (500000 / HZ), bogosum / (5000 / HZ) % 100);
+ }
seq_printf(m, "timebase\t: %lu\n", ppc_tb_freq);
if (ppc_md.name)
seq_printf(m, "platform\t: %s\n", ppc_md.name);
@@ -221,11 +221,10 @@ static void show_cpuinfo_summary(struct seq_file *m)
if (ppc_md.show_cpuinfo != NULL)
ppc_md.show_cpuinfo(m);
-#ifdef CONFIG_PPC32
/* Display the amount of memory */
- seq_printf(m, "Memory\t\t: %d MB\n",
- (unsigned int)(total_memory / (1024 * 1024)));
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "Memory\t\t: %d MB\n",
+ (unsigned int)(total_memory / (1024 * 1024)));
}
static int show_cpuinfo(struct seq_file *m, void *v)
@@ -252,26 +251,24 @@ static int show_cpuinfo(struct seq_file *m, void *v)
else
seq_printf(m, "unknown (%08x)", pvr);
-#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC))
seq_printf(m, ", altivec supported");
-#endif /* CONFIG_ALTIVEC */
seq_printf(m, "\n");
#ifdef CONFIG_TAU
- if (cur_cpu_spec->cpu_features & CPU_FTR_TAU) {
-#ifdef CONFIG_TAU_AVERAGE
- /* more straightforward, but potentially misleading */
- seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
- cpu_temp(cpu_id));
-#else
- /* show the actual temp sensor range */
- u32 temp;
- temp = cpu_temp_both(cpu_id);
- seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
- temp & 0xff, temp >> 16);
-#endif
+ if (cpu_has_feature(CPU_FTR_TAU)) {
+ if (IS_ENABLED(CONFIG_TAU_AVERAGE)) {
+ /* more straightforward, but potentially misleading */
+ seq_printf(m, "temperature \t: %u C (uncalibrated)\n",
+ cpu_temp(cpu_id));
+ } else {
+ /* show the actual temp sensor range */
+ u32 temp;
+ temp = cpu_temp_both(cpu_id);
+ seq_printf(m, "temperature \t: %u-%u C (uncalibrated)\n",
+ temp & 0xff, temp >> 16);
+ }
}
#endif /* CONFIG_TAU */
@@ -335,11 +332,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
seq_printf(m, "revision\t: %hd.%hd (pvr %04x %04x)\n",
maj, min, PVR_VER(pvr), PVR_REV(pvr));
-#ifdef CONFIG_PPC32
- seq_printf(m, "bogomips\t: %lu.%02lu\n",
- loops_per_jiffy / (500000/HZ),
- (loops_per_jiffy / (5000/HZ)) % 100);
-#endif
+ if (IS_ENABLED(CONFIG_PPC32))
+ seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy / (500000 / HZ),
+ (loops_per_jiffy / (5000 / HZ)) % 100);
+
seq_printf(m, "\n");
/* If this is the last cpu, print the summary */
@@ -401,8 +397,8 @@ void __init check_for_initrd(void)
#ifdef CONFIG_SMP
-int threads_per_core, threads_per_subcore, threads_shift;
-cpumask_t threads_core_mask;
+int threads_per_core, threads_per_subcore, threads_shift __read_mostly;
+cpumask_t threads_core_mask __read_mostly;
EXPORT_SYMBOL_GPL(threads_per_core);
EXPORT_SYMBOL_GPL(threads_per_subcore);
EXPORT_SYMBOL_GPL(threads_shift);
@@ -740,23 +736,19 @@ void __init setup_panic(void)
* BUG() in that case.
*/
-#ifdef CONFIG_NOT_COHERENT_CACHE
-#define KERNEL_COHERENCY 0
-#else
-#define KERNEL_COHERENCY 1
-#endif
+#define KERNEL_COHERENCY (!IS_ENABLED(CONFIG_NOT_COHERENT_CACHE))
static int __init check_cache_coherency(void)
{
struct device_node *np;
const void *prop;
- int devtree_coherency;
+ bool devtree_coherency;
np = of_find_node_by_path("/");
prop = of_get_property(np, "coherency-off", NULL);
of_node_put(np);
- devtree_coherency = prop ? 0 : 1;
+ devtree_coherency = prop ? false : true;
if (devtree_coherency != KERNEL_COHERENCY) {
printk(KERN_ERR
@@ -799,12 +791,6 @@ void arch_setup_pdev_archdata(struct platform_device *pdev)
static __init void print_system_info(void)
{
pr_info("-----------------------------------------------------\n");
-#ifdef CONFIG_PPC_BOOK3S_64
- pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
-#endif
-#ifdef CONFIG_PPC_BOOK3S_32
- pr_info("Hash_size = 0x%lx\n", Hash_size);
-#endif
pr_info("phys_mem_size = 0x%llx\n",
(unsigned long long)memblock_phys_mem_size());
@@ -826,18 +812,7 @@ static __init void print_system_info(void)
pr_info("firmware_features = 0x%016lx\n", powerpc_firmware_features);
#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- if (htab_address)
- pr_info("htab_address = 0x%p\n", htab_address);
- if (htab_hash_mask)
- pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
-#endif
-#ifdef CONFIG_PPC_BOOK3S_32
- if (Hash)
- pr_info("Hash = 0x%p\n", Hash);
- if (Hash_mask)
- pr_info("Hash_mask = 0x%lx\n", Hash_mask);
-#endif
+ print_system_hash_info();
if (PHYSICAL_START > 0)
pr_info("physical_start = 0x%llx\n",
@@ -868,6 +843,8 @@ static void smp_setup_pacas(void)
*/
void __init setup_arch(char **cmdline_p)
{
+ kasan_init();
+
*cmdline_p = boot_command_line;
/* Set a half-reasonable default so udelay does something sensible */
@@ -947,20 +924,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.end_data = (unsigned long) _edata;
init_mm.brk = klimit;
-#ifdef CONFIG_PPC_MM_SLICES
-#ifdef CONFIG_PPC64
- if (!radix_enabled())
- init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-#elif defined(CONFIG_PPC_8xx)
- init_mm.context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#else
-#error "context.addr_limit not initialized."
-#endif
-#endif
-
-#ifdef CONFIG_SPAPR_TCE_IOMMU
mm_iommu_init(&init_mm);
-#endif
irqstack_early_init();
exc_lvl_early_init();
emergency_stack_init();
@@ -969,9 +933,9 @@ void __init setup_arch(char **cmdline_p)
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
-#ifdef CONFIG_DUMMY_CONSOLE
- conswitchp = &dummy_con;
-#endif
+ if (IS_ENABLED(CONFIG_DUMMY_CONSOLE))
+ conswitchp = &dummy_con;
+
if (ppc_md.setup_arch)
ppc_md.setup_arch();
@@ -983,10 +947,8 @@ void __init setup_arch(char **cmdline_p)
/* Initialize the MMU context management stuff. */
mmu_context_init();
-#ifdef CONFIG_PPC64
/* Interrupt code needs to be 64K-aligned. */
- if ((unsigned long)_stext & 0xffff)
+ if (IS_ENABLED(CONFIG_PPC64) && (unsigned long)_stext & 0xffff)
panic("Kernelbase not 64K-aligned (0x%lx)!\n",
(unsigned long)_stext);
-#endif
}
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 4a65e08a6042..3fb9f64f88fd 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -64,34 +64,6 @@ EXPORT_SYMBOL(DMA_MODE_READ);
EXPORT_SYMBOL(DMA_MODE_WRITE);
/*
- * We're called here very early in the boot.
- *
- * Note that the kernel may be running at an address which is different
- * from the address that it was linked at, so we must use RELOC/PTRRELOC
- * to access static data (including strings). -- paulus
- */
-notrace unsigned long __init early_init(unsigned long dt_ptr)
-{
- unsigned long offset = reloc_offset();
-
- /* First zero the BSS -- use memset_io, some platforms don't have
- * caches on yet */
- memset_io((void __iomem *)PTRRELOC(&__bss_start), 0,
- __bss_stop - __bss_start);
-
- /*
- * Identify the CPU type and fix up code sections
- * that depend on which cpu we have.
- */
- identify_cpu(offset, mfspr(SPRN_PVR));
-
- apply_feature_fixups();
-
- return KERNELBASE + offset;
-}
-
-
-/*
* This is run before start_kernel(), the kernel has been relocated
* and we are running with enough of the MMU enabled to have our
* proper kernel virtual addresses
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index ba404dd9ce1d..a400854a5036 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -68,6 +68,7 @@
#include <asm/cputhreads.h>
#include <asm/hw_irq.h>
#include <asm/feature-fixups.h>
+#include <asm/kup.h>
#include "setup.h"
@@ -331,6 +332,12 @@ void __init early_setup(unsigned long dt_ptr)
*/
configure_exceptions();
+ /*
+ * Configure Kernel Userspace Protection. This needs to happen before
+ * feature fixups for platforms that implement this using features.
+ */
+ setup_kup();
+
/* Apply all the dynamic patching */
apply_feature_fixups();
setup_feature_keys();
@@ -383,6 +390,9 @@ void early_setup_secondary(void)
/* Initialize the hash table or TLB handling */
early_init_mmu_secondary();
+ /* Perform any KUP setup that is per-cpu */
+ setup_kup();
+
/*
* At this point, we can let interrupts switch to virtual mode
* (the MMU has been setup), so adjust the MSR in the PACA to
@@ -932,7 +942,7 @@ void setup_rfi_flush(enum l1d_flush_type types, bool enable)
enabled_flush_types = types;
- if (!no_rfi_flush)
+ if (!no_rfi_flush && !cpu_mitigations_off())
rfi_flush_enable(enable);
}
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 6794466f6420..06c299ef6132 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -565,7 +565,7 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
preempt_disable();
/* pull in MSR TS bits from user context */
- regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+ regs->msr |= msr & MSR_TS_MASK;
/*
* Ensure that TM is enabled in regs->msr before we leave the signal
@@ -745,6 +745,31 @@ SYSCALL_DEFINE0(rt_sigreturn)
if (MSR_TM_SUSPENDED(mfmsr()))
tm_reclaim_current(0);
+ /*
+ * Disable MSR[TS] bit also, so, if there is an exception in the
+ * code below (as a page fault in copy_ckvsx_to_user()), it does
+ * not recheckpoint this task if there was a context switch inside
+ * the exception.
+ *
+ * A major page fault can indirectly call schedule(). A reschedule
+ * process in the middle of an exception can have a side effect
+ * (Changing the CPU MSR[TS] state), since schedule() is called
+ * with the CPU MSR[TS] disable and returns with MSR[TS]=Suspended
+ * (switch_to() calls tm_recheckpoint() for the 'new' process). In
+ * this case, the process continues to be the same in the CPU, but
+ * the CPU state just changed.
+ *
+ * This can cause a TM Bad Thing, since the MSR in the stack will
+ * have the MSR[TS]=0, and this is what will be used to RFID.
+ *
+ * Clearing MSR[TS] state here will avoid a recheckpoint if there
+ * is any process reschedule in kernel space. The MSR[TS] state
+ * does not need to be saved also, since it will be replaced with
+ * the MSR[TS] that came from user context later, at
+ * restore_tm_sigcontexts.
+ */
+ regs->msr &= ~MSR_TS_MASK;
+
if (__get_user(msr, &uc->uc_mcontext.gp_regs[PT_MSR]))
goto badframe;
if (MSR_TM_ACTIVE(msr)) {
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc0503ef9c9c..325d60633dfa 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -43,7 +43,6 @@
#include <linux/timex.h>
#include <linux/kernel_stat.h>
#include <linux/time.h>
-#include <linux/clockchips.h>
#include <linux/init.h>
#include <linux/profile.h>
#include <linux/cpu.h>
@@ -151,6 +150,8 @@ EXPORT_SYMBOL_GPL(ppc_proc_freq);
unsigned long ppc_tb_freq;
EXPORT_SYMBOL_GPL(ppc_tb_freq);
+bool tb_invalid;
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
/*
* Factor for converting from cputime_t (timebase ticks) to
@@ -460,6 +461,13 @@ void __delay(unsigned long loops)
diff += 1000000000;
spin_cpu_relax();
} while (diff < loops);
+ } else if (tb_invalid) {
+ /*
+ * TB is in error state and isn't ticking anymore.
+ * HMI handler was unable to recover from TB error.
+ * Return immediately, so that kernel won't get stuck here.
+ */
+ spin_cpu_relax();
} else {
start = get_tbl();
while (get_tbl() - start < loops)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 1fd45a8650e1..665f294725cb 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2088,6 +2088,10 @@ void SPEFloatingPointException(struct pt_regs *regs)
int code = FPE_FLTUNK;
int err;
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
flush_spe_to_thread(current);
spefscr = current->thread.spefscr;
@@ -2133,6 +2137,10 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
extern int speround_handler(struct pt_regs *regs);
int err;
+ /* We restore the interrupt state now */
+ if (!arch_irq_disabled_regs(regs))
+ local_irq_enable();
+
preempt_disable();
if (regs->msr & MSR_SPE)
giveup_spe(current);
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index ce199f6e4256..06f54d947057 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -26,9 +26,8 @@ GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
+ -Wl,-soname=linux-vdso32.so.1 -Wl,--hash-style=both
asflags-y := -D__VDSO32__ -s
obj-y += vdso32_wrapper.o
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 28e7d112aa2f..32ebb3522ea1 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -12,9 +12,8 @@ GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
-ccflags-y := -shared -fno-common -fno-builtin
-ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ccflags-y := -shared -fno-common -fno-builtin -nostdlib \
+ -Wl,-soname=linux-vdso64.so.1 -Wl,--hash-style=both
asflags-y := -D__VDSO64__ -s
obj-y += vdso64_wrapper.o
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 21165da0052d..8eb867dbad5f 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -21,6 +21,7 @@ _GLOBAL(load_vr_state)
REST_32VRS(0,r4,r3)
blr
EXPORT_SYMBOL(load_vr_state)
+_ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
/*
* Store VMX state into memory, including VSCR.
diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c
index 3c6ab22a0c4e..af3c15a1d41e 100644
--- a/arch/powerpc/kernel/watchdog.c
+++ b/arch/powerpc/kernel/watchdog.c
@@ -77,7 +77,7 @@ static u64 wd_smp_panic_timeout_tb __read_mostly; /* panic other CPUs */
static u64 wd_timer_period_ms __read_mostly; /* interval between heartbeat */
-static DEFINE_PER_CPU(struct timer_list, wd_timer);
+static DEFINE_PER_CPU(struct hrtimer, wd_hrtimer);
static DEFINE_PER_CPU(u64, wd_timer_tb);
/* SMP checker bits */
@@ -293,21 +293,21 @@ out:
nmi_exit();
}
-static void wd_timer_reset(unsigned int cpu, struct timer_list *t)
-{
- t->expires = jiffies + msecs_to_jiffies(wd_timer_period_ms);
- if (wd_timer_period_ms > 1000)
- t->expires = __round_jiffies_up(t->expires, cpu);
- add_timer_on(t, cpu);
-}
-
-static void wd_timer_fn(struct timer_list *t)
+static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
{
int cpu = smp_processor_id();
+ if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
+ return HRTIMER_NORESTART;
+
+ if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
+ return HRTIMER_NORESTART;
+
watchdog_timer_interrupt(cpu);
- wd_timer_reset(cpu, t);
+ hrtimer_forward_now(hrtimer, ms_to_ktime(wd_timer_period_ms));
+
+ return HRTIMER_RESTART;
}
void arch_touch_nmi_watchdog(void)
@@ -323,37 +323,22 @@ void arch_touch_nmi_watchdog(void)
}
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
-static void start_watchdog_timer_on(unsigned int cpu)
-{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- per_cpu(wd_timer_tb, cpu) = get_tb();
-
- timer_setup(t, wd_timer_fn, TIMER_PINNED);
- wd_timer_reset(cpu, t);
-}
-
-static void stop_watchdog_timer_on(unsigned int cpu)
-{
- struct timer_list *t = per_cpu_ptr(&wd_timer, cpu);
-
- del_timer_sync(t);
-}
-
-static int start_wd_on_cpu(unsigned int cpu)
+static void start_watchdog(void *arg)
{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (cpumask_test_cpu(cpu, &wd_cpus_enabled)) {
WARN_ON(1);
- return 0;
+ return;
}
if (!(watchdog_enabled & NMI_WATCHDOG_ENABLED))
- return 0;
+ return;
if (!cpumask_test_cpu(cpu, &watchdog_cpumask))
- return 0;
+ return;
wd_smp_lock(&flags);
cpumask_set_cpu(cpu, &wd_cpus_enabled);
@@ -363,27 +348,40 @@ static int start_wd_on_cpu(unsigned int cpu)
}
wd_smp_unlock(&flags);
- start_watchdog_timer_on(cpu);
+ *this_cpu_ptr(&wd_timer_tb) = get_tb();
- return 0;
+ hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer->function = watchdog_timer_fn;
+ hrtimer_start(hrtimer, ms_to_ktime(wd_timer_period_ms),
+ HRTIMER_MODE_REL_PINNED);
}
-static int stop_wd_on_cpu(unsigned int cpu)
+static int start_watchdog_on_cpu(unsigned int cpu)
{
+ return smp_call_function_single(cpu, start_watchdog, NULL, true);
+}
+
+static void stop_watchdog(void *arg)
+{
+ struct hrtimer *hrtimer = this_cpu_ptr(&wd_hrtimer);
+ int cpu = smp_processor_id();
unsigned long flags;
if (!cpumask_test_cpu(cpu, &wd_cpus_enabled))
- return 0; /* Can happen in CPU unplug case */
+ return; /* Can happen in CPU unplug case */
- stop_watchdog_timer_on(cpu);
+ hrtimer_cancel(hrtimer);
wd_smp_lock(&flags);
cpumask_clear_cpu(cpu, &wd_cpus_enabled);
wd_smp_unlock(&flags);
wd_smp_clear_cpu_pending(cpu, get_tb());
+}
- return 0;
+static int stop_watchdog_on_cpu(unsigned int cpu)
+{
+ return smp_call_function_single(cpu, stop_watchdog, NULL, true);
}
static void watchdog_calc_timeouts(void)
@@ -402,7 +400,7 @@ void watchdog_nmi_stop(void)
int cpu;
for_each_cpu(cpu, &wd_cpus_enabled)
- stop_wd_on_cpu(cpu);
+ stop_watchdog_on_cpu(cpu);
}
void watchdog_nmi_start(void)
@@ -411,7 +409,7 @@ void watchdog_nmi_start(void)
watchdog_calc_timeouts();
for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask)
- start_wd_on_cpu(cpu);
+ start_watchdog_on_cpu(cpu);
}
/*
@@ -423,7 +421,8 @@ int __init watchdog_nmi_probe(void)
err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
"powerpc/watchdog:online",
- start_wd_on_cpu, stop_wd_on_cpu);
+ start_watchdog_on_cpu,
+ stop_watchdog_on_cpu);
if (err < 0) {
pr_warn("could not be initialized");
return err;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index bfdde04e4905..f53997a8ca62 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,7 +20,6 @@ if VIRTUALIZATION
config KVM
bool
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_EVENTFD
select HAVE_KVM_VCPU_ASYNC_IOCTL
select SRCU
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index f02b04973710..f100e331e69b 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -543,14 +543,14 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+
ret = kvmppc_tce_validate(stt, tce);
if (ret != H_SUCCESS)
- return ret;
+ goto unlock_exit;
dir = iommu_tce_direction(tce);
- idx = srcu_read_lock(&vcpu->kvm->srcu);
-
if ((dir != DMA_NONE) && kvmppc_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) {
ret = H_PARAMETER;
goto unlock_exit;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 06964350b97a..7bdcd4d7a9f0 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -74,6 +74,7 @@
#include <asm/opal.h>
#include <asm/xics.h>
#include <asm/xive.h>
+#include <asm/hw_breakpoint.h>
#include "book3s.h"
@@ -3374,7 +3375,7 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_PURR, vcpu->arch.purr);
mtspr(SPRN_SPURR, vcpu->arch.spurr);
- if (cpu_has_feature(CPU_FTR_DAWR)) {
+ if (dawr_enabled()) {
mtspr(SPRN_DAWR, vcpu->arch.dawr);
mtspr(SPRN_DAWRX, vcpu->arch.dawrx);
}
@@ -3423,7 +3424,9 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
- mtspr(SPRN_PSSCR, host_psscr);
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
mtspr(SPRN_HFSCR, host_hfscr);
mtspr(SPRN_CIABR, host_ciabr);
mtspr(SPRN_DAWR, host_dawr);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3b9662a4207e..085509148d95 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -822,7 +822,7 @@ static inline void this_cpu_inc_rm(unsigned int __percpu *addr)
raddr = per_cpu_ptr(addr, cpu);
l = (unsigned long)raddr;
- if (REGION_ID(l) == VMALLOC_REGION_ID) {
+ if (get_region_id(l) == VMALLOC_REGION_ID) {
l = vmalloc_to_phys(raddr);
raddr = (unsigned int *)l;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 3a5e719ef032..dd014308f065 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -35,6 +35,7 @@
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
+#include <asm/cpuidle.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
@@ -45,6 +46,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
+#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 208
@@ -290,17 +292,19 @@ kvm_novcpu_exit:
b kvmhv_switch_to_host
/*
- * We come in here when wakened from nap mode.
- * Relocation is off and most register values are lost.
- * r13 points to the PACA.
+ * We come in here when wakened from Linux offline idle code.
+ * Relocation is off
* r3 contains the SRR1 wakeup value, SRR1 is trashed.
*/
- .globl kvm_start_guest
-kvm_start_guest:
- /* Set runlatch bit the minute you wake up from nap */
- mfspr r0, SPRN_CTRLF
- ori r0, r0, 1
- mtspr SPRN_CTRLT, r0
+_GLOBAL(idle_kvm_start_guest)
+ ld r4,PACAEMERGSP(r13)
+ mfcr r5
+ mflr r0
+ std r1,0(r4)
+ std r5,8(r4)
+ std r0,16(r4)
+ subi r1,r4,STACK_FRAME_OVERHEAD
+ SAVE_NVGPRS(r1)
/*
* Could avoid this and pass it through in r3. For now,
@@ -308,27 +312,23 @@ kvm_start_guest:
*/
mtspr SPRN_SRR1,r3
- ld r2,PACATOC(r13)
-
li r0,0
stb r0,PACA_FTRACE_ENABLED(r13)
li r0,KVM_HWTHREAD_IN_KVM
stb r0,HSTATE_HWTHREAD_STATE(r13)
- /* NV GPR values from power7_idle() will no longer be valid */
- li r0,1
- stb r0,PACA_NAPSTATELOST(r13)
-
- /* were we napping due to cede? */
+ /* kvm cede / napping does not come through here */
lbz r0,HSTATE_NAPPING(r13)
- cmpwi r0,NAPPING_CEDE
- beq kvm_end_cede
- cmpwi r0,NAPPING_NOVCPU
- beq kvm_novcpu_wakeup
+ twnei r0,0
- ld r1,PACAEMERGSP(r13)
- subi r1,r1,STACK_FRAME_OVERHEAD
+ b 1f
+
+kvm_unsplit_wakeup:
+ li r0, 0
+ stb r0, HSTATE_NAPPING(r13)
+
+1:
/*
* We weren't napping due to cede, so this must be a secondary
@@ -437,19 +437,25 @@ kvm_no_guest:
lbz r3, HSTATE_HWTHREAD_REQ(r13)
cmpwi r3, 0
bne 54f
-/*
- * We jump to pnv_wakeup_loss, which will return to the caller
- * of power7_nap in the powernv cpu offline loop. The value we
- * put in r3 becomes the return value for power7_nap. pnv_wakeup_loss
- * requires SRR1 in r12.
- */
+
+ /*
+ * Jump to idle_return_gpr_loss, which returns to the
+ * idle_kvm_start_guest caller.
+ */
li r3, LPCR_PECE0
mfspr r4, SPRN_LPCR
rlwimi r4, r3, 0, LPCR_PECE0 | LPCR_PECE1
mtspr SPRN_LPCR, r4
- li r3, 0
- mfspr r12,SPRN_SRR1
- b pnv_wakeup_loss
+ /* set up r3 for return */
+ mfspr r3,SPRN_SRR1
+ REST_NVGPRS(r1)
+ addi r1, r1, STACK_FRAME_OVERHEAD
+ ld r0, 16(r1)
+ ld r5, 8(r1)
+ ld r1, 0(r1)
+ mtlr r0
+ mtcr r5
+ blr
53: HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
@@ -534,6 +540,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
lbz r0, KVM_SPLIT_DO_NAP(r3)
cmpwi r0, 0
beq 57f
+ li r3, NAPPING_UNSPLIT
+ stb r3, HSTATE_NAPPING(r13)
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
mfspr r5, SPRN_LPCR
rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
@@ -822,18 +830,21 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_IAMR, r5
mtspr SPRN_PSPB, r6
mtspr SPRN_FSCR, r7
- ld r5, VCPU_DAWR(r4)
- ld r6, VCPU_DAWRX(r4)
- ld r7, VCPU_CIABR(r4)
- ld r8, VCPU_TAR(r4)
/*
* Handle broken DAWR case by not writing it. This means we
* can still store the DAWR register for migration.
*/
-BEGIN_FTR_SECTION
+ LOAD_REG_ADDR(r5, dawr_force_enable)
+ lbz r5, 0(r5)
+ cmpdi r5, 0
+ beq 1f
+ ld r5, VCPU_DAWR(r4)
+ ld r6, VCPU_DAWRX(r4)
mtspr SPRN_DAWR, r5
mtspr SPRN_DAWRX, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR)
+1:
+ ld r7, VCPU_CIABR(r4)
+ ld r8, VCPU_TAR(r4)
mtspr SPRN_CIABR, r7
mtspr SPRN_TAR, r8
ld r5, VCPU_IC(r4)
@@ -2513,11 +2524,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
blr
2:
-BEGIN_FTR_SECTION
- /* POWER9 with disabled DAWR */
+ LOAD_REG_ADDR(r11, dawr_force_enable)
+ lbz r11, 0(r11)
+ cmpdi r11, 0
li r3, H_HARDWARE
- blr
-END_FTR_SECTION_IFCLR(CPU_FTR_DAWR)
+ beqlr
/* Emulate H_SET_DABR/X on P8 for the sake of compat mode guests */
rlwimi r5, r4, 5, DAWRX_DR | DAWRX_DW
rlwimi r5, r4, 2, DAWRX_WT
@@ -2654,6 +2665,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lis r3, LPCR_PECEDP@h /* Do wake on privileged doorbell */
+ /* Go back to host stack */
+ ld r1, HSTATE_HOST_R1(r13)
+
/*
* Take a nap until a decrementer or external or doobell interrupt
* occurs, with PECE1 and PECE0 set in LPCR.
@@ -2682,26 +2696,42 @@ BEGIN_FTR_SECTION
* requested level = 0 (just stop dispatching)
*/
lis r3, (PSSCR_EC | PSSCR_ESL)@h
- mtspr SPRN_PSSCR, r3
/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
li r4, LPCR_PECE_HVEE@higher
sldi r4, r4, 32
or r5, r5, r4
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+FTR_SECTION_ELSE
+ li r3, PNV_THREAD_NAP
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
- li r0, 0
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
+
BEGIN_FTR_SECTION
- nap
+ bl isa300_idle_stop_mayloss
FTR_SECTION_ELSE
- PPC_STOP
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
- b .
+ bl isa206_idle_insn_mayloss
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
+
+ mfspr r0, SPRN_CTRLF
+ ori r0, r0, 1
+ mtspr SPRN_CTRLT, r0
+
+ mtspr SPRN_SRR1, r3
+
+ li r0, 0
+ stb r0, PACA_FTRACE_ENABLED(r13)
+
+ li r0, KVM_HWTHREAD_IN_KVM
+ stb r0, HSTATE_HWTHREAD_STATE(r13)
+
+ lbz r0, HSTATE_NAPPING(r13)
+ cmpwi r0, NAPPING_CEDE
+ beq kvm_end_cede
+ cmpwi r0, NAPPING_NOVCPU
+ beq kvm_novcpu_wakeup
+ cmpwi r0, NAPPING_UNSPLIT
+ beq kvm_unsplit_wakeup
+ twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
li r3, 0
@@ -2709,12 +2739,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
b 34f
kvm_end_cede:
+ /* Woken by external or decrementer interrupt */
+
/* get vcpu pointer */
ld r4, HSTATE_KVM_VCPU(r13)
- /* Woken by external or decrementer interrupt */
- ld r1, HSTATE_HOST_R1(r13)
-
#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
addi r3, r4, VCPU_TB_RMINTR
bl kvmhv_accumulate_time
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 79396e184bca..c55f9c27bf79 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -8,9 +8,22 @@ ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
-obj-y += string.o alloc.o code-patching.o feature-fixups.o
+KASAN_SANITIZE_code-patching.o := n
+KASAN_SANITIZE_feature-fixups.o := n
-obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o
+ifdef CONFIG_KASAN
+CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_feature-fixups.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+obj-y += alloc.o code-patching.o feature-fixups.o
+
+ifndef CONFIG_KASAN
+obj-y += string.o memcmp_$(BITS).o
+obj-$(CONFIG_PPC32) += strlen_32.o
+endif
+
+obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
@@ -34,7 +47,7 @@ obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o \
test_emulate_step_exec_instr.o
obj-y += checksum_$(BITS).o checksum_wrappers.o \
- string_$(BITS).o memcmp_$(BITS).o
+ string_$(BITS).o
obj-y += sstep.o ldstfp.o quad.o
obj64-y += quad.o
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index 890d4ddd91d6..bb9307ce2440 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -29,6 +29,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
unsigned int csum;
might_sleep();
+ allow_read_from_user(src, len);
*err_ptr = 0;
@@ -60,6 +61,7 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
}
out:
+ prevent_read_from_user(src, len);
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_from_user);
@@ -70,6 +72,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
unsigned int csum;
might_sleep();
+ allow_write_to_user(dst, len);
*err_ptr = 0;
@@ -97,6 +100,7 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
}
out:
+ prevent_write_to_user(dst, len);
return (__force __wsum)csum;
}
EXPORT_SYMBOL(csum_and_copy_to_user);
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 506413a2c25e..90c9d4a1e36f 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -15,7 +15,6 @@
#include <linux/cpuhotplug.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
-#include <linux/kprobes.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
@@ -26,9 +25,9 @@
static int __patch_instruction(unsigned int *exec_addr, unsigned int instr,
unsigned int *patch_addr)
{
- int err;
+ int err = 0;
- __put_user_size(instr, patch_addr, 4, err);
+ __put_user_asm(instr, patch_addr, err, "stw");
if (err)
return err;
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index ba66846fe973..d5642481fb98 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -14,6 +14,7 @@
#include <asm/ppc_asm.h>
#include <asm/export.h>
#include <asm/code-patching-asm.h>
+#include <asm/kasan.h>
#define COPY_16_BYTES \
lwz r7,4(r4); \
@@ -68,6 +69,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
+#ifndef CONFIG_KASAN
_GLOBAL(memset16)
rlwinm. r0 ,r5, 31, 1, 31
addi r6, r3, -4
@@ -81,6 +83,7 @@ _GLOBAL(memset16)
sth r4, 4(r6)
blr
EXPORT_SYMBOL(memset16)
+#endif
/*
* Use dcbz on the complete cache lines in the destination
@@ -91,7 +94,7 @@ EXPORT_SYMBOL(memset16)
* We therefore skip the optimised bloc that uses dcbz. This jump is
* replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(memset)
+_GLOBAL_KASAN(memset)
cmplwi 0,r5,4
blt 7f
@@ -151,6 +154,7 @@ _GLOBAL(memset)
bdnz 9b
blr
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
/*
* This version uses dcbz on the complete cache lines in the
@@ -163,12 +167,12 @@ EXPORT_SYMBOL(memset)
* We therefore jump to generic_memcpy which doesn't use dcbz. This jump is
* replaced by a nop once cache is active. This is done in machine_init()
*/
-_GLOBAL(memmove)
+_GLOBAL_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
/* fall through */
-_GLOBAL(memcpy)
+_GLOBAL_KASAN(memcpy)
1: b generic_memcpy
patch_site 1b, patch__memcpy_nocache
@@ -244,6 +248,8 @@ _GLOBAL(memcpy)
65: blr
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memcpy)
+EXPORT_SYMBOL_KASAN(memmove)
generic_memcpy:
srwi. r7,r5,3
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 3c3be02f33b7..7f6bd031c306 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -12,7 +12,9 @@
#include <asm/errno.h>
#include <asm/ppc_asm.h>
#include <asm/export.h>
+#include <asm/kasan.h>
+#ifndef CONFIG_KASAN
_GLOBAL(__memset16)
rlwimi r4,r4,16,0,15
/* fall through */
@@ -29,8 +31,9 @@ _GLOBAL(__memset64)
EXPORT_SYMBOL(__memset16)
EXPORT_SYMBOL(__memset32)
EXPORT_SYMBOL(__memset64)
+#endif
-_GLOBAL(memset)
+_GLOBAL_KASAN(memset)
neg r0,r3
rlwimi r4,r4,8,16,23
andi. r0,r0,7 /* # bytes to be 8-byte aligned */
@@ -96,8 +99,9 @@ _GLOBAL(memset)
stb r4,0(r6)
blr
EXPORT_SYMBOL(memset)
+EXPORT_SYMBOL_KASAN(memset)
-_GLOBAL_TOC(memmove)
+_GLOBAL_TOC_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy
b memcpy
@@ -139,3 +143,4 @@ _GLOBAL(backwards_memcpy)
mtctr r7
b 1b
EXPORT_SYMBOL(memmove)
+EXPORT_SYMBOL_KASAN(memmove)
diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S
index 273ea67e60a1..25c3772c1dfb 100644
--- a/arch/powerpc/lib/memcpy_64.S
+++ b/arch/powerpc/lib/memcpy_64.S
@@ -11,6 +11,7 @@
#include <asm/export.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
+#include <asm/kasan.h>
#ifndef SELFTEST_CASE
/* For big-endian, 0 == most CPUs, 1 == POWER6, 2 == Cell */
@@ -18,7 +19,7 @@
#endif
.align 7
-_GLOBAL_TOC(memcpy)
+_GLOBAL_TOC_KASAN(memcpy)
BEGIN_FTR_SECTION
#ifdef __LITTLE_ENDIAN__
cmpdi cr7,r5,0
@@ -230,3 +231,4 @@ END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
blr
#endif
EXPORT_SYMBOL(memcpy)
+EXPORT_SYMBOL_KASAN(memcpy)
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 3c1bd9fa23cd..0f499db315d6 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,53 +5,18 @@
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
-CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
+ pgtable-frag.o \
init-common.o mmu_context.o drmem.o
-obj-$(CONFIG_PPC_MMU_NOHASH) += mmu_context_nohash.o tlb_nohash.o \
- tlb_nohash_low.o
-obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
-hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
-obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
-obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o \
- $(hash64-y) mmu_context_book3s64.o \
- pgtable-book3s64.o pgtable-frag.o
-obj-$(CONFIG_PPC32) += pgtable-frag.o
-obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
-obj-$(CONFIG_PPC_BOOK3S_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
-obj-$(CONFIG_PPC_BOOK3S) += tlb_hash$(BITS).o
-ifdef CONFIG_PPC_BOOK3S_64
-obj-$(CONFIG_PPC_4K_PAGES) += hash64_4k.o
-obj-$(CONFIG_PPC_64K_PAGES) += hash64_64k.o
-endif
-obj-$(CONFIG_40x) += 40x_mmu.o
-obj-$(CONFIG_44x) += 44x_mmu.o
-obj-$(CONFIG_PPC_8xx) += 8xx_mmu.o
-obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke_mmu.o
+obj-$(CONFIG_PPC_MMU_NOHASH) += nohash/
+obj-$(CONFIG_PPC_BOOK3S_32) += book3s32/
+obj-$(CONFIG_PPC_BOOK3S_64) += book3s64/
obj-$(CONFIG_NEED_MULTIPLE_NODES) += numa.o
-obj-$(CONFIG_PPC_SPLPAR) += vphn.o
obj-$(CONFIG_PPC_MM_SLICES) += slice.o
-obj-y += hugetlbpage.o
-ifdef CONFIG_HUGETLB_PAGE
-obj-$(CONFIG_PPC_BOOK3S_64) += hugetlbpage-hash64.o
-obj-$(CONFIG_PPC_RADIX_MMU) += hugetlbpage-radix.o
-obj-$(CONFIG_PPC_BOOK3E_MMU) += hugetlbpage-book3e.o
-endif
-obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hugepage-hash64.o
-obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage-prot.o
+obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
obj-$(CONFIG_HIGHMEM) += highmem.o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
-obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
obj-$(CONFIG_PPC_PTDUMP) += ptdump/
-obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
-
-# Disable kcov instrumentation on sensitive code
-# This is necessary for booting with kcov enabled on book3e machines
-KCOV_INSTRUMENT_tlb_nohash.o := n
-KCOV_INSTRUMENT_fsl_booke_mmu.o := n
-
-# Instrumenting the SLB fault path can lead to duplicate SLB entries
-KCOV_INSTRUMENT_slb.o := n
+obj-$(CONFIG_KASAN) += kasan/
diff --git a/arch/powerpc/mm/book3s32/Makefile b/arch/powerpc/mm/book3s32/Makefile
new file mode 100644
index 000000000000..1732eaa740a9
--- /dev/null
+++ b/arch/powerpc/mm/book3s32/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE_mmu.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_mmu.o += -DDISABLE_BRANCH_PROFILING
+endif
+
+obj-y += mmu.o hash_low.o mmu_context.o tlb.o
diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/book3s32/hash_low.S
index a6c491f18a04..e27792d0b744 100644
--- a/arch/powerpc/mm/hash_low_32.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -309,13 +309,13 @@ Hash_msk = (((1 << Hash_bits) - 1) * 64)
_GLOBAL(create_hpte)
/* Convert linux-style PTE (r5) to low word of PPC-style PTE (r8) */
- rlwinm r8,r5,32-10,31,31 /* _PAGE_RW -> PP lsb */
- rlwinm r0,r5,32-7,31,31 /* _PAGE_DIRTY -> PP lsb */
+ rlwinm r8,r5,32-9,30,30 /* _PAGE_RW -> PP msb */
+ rlwinm r0,r5,32-6,30,30 /* _PAGE_DIRTY -> PP msb */
and r8,r8,r0 /* writable if _RW & _DIRTY */
rlwimi r5,r5,32-1,30,30 /* _PAGE_USER -> PP msb */
rlwimi r5,r5,32-2,31,31 /* _PAGE_USER -> PP lsb */
ori r8,r8,0xe04 /* clear out reserved bits */
- andc r8,r5,r8 /* PP = user? (rw&dirty? 2: 3): 0 */
+ andc r8,r5,r8 /* PP = user? (rw&dirty? 1: 3): 0 */
BEGIN_FTR_SECTION
rlwinm r8,r8,0,~_PAGE_COHERENT /* clear M (coherence not required) */
END_FTR_SECTION_IFCLR(CPU_FTR_NEED_COHERENT)
diff --git a/arch/powerpc/mm/ppc_mmu_32.c b/arch/powerpc/mm/book3s32/mmu.c
index f29d2f118b44..fc073cb2c517 100644
--- a/arch/powerpc/mm/ppc_mmu_32.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -34,11 +34,12 @@
#include <asm/code-patching.h>
#include <asm/sections.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
-struct hash_pte *Hash, *Hash_end;
-unsigned long Hash_size, Hash_mask;
+struct hash_pte *Hash;
+static unsigned long Hash_size, Hash_mask;
unsigned long _SDR1;
+static unsigned int hash_mb, hash_mb2;
struct ppc_bat BATS[8][2]; /* 8 pairs of IBAT, DBAT */
@@ -98,10 +99,20 @@ static int find_free_bat(void)
return -1;
}
+/*
+ * This function calculates the size of the larger block usable to map the
+ * beginning of an area based on the start address and size of that area:
+ * - max block size is 8M on 601 and 256 on other 6xx.
+ * - base address must be aligned to the block size. So the maximum block size
+ * is identified by the lowest bit set to 1 in the base address (for instance
+ * if base is 0x16000000, max size is 0x02000000).
+ * - block size has to be a power of two. This is calculated by finding the
+ * highest bit set to 1.
+ */
static unsigned int block_size(unsigned long base, unsigned long top)
{
unsigned int max_size = (cpu_has_feature(CPU_FTR_601) ? 8 : 256) << 20;
- unsigned int base_shift = (fls(base) - 1) & 31;
+ unsigned int base_shift = (ffs(base) - 1) & 31;
unsigned int block_shift = (fls(top - base) - 1) & 31;
return min3(max_size, 1U << base_shift, 1U << block_shift);
@@ -157,7 +168,7 @@ static unsigned long __init __mmu_mapin_ram(unsigned long base, unsigned long to
unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
{
- int done;
+ unsigned long done;
unsigned long border = (unsigned long)__init_begin - PAGE_OFFSET;
if (__map_without_bats) {
@@ -169,10 +180,10 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
return __mmu_mapin_ram(base, top);
done = __mmu_mapin_ram(base, border);
- if (done != border - base)
+ if (done != border)
return done;
- return done + __mmu_mapin_ram(border, top);
+ return __mmu_mapin_ram(border, top);
}
void mmu_mark_initmem_nx(void)
@@ -308,7 +319,6 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
*/
void __init MMU_init_hw(void)
{
- unsigned int hmask, mb, mb2;
unsigned int n_hpteg, lg_n_hpteg;
if (!mmu_has_feature(MMU_FTR_HPTE_TABLE))
@@ -345,26 +355,34 @@ void __init MMU_init_hw(void)
__func__, Hash_size, Hash_size);
_SDR1 = __pa(Hash) | SDR1_LOW_BITS;
- Hash_end = (struct hash_pte *) ((unsigned long)Hash + Hash_size);
+ pr_info("Total memory = %lldMB; using %ldkB for hash table\n",
+ (unsigned long long)(total_memory >> 20), Hash_size >> 10);
- printk("Total memory = %lldMB; using %ldkB for hash table (at %p)\n",
- (unsigned long long)(total_memory >> 20), Hash_size >> 10, Hash);
+ Hash_mask = n_hpteg - 1;
+ hash_mb2 = hash_mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
+ if (lg_n_hpteg > 16)
+ hash_mb2 = 16 - LG_HPTEG_SIZE;
+}
+
+void __init MMU_init_hw_patch(void)
+{
+ unsigned int hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
+
+ if (ppc_md.progress)
+ ppc_md.progress("hash:patch", 0x345);
+ if (ppc_md.progress)
+ ppc_md.progress("hash:done", 0x205);
+
+ /* WARNING: Make sure nothing can trigger a KASAN check past this point */
/*
* Patch up the instructions in hashtable.S:create_hpte
*/
- if ( ppc_md.progress ) ppc_md.progress("hash:patch", 0x345);
- Hash_mask = n_hpteg - 1;
- hmask = Hash_mask >> (16 - LG_HPTEG_SIZE);
- mb2 = mb = 32 - LG_HPTEG_SIZE - lg_n_hpteg;
- if (lg_n_hpteg > 16)
- mb2 = 16 - LG_HPTEG_SIZE;
-
modify_instruction_site(&patch__hash_page_A0, 0xffff,
((unsigned int)Hash - PAGE_OFFSET) >> 16);
- modify_instruction_site(&patch__hash_page_A1, 0x7c0, mb << 6);
- modify_instruction_site(&patch__hash_page_A2, 0x7c0, mb2 << 6);
+ modify_instruction_site(&patch__hash_page_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__hash_page_A2, 0x7c0, hash_mb2 << 6);
modify_instruction_site(&patch__hash_page_B, 0xffff, hmask);
modify_instruction_site(&patch__hash_page_C, 0xffff, hmask);
@@ -373,11 +391,9 @@ void __init MMU_init_hw(void)
*/
modify_instruction_site(&patch__flush_hash_A0, 0xffff,
((unsigned int)Hash - PAGE_OFFSET) >> 16);
- modify_instruction_site(&patch__flush_hash_A1, 0x7c0, mb << 6);
- modify_instruction_site(&patch__flush_hash_A2, 0x7c0, mb2 << 6);
+ modify_instruction_site(&patch__flush_hash_A1, 0x7c0, hash_mb << 6);
+ modify_instruction_site(&patch__flush_hash_A2, 0x7c0, hash_mb2 << 6);
modify_instruction_site(&patch__flush_hash_B, 0xffff, hmask);
-
- if ( ppc_md.progress ) ppc_md.progress("hash:done", 0x205);
}
void setup_initial_memory_limit(phys_addr_t first_memblock_base,
@@ -394,3 +410,33 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
else /* Anything else has 256M mapped */
memblock_set_current_limit(min_t(u64, first_memblock_size, 0x10000000));
}
+
+void __init print_system_hash_info(void)
+{
+ pr_info("Hash_size = 0x%lx\n", Hash_size);
+ if (Hash_mask)
+ pr_info("Hash_mask = 0x%lx\n", Hash_mask);
+}
+
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ if (cpu_has_feature(CPU_FTR_601))
+ pr_warn("KUEP is not working on powerpc 601 (No NX bit in Seg Regs)\n");
+
+ if (disabled)
+ pr_warn("KUEP cannot be disabled yet on 6xx when compiled in\n");
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void __init setup_kuap(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ if (disabled)
+ pr_warn("KUAP cannot be disabled yet on 6xx when compiled in\n");
+}
+#endif
diff --git a/arch/powerpc/mm/mmu_context_hash32.c b/arch/powerpc/mm/book3s32/mmu_context.c
index 921c1e33e941..921c1e33e941 100644
--- a/arch/powerpc/mm/mmu_context_hash32.c
+++ b/arch/powerpc/mm/book3s32/mmu_context.c
diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/book3s32/tlb.c
index cf8472cf3d59..8d56f0417f87 100644
--- a/arch/powerpc/mm/tlb_hash32.c
+++ b/arch/powerpc/mm/book3s32/tlb.c
@@ -32,7 +32,7 @@
#include <asm/tlbflush.h>
#include <asm/tlb.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* Called when unmapping pages to flush entries from the TLB/hash table.
diff --git a/arch/powerpc/mm/book3s64/Makefile b/arch/powerpc/mm/book3s64/Makefile
new file mode 100644
index 000000000000..974b4fc19f4f
--- /dev/null
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-y := $(NO_MINIMAL_TOC)
+
+CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
+
+obj-y += hash_pgtable.o hash_utils.o slb.o \
+ mmu_context.o pgtable.o hash_tlb.o
+obj-$(CONFIG_PPC_NATIVE) += hash_native.o
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_pgtable.o radix_tlb.o
+obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
+obj-$(CONFIG_PPC_64K_PAGES) += hash_64k.o
+obj-$(CONFIG_PPC_SPLPAR) += vphn.o
+obj-$(CONFIG_HUGETLB_PAGE) += hash_hugetlbpage.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_RADIX_MMU) += radix_hugetlbpage.o
+endif
+obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += hash_hugepage.o
+obj-$(CONFIG_PPC_SUBPAGE_PROT) += subpage_prot.o
+obj-$(CONFIG_SPAPR_TCE_IOMMU) += iommu_api.o
+obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
+
+# Instrumenting the SLB fault path can lead to duplicate SLB entries
+KCOV_INSTRUMENT_slb.o := n
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 6fa6765a10eb..22e787123cdf 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -1,6 +1,6 @@
/*
* Copyright IBM Corporation, 2015
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 3afa253d7f52..7084ce2951e6 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -1,6 +1,6 @@
/*
* Copyright IBM Corporation, 2015
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugepage.c
index dfbc3b32f09b..440823797de7 100644
--- a/arch/powerpc/mm/hugepage-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_hugepage.c
@@ -1,6 +1,6 @@
/*
* Copyright IBM Corporation, 2013
- * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of version 2.1 of the GNU Lesser General Public License
diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
index b0d9209d9a86..eefa89c6117b 100644
--- a/arch/powerpc/mm/hugetlbpage-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c
@@ -15,6 +15,9 @@
#include <asm/cacheflush.h>
#include <asm/machdep.h>
+unsigned int hpage_shift;
+EXPORT_SYMBOL(hpage_shift);
+
extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn,
unsigned long pa, unsigned long rlags,
unsigned long vflags, int psize, int ssize);
@@ -34,7 +37,8 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
/* Search the Linux page table for a match with va */
vpn = hpt_vpn(ea, vsid, ssize);
- /* At this point, we have a pte (old_pte) which can be used to build
+ /*
+ * At this point, we have a pte (old_pte) which can be used to build
* or update an HPTE. There are 2 cases:
*
* 1. There is a valid (present) pte with no associated HPTE (this is
@@ -55,8 +59,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
if (unlikely(!check_pte_access(access, old_pte)))
return 1;
- /* Try to lock the PTE, add ACCESSED and DIRTY if it was
- * a write access */
+ /*
+ * Try to lock the PTE, add ACCESSED and DIRTY if it was
+ * a write access
+ */
new_pte = old_pte | H_PAGE_BUSY | _PAGE_ACCESSED;
if (access & _PAGE_WRITE)
new_pte |= _PAGE_DIRTY;
@@ -74,8 +80,10 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid,
rpte = __real_pte(__pte(old_pte), ptep, offset);
if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
- /* No CPU has hugepages but lacks no execute, so we
- * don't need to worry about that case */
+ /*
+ * No CPU has hugepages but lacks no execute, so we
+ * don't need to worry about that case
+ */
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
/* Check if pte already has an hpte (case 2) */
@@ -145,3 +153,16 @@ void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr
old_pte, pte);
set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
}
+
+void hugetlbpage_init_default(void)
+{
+ /* Set default large page size. Currently, we pick 16M or 1M
+ * depending on what is available
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_16M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_1M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_1M].shift;
+ else if (mmu_psize_defs[MMU_PAGE_2M].shift)
+ hpage_shift = mmu_psize_defs[MMU_PAGE_2M].shift;
+}
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/book3s64/hash_native.c
index aaa28fd918fe..aaa28fd918fe 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/book3s64/hash_native.c
diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/book3s64/hash_pgtable.c
index c08d49046a96..1fd025dba4a3 100644
--- a/arch/powerpc/mm/pgtable-hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_pgtable.c
@@ -19,7 +19,7 @@
#include <asm/mmu.h>
#include <asm/tlb.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#define CREATE_TRACE_POINTS
#include <trace/events/thp.h>
@@ -112,9 +112,16 @@ int __meminit hash__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys)
{
- int rc = htab_bolt_mapping(start, start + page_size, phys,
- pgprot_val(PAGE_KERNEL),
- mmu_vmemmap_psize, mmu_kernel_ssize);
+ int rc;
+
+ if ((start + page_size) >= H_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ rc = htab_bolt_mapping(start, start + page_size, phys,
+ pgprot_val(PAGE_KERNEL),
+ mmu_vmemmap_psize, mmu_kernel_ssize);
if (rc < 0) {
int rc2 = htab_remove_mapping(start, start + page_size,
mmu_vmemmap_psize,
diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/book3s64/hash_tlb.c
index 87d71dd25441..d4f0101447b1 100644
--- a/arch/powerpc/mm/tlb_hash64.c
+++ b/arch/powerpc/mm/book3s64/hash_tlb.c
@@ -55,7 +55,8 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
i = batch->index;
- /* Get page size (maybe move back to caller).
+ /*
+ * Get page size (maybe move back to caller).
*
* NOTE: when using special 64K mappings in 4K environment like
* for SPEs, we obtain the page size from the slice, which thus
@@ -77,10 +78,12 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
#endif
} else {
psize = pte_pagesize_index(mm, addr, pte);
- /* Mask the address for the standard page size. If we
+ /*
+ * Mask the address for the standard page size. If we
* have a 64k page kernel, but the hardware does not
* support 64k pages, this might be different from the
- * hardware page size encoded in the slice table. */
+ * hardware page size encoded in the slice table.
+ */
addr &= PAGE_MASK;
offset = PTRS_PER_PTE;
}
@@ -161,7 +164,8 @@ void hash__tlb_flush(struct mmu_gather *tlb)
{
struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);
- /* If there's a TLB batch pending, then we must flush it because the
+ /*
+ * If there's a TLB batch pending, then we must flush it because the
* pages are going to be freed and we really don't want to have a CPU
* access a freed page because it has a stale TLB
*/
@@ -201,7 +205,8 @@ void __flush_hash_table_range(struct mm_struct *mm, unsigned long start,
BUG_ON(!mm->pgd);
- /* Note: Normally, we should only ever use a batch within a
+ /*
+ * Note: Normally, we should only ever use a batch within a
* PTE locked section. This violates the rule, but will work
* since we don't actually modify the PTEs, we just flush the
* hash while leaving the PTEs intact (including their reference
@@ -238,7 +243,8 @@ void flush_tlb_pmd_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr)
unsigned long flags;
addr = _ALIGN_DOWN(addr, PMD_SIZE);
- /* Note: Normally, we should only ever use a batch within a
+ /*
+ * Note: Normally, we should only ever use a batch within a
* PTE locked section. This violates the rule, but will work
* since we don't actually modify the PTEs, we just flush the
* hash while leaving the PTEs intact (including their reference
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 0a4f939a8161..919a861a8ec0 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -37,6 +37,7 @@
#include <linux/context_tracking.h>
#include <linux/libfdt.h>
#include <linux/pkeys.h>
+#include <linux/hugetlb.h>
#include <asm/debugfs.h>
#include <asm/processor.h>
@@ -65,6 +66,8 @@
#include <asm/pte-walk.h>
#include <asm/asm-prototypes.h>
+#include <mm/mmu_decl.h>
+
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#else
@@ -128,7 +131,8 @@ static DEFINE_SPINLOCK(linear_map_hash_lock);
struct mmu_hash_ops mmu_hash_ops;
EXPORT_SYMBOL(mmu_hash_ops);
-/* There are definitions of page sizes arrays to be used when none
+/*
+ * These are definitions of page sizes arrays to be used when none
* is provided by the firmware.
*/
@@ -145,7 +149,8 @@ static struct mmu_psize_def mmu_psize_defaults[] = {
},
};
-/* POWER4, GPUL, POWER5
+/*
+ * POWER4, GPUL, POWER5
*
* Support for 16Mb large pages
*/
@@ -479,7 +484,8 @@ static int __init htab_dt_scan_page_sizes(unsigned long node,
}
#ifdef CONFIG_HUGETLB_PAGE
-/* Scan for 16G memory blocks that have been set aside for huge pages
+/*
+ * Scan for 16G memory blocks that have been set aside for huge pages
* and reserve those blocks for 16G huge pages.
*/
static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
@@ -496,8 +502,10 @@ static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
if (type == NULL || strcmp(type, "memory") != 0)
return 0;
- /* This property is the log base 2 of the number of virtual pages that
- * will represent this memory block. */
+ /*
+ * This property is the log base 2 of the number of virtual pages that
+ * will represent this memory block.
+ */
page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
if (page_count_prop == NULL)
return 0;
@@ -673,7 +681,8 @@ static void __init htab_init_page_sizes(void)
#endif /* CONFIG_PPC_64K_PAGES */
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- /* We try to use 16M pages for vmemmap if that is supported
+ /*
+ * We try to use 16M pages for vmemmap if that is supported
* and we have at least 1G of RAM at boot
*/
if (mmu_psize_defs[MMU_PAGE_16M].shift &&
@@ -742,7 +751,8 @@ unsigned htab_shift_for_mem_size(unsigned long mem_size)
static unsigned long __init htab_get_table_size(void)
{
- /* If hash size isn't already provided by the platform, we try to
+ /*
+ * If hash size isn't already provided by the platform, we try to
* retrieve it from the device-tree. If it's not there neither, we
* calculate it now based on the total RAM size
*/
@@ -755,12 +765,12 @@ static unsigned long __init htab_get_table_size(void)
}
#ifdef CONFIG_MEMORY_HOTPLUG
-void resize_hpt_for_hotplug(unsigned long new_mem_size)
+int resize_hpt_for_hotplug(unsigned long new_mem_size)
{
unsigned target_hpt_shift;
if (!mmu_hash_ops.resize_hpt)
- return;
+ return 0;
target_hpt_shift = htab_shift_for_mem_size(new_mem_size);
@@ -772,23 +782,25 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size)
* reduce unless the target shift is at least 2 below the
* current shift
*/
- if ((target_hpt_shift > ppc64_pft_size)
- || (target_hpt_shift < (ppc64_pft_size - 1))) {
- int rc;
-
- rc = mmu_hash_ops.resize_hpt(target_hpt_shift);
- if (rc && (rc != -ENODEV))
- printk(KERN_WARNING
- "Unable to resize hash page table to target order %d: %d\n",
- target_hpt_shift, rc);
- }
+ if (target_hpt_shift > ppc64_pft_size ||
+ target_hpt_shift < ppc64_pft_size - 1)
+ return mmu_hash_ops.resize_hpt(target_hpt_shift);
+
+ return 0;
}
int hash__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
- int rc = htab_bolt_mapping(start, end, __pa(start),
- pgprot_val(PAGE_KERNEL), mmu_linear_psize,
- mmu_kernel_ssize);
+ int rc;
+
+ if (end >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
+ rc = htab_bolt_mapping(start, end, __pa(start),
+ pgprot_val(PAGE_KERNEL), mmu_linear_psize,
+ mmu_kernel_ssize);
if (rc < 0) {
int rc2 = htab_remove_mapping(start, end, mmu_linear_psize,
@@ -929,6 +941,11 @@ static void __init htab_initialize(void)
DBG("creating mapping for region: %lx..%lx (prot: %lx)\n",
base, size, prot);
+ if ((base + size) >= H_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
BUG_ON(htab_bolt_mapping(base, base + size, __pa(base),
prot, mmu_linear_psize, mmu_kernel_ssize));
}
@@ -968,6 +985,7 @@ void __init hash__early_init_devtree(void)
htab_scan_page_sizes();
}
+struct hash_mm_context init_hash_mm_context;
void __init hash__early_init_mmu(void)
{
#ifndef CONFIG_PPC_64K_PAGES
@@ -1013,11 +1031,11 @@ void __init hash__early_init_mmu(void)
__pgd_val_bits = HASH_PGD_VAL_BITS;
__kernel_virt_start = H_KERN_VIRT_START;
- __kernel_virt_size = H_KERN_VIRT_SIZE;
__vmalloc_start = H_VMALLOC_START;
__vmalloc_end = H_VMALLOC_END;
__kernel_io_start = H_KERN_IO_START;
- vmemmap = (struct page *)H_VMEMMAP_BASE;
+ __kernel_io_end = H_KERN_IO_END;
+ vmemmap = (struct page *)H_VMEMMAP_START;
ioremap_bot = IOREMAP_BASE;
#ifdef CONFIG_PCI
@@ -1035,12 +1053,16 @@ void __init hash__early_init_mmu(void)
if (!mmu_hash_ops.hpte_insert)
panic("hash__early_init_mmu: No MMU hash ops defined!\n");
- /* Initialize the MMU Hash table and create the linear mapping
+ /*
+ * Initialize the MMU Hash table and create the linear mapping
* of memory. Has to be done before SLB initialization as this is
* currently where the page size encoding is obtained.
*/
htab_initialize();
+ init_mm.context.hash_context = &init_hash_mm_context;
+ mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
+
pr_info("Initializing hash mmu with SLB\n");
/* Initialize SLB management */
slb_initialize();
@@ -1147,10 +1169,13 @@ void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
*/
static int subpage_protection(struct mm_struct *mm, unsigned long ea)
{
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
u32 spp = 0;
u32 **sbpm, *sbpp;
+ if (!spt)
+ return 0;
+
if (ea >= spt->maxaddr)
return 0;
if (ea < 0x100000000UL) {
@@ -1214,7 +1239,8 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm,
}
}
-/* Result code is:
+/*
+ * Result code is:
* 0 - handled
* 1 - normal page fault
* -1 - critical hash insertion error
@@ -1238,7 +1264,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
trace_hash_fault(ea, access, trap);
/* Get region & vsid */
- switch (REGION_ID(ea)) {
+ switch (get_region_id(ea)) {
case USER_REGION_ID:
user_region = 1;
if (! mm) {
@@ -1252,15 +1278,19 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
break;
case VMALLOC_REGION_ID:
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
- if (ea < VMALLOC_END)
- psize = mmu_vmalloc_psize;
- else
- psize = mmu_io_psize;
+ psize = mmu_vmalloc_psize;
+ ssize = mmu_kernel_ssize;
+ break;
+
+ case IO_REGION_ID:
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ psize = mmu_io_psize;
ssize = mmu_kernel_ssize;
break;
default:
- /* Not a valid range
- * Send the problem up to do_page_fault
+ /*
+ * Not a valid range
+ * Send the problem up to do_page_fault()
*/
rc = 1;
goto bail;
@@ -1285,7 +1315,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
flags |= HPTE_LOCAL_UPDATE;
#ifndef CONFIG_PPC_64K_PAGES
- /* If we use 4K pages and our psize is not 4K, then we might
+ /*
+ * If we use 4K pages and our psize is not 4K, then we might
* be hitting a special driver mapping, and need to align the
* address before we fetch the PTE.
*
@@ -1307,7 +1338,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
/* Add _PAGE_PRESENT to the required access perm */
access |= _PAGE_PRESENT;
- /* Pre-check access permissions (will be re-checked atomically
+ /*
+ * Pre-check access permissions (will be re-checked atomically
* in __hash_page_XX but this pre-check is a fast path
*/
if (!check_pte_access(access, pte_val(*ptep))) {
@@ -1354,7 +1386,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
psize = MMU_PAGE_4K;
}
- /* If this PTE is non-cacheable and we have restrictions on
+ /*
+ * If this PTE is non-cacheable and we have restrictions on
* using non cacheable large pages, then we switch to 4k
*/
if (mmu_ci_restrictions && psize == MMU_PAGE_64K && pte_ci(*ptep)) {
@@ -1395,7 +1428,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea,
flags, ssize, spp);
}
- /* Dump some info in case of hash insertion failure, they should
+ /*
+ * Dump some info in case of hash insertion failure, they should
* never happen so it is really useful to know if/when they do
*/
if (rc == -1)
@@ -1421,7 +1455,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap,
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
- if (REGION_ID(ea) == VMALLOC_REGION_ID)
+ if ((get_region_id(ea) == VMALLOC_REGION_ID) ||
+ (get_region_id(ea) == IO_REGION_ID))
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
@@ -1437,8 +1472,9 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
unsigned long access = _PAGE_PRESENT | _PAGE_READ;
unsigned long flags = 0;
struct mm_struct *mm = current->mm;
+ unsigned int region_id = get_region_id(ea);
- if (REGION_ID(ea) == VMALLOC_REGION_ID)
+ if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID))
mm = &init_mm;
if (dsisr & DSISR_NOHPTE)
@@ -1455,7 +1491,7 @@ int __hash_page(unsigned long ea, unsigned long msr, unsigned long trap,
* 2) user space access kernel space.
*/
access |= _PAGE_PRIVILEGED;
- if ((msr & MSR_PR) || (REGION_ID(ea) == USER_REGION_ID))
+ if ((msr & MSR_PR) || (region_id == USER_REGION_ID))
access &= ~_PAGE_PRIVILEGED;
if (trap == 0x400)
@@ -1470,7 +1506,7 @@ static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
int psize = get_slice_psize(mm, ea);
/* We only prefault standard pages for now */
- if (unlikely(psize != mm->context.user_psize))
+ if (unlikely(psize != mm_ctx_user_psize(&mm->context)))
return false;
/*
@@ -1499,7 +1535,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
int rc, ssize, update_flags = 0;
unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
- BUG_ON(REGION_ID(ea) != USER_REGION_ID);
+ BUG_ON(get_region_id(ea) != USER_REGION_ID);
if (!should_hash_preload(mm, ea))
return;
@@ -1549,7 +1585,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
/* Hash it in */
#ifdef CONFIG_PPC_64K_PAGES
- if (mm->context.user_psize == MMU_PAGE_64K)
+ if (mm_ctx_user_psize(&mm->context) == MMU_PAGE_64K)
rc = __hash_page_64K(ea, access, vsid, ptep, trap,
update_flags, ssize);
else
@@ -1562,8 +1598,8 @@ void hash_preload(struct mm_struct *mm, unsigned long ea,
*/
if (rc == -1)
hash_failure_debug(ea, access, vsid, trap, ssize,
- mm->context.user_psize,
- mm->context.user_psize,
+ mm_ctx_user_psize(&mm->context),
+ mm_ctx_user_psize(&mm->context),
pte_val(*ptep));
out_exit:
local_irq_restore(flags);
@@ -1634,7 +1670,8 @@ unsigned long pte_get_hash_gslot(unsigned long vpn, unsigned long shift,
return gslot;
}
-/* WARNING: This is called from hash_low_64.S, if you change this prototype,
+/*
+ * WARNING: This is called from hash_low_64.S, if you change this prototype,
* do not forget to update the assembly call site !
*/
void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
@@ -1855,7 +1892,8 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
void hash__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
- /* We don't currently support the first MEMBLOCK not mapping 0
+ /*
+ * We don't currently support the first MEMBLOCK not mapping 0
* physical on those processors
*/
BUG_ON(first_memblock_base != 0);
@@ -1909,3 +1947,14 @@ static int __init hash64_debugfs(void)
}
machine_device_initcall(pseries, hash64_debugfs);
#endif /* CONFIG_DEBUG_FS */
+
+void __init print_system_hash_info(void)
+{
+ pr_info("ppc64_pft_size = 0x%llx\n", ppc64_pft_size);
+
+ if (htab_hash_mask)
+ pr_info("htab_hash_mask = 0x%lx\n", htab_hash_mask);
+ pr_info("kernel vmalloc start = 0x%lx\n", KERN_VIRT_START);
+ pr_info("kernel IO start = 0x%lx\n", KERN_IO_START);
+ pr_info("kernel vmemmap start = 0x%lx\n", (unsigned long)vmemmap);
+}
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/book3s64/iommu_api.c
index 8330f135294f..8330f135294f 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/book3s64/iommu_api.c
diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/book3s64/mmu_context.c
index f720c5cc0b5e..cb2b08635508 100644
--- a/arch/powerpc/mm/mmu_context_book3s64.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -63,6 +63,13 @@ static int hash__init_new_context(struct mm_struct *mm)
if (index < 0)
return index;
+ mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
+ GFP_KERNEL);
+ if (!mm->context.hash_context) {
+ ida_free(&mmu_context_ida, index);
+ return -ENOMEM;
+ }
+
/*
* The old code would re-promote on fork, we don't do that when using
* slices as it could cause problem promoting slices that have been
@@ -77,10 +84,26 @@ static int hash__init_new_context(struct mm_struct *mm)
* We should not be calling init_new_context() on init_mm. Hence a
* check against 0 is OK.
*/
- if (mm->context.id == 0)
+ if (mm->context.id == 0) {
+ memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
slice_init_new_context_exec(mm);
+ } else {
+ /* This is fork. Copy hash_context details from current->mm */
+ memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ /* inherit subpage prot detalis if we have one. */
+ if (current->mm->context.hash_context->spt) {
+ mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
+ GFP_KERNEL);
+ if (!mm->context.hash_context->spt) {
+ ida_free(&mmu_context_ida, index);
+ kfree(mm->context.hash_context);
+ return -ENOMEM;
+ }
+ }
+#endif
- subpage_prot_init_new_context(mm);
+ }
pkey_mm_init(mm);
return index;
@@ -118,6 +141,7 @@ static int radix__init_new_context(struct mm_struct *mm)
asm volatile("ptesync;isync" : : : "memory");
mm->context.npu_context = NULL;
+ mm->context.hash_context = NULL;
return index;
}
@@ -162,6 +186,7 @@ static void destroy_contexts(mm_context_t *ctx)
if (context_id)
ida_free(&mmu_context_ida, context_id);
}
+ kfree(ctx->hash_context);
}
static void pmd_frag_destroy(void *pmd_frag)
diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/book3s64/pgtable.c
index a4341aba0af4..16bda049187a 100644
--- a/arch/powerpc/mm/pgtable-book3s64.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -17,7 +17,7 @@
#include <asm/trace.h>
#include <asm/powernv.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#include <trace/events/thp.h>
unsigned long __pmd_frag_nr;
diff --git a/arch/powerpc/mm/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index 587807763737..ae7fca40e5b3 100644
--- a/arch/powerpc/mm/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -7,6 +7,7 @@
#include <asm/mman.h>
#include <asm/mmu_context.h>
+#include <asm/mmu.h>
#include <asm/setup.h>
#include <linux/pkeys.h>
#include <linux/of_device.h>
diff --git a/arch/powerpc/mm/hugetlbpage-radix.c b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index cab06331c0c0..cab06331c0c0 100644
--- a/arch/powerpc/mm/hugetlbpage-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 154472a28c77..c9bcf428dd2b 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -29,6 +29,7 @@
#include <asm/powernv.h>
#include <asm/sections.h>
#include <asm/trace.h>
+#include <asm/uaccess.h>
#include <trace/events/thp.h>
@@ -135,6 +136,10 @@ static int __map_kernel_page(unsigned long ea, unsigned long pa,
*/
BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE);
+#ifdef CONFIG_PPC_64K_PAGES
+ BUILD_BUG_ON(RADIX_KERN_MAP_SIZE != (1UL << MAX_EA_BITS_PER_CONTEXT));
+#endif
+
if (unlikely(!slab_is_available()))
return early_map_kernel_page(ea, pa, flags, map_page_size,
nid, region_start, region_end);
@@ -334,6 +339,12 @@ void __init radix_init_pgtable(void)
* page tables will be allocated within the range. No
* need or a node (which we don't have yet).
*/
+
+ if ((reg->base + reg->size) >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ continue;
+ }
+
WARN_ON(create_physical_mapping(reg->base,
reg->base + reg->size,
-1));
@@ -531,8 +542,15 @@ static void radix_init_amor(void)
mtspr(SPRN_AMOR, (3ul << 62));
}
-static void radix_init_iamr(void)
+#ifdef CONFIG_PPC_KUEP
+void setup_kuep(bool disabled)
{
+ if (disabled || !early_radix_enabled())
+ return;
+
+ if (smp_processor_id() == boot_cpuid)
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
/*
* Radix always uses key0 of the IAMR to determine if an access is
* allowed. We set bit 0 (IBM bit 1) of key0, to prevent instruction
@@ -540,6 +558,25 @@ static void radix_init_iamr(void)
*/
mtspr(SPRN_IAMR, (1ul << 62));
}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void setup_kuap(bool disabled)
+{
+ if (disabled || !early_radix_enabled())
+ return;
+
+ if (smp_processor_id() == boot_cpuid) {
+ pr_info("Activating Kernel Userspace Access Prevention\n");
+ cur_cpu_spec->mmu_features |= MMU_FTR_RADIX_KUAP;
+ }
+
+ /* Make sure userspace can't change the AMR */
+ mtspr(SPRN_UAMOR, 0);
+ mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
+ isync();
+}
+#endif
void __init radix__early_init_mmu(void)
{
@@ -574,11 +611,11 @@ void __init radix__early_init_mmu(void)
__pgd_val_bits = RADIX_PGD_VAL_BITS;
__kernel_virt_start = RADIX_KERN_VIRT_START;
- __kernel_virt_size = RADIX_KERN_VIRT_SIZE;
__vmalloc_start = RADIX_VMALLOC_START;
__vmalloc_end = RADIX_VMALLOC_END;
__kernel_io_start = RADIX_KERN_IO_START;
- vmemmap = (struct page *)RADIX_VMEMMAP_BASE;
+ __kernel_io_end = RADIX_KERN_IO_END;
+ vmemmap = (struct page *)RADIX_VMEMMAP_START;
ioremap_bot = IOREMAP_BASE;
#ifdef CONFIG_PCI
@@ -601,7 +638,6 @@ void __init radix__early_init_mmu(void)
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- radix_init_iamr();
radix_init_pgtable();
/* Switch to the guard PID before turning on MMU */
radix__switch_mmu_context(NULL, &init_mm);
@@ -623,7 +659,6 @@ void radix__early_init_mmu_secondary(void)
__pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
radix_init_amor();
}
- radix_init_iamr();
radix__switch_mmu_context(NULL, &init_mm);
if (cpu_has_feature(CPU_FTR_HVMODE))
@@ -646,7 +681,8 @@ void radix__mmu_cleanup_all(void)
void radix__setup_initial_memory_limit(phys_addr_t first_memblock_base,
phys_addr_t first_memblock_size)
{
- /* We don't currently support the first MEMBLOCK not mapping 0
+ /*
+ * We don't currently support the first MEMBLOCK not mapping 0
* physical on those processors
*/
BUG_ON(first_memblock_base != 0);
@@ -866,6 +902,11 @@ static void __meminit remove_pagetable(unsigned long start, unsigned long end)
int __meminit radix__create_section_mapping(unsigned long start, unsigned long end, int nid)
{
+ if (end >= RADIX_VMALLOC_START) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
return create_physical_mapping(start, end, nid);
}
@@ -893,6 +934,11 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start,
int nid = early_pfn_to_nid(phys >> PAGE_SHIFT);
int ret;
+ if ((start + page_size) >= RADIX_VMEMMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return -1;
+ }
+
ret = __map_kernel_page_nid(start, phys, __pgprot(flags), page_size, nid);
BUG_ON(ret);
@@ -958,45 +1004,44 @@ pmd_t radix__pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long addre
void radix__pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
pgtable_t pgtable)
{
- struct list_head *lh = (struct list_head *) pgtable;
+ struct list_head *lh = (struct list_head *) pgtable;
- assert_spin_locked(pmd_lockptr(mm, pmdp));
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
- /* FIFO */
- if (!pmd_huge_pte(mm, pmdp))
- INIT_LIST_HEAD(lh);
- else
- list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
- pmd_huge_pte(mm, pmdp) = pgtable;
+ /* FIFO */
+ if (!pmd_huge_pte(mm, pmdp))
+ INIT_LIST_HEAD(lh);
+ else
+ list_add(lh, (struct list_head *) pmd_huge_pte(mm, pmdp));
+ pmd_huge_pte(mm, pmdp) = pgtable;
}
pgtable_t radix__pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp)
{
- pte_t *ptep;
- pgtable_t pgtable;
- struct list_head *lh;
-
- assert_spin_locked(pmd_lockptr(mm, pmdp));
-
- /* FIFO */
- pgtable = pmd_huge_pte(mm, pmdp);
- lh = (struct list_head *) pgtable;
- if (list_empty(lh))
- pmd_huge_pte(mm, pmdp) = NULL;
- else {
- pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
- list_del(lh);
- }
- ptep = (pte_t *) pgtable;
- *ptep = __pte(0);
- ptep++;
- *ptep = __pte(0);
- return pgtable;
-}
+ pte_t *ptep;
+ pgtable_t pgtable;
+ struct list_head *lh;
+ assert_spin_locked(pmd_lockptr(mm, pmdp));
+
+ /* FIFO */
+ pgtable = pmd_huge_pte(mm, pmdp);
+ lh = (struct list_head *) pgtable;
+ if (list_empty(lh))
+ pmd_huge_pte(mm, pmdp) = NULL;
+ else {
+ pmd_huge_pte(mm, pmdp) = (pgtable_t) lh->next;
+ list_del(lh);
+ }
+ ptep = (pte_t *) pgtable;
+ *ptep = __pte(0);
+ ptep++;
+ *ptep = __pte(0);
+ return pgtable;
+}
pmd_t radix__pmdp_huge_get_and_clear(struct mm_struct *mm,
- unsigned long addr, pmd_t *pmdp)
+ unsigned long addr, pmd_t *pmdp)
{
pmd_t old_pmd;
unsigned long old;
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 6a23b9ebd2a1..6a23b9ebd2a1 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/book3s64/slb.c
index 5986df48359b..c22742218bd3 100644
--- a/arch/powerpc/mm/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -554,7 +554,8 @@ void slb_initialize(void)
asm volatile("isync; slbia; isync":::"memory");
create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
- /* For the boot cpu, we're running on the stack in init_thread_union,
+ /*
+ * For the boot cpu, we're running on the stack in init_thread_union,
* which is in the first segment of the linear mapping, and also
* get_paca()->kstack hasn't been initialized yet.
* For secondary cpus, we need to bolt the kernel stack entry now.
@@ -691,10 +692,10 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
unsigned long flags;
int ssize;
- if (id == KERNEL_REGION_ID) {
+ if (id == LINEAR_MAP_REGION_ID) {
/* We only support upto MAX_PHYSMEM_BITS */
- if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
+ if ((ea & EA_MASK) > (1UL << MAX_PHYSMEM_BITS))
return -EFAULT;
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
@@ -702,20 +703,25 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
#ifdef CONFIG_SPARSEMEM_VMEMMAP
} else if (id == VMEMMAP_REGION_ID) {
- if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ if (ea >= H_VMEMMAP_END)
return -EFAULT;
flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
#endif
} else if (id == VMALLOC_REGION_ID) {
- if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ if (ea >= H_VMALLOC_END)
return -EFAULT;
- if (ea < H_VMALLOC_END)
- flags = local_paca->vmalloc_sllp;
- else
- flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+ flags = local_paca->vmalloc_sllp;
+
+ } else if (id == IO_REGION_ID) {
+
+ if (ea >= H_KERN_IO_END)
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+
} else {
return -EFAULT;
}
@@ -725,6 +731,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id)
ssize = MMU_SEGSIZE_256M;
context = get_kernel_context(ea);
+
return slb_insert_entry(ea, context, flags, ssize, true);
}
@@ -739,7 +746,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
* consider this as bad access if we take a SLB miss
* on an address above addr limit.
*/
- if (ea >= mm->context.slb_addr_limit)
+ if (ea >= mm_ctx_slb_addr_limit(&mm->context))
return -EFAULT;
context = get_user_context(&mm->context, ea);
@@ -761,7 +768,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
long do_slb_fault(struct pt_regs *regs, unsigned long ea)
{
- unsigned long id = REGION_ID(ea);
+ unsigned long id = get_region_id(ea);
/* IRQs are not reconciled here, so can't check irqs_disabled */
VM_WARN_ON(mfmsr() & MSR_EE);
@@ -784,7 +791,7 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea)
* first class kernel code. But for performance it's probably nicer
* if they go via fast_exception_return too.
*/
- if (id >= KERNEL_REGION_ID) {
+ if (id >= LINEAR_MAP_REGION_ID) {
long err;
#ifdef CONFIG_DEBUG_VM
/* Catch recursive kernel SLB faults. */
diff --git a/arch/powerpc/mm/subpage-prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 5e4178790dee..473dd430e306 100644
--- a/arch/powerpc/mm/subpage-prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -25,10 +25,13 @@
*/
void subpage_prot_free(struct mm_struct *mm)
{
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt = mm_ctx_subpage_prot(&mm->context);
unsigned long i, j, addr;
u32 **p;
+ if (!spt)
+ return;
+
for (i = 0; i < 4; ++i) {
if (spt->low_prot[i]) {
free_page((unsigned long)spt->low_prot[i]);
@@ -48,13 +51,7 @@ void subpage_prot_free(struct mm_struct *mm)
free_page((unsigned long)p);
}
spt->maxaddr = 0;
-}
-
-void subpage_prot_init_new_context(struct mm_struct *mm)
-{
- struct subpage_prot_table *spt = &mm->context.spt;
-
- memset(spt, 0, sizeof(*spt));
+ kfree(spt);
}
static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
@@ -93,13 +90,18 @@ static void hpte_flush_range(struct mm_struct *mm, unsigned long addr,
static void subpage_prot_clear(unsigned long addr, unsigned long len)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
unsigned long next, limit;
down_write(&mm->mmap_sem);
+
+ spt = mm_ctx_subpage_prot(&mm->context);
+ if (!spt)
+ goto err_out;
+
limit = addr + len;
if (limit > spt->maxaddr)
limit = spt->maxaddr;
@@ -127,6 +129,8 @@ static void subpage_prot_clear(unsigned long addr, unsigned long len)
/* now flush any existing HPTEs for the range */
hpte_flush_range(mm, addr, nw);
}
+
+err_out:
up_write(&mm->mmap_sem);
}
@@ -189,7 +193,7 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
unsigned long, len, u32 __user *, map)
{
struct mm_struct *mm = current->mm;
- struct subpage_prot_table *spt = &mm->context.spt;
+ struct subpage_prot_table *spt;
u32 **spm, *spp;
unsigned long i;
size_t nw;
@@ -218,6 +222,21 @@ SYSCALL_DEFINE3(subpage_prot, unsigned long, addr,
return -EFAULT;
down_write(&mm->mmap_sem);
+
+ spt = mm_ctx_subpage_prot(&mm->context);
+ if (!spt) {
+ /*
+ * Allocate subpage prot table if not already done.
+ * Do this with mmap_sem held
+ */
+ spt = kzalloc(sizeof(struct subpage_prot_table), GFP_KERNEL);
+ if (!spt) {
+ err = -ENOMEM;
+ goto out;
+ }
+ mm->context.hash_context->spt = spt;
+ }
+
subpage_mark_vma_nohuge(mm, addr, len);
for (limit = addr + len; addr < limit; addr = next) {
next = pmd_addr_end(addr, limit);
diff --git a/arch/powerpc/mm/vphn.c b/arch/powerpc/mm/book3s64/vphn.c
index f83044faac23..0ee7734afb50 100644
--- a/arch/powerpc/mm/vphn.c
+++ b/arch/powerpc/mm/book3s64/vphn.c
@@ -42,7 +42,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
u16 new = be16_to_cpup(field++);
if (is_32bit) {
- /* Let's concatenate the 16 bits of this field to the
+ /*
+ * Let's concatenate the 16 bits of this field to the
* 15 lower bits of the previous field
*/
unpacked[++nr_assoc_doms] =
@@ -56,7 +57,8 @@ int vphn_unpack_associativity(const long *packed, __be32 *unpacked)
unpacked[++nr_assoc_doms] =
cpu_to_be32(new & VPHN_FIELD_MASK);
} else {
- /* Data is in the lower 15 bits of this field
+ /*
+ * Data is in the lower 15 bits of this field
* concatenated with the next 16 bit field
*/
last = new;
diff --git a/arch/powerpc/mm/vphn.h b/arch/powerpc/mm/book3s64/vphn.h
index f9ffdb3942fc..f0b93c2dd578 100644
--- a/arch/powerpc/mm/vphn.h
+++ b/arch/powerpc/mm/book3s64/vphn.h
@@ -2,8 +2,7 @@
#ifndef _ARCH_POWERPC_MM_VPHN_H_
#define _ARCH_POWERPC_MM_VPHN_H_
-/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers.
- */
+/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */
#define VPHN_REGISTER_COUNT 6
/*
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index c8da352e8686..f137286740cb 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -105,7 +105,7 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
u64 vsid, vsidkey;
int psize, ssize;
- switch (REGION_ID(ea)) {
+ switch (get_region_id(ea)) {
case USER_REGION_ID:
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
if (mm == NULL)
@@ -117,16 +117,20 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
break;
case VMALLOC_REGION_ID:
pr_devel("%s: 0x%llx -- VMALLOC_REGION_ID\n", __func__, ea);
- if (ea < VMALLOC_END)
- psize = mmu_vmalloc_psize;
- else
- psize = mmu_io_psize;
+ psize = mmu_vmalloc_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
vsidkey = SLB_VSID_KERNEL;
break;
- case KERNEL_REGION_ID:
- pr_devel("%s: 0x%llx -- KERNEL_REGION_ID\n", __func__, ea);
+ case IO_REGION_ID:
+ pr_devel("%s: 0x%llx -- IO_REGION_ID\n", __func__, ea);
+ psize = mmu_io_psize;
+ ssize = mmu_kernel_ssize;
+ vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
+ vsidkey = SLB_VSID_KERNEL;
+ break;
+ case LINEAR_MAP_REGION_ID:
+ pr_devel("%s: 0x%llx -- LINEAR_MAP_REGION_ID\n", __func__, ea);
psize = mmu_linear_psize;
ssize = mmu_kernel_ssize;
vsid = get_kernel_vsid(ea, mmu_kernel_ssize);
diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c
index b5d2658c26af..2f6154b76328 100644
--- a/arch/powerpc/mm/dma-noncoherent.c
+++ b/arch/powerpc/mm/dma-noncoherent.c
@@ -36,7 +36,7 @@
#include <asm/tlbflush.h>
#include <asm/dma.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* This address range defaults to a value that is safe for all
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 3f1803672c9b..641891df2046 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -366,8 +366,10 @@ static void __init init_drmem_v1_lmbs(const __be32 *prop)
if (!drmem_info->lmbs)
return;
- for_each_drmem_lmb(lmb)
+ for_each_drmem_lmb(lmb) {
read_drconf_v1_cell(lmb, &prop);
+ lmb_set_nid(lmb);
+ }
}
static void __init init_drmem_v2_lmbs(const __be32 *prop)
@@ -412,6 +414,8 @@ static void __init init_drmem_v2_lmbs(const __be32 *prop)
lmb->aa_index = dr_cell.aa_index;
lmb->flags = dr_cell.flags;
+
+ lmb_set_nid(lmb);
}
}
}
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 887f11bcf330..b5d3578d9f65 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -44,6 +44,7 @@
#include <asm/mmu_context.h>
#include <asm/siginfo.h>
#include <asm/debug.h>
+#include <asm/kup.h>
static inline bool notify_page_fault(struct pt_regs *regs)
{
@@ -223,19 +224,46 @@ static int mm_fault_error(struct pt_regs *regs, unsigned long addr,
}
/* Is this a bad kernel fault ? */
-static bool bad_kernel_fault(bool is_exec, unsigned long error_code,
- unsigned long address)
+static bool bad_kernel_fault(struct pt_regs *regs, unsigned long error_code,
+ unsigned long address, bool is_write)
{
+ int is_exec = TRAP(regs) == 0x400;
+
/* NX faults set DSISR_PROTFAULT on the 8xx, DSISR_NOEXEC_OR_G on others */
if (is_exec && (error_code & (DSISR_NOEXEC_OR_G | DSISR_KEYFAULT |
DSISR_PROTFAULT))) {
- printk_ratelimited(KERN_CRIT "kernel tried to execute"
- " exec-protected page (%lx) -"
- "exploit attempt? (uid: %d)\n",
- address, from_kuid(&init_user_ns,
- current_uid()));
+ pr_crit_ratelimited("kernel tried to execute %s page (%lx) - exploit attempt? (uid: %d)\n",
+ address >= TASK_SIZE ? "exec-protected" : "user",
+ address,
+ from_kuid(&init_user_ns, current_uid()));
+
+ // Kernel exec fault is always bad
+ return true;
}
- return is_exec || (address >= TASK_SIZE);
+
+ if (!is_exec && address < TASK_SIZE && (error_code & DSISR_PROTFAULT) &&
+ !search_exception_tables(regs->nip)) {
+ pr_crit_ratelimited("Kernel attempted to access user page (%lx) - exploit attempt? (uid: %d)\n",
+ address,
+ from_kuid(&init_user_ns, current_uid()));
+ }
+
+ // Kernel fault on kernel address is bad
+ if (address >= TASK_SIZE)
+ return true;
+
+ // Fault on user outside of certain regions (eg. copy_tofrom_user()) is bad
+ if (!search_exception_tables(regs->nip))
+ return true;
+
+ // Read/write fault in a valid region (the exception table search passed
+ // above), but blocked by KUAP is bad, it can never succeed.
+ if (bad_kuap_fault(regs, is_write))
+ return true;
+
+ // What's left? Kernel fault on user in well defined regions (extable
+ // matched), and allowed by KUAP in the faulting context.
+ return false;
}
static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
@@ -455,9 +483,10 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
/*
* The kernel should never take an execute fault nor should it
- * take a page fault to a kernel address.
+ * take a page fault to a kernel address or a page fault to a user
+ * address outside of dedicated places
*/
- if (unlikely(!is_user && bad_kernel_fault(is_exec, error_code, address)))
+ if (unlikely(!is_user && bad_kernel_fault(regs, error_code, address, is_write)))
return SIGSEGV;
/*
diff --git a/arch/powerpc/mm/highmem.c b/arch/powerpc/mm/highmem.c
index 82a0e37557a5..320c1672b2ae 100644
--- a/arch/powerpc/mm/highmem.c
+++ b/arch/powerpc/mm/highmem.c
@@ -43,9 +43,7 @@ void *kmap_atomic_prot(struct page *page, pgprot_t prot)
type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
-#ifdef CONFIG_DEBUG_HIGHMEM
- BUG_ON(!pte_none(*(kmap_pte-idx)));
-#endif
+ WARN_ON(IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !pte_none(*(kmap_pte - idx)));
__set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot), 1);
local_flush_tlb_page(NULL, vaddr);
@@ -56,7 +54,6 @@ EXPORT_SYMBOL(kmap_atomic_prot);
void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- int type __maybe_unused;
if (vaddr < __fix_to_virt(FIX_KMAP_END)) {
pagefault_enable();
@@ -64,14 +61,12 @@ void __kunmap_atomic(void *kvaddr)
return;
}
- type = kmap_atomic_idx();
-
-#ifdef CONFIG_DEBUG_HIGHMEM
- {
+ if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM)) {
+ int type = kmap_atomic_idx();
unsigned int idx;
idx = type + KM_TYPE_NR * smp_processor_id();
- BUG_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+ WARN_ON(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
/*
* force other mappings to Oops if they'll try to access
@@ -80,7 +75,6 @@ void __kunmap_atomic(void *kvaddr)
pte_clear(&init_mm, vaddr, kmap_pte-idx);
local_flush_tlb_page(NULL, vaddr);
}
-#endif
kmap_atomic_idx_pop();
pagefault_enable();
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 9e732bb2c84a..c5c9ff2d7afc 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -26,20 +26,8 @@
#include <asm/hugetlb.h>
#include <asm/pte-walk.h>
-
-#ifdef CONFIG_HUGETLB_PAGE
-
-#define PAGE_SHIFT_64K 16
-#define PAGE_SHIFT_512K 19
-#define PAGE_SHIFT_8M 23
-#define PAGE_SHIFT_16M 24
-#define PAGE_SHIFT_16G 34
-
bool hugetlb_disabled = false;
-unsigned int HPAGE_SHIFT;
-EXPORT_SYMBOL(HPAGE_SHIFT);
-
#define hugepd_none(hpd) (hpd_val(hpd) == 0)
#define PTE_T_ORDER (__builtin_ffs(sizeof(pte_t)) - __builtin_ffs(sizeof(void *)))
@@ -98,19 +86,7 @@ static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp,
for (i = 0; i < num_hugepd; i++, hpdp++) {
if (unlikely(!hugepd_none(*hpdp)))
break;
- else {
-#ifdef CONFIG_PPC_BOOK3S_64
- *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS |
- (shift_to_mmu_psize(pshift) << 2));
-#elif defined(CONFIG_PPC_8xx)
- *hpdp = __hugepd(__pa(new) | _PMD_USER |
- (pshift == PAGE_SHIFT_8M ? _PMD_PAGE_8M :
- _PMD_PAGE_512K) | _PMD_PRESENT);
-#else
- /* We use the old format for PPC_FSL_BOOK3E */
- *hpdp = __hugepd(((unsigned long)new & ~PD_HUGE) | pshift);
-#endif
- }
+ hugepd_populate(hpdp, new, pshift);
}
/* If we bailed from the for loop early, an error occurred, clean up */
if (i < num_hugepd) {
@@ -250,7 +226,7 @@ int __init alloc_bootmem_huge_page(struct hstate *h)
return __alloc_bootmem_huge_page(h);
}
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
+#ifndef CONFIG_PPC_BOOK3S_64
#define HUGEPD_FREELIST_SIZE \
((PAGE_SIZE - sizeof(struct hugepd_freelist)) / sizeof(pte_t))
@@ -542,23 +518,6 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
return (__boundary - 1 < end - 1) ? __boundary : end;
}
-int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned pdshift,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- pte_t *ptep;
- unsigned long sz = 1UL << hugepd_shift(hugepd);
- unsigned long next;
-
- ptep = hugepte_offset(hugepd, addr, pdshift);
- do {
- next = hugepte_addr_end(addr, end, sz);
- if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
- return 0;
- } while (ptep++, addr = next, addr != end);
-
- return 1;
-}
-
#ifdef CONFIG_PPC_MM_SLICES
unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff,
@@ -578,24 +537,15 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
{
-#ifdef CONFIG_PPC_MM_SLICES
/* With radix we don't use slice, so derive it from vma*/
- if (!radix_enabled()) {
+ if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
return 1UL << mmu_psize_to_shift(psize);
}
-#endif
return vma_kernel_pagesize(vma);
}
-static inline bool is_power_of_4(unsigned long x)
-{
- if (is_power_of_2(x))
- return (__ilog2(x) % 2) ? false : true;
- return false;
-}
-
static int __init add_huge_page_size(unsigned long long size)
{
int shift = __ffs(size);
@@ -603,37 +553,13 @@ static int __init add_huge_page_size(unsigned long long size)
/* Check that it is a page size supported by the hardware and
* that it fits within pagetable and slice limits. */
- if (size <= PAGE_SIZE)
- return -EINVAL;
-#if defined(CONFIG_PPC_FSL_BOOK3E)
- if (!is_power_of_4(size))
+ if (size <= PAGE_SIZE || !is_power_of_2(size))
return -EINVAL;
-#elif !defined(CONFIG_PPC_8xx)
- if (!is_power_of_2(size) || (shift > SLICE_HIGH_SHIFT))
- return -EINVAL;
-#endif
- if ((mmu_psize = shift_to_mmu_psize(shift)) < 0)
+ mmu_psize = check_and_get_huge_psize(size);
+ if (mmu_psize < 0)
return -EINVAL;
-#ifdef CONFIG_PPC_BOOK3S_64
- /*
- * We need to make sure that for different page sizes reported by
- * firmware we only add hugetlb support for page sizes that can be
- * supported by linux page table layout.
- * For now we have
- * Radix: 2M and 1G
- * Hash: 16M and 16G
- */
- if (radix_enabled()) {
- if (mmu_psize != MMU_PAGE_2M && mmu_psize != MMU_PAGE_1G)
- return -EINVAL;
- } else {
- if (mmu_psize != MMU_PAGE_16M && mmu_psize != MMU_PAGE_16G)
- return -EINVAL;
- }
-#endif
-
BUG_ON(mmu_psize_defs[mmu_psize].shift != shift);
/* Return if huge page size has already been setup */
@@ -669,10 +595,10 @@ static int __init hugetlbpage_init(void)
return 0;
}
-#if !defined(CONFIG_PPC_FSL_BOOK3E) && !defined(CONFIG_PPC_8xx)
- if (!radix_enabled() && !mmu_has_feature(MMU_FTR_16M_PAGE))
+ if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled() &&
+ !mmu_has_feature(MMU_FTR_16M_PAGE))
return -ENODEV;
-#endif
+
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
unsigned shift;
unsigned pdshift;
@@ -710,29 +636,13 @@ static int __init hugetlbpage_init(void)
pgtable_cache_add(PTE_INDEX_SIZE);
else if (pdshift > shift)
pgtable_cache_add(pdshift - shift);
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
- else
+ else if (IS_ENABLED(CONFIG_PPC_FSL_BOOK3E) || IS_ENABLED(CONFIG_PPC_8xx))
pgtable_cache_add(PTE_T_ORDER);
-#endif
}
-#if defined(CONFIG_PPC_FSL_BOOK3E) || defined(CONFIG_PPC_8xx)
- /* Default hpage size = 4M on FSL_BOOK3E and 512k on 8xx */
- if (mmu_psize_defs[MMU_PAGE_4M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_4M].shift;
- else if (mmu_psize_defs[MMU_PAGE_512K].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_512K].shift;
-#else
- /* Set default large page size. Currently, we pick 16M or 1M
- * depending on what is available
- */
- if (mmu_psize_defs[MMU_PAGE_16M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
- else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
- else if (mmu_psize_defs[MMU_PAGE_2M].shift)
- HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_2M].shift;
-#endif
+ if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
+ hugetlbpage_init_default();
+
return 0;
}
@@ -756,113 +666,8 @@ void flush_dcache_icache_hugepage(struct page *page)
}
}
-#endif /* CONFIG_HUGETLB_PAGE */
-
-/*
- * We have 4 cases for pgds and pmds:
- * (1) invalid (all zeroes)
- * (2) pointer to next table, as normal; bottom 6 bits == 0
- * (3) leaf pte for huge page _PAGE_PTE set
- * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
- *
- * So long as we atomically load page table pointers we are safe against teardown,
- * we can follow the address down to the the page and take a ref on it.
- * This function need to be called with interrupts disabled. We use this variant
- * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
- */
-pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
- bool *is_thp, unsigned *hpage_shift)
-{
- pgd_t pgd, *pgdp;
- pud_t pud, *pudp;
- pmd_t pmd, *pmdp;
- pte_t *ret_pte;
- hugepd_t *hpdp = NULL;
- unsigned pdshift = PGDIR_SHIFT;
-
- if (hpage_shift)
- *hpage_shift = 0;
-
- if (is_thp)
- *is_thp = false;
-
- pgdp = pgdir + pgd_index(ea);
- pgd = READ_ONCE(*pgdp);
- /*
- * Always operate on the local stack value. This make sure the
- * value don't get updated by a parallel THP split/collapse,
- * page fault or a page unmap. The return pte_t * is still not
- * stable. So should be checked there for above conditions.
- */
- if (pgd_none(pgd))
- return NULL;
- else if (pgd_huge(pgd)) {
- ret_pte = (pte_t *) pgdp;
- goto out;
- } else if (is_hugepd(__hugepd(pgd_val(pgd))))
- hpdp = (hugepd_t *)&pgd;
- else {
- /*
- * Even if we end up with an unmap, the pgtable will not
- * be freed, because we do an rcu free and here we are
- * irq disabled
- */
- pdshift = PUD_SHIFT;
- pudp = pud_offset(&pgd, ea);
- pud = READ_ONCE(*pudp);
-
- if (pud_none(pud))
- return NULL;
- else if (pud_huge(pud)) {
- ret_pte = (pte_t *) pudp;
- goto out;
- } else if (is_hugepd(__hugepd(pud_val(pud))))
- hpdp = (hugepd_t *)&pud;
- else {
- pdshift = PMD_SHIFT;
- pmdp = pmd_offset(&pud, ea);
- pmd = READ_ONCE(*pmdp);
- /*
- * A hugepage collapse is captured by pmd_none, because
- * it mark the pmd none and do a hpte invalidate.
- */
- if (pmd_none(pmd))
- return NULL;
-
- if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
- if (is_thp)
- *is_thp = true;
- ret_pte = (pte_t *) pmdp;
- goto out;
- }
- /*
- * pmd_large check below will handle the swap pmd pte
- * we need to do both the check because they are config
- * dependent.
- */
- if (pmd_huge(pmd) || pmd_large(pmd)) {
- ret_pte = (pte_t *) pmdp;
- goto out;
- } else if (is_hugepd(__hugepd(pmd_val(pmd))))
- hpdp = (hugepd_t *)&pmd;
- else
- return pte_offset_kernel(&pmd, ea);
- }
- }
- if (!hpdp)
- return NULL;
-
- ret_pte = hugepte_offset(*hpdp, ea, pdshift);
- pdshift = hugepd_shift(*hpdp);
-out:
- if (hpage_shift)
- *hpage_shift = pdshift;
- return ret_pte;
-}
-EXPORT_SYMBOL_GPL(__find_linux_pte);
-
-int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
+static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
+ unsigned long end, int write, struct page **pages, int *nr)
{
unsigned long pte_end;
struct page *head, *page;
@@ -908,3 +713,20 @@ int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
return 1;
}
+
+int gup_huge_pd(hugepd_t hugepd, unsigned long addr, unsigned int pdshift,
+ unsigned long end, int write, struct page **pages, int *nr)
+{
+ pte_t *ptep;
+ unsigned long sz = 1UL << hugepd_shift(hugepd);
+ unsigned long next;
+
+ ptep = hugepte_offset(hugepd, addr, pdshift);
+ do {
+ next = hugepte_addr_end(addr, end, sz);
+ if (!gup_hugepte(ptep, sz, addr, end, write, pages, nr))
+ return 0;
+ } while (ptep++, addr = next, addr != end);
+
+ return 1;
+}
diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c
index 1e6910eb70ed..3bcae9e5e954 100644
--- a/arch/powerpc/mm/init-common.c
+++ b/arch/powerpc/mm/init-common.c
@@ -24,6 +24,32 @@
#include <linux/string.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
+#include <asm/kup.h>
+
+static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
+static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
+
+static int __init parse_nosmep(char *p)
+{
+ disable_kuep = true;
+ pr_warn("Disabling Kernel Userspace Execution Prevention\n");
+ return 0;
+}
+early_param("nosmep", parse_nosmep);
+
+static int __init parse_nosmap(char *p)
+{
+ disable_kuap = true;
+ pr_warn("Disabling Kernel Userspace Access Protection\n");
+ return 0;
+}
+early_param("nosmap", parse_nosmap);
+
+void __ref setup_kup(void)
+{
+ setup_kuep(disable_kuep);
+ setup_kuap(disable_kuap);
+}
#define CTOR(shift) static void ctor_##shift(void *addr) \
{ \
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 41a3513cadc9..c3121b6c8cbd 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -45,8 +45,10 @@
#include <asm/tlb.h>
#include <asm/sections.h>
#include <asm/hugetlb.h>
+#include <asm/kup.h>
+#include <asm/kasan.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#if defined(CONFIG_KERNEL_START_BOOL) || defined(CONFIG_LOWMEM_SIZE_BOOL)
/* The amount of lowmem must be within 0xF0000000 - KERNELBASE. */
@@ -178,6 +180,10 @@ void __init MMU_init(void)
btext_unmap();
#endif
+ kasan_mmu_init();
+
+ setup_kup();
+
/* Shortly after that, the entire linear mapping will be available */
memblock_set_current_limit(lowmem_end_addr);
}
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a4c155af1597..45b02fa11cd8 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -66,7 +66,7 @@
#include <asm/iommu.h>
#include <asm/vdso.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
phys_addr_t memstart_addr = ~0;
EXPORT_SYMBOL_GPL(memstart_addr);
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
new file mode 100644
index 000000000000..6577897673dd
--- /dev/null
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE := n
+
+obj-$(CONFIG_PPC32) += kasan_init_32.o
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
new file mode 100644
index 000000000000..0d62be3cba47
--- /dev/null
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/kasan.h>
+#include <linux/printk.h>
+#include <linux/memblock.h>
+#include <linux/sched/task.h>
+#include <linux/vmalloc.h>
+#include <asm/pgalloc.h>
+#include <asm/code-patching.h>
+#include <mm/mmu_decl.h>
+
+static void kasan_populate_pte(pte_t *ptep, pgprot_t prot)
+{
+ unsigned long va = (unsigned long)kasan_early_shadow_page;
+ phys_addr_t pa = __pa(kasan_early_shadow_page);
+ int i;
+
+ for (i = 0; i < PTRS_PER_PTE; i++, ptep++)
+ __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0);
+}
+
+static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end)
+{
+ pmd_t *pmd;
+ unsigned long k_cur, k_next;
+
+ pmd = pmd_offset(pud_offset(pgd_offset_k(k_start), k_start), k_start);
+
+ for (k_cur = k_start; k_cur != k_end; k_cur = k_next, pmd++) {
+ pte_t *new;
+
+ k_next = pgd_addr_end(k_cur, k_end);
+ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte)
+ continue;
+
+ new = pte_alloc_one_kernel(&init_mm);
+
+ if (!new)
+ return -ENOMEM;
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ kasan_populate_pte(new, PAGE_READONLY);
+ else
+ kasan_populate_pte(new, PAGE_KERNEL_RO);
+ pmd_populate_kernel(&init_mm, pmd, new);
+ }
+ return 0;
+}
+
+static void __ref *kasan_get_one_page(void)
+{
+ if (slab_is_available())
+ return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+
+ return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+}
+
+static int __ref kasan_init_region(void *start, size_t size)
+{
+ unsigned long k_start = (unsigned long)kasan_mem_to_shadow(start);
+ unsigned long k_end = (unsigned long)kasan_mem_to_shadow(start + size);
+ unsigned long k_cur;
+ int ret;
+ void *block = NULL;
+
+ ret = kasan_init_shadow_page_tables(k_start, k_end);
+ if (ret)
+ return ret;
+
+ if (!slab_is_available())
+ block = memblock_alloc(k_end - k_start, PAGE_SIZE);
+
+ for (k_cur = k_start; k_cur < k_end; k_cur += PAGE_SIZE) {
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(k_cur), k_cur), k_cur);
+ void *va = block ? block + k_cur - k_start : kasan_get_one_page();
+ pte_t pte = pfn_pte(PHYS_PFN(__pa(va)), PAGE_KERNEL);
+
+ if (!va)
+ return -ENOMEM;
+
+ __set_pte_at(&init_mm, k_cur, pte_offset_kernel(pmd, k_cur), pte, 0);
+ }
+ flush_tlb_kernel_range(k_start, k_end);
+ return 0;
+}
+
+static void __init kasan_remap_early_shadow_ro(void)
+{
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_READONLY);
+ else
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL_RO);
+
+ flush_tlb_kernel_range(KASAN_SHADOW_START, KASAN_SHADOW_END);
+}
+
+void __init kasan_mmu_init(void)
+{
+ int ret;
+ struct memblock_region *reg;
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE)) {
+ ret = kasan_init_shadow_page_tables(KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ if (ret)
+ panic("kasan: kasan_init_shadow_page_tables() failed");
+ }
+
+ for_each_memblock(memory, reg) {
+ phys_addr_t base = reg->base;
+ phys_addr_t top = min(base + reg->size, total_lowmem);
+
+ if (base >= top)
+ continue;
+
+ ret = kasan_init_region(__va(base), top - base);
+ if (ret)
+ panic("kasan: kasan_init_region() failed");
+ }
+}
+
+void __init kasan_init(void)
+{
+ kasan_remap_early_shadow_ro();
+
+ clear_page(kasan_early_shadow_page);
+
+ /* At this point kasan is fully initialized. Enable error messages */
+ init_task.kasan_depth = 0;
+ pr_info("KASAN init done\n");
+}
+
+#ifdef CONFIG_MODULES
+void *module_alloc(unsigned long size)
+{
+ void *base = vmalloc_exec(size);
+
+ if (!base)
+ return NULL;
+
+ if (!kasan_init_region(base, size))
+ return base;
+
+ vfree(base);
+
+ return NULL;
+}
+#endif
+
+#ifdef CONFIG_PPC_BOOK3S_32
+u8 __initdata early_hash[256 << 10] __aligned(256 << 10) = {0};
+
+static void __init kasan_early_hash_table(void)
+{
+ modify_instruction_site(&patch__hash_page_A0, 0xffff, __pa(early_hash) >> 16);
+ modify_instruction_site(&patch__flush_hash_A0, 0xffff, __pa(early_hash) >> 16);
+
+ Hash = (struct hash_pte *)early_hash;
+}
+#else
+static void __init kasan_early_hash_table(void) {}
+#endif
+
+void __init kasan_early_init(void)
+{
+ unsigned long addr = KASAN_SHADOW_START;
+ unsigned long end = KASAN_SHADOW_END;
+ unsigned long next;
+ pmd_t *pmd = pmd_offset(pud_offset(pgd_offset_k(addr), addr), addr);
+
+ BUILD_BUG_ON(KASAN_SHADOW_START & ~PGDIR_MASK);
+
+ kasan_populate_pte(kasan_early_shadow_pte, PAGE_KERNEL);
+
+ do {
+ next = pgd_addr_end(addr, end);
+ pmd_populate_kernel(&init_mm, pmd, kasan_early_shadow_pte);
+ } while (pmd++, addr = next, addr != end);
+
+ if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+ kasan_early_hash_table();
+}
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f6787f90e158..cd525d709072 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -54,7 +54,7 @@
#include <asm/swiotlb.h>
#include <asm/rtas.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#ifndef CPU_FTR_COHERENT_ICACHE
#define CPU_FTR_COHERENT_ICACHE 0 /* XXX for now */
@@ -109,8 +109,8 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
return -ENODEV;
}
-int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
- bool want_memblock)
+int __ref arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *altmap,
+ bool want_memblock)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -131,8 +131,8 @@ int __meminit arch_add_memory(int nid, u64 start, u64 size, struct vmem_altmap *
}
#ifdef CONFIG_MEMORY_HOTREMOVE
-int __meminit arch_remove_memory(int nid, u64 start, u64 size,
- struct vmem_altmap *altmap)
+int __ref arch_remove_memory(int nid, u64 start, u64 size,
+ struct vmem_altmap *altmap)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
@@ -161,7 +161,8 @@ int __meminit arch_remove_memory(int nid, u64 start, u64 size,
*/
vm_unmap_aliases();
- resize_hpt_for_hotplug(memblock_phys_mem_size());
+ if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+ pr_warn("Hash collision while resizing HPT\n");
return ret;
}
@@ -309,6 +310,10 @@ void __init mem_init(void)
mem_init_print_info(NULL);
#ifdef CONFIG_PPC32
pr_info("Kernel virtual memory layout:\n");
+#ifdef CONFIG_KASAN
+ pr_info(" * 0x%08lx..0x%08lx : kasan shadow mem\n",
+ KASAN_SHADOW_START, KASAN_SHADOW_END);
+#endif
pr_info(" * 0x%08lx..0x%08lx : fixmap\n", FIXADDR_START, FIXADDR_TOP);
#ifdef CONFIG_HIGHMEM
pr_info(" * 0x%08lx..0x%08lx : highmem PTEs\n",
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index bb52320b7369..6b049d82b98a 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -98,7 +98,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
switch_mmu_context(prev, next, tsk);
}
-#ifdef CONFIG_PPC32
+#ifndef CONFIG_PPC_BOOK3S_64
void arch_exit_mmap(struct mm_struct *mm)
{
void *frag = pte_frag_get(&mm->context);
diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h
index 74ff61dabcb1..7bac0aa2026a 100644
--- a/arch/powerpc/mm/mmu_decl.h
+++ b/arch/powerpc/mm/mmu_decl.h
@@ -83,6 +83,8 @@ static inline void _tlbivax_bcast(unsigned long address, unsigned int pid,
}
#endif
+static inline void print_system_hash_info(void) {}
+
#else /* CONFIG_PPC_MMU_NOHASH */
extern void hash_preload(struct mm_struct *mm, unsigned long ea,
@@ -92,6 +94,8 @@ extern void hash_preload(struct mm_struct *mm, unsigned long ea,
extern void _tlbie(unsigned long address);
extern void _tlbia(void);
+void print_system_hash_info(void);
+
#endif /* CONFIG_PPC_MMU_NOHASH */
#ifdef CONFIG_PPC32
@@ -104,8 +108,8 @@ extern int __map_without_bats;
extern unsigned int rtas_data, rtas_size;
struct hash_pte;
-extern struct hash_pte *Hash, *Hash_end;
-extern unsigned long Hash_size, Hash_mask;
+extern struct hash_pte *Hash;
+extern u8 early_hash[];
#endif /* CONFIG_PPC32 */
@@ -130,6 +134,7 @@ extern void wii_memory_fixups(void);
*/
#ifdef CONFIG_PPC32
extern void MMU_init_hw(void);
+void MMU_init_hw_patch(void);
unsigned long mmu_mapin_ram(unsigned long base, unsigned long top);
#endif
diff --git a/arch/powerpc/mm/40x_mmu.c b/arch/powerpc/mm/nohash/40x.c
index b9cf6f8764b0..460459b6f53e 100644
--- a/arch/powerpc/mm/40x_mmu.c
+++ b/arch/powerpc/mm/nohash/40x.c
@@ -49,7 +49,7 @@
#include <asm/machdep.h>
#include <asm/setup.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
extern int __map_without_ltlbs;
/*
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/nohash/44x.c
index aad127acdbaa..c07983ebc02e 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/nohash/44x.c
@@ -31,7 +31,7 @@
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/* Used by the 44x TLB replacement exception handler.
* Just needed it declared someplace.
diff --git a/arch/powerpc/mm/8xx_mmu.c b/arch/powerpc/mm/nohash/8xx.c
index fe1f6443d57f..70d55b615b62 100644
--- a/arch/powerpc/mm/8xx_mmu.c
+++ b/arch/powerpc/mm/nohash/8xx.c
@@ -17,7 +17,7 @@
#include <asm/fixmap.h>
#include <asm/code-patching.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#define IMMR_SIZE (FIX_IMMR_SIZE << PAGE_SHIFT)
@@ -213,3 +213,27 @@ void flush_instruction_cache(void)
mtspr(SPRN_IC_CST, IDC_INVALL);
isync();
}
+
+#ifdef CONFIG_PPC_KUEP
+void __init setup_kuep(bool disabled)
+{
+ if (disabled)
+ return;
+
+ pr_info("Activating Kernel Userspace Execution Prevention\n");
+
+ mtspr(SPRN_MI_AP, MI_APG_KUEP);
+}
+#endif
+
+#ifdef CONFIG_PPC_KUAP
+void __init setup_kuap(bool disabled)
+{
+ pr_info("Activating Kernel Userspace Access Protection\n");
+
+ if (disabled)
+ pr_warn("KUAP cannot be disabled yet on 8xx when compiled in\n");
+
+ mtspr(SPRN_MD_AP, MD_APG_KUAP);
+}
+#endif
diff --git a/arch/powerpc/mm/nohash/Makefile b/arch/powerpc/mm/nohash/Makefile
new file mode 100644
index 000000000000..33b6f6f29d3f
--- /dev/null
+++ b/arch/powerpc/mm/nohash/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+
+ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+
+obj-y += mmu_context.o tlb.o tlb_low.o
+obj-$(CONFIG_PPC_BOOK3E_64) += tlb_low_64e.o book3e_pgtable.o
+obj-$(CONFIG_40x) += 40x.o
+obj-$(CONFIG_44x) += 44x.o
+obj-$(CONFIG_PPC_8xx) += 8xx.o
+obj-$(CONFIG_PPC_FSL_BOOK3E) += fsl_booke.o
+ifdef CONFIG_HUGETLB_PAGE
+obj-$(CONFIG_PPC_FSL_BOOK3E) += book3e_hugetlbpage.o
+endif
+
+# Disable kcov instrumentation on sensitive code
+# This is necessary for booting with kcov enabled on book3e machines
+KCOV_INSTRUMENT_tlb.o := n
+KCOV_INSTRUMENT_fsl_booke.o := n
diff --git a/arch/powerpc/mm/hugetlbpage-book3e.c b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
index f84ec46cdb26..61915f4d3c7f 100644
--- a/arch/powerpc/mm/hugetlbpage-book3e.c
+++ b/arch/powerpc/mm/nohash/book3e_hugetlbpage.c
@@ -11,8 +11,9 @@
#include <asm/mmu.h>
-#ifdef CONFIG_PPC_FSL_BOOK3E
#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+
static inline int tlb1_next(void)
{
struct paca_struct *paca = get_paca();
@@ -29,33 +30,6 @@ static inline int tlb1_next(void)
tcd->esel_next = next;
return this;
}
-#else
-static inline int tlb1_next(void)
-{
- int index, ncams;
-
- ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
-
- index = this_cpu_read(next_tlbcam_idx);
-
- /* Just round-robin the entries and wrap when we hit the end */
- if (unlikely(index == ncams - 1))
- __this_cpu_write(next_tlbcam_idx, tlbcam_index);
- else
- __this_cpu_inc(next_tlbcam_idx);
-
- return index;
-}
-#endif /* !PPC64 */
-#endif /* FSL */
-
-static inline int mmu_get_tsize(int psize)
-{
- return mmu_psize_defs[psize].enc;
-}
-
-#if defined(CONFIG_PPC_FSL_BOOK3E) && defined(CONFIG_PPC64)
-#include <asm/paca.h>
static inline void book3e_tlb_lock(void)
{
@@ -98,6 +72,23 @@ static inline void book3e_tlb_unlock(void)
paca->tcd_ptr->lock = 0;
}
#else
+static inline int tlb1_next(void)
+{
+ int index, ncams;
+
+ ncams = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY;
+
+ index = this_cpu_read(next_tlbcam_idx);
+
+ /* Just round-robin the entries and wrap when we hit the end */
+ if (unlikely(index == ncams - 1))
+ __this_cpu_write(next_tlbcam_idx, tlbcam_index);
+ else
+ __this_cpu_inc(next_tlbcam_idx);
+
+ return index;
+}
+
static inline void book3e_tlb_lock(void)
{
}
@@ -139,10 +130,7 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
unsigned long psize, tsize, shift;
unsigned long flags;
struct mm_struct *mm;
-
-#ifdef CONFIG_PPC_FSL_BOOK3E
int index;
-#endif
if (unlikely(is_kernel_addr(ea)))
return;
@@ -166,11 +154,9 @@ void book3e_hugetlb_preload(struct vm_area_struct *vma, unsigned long ea,
return;
}
-#ifdef CONFIG_PPC_FSL_BOOK3E
/* We have to use the CAM(TLB1) on FSL parts for hugepages */
index = tlb1_next();
mtspr(SPRN_MAS0, MAS0_ESEL(index) | MAS0_TLBSEL(1));
-#endif
mas1 = MAS1_VALID | MAS1_TID(mm->context.id) | MAS1_TSIZE(tsize);
mas2 = ea & ~((1UL << shift) - 1);
diff --git a/arch/powerpc/mm/pgtable-book3e.c b/arch/powerpc/mm/nohash/book3e_pgtable.c
index 1032ef7aaf62..75e9e2c35fe2 100644
--- a/arch/powerpc/mm/pgtable-book3e.c
+++ b/arch/powerpc/mm/nohash/book3e_pgtable.c
@@ -15,7 +15,7 @@
#include <asm/tlb.h>
#include <asm/dma.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#ifdef CONFIG_SPARSEMEM_VMEMMAP
/*
@@ -55,7 +55,7 @@ void vmemmap_remove_mapping(unsigned long start,
#endif
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-static __ref void *early_alloc_pgtable(unsigned long size)
+static void __init *early_alloc_pgtable(unsigned long size)
{
void *ptr;
@@ -74,7 +74,7 @@ static __ref void *early_alloc_pgtable(unsigned long size)
* map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
+int __ref map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
{
pgd_t *pgdp;
pud_t *pudp;
@@ -98,20 +98,17 @@ int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
#ifndef __PAGETABLE_PUD_FOLDED
if (pgd_none(*pgdp)) {
pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
- BUG_ON(pudp == NULL);
pgd_populate(&init_mm, pgdp, pudp);
}
#endif /* !__PAGETABLE_PUD_FOLDED */
pudp = pud_offset(pgdp, ea);
if (pud_none(*pudp)) {
pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- BUG_ON(pmdp == NULL);
pud_populate(&init_mm, pudp, pmdp);
}
pmdp = pmd_offset(pudp, ea);
if (!pmd_present(*pmdp)) {
ptep = early_alloc_pgtable(PAGE_SIZE);
- BUG_ON(ptep == NULL);
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, ea);
diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/nohash/fsl_booke.c
index 210cbc1faf63..71a1a36751dd 100644
--- a/arch/powerpc/mm/fsl_booke_mmu.c
+++ b/arch/powerpc/mm/nohash/fsl_booke.c
@@ -54,7 +54,7 @@
#include <asm/setup.h>
#include <asm/paca.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
unsigned int tlbcam_index;
diff --git a/arch/powerpc/mm/mmu_context_nohash.c b/arch/powerpc/mm/nohash/mmu_context.c
index 1945c5f19f5e..ae4505d5b4b8 100644
--- a/arch/powerpc/mm/mmu_context_nohash.c
+++ b/arch/powerpc/mm/nohash/mmu_context.c
@@ -52,7 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* The MPC8xx has only 16 contexts. We rotate through them on each task switch.
diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/nohash/tlb.c
index ac23dc1c6535..24f88efb05bf 100644
--- a/arch/powerpc/mm/tlb_nohash.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -46,7 +46,7 @@
#include <asm/hugetlb.h>
#include <asm/paca.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
/*
* This struct lists the sw-supported page sizes. The hardawre MMU may support
@@ -433,11 +433,7 @@ void tlb_flush_pgtable(struct mmu_gather *tlb, unsigned long address)
unsigned long rid = (address & rmask) | 0x1000000000000000ul;
unsigned long vpte = address & ~rmask;
-#ifdef CONFIG_PPC_64K_PAGES
- vpte = (vpte >> (PAGE_SHIFT - 4)) & ~0xfffful;
-#else
vpte = (vpte >> (PAGE_SHIFT - 3)) & ~0xffful;
-#endif
vpte |= rid;
__flush_tlb_page(tlb->mm, vpte, tsize, 0);
}
@@ -625,21 +621,12 @@ static void early_init_this_mmu(void)
case PPC_HTW_IBM:
mas4 |= MAS4_INDD;
-#ifdef CONFIG_PPC_64K_PAGES
- mas4 |= BOOK3E_PAGESZ_256M << MAS4_TSIZED_SHIFT;
- mmu_pte_psize = MMU_PAGE_256M;
-#else
mas4 |= BOOK3E_PAGESZ_1M << MAS4_TSIZED_SHIFT;
mmu_pte_psize = MMU_PAGE_1M;
-#endif
break;
case PPC_HTW_NONE:
-#ifdef CONFIG_PPC_64K_PAGES
- mas4 |= BOOK3E_PAGESZ_64K << MAS4_TSIZED_SHIFT;
-#else
mas4 |= BOOK3E_PAGESZ_4K << MAS4_TSIZED_SHIFT;
-#endif
mmu_pte_psize = mmu_virtual_psize;
break;
}
@@ -800,5 +787,9 @@ void __init early_init_mmu(void)
#ifdef CONFIG_PPC_47x
early_init_mmu_47x();
#endif
+
+#ifdef CONFIG_PPC_MM_SLICES
+ mm_ctx_set_slb_addr_limit(&init_mm.context, SLB_ADDR_LIMIT_DEFAULT);
+#endif
}
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/mm/tlb_nohash_low.S b/arch/powerpc/mm/nohash/tlb_low.S
index e066a658acac..e066a658acac 100644
--- a/arch/powerpc/mm/tlb_nohash_low.S
+++ b/arch/powerpc/mm/nohash/tlb_low.S
diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index 9ed90064f542..58959ce15415 100644
--- a/arch/powerpc/mm/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -24,11 +24,7 @@
#include <asm/kvm_booke_hv_asm.h>
#include <asm/feature-fixups.h>
-#ifdef CONFIG_PPC_64K_PAGES
-#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE+1)
-#else
#define VPTE_PMD_SHIFT (PTE_INDEX_SIZE)
-#endif
#define VPTE_PUD_SHIFT (VPTE_PMD_SHIFT + PMD_INDEX_SIZE)
#define VPTE_PGD_SHIFT (VPTE_PUD_SHIFT + PUD_INDEX_SIZE)
#define VPTE_INDEX_SIZE (VPTE_PGD_SHIFT + PGD_INDEX_SIZE)
@@ -167,13 +163,11 @@ MMU_FTR_SECTION_ELSE
ldx r14,r14,r15 /* grab pgd entry */
ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_USE_TLBRSRV)
-#ifndef CONFIG_PPC_64K_PAGES
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
clrrdi r15,r15,3
cmpdi cr0,r14,0
bge tlb_miss_fault_bolted /* Bad pgd entry or hugepage; bail */
ldx r14,r14,r15 /* grab pud entry */
-#endif /* CONFIG_PPC_64K_PAGES */
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
clrrdi r15,r15,3
@@ -682,18 +676,7 @@ normal_tlb_miss:
* order to handle the weird page table format used by linux
*/
ori r10,r15,0x1
-#ifdef CONFIG_PPC_64K_PAGES
- /* For the top bits, 16 bytes per PTE */
- rldicl r14,r16,64-(PAGE_SHIFT-4),PAGE_SHIFT-4+4
- /* Now create the bottom bits as 0 in position 0x8000 and
- * the rest calculated for 8 bytes per PTE
- */
- rldicl r15,r16,64-(PAGE_SHIFT-3),64-15
- /* Insert the bottom bits in */
- rlwimi r14,r15,0,16,31
-#else
rldicl r14,r16,64-(PAGE_SHIFT-3),PAGE_SHIFT-3+4
-#endif
sldi r15,r10,60
clrrdi r14,r14,3
or r10,r15,r14
@@ -732,11 +715,7 @@ finish_normal_tlb_miss:
/* Check page size, if not standard, update MAS1 */
rldicl r11,r14,64-8,64-8
-#ifdef CONFIG_PPC_64K_PAGES
- cmpldi cr0,r11,BOOK3E_PAGESZ_64K
-#else
cmpldi cr0,r11,BOOK3E_PAGESZ_4K
-#endif
beq- 1f
mfspr r11,SPRN_MAS1
rlwimi r11,r14,31,21,24
@@ -857,14 +836,12 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
cmpdi cr0,r15,0
bge virt_page_table_tlb_miss_fault
-#ifndef CONFIG_PPC_64K_PAGES
/* Get to PUD entry */
rldicl r11,r16,64-VPTE_PUD_SHIFT,64-PUD_INDEX_SIZE-3
clrrdi r10,r11,3
ldx r15,r10,r15
cmpdi cr0,r15,0
bge virt_page_table_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
/* Get to PMD entry */
rldicl r11,r16,64-VPTE_PMD_SHIFT,64-PMD_INDEX_SIZE-3
@@ -1106,14 +1083,12 @@ htw_tlb_miss:
cmpdi cr0,r15,0
bge htw_tlb_miss_fault
-#ifndef CONFIG_PPC_64K_PAGES
/* Get to PUD entry */
rldicl r11,r16,64-(PUD_SHIFT-3),64-PUD_INDEX_SIZE-3
clrrdi r10,r11,3
ldx r15,r10,r15
cmpdi cr0,r15,0
bge htw_tlb_miss_fault
-#endif /* CONFIG_PPC_64K_PAGES */
/* Get to PMD entry */
rldicl r11,r16,64-(PMD_SHIFT-3),64-PMD_INDEX_SIZE-3
@@ -1132,9 +1107,7 @@ htw_tlb_miss:
* 4K page we need to extract a bit from the virtual address and
* insert it into the "PA52" bit of the RPN.
*/
-#ifndef CONFIG_PPC_64K_PAGES
rlwimi r15,r16,32-9,20,20
-#endif
/* Now we build the MAS:
*
* MAS 0 : Fully setup with defaults in MAS4 and TLBnCFG
@@ -1144,11 +1117,7 @@ htw_tlb_miss:
* MAS 2 : Use defaults
* MAS 3+7 : Needs to be done
*/
-#ifdef CONFIG_PPC_64K_PAGES
- ori r10,r15,(BOOK3E_PAGESZ_64K << MAS3_SPSIZE_SHIFT)
-#else
ori r10,r15,(BOOK3E_PAGESZ_4K << MAS3_SPSIZE_SHIFT)
-#endif
BEGIN_MMU_FTR_SECTION
srdi r16,r10,32
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f976676004ad..57e64273cb33 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -32,7 +32,6 @@
#include <asm/sparsemem.h>
#include <asm/prom.h>
#include <asm/smp.h>
-#include <asm/cputhreads.h>
#include <asm/topology.h>
#include <asm/firmware.h>
#include <asm/paca.h>
@@ -908,16 +907,22 @@ static int __init early_numa(char *p)
}
early_param("numa", early_numa);
-static bool topology_updates_enabled = true;
+/*
+ * The platform can inform us through one of several mechanisms
+ * (post-migration device tree updates, PRRN or VPHN) that the NUMA
+ * assignment of a resource has changed. This controls whether we act
+ * on that. Disabled by default.
+ */
+static bool topology_updates_enabled;
static int __init early_topology_updates(char *p)
{
if (!p)
return 0;
- if (!strcmp(p, "off")) {
- pr_info("Disabling topology updates\n");
- topology_updates_enabled = false;
+ if (!strcmp(p, "on")) {
+ pr_warn("Caution: enabling topology updates\n");
+ topology_updates_enabled = true;
}
return 0;
@@ -1063,7 +1068,7 @@ u64 memory_hotplug_max(void)
/* Virtual Processor Home Node (VPHN) support */
#ifdef CONFIG_PPC_SPLPAR
-#include "vphn.h"
+#include "book3s64/vphn.h"
struct topology_update_data {
struct topology_update_data *next;
@@ -1498,6 +1503,9 @@ int start_topology_update(void)
{
int rc = 0;
+ if (!topology_updates_enabled)
+ return 0;
+
if (firmware_has_feature(FW_FEATURE_PRRN)) {
if (!prrn_enabled) {
prrn_enabled = 1;
@@ -1531,6 +1539,9 @@ int stop_topology_update(void)
{
int rc = 0;
+ if (!topology_updates_enabled)
+ return 0;
+
if (prrn_enabled) {
prrn_enabled = 0;
#ifdef CONFIG_SMP
@@ -1588,11 +1599,13 @@ static ssize_t topology_write(struct file *file, const char __user *buf,
kbuf[read_len] = '\0';
- if (!strncmp(kbuf, "on", 2))
+ if (!strncmp(kbuf, "on", 2)) {
+ topology_updates_enabled = true;
start_topology_update();
- else if (!strncmp(kbuf, "off", 3))
+ } else if (!strncmp(kbuf, "off", 3)) {
stop_topology_update();
- else
+ topology_updates_enabled = false;
+ } else
return -EINVAL;
return count;
@@ -1607,9 +1620,7 @@ static const struct file_operations topology_ops = {
static int topology_update_init(void)
{
- /* Do not poll for changes if disabled at boot */
- if (topology_updates_enabled)
- start_topology_update();
+ start_topology_update();
if (vphn_enabled)
topology_schedule_update();
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index d3d61d29b4f1..db4a6253df92 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -30,6 +30,7 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
+#include <asm/hugetlb.h>
static inline int is_exec_fault(void)
{
@@ -299,3 +300,116 @@ unsigned long vmalloc_to_phys(void *va)
return __pa(pfn_to_kaddr(pfn)) + offset_in_page(va);
}
EXPORT_SYMBOL_GPL(vmalloc_to_phys);
+
+/*
+ * We have 4 cases for pgds and pmds:
+ * (1) invalid (all zeroes)
+ * (2) pointer to next table, as normal; bottom 6 bits == 0
+ * (3) leaf pte for huge page _PAGE_PTE set
+ * (4) hugepd pointer, _PAGE_PTE = 0 and bits [2..6] indicate size of table
+ *
+ * So long as we atomically load page table pointers we are safe against teardown,
+ * we can follow the address down to the the page and take a ref on it.
+ * This function need to be called with interrupts disabled. We use this variant
+ * when we have MSR[EE] = 0 but the paca->irq_soft_mask = IRQS_ENABLED
+ */
+pte_t *__find_linux_pte(pgd_t *pgdir, unsigned long ea,
+ bool *is_thp, unsigned *hpage_shift)
+{
+ pgd_t pgd, *pgdp;
+ pud_t pud, *pudp;
+ pmd_t pmd, *pmdp;
+ pte_t *ret_pte;
+ hugepd_t *hpdp = NULL;
+ unsigned pdshift = PGDIR_SHIFT;
+
+ if (hpage_shift)
+ *hpage_shift = 0;
+
+ if (is_thp)
+ *is_thp = false;
+
+ pgdp = pgdir + pgd_index(ea);
+ pgd = READ_ONCE(*pgdp);
+ /*
+ * Always operate on the local stack value. This make sure the
+ * value don't get updated by a parallel THP split/collapse,
+ * page fault or a page unmap. The return pte_t * is still not
+ * stable. So should be checked there for above conditions.
+ */
+ if (pgd_none(pgd))
+ return NULL;
+
+ if (pgd_huge(pgd)) {
+ ret_pte = (pte_t *)pgdp;
+ goto out;
+ }
+ if (is_hugepd(__hugepd(pgd_val(pgd)))) {
+ hpdp = (hugepd_t *)&pgd;
+ goto out_huge;
+ }
+
+ /*
+ * Even if we end up with an unmap, the pgtable will not
+ * be freed, because we do an rcu free and here we are
+ * irq disabled
+ */
+ pdshift = PUD_SHIFT;
+ pudp = pud_offset(&pgd, ea);
+ pud = READ_ONCE(*pudp);
+
+ if (pud_none(pud))
+ return NULL;
+
+ if (pud_huge(pud)) {
+ ret_pte = (pte_t *)pudp;
+ goto out;
+ }
+ if (is_hugepd(__hugepd(pud_val(pud)))) {
+ hpdp = (hugepd_t *)&pud;
+ goto out_huge;
+ }
+ pdshift = PMD_SHIFT;
+ pmdp = pmd_offset(&pud, ea);
+ pmd = READ_ONCE(*pmdp);
+ /*
+ * A hugepage collapse is captured by pmd_none, because
+ * it mark the pmd none and do a hpte invalidate.
+ */
+ if (pmd_none(pmd))
+ return NULL;
+
+ if (pmd_trans_huge(pmd) || pmd_devmap(pmd)) {
+ if (is_thp)
+ *is_thp = true;
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+ /*
+ * pmd_large check below will handle the swap pmd pte
+ * we need to do both the check because they are config
+ * dependent.
+ */
+ if (pmd_huge(pmd) || pmd_large(pmd)) {
+ ret_pte = (pte_t *)pmdp;
+ goto out;
+ }
+ if (is_hugepd(__hugepd(pmd_val(pmd)))) {
+ hpdp = (hugepd_t *)&pmd;
+ goto out_huge;
+ }
+
+ return pte_offset_kernel(&pmd, ea);
+
+out_huge:
+ if (!hpdp)
+ return NULL;
+
+ ret_pte = hugepte_offset(*hpdp, ea, pdshift);
+ pdshift = hugepd_shift(*hpdp);
+out:
+ if (hpage_shift)
+ *hpage_shift = pdshift;
+ return ret_pte;
+}
+EXPORT_SYMBOL_GPL(__find_linux_pte);
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index 6e56a6240bfa..16ada373b32b 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -36,26 +36,13 @@
#include <asm/setup.h>
#include <asm/sections.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
unsigned long ioremap_bot;
EXPORT_SYMBOL(ioremap_bot); /* aka VMALLOC_END */
extern char etext[], _stext[], _sinittext[], _einittext[];
-__ref pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
-{
- if (!slab_is_available())
- return memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE);
-
- return (pte_t *)pte_fragment_alloc(mm, 1);
-}
-
-pgtable_t pte_alloc_one(struct mm_struct *mm)
-{
- return (pgtable_t)pte_fragment_alloc(mm, 0);
-}
-
void __iomem *
ioremap(phys_addr_t addr, unsigned long size)
{
@@ -205,7 +192,29 @@ void iounmap(volatile void __iomem *addr)
}
EXPORT_SYMBOL(iounmap);
-int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
+static void __init *early_alloc_pgtable(unsigned long size)
+{
+ void *ptr = memblock_alloc(size, size);
+
+ if (!ptr)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, size, size);
+
+ return ptr;
+}
+
+static pte_t __init *early_pte_alloc_kernel(pmd_t *pmdp, unsigned long va)
+{
+ if (pmd_none(*pmdp)) {
+ pte_t *ptep = early_alloc_pgtable(PTE_FRAG_SIZE);
+
+ pmd_populate_kernel(&init_mm, pmdp, ptep);
+ }
+ return pte_offset_kernel(pmdp, va);
+}
+
+
+int __ref map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
{
pmd_t *pd;
pte_t *pg;
@@ -214,7 +223,10 @@ int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
/* Use upper 10 bits of VA to index the first level map */
pd = pmd_offset(pud_offset(pgd_offset_k(va), va), va);
/* Use middle 10 bits of VA to index the second-level map */
- pg = pte_alloc_kernel(pd, va);
+ if (likely(slab_is_available()))
+ pg = pte_alloc_kernel(pd, va);
+ else
+ pg = early_pte_alloc_kernel(pd, va);
if (pg != 0) {
err = 0;
/* The PTE should never be already set nor present in the
@@ -384,6 +396,9 @@ void mark_rodata_ro(void)
PFN_DOWN((unsigned long)__start_rodata);
change_page_attr(page, numpages, PAGE_KERNEL_RO);
+
+ // mark_initmem_nx() should have already run by now
+ ptdump_check_wx();
}
#endif
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index fb1375c07e8c..d2d976ff8a0e 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -52,7 +52,7 @@
#include <asm/firmware.h>
#include <asm/dma.h>
-#include "mmu_decl.h"
+#include <mm/mmu_decl.h>
#ifdef CONFIG_PPC_BOOK3S_64
@@ -90,14 +90,13 @@ unsigned long __pgd_val_bits;
EXPORT_SYMBOL(__pgd_val_bits);
unsigned long __kernel_virt_start;
EXPORT_SYMBOL(__kernel_virt_start);
-unsigned long __kernel_virt_size;
-EXPORT_SYMBOL(__kernel_virt_size);
unsigned long __vmalloc_start;
EXPORT_SYMBOL(__vmalloc_start);
unsigned long __vmalloc_end;
EXPORT_SYMBOL(__vmalloc_end);
unsigned long __kernel_io_start;
EXPORT_SYMBOL(__kernel_io_start);
+unsigned long __kernel_io_end;
struct page *vmemmap;
EXPORT_SYMBOL(vmemmap);
unsigned long __pte_frag_nr;
@@ -121,6 +120,11 @@ void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_
if (pgprot_val(prot) & H_PAGE_4K_PFN)
return NULL;
+ if ((ea + size) >= (void *)IOREMAP_END) {
+ pr_warn("Outside the supported range\n");
+ return NULL;
+ }
+
WARN_ON(pa & ~PAGE_MASK);
WARN_ON(((unsigned long)ea) & ~PAGE_MASK);
WARN_ON(size & ~PAGE_MASK);
@@ -328,6 +332,9 @@ void mark_rodata_ro(void)
radix__mark_rodata_ro();
else
hash__mark_rodata_ro();
+
+ // mark_initmem_nx() should have already run by now
+ ptdump_check_wx();
}
void mark_initmem_nx(void)
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index b430e4e08af6..b9bda0105841 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -500,7 +500,7 @@ static void populate_markers(void)
address_markers[7].start_address = IOREMAP_BASE;
address_markers[8].start_address = IOREMAP_END;
#ifdef CONFIG_PPC_BOOK3S_64
- address_markers[9].start_address = H_VMEMMAP_BASE;
+ address_markers[9].start_address = H_VMEMMAP_START;
#else
address_markers[9].start_address = VMEMMAP_BASE;
#endif
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index 37138428ab55..646876d9da64 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -31,7 +31,7 @@
#include "ptdump.h"
#ifdef CONFIG_PPC32
-#define KERN_VIRT_START 0
+#define KERN_VIRT_START PAGE_OFFSET
#endif
/*
@@ -68,6 +68,8 @@ struct pg_state {
unsigned long last_pa;
unsigned int level;
u64 current_flags;
+ bool check_wx;
+ unsigned long wx_pages;
};
struct addr_marker {
@@ -101,9 +103,25 @@ static struct addr_marker address_markers[] = {
{ 0, "Fixmap start" },
{ 0, "Fixmap end" },
#endif
+#ifdef CONFIG_KASAN
+ { 0, "kasan shadow mem start" },
+ { 0, "kasan shadow mem end" },
+#endif
{ -1, NULL },
};
+#define pt_dump_seq_printf(m, fmt, args...) \
+({ \
+ if (m) \
+ seq_printf(m, fmt, ##args); \
+})
+
+#define pt_dump_seq_putc(m, c) \
+({ \
+ if (m) \
+ seq_putc(m, c); \
+})
+
static void dump_flag_info(struct pg_state *st, const struct flag_info
*flag, u64 pte, int num)
{
@@ -121,19 +139,19 @@ static void dump_flag_info(struct pg_state *st, const struct flag_info
val = pte & flag->val;
if (flag->shift)
val = val >> flag->shift;
- seq_printf(st->seq, " %s:%llx", flag->set, val);
+ pt_dump_seq_printf(st->seq, " %s:%llx", flag->set, val);
} else {
if ((pte & flag->mask) == flag->val)
s = flag->set;
else
s = flag->clear;
if (s)
- seq_printf(st->seq, " %s", s);
+ pt_dump_seq_printf(st->seq, " %s", s);
}
st->current_flags &= ~flag->mask;
}
if (st->current_flags != 0)
- seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
+ pt_dump_seq_printf(st->seq, " unknown flags:%llx", st->current_flags);
}
static void dump_addr(struct pg_state *st, unsigned long addr)
@@ -148,12 +166,12 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
#define REG "0x%08lx"
#endif
- seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
+ pt_dump_seq_printf(st->seq, REG "-" REG " ", st->start_address, addr - 1);
if (st->start_pa == st->last_pa && st->start_address + PAGE_SIZE != addr) {
- seq_printf(st->seq, "[" REG "]", st->start_pa);
+ pt_dump_seq_printf(st->seq, "[" REG "]", st->start_pa);
delta = PAGE_SIZE >> 10;
} else {
- seq_printf(st->seq, " " REG " ", st->start_pa);
+ pt_dump_seq_printf(st->seq, " " REG " ", st->start_pa);
delta = (addr - st->start_address) >> 10;
}
/* Work out what appropriate unit to use */
@@ -161,10 +179,24 @@ static void dump_addr(struct pg_state *st, unsigned long addr)
delta >>= 10;
unit++;
}
- seq_printf(st->seq, "%9lu%c", delta, *unit);
+ pt_dump_seq_printf(st->seq, "%9lu%c", delta, *unit);
}
+static void note_prot_wx(struct pg_state *st, unsigned long addr)
+{
+ if (!st->check_wx)
+ return;
+
+ if (!((st->current_flags & pgprot_val(PAGE_KERNEL_X)) == pgprot_val(PAGE_KERNEL_X)))
+ return;
+
+ WARN_ONCE(1, "powerpc/mm: Found insecure W+X mapping at address %p/%pS\n",
+ (void *)st->start_address, (void *)st->start_address);
+
+ st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
+}
+
static void note_page(struct pg_state *st, unsigned long addr,
unsigned int level, u64 val)
{
@@ -178,7 +210,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
st->start_address = addr;
st->start_pa = pa;
st->last_pa = pa;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
/*
* Dump the section of virtual memory when:
* - the PTE flags from one entry to the next differs.
@@ -194,6 +226,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
/* Check the PTE flags */
if (st->current_flags) {
+ note_prot_wx(st, addr);
dump_addr(st, addr);
/* Dump all the flags */
@@ -202,7 +235,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
st->current_flags,
pg_level[st->level].num);
- seq_putc(st->seq, '\n');
+ pt_dump_seq_putc(st->seq, '\n');
}
/*
@@ -211,7 +244,7 @@ static void note_page(struct pg_state *st, unsigned long addr,
*/
while (addr >= st->marker[1].start_address) {
st->marker++;
- seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+ pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
}
st->start_address = addr;
st->start_pa = pa;
@@ -303,8 +336,9 @@ static void populate_markers(void)
address_markers[i++].start_address = PHB_IO_END;
address_markers[i++].start_address = IOREMAP_BASE;
address_markers[i++].start_address = IOREMAP_END;
+ /* What is the ifdef about? */
#ifdef CONFIG_PPC_BOOK3S_64
- address_markers[i++].start_address = H_VMEMMAP_BASE;
+ address_markers[i++].start_address = H_VMEMMAP_START;
#else
address_markers[i++].start_address = VMEMMAP_BASE;
#endif
@@ -322,6 +356,10 @@ static void populate_markers(void)
#endif
address_markers[i++].start_address = FIXADDR_START;
address_markers[i++].start_address = FIXADDR_TOP;
+#ifdef CONFIG_KASAN
+ address_markers[i++].start_address = KASAN_SHADOW_START;
+ address_markers[i++].start_address = KASAN_SHADOW_END;
+#endif
#endif /* CONFIG_PPC64 */
}
@@ -366,6 +404,30 @@ static void build_pgtable_complete_mask(void)
pg_level[i].mask |= pg_level[i].flag[j].mask;
}
+#ifdef CONFIG_PPC_DEBUG_WX
+void ptdump_check_wx(void)
+{
+ struct pg_state st = {
+ .seq = NULL,
+ .marker = address_markers,
+ .check_wx = true,
+ };
+
+ if (radix_enabled())
+ st.start_address = PAGE_OFFSET;
+ else
+ st.start_address = KERN_VIRT_START;
+
+ walk_pagetables(&st);
+
+ if (st.wx_pages)
+ pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n",
+ st.wx_pages);
+ else
+ pr_info("Checked W+X mappings: passed, no W+X pages found\n");
+}
+#endif
+
static int ptdump_init(void)
{
struct dentry *debugfs_file;
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index aec91dbcdc0b..97fbf7b54422 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -101,7 +101,7 @@ static int slice_area_is_free(struct mm_struct *mm, unsigned long addr,
{
struct vm_area_struct *vma;
- if ((mm->context.slb_addr_limit - len) < addr)
+ if ((mm_ctx_slb_addr_limit(&mm->context) - len) < addr)
return 0;
vma = find_vma(mm, addr);
return (!vma || (addr + len) <= vm_start_gap(vma));
@@ -118,13 +118,11 @@ static int slice_high_has_vma(struct mm_struct *mm, unsigned long slice)
unsigned long start = slice << SLICE_HIGH_SHIFT;
unsigned long end = start + (1ul << SLICE_HIGH_SHIFT);
-#ifdef CONFIG_PPC64
/* Hack, so that each addresses is controlled by exactly one
* of the high or low area bitmaps, the first high area starts
* at 4GB, not 0 */
if (start == 0)
- start = SLICE_LOW_TOP;
-#endif
+ start = (unsigned long)SLICE_LOW_TOP;
return !slice_area_is_free(mm, start, end - start);
}
@@ -150,40 +148,6 @@ static void slice_mask_for_free(struct mm_struct *mm, struct slice_mask *ret,
__set_bit(i, ret->high_slices);
}
-#ifdef CONFIG_PPC_BOOK3S_64
-static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
-{
-#ifdef CONFIG_PPC_64K_PAGES
- if (psize == MMU_PAGE_64K)
- return &mm->context.mask_64k;
-#endif
- if (psize == MMU_PAGE_4K)
- return &mm->context.mask_4k;
-#ifdef CONFIG_HUGETLB_PAGE
- if (psize == MMU_PAGE_16M)
- return &mm->context.mask_16m;
- if (psize == MMU_PAGE_16G)
- return &mm->context.mask_16g;
-#endif
- BUG();
-}
-#elif defined(CONFIG_PPC_8xx)
-static struct slice_mask *slice_mask_for_size(struct mm_struct *mm, int psize)
-{
- if (psize == mmu_virtual_psize)
- return &mm->context.mask_base_psize;
-#ifdef CONFIG_HUGETLB_PAGE
- if (psize == MMU_PAGE_512K)
- return &mm->context.mask_512k;
- if (psize == MMU_PAGE_8M)
- return &mm->context.mask_8m;
-#endif
- BUG();
-}
-#else
-#error "Must define the slice masks for page sizes supported by the platform"
-#endif
-
static bool slice_check_range_fits(struct mm_struct *mm,
const struct slice_mask *available,
unsigned long start, unsigned long len)
@@ -246,14 +210,14 @@ static void slice_convert(struct mm_struct *mm,
slice_dbg("slice_convert(mm=%p, psize=%d)\n", mm, psize);
slice_print_mask(" mask", mask);
- psize_mask = slice_mask_for_size(mm, psize);
+ psize_mask = slice_mask_for_size(&mm->context, psize);
/* We need to use a spinlock here to protect against
* concurrent 64k -> 4k demotion ...
*/
spin_lock_irqsave(&slice_convert_lock, flags);
- lpsizes = mm->context.low_slices_psize;
+ lpsizes = mm_ctx_low_slices(&mm->context);
for (i = 0; i < SLICE_NUM_LOW; i++) {
if (!(mask->low_slices & (1u << i)))
continue;
@@ -263,7 +227,7 @@ static void slice_convert(struct mm_struct *mm,
/* Update the slice_mask */
old_psize = (lpsizes[index] >> (mask_index * 4)) & 0xf;
- old_mask = slice_mask_for_size(mm, old_psize);
+ old_mask = slice_mask_for_size(&mm->context, old_psize);
old_mask->low_slices &= ~(1u << i);
psize_mask->low_slices |= 1u << i;
@@ -272,8 +236,8 @@ static void slice_convert(struct mm_struct *mm,
(((unsigned long)psize) << (mask_index * 4));
}
- hpsizes = mm->context.high_slices_psize;
- for (i = 0; i < GET_HIGH_SLICE_INDEX(mm->context.slb_addr_limit); i++) {
+ hpsizes = mm_ctx_high_slices(&mm->context);
+ for (i = 0; i < GET_HIGH_SLICE_INDEX(mm_ctx_slb_addr_limit(&mm->context)); i++) {
if (!test_bit(i, mask->high_slices))
continue;
@@ -282,7 +246,7 @@ static void slice_convert(struct mm_struct *mm,
/* Update the slice_mask */
old_psize = (hpsizes[index] >> (mask_index * 4)) & 0xf;
- old_mask = slice_mask_for_size(mm, old_psize);
+ old_mask = slice_mask_for_size(&mm->context, old_psize);
__clear_bit(i, old_mask->high_slices);
__set_bit(i, psize_mask->high_slices);
@@ -292,8 +256,8 @@ static void slice_convert(struct mm_struct *mm,
}
slice_dbg(" lsps=%lx, hsps=%lx\n",
- (unsigned long)mm->context.low_slices_psize,
- (unsigned long)mm->context.high_slices_psize);
+ (unsigned long)mm_ctx_low_slices(&mm->context),
+ (unsigned long)mm_ctx_high_slices(&mm->context));
spin_unlock_irqrestore(&slice_convert_lock, flags);
@@ -393,7 +357,7 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* DEFAULT_MAP_WINDOW we should apply this.
*/
if (high_limit > DEFAULT_MAP_WINDOW)
- addr += mm->context.slb_addr_limit - DEFAULT_MAP_WINDOW;
+ addr += mm_ctx_slb_addr_limit(&mm->context) - DEFAULT_MAP_WINDOW;
while (addr > min_addr) {
info.high_limit = addr;
@@ -505,20 +469,20 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
return -ENOMEM;
}
- if (high_limit > mm->context.slb_addr_limit) {
+ if (high_limit > mm_ctx_slb_addr_limit(&mm->context)) {
/*
* Increasing the slb_addr_limit does not require
* slice mask cache to be recalculated because it should
* be already initialised beyond the old address limit.
*/
- mm->context.slb_addr_limit = high_limit;
+ mm_ctx_set_slb_addr_limit(&mm->context, high_limit);
on_each_cpu(slice_flush_segments, mm, 1);
}
/* Sanity checks */
BUG_ON(mm->task_size == 0);
- BUG_ON(mm->context.slb_addr_limit == 0);
+ BUG_ON(mm_ctx_slb_addr_limit(&mm->context) == 0);
VM_BUG_ON(radix_enabled());
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
@@ -538,7 +502,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* First make up a "good" mask of slices that have the right size
* already
*/
- maskp = slice_mask_for_size(mm, psize);
+ maskp = slice_mask_for_size(&mm->context, psize);
/*
* Here "good" means slices that are already the right page size,
@@ -565,7 +529,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* a pointer to good mask for the next code to use.
*/
if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
- compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+ compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
if (fixed)
slice_or_mask(&good_mask, maskp, compat_maskp);
else
@@ -642,14 +606,13 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
newaddr = slice_find_area(mm, len, &potential_mask,
psize, topdown, high_limit);
-#ifdef CONFIG_PPC_64K_PAGES
- if (newaddr == -ENOMEM && psize == MMU_PAGE_64K) {
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && newaddr == -ENOMEM &&
+ psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
slice_or_mask(&potential_mask, &potential_mask, compat_maskp);
newaddr = slice_find_area(mm, len, &potential_mask,
psize, topdown, high_limit);
}
-#endif
if (newaddr == -ENOMEM)
return -ENOMEM;
@@ -696,7 +659,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
unsigned long flags)
{
return slice_get_unmapped_area(addr, len, flags,
- current->mm->context.user_psize, 0);
+ mm_ctx_user_psize(&current->mm->context), 0);
}
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -706,7 +669,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
const unsigned long flags)
{
return slice_get_unmapped_area(addr0, len, flags,
- current->mm->context.user_psize, 1);
+ mm_ctx_user_psize(&current->mm->context), 1);
}
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
@@ -717,10 +680,10 @@ unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
VM_BUG_ON(radix_enabled());
if (slice_addr_is_low(addr)) {
- psizes = mm->context.low_slices_psize;
+ psizes = mm_ctx_low_slices(&mm->context);
index = GET_LOW_SLICE_INDEX(addr);
} else {
- psizes = mm->context.high_slices_psize;
+ psizes = mm_ctx_high_slices(&mm->context);
index = GET_HIGH_SLICE_INDEX(addr);
}
mask_index = index & 0x1;
@@ -741,27 +704,22 @@ void slice_init_new_context_exec(struct mm_struct *mm)
* case of fork it is just inherited from the mm being
* duplicated.
*/
-#ifdef CONFIG_PPC64
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW_USER64;
-#else
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
-#endif
-
- mm->context.user_psize = psize;
+ mm_ctx_set_slb_addr_limit(&mm->context, SLB_ADDR_LIMIT_DEFAULT);
+ mm_ctx_set_user_psize(&mm->context, psize);
/*
* Set all slice psizes to the default.
*/
- lpsizes = mm->context.low_slices_psize;
+ lpsizes = mm_ctx_low_slices(&mm->context);
memset(lpsizes, (psize << 4) | psize, SLICE_NUM_LOW >> 1);
- hpsizes = mm->context.high_slices_psize;
+ hpsizes = mm_ctx_high_slices(&mm->context);
memset(hpsizes, (psize << 4) | psize, SLICE_NUM_HIGH >> 1);
/*
* Slice mask cache starts zeroed, fill the default size cache.
*/
- mask = slice_mask_for_size(mm, psize);
+ mask = slice_mask_for_size(&mm->context, psize);
mask->low_slices = ~0UL;
if (SLICE_NUM_HIGH)
bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
@@ -777,7 +735,7 @@ void slice_setup_new_exec(void)
if (!is_32bit_task())
return;
- mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+ mm_ctx_set_slb_addr_limit(&mm->context, DEFAULT_MAP_WINDOW);
}
#endif
@@ -816,22 +774,21 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, unsigned long addr,
unsigned long len)
{
const struct slice_mask *maskp;
- unsigned int psize = mm->context.user_psize;
+ unsigned int psize = mm_ctx_user_psize(&mm->context);
VM_BUG_ON(radix_enabled());
- maskp = slice_mask_for_size(mm, psize);
-#ifdef CONFIG_PPC_64K_PAGES
+ maskp = slice_mask_for_size(&mm->context, psize);
+
/* We need to account for 4k slices too */
- if (psize == MMU_PAGE_64K) {
+ if (IS_ENABLED(CONFIG_PPC_64K_PAGES) && psize == MMU_PAGE_64K) {
const struct slice_mask *compat_maskp;
struct slice_mask available;
- compat_maskp = slice_mask_for_size(mm, MMU_PAGE_4K);
+ compat_maskp = slice_mask_for_size(&mm->context, MMU_PAGE_4K);
slice_or_mask(&available, maskp, compat_maskp);
return !slice_check_range_fits(mm, &available, addr, len);
}
-#endif
return !slice_check_range_fits(mm, maskp, addr, len);
}
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index ab26df5bacb9..c155dcbb8691 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -5,7 +5,8 @@ obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o
obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \
power5+-pmu.o power6-pmu.o power7-pmu.o \
- isa207-common.o power8-pmu.o power9-pmu.o
+ isa207-common.o power8-pmu.o power9-pmu.o \
+ generic-compat-pmu.o
obj32-$(CONFIG_PPC_PERF_CTRS) += mpc7450-pmu.o
obj-$(CONFIG_PPC_POWERNV) += imc-pmu.o
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index b0723002a396..a66fb9c01c9e 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -22,6 +22,10 @@
#include <asm/ptrace.h>
#include <asm/code-patching.h>
+#ifdef CONFIG_PPC64
+#include "internal.h"
+#endif
+
#define BHRB_MAX_ENTRIES 32
#define BHRB_TARGET 0x0000000000000002
#define BHRB_PREDICTION 0x0000000000000001
@@ -2294,3 +2298,27 @@ int register_power_pmu(struct power_pmu *pmu)
power_pmu_prepare_cpu, NULL);
return 0;
}
+
+#ifdef CONFIG_PPC64
+static int __init init_ppc64_pmu(void)
+{
+ /* run through all the pmu drivers one at a time */
+ if (!init_power5_pmu())
+ return 0;
+ else if (!init_power5p_pmu())
+ return 0;
+ else if (!init_power6_pmu())
+ return 0;
+ else if (!init_power7_pmu())
+ return 0;
+ else if (!init_power8_pmu())
+ return 0;
+ else if (!init_power9_pmu())
+ return 0;
+ else if (!init_ppc970_pmu())
+ return 0;
+ else
+ return init_generic_compat_pmu();
+}
+early_initcall(init_ppc64_pmu);
+#endif
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
new file mode 100644
index 000000000000..5e5a54d5588e
--- /dev/null
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+#define pr_fmt(fmt) "generic-compat-pmu: " fmt
+
+#include "isa207-common.h"
+
+/*
+ * Raw event encoding:
+ *
+ * 60 56 52 48 44 40 36 32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *
+ * 28 24 20 16 12 8 4 0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ * [ pmc ] [unit ] [ ] m [ pmcxsel ]
+ * | |
+ * | *- mark
+ * |
+ * |
+ * *- combine
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit (PMCxUNIT)
+ * MMCR1[24] = pmc1combine[0]
+ * MMCR1[25] = pmc1combine[1]
+ * MMCR1[26] = pmc2combine[0]
+ * MMCR1[27] = pmc2combine[1]
+ * MMCR1[28] = pmc3combine[0]
+ * MMCR1[29] = pmc3combine[1]
+ * MMCR1[30] = pmc4combine[0]
+ * MMCR1[31] = pmc4combine[1]
+ *
+ */
+
+/*
+ * Some power9 event codes.
+ */
+#define EVENT(_name, _code) _name = _code,
+
+enum {
+EVENT(PM_CYC, 0x0001e)
+EVENT(PM_INST_CMPL, 0x00002)
+};
+
+#undef EVENT
+
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(instructions, PM_INST_CMPL);
+
+static struct attribute *generic_compat_events_attr[] = {
+ GENERIC_EVENT_PTR(PM_CYC),
+ GENERIC_EVENT_PTR(PM_INST_CMPL),
+ NULL
+};
+
+static struct attribute_group generic_compat_pmu_events_group = {
+ .name = "events",
+ .attrs = generic_compat_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-19");
+PMU_FORMAT_ATTR(pmcxsel, "config:0-7");
+PMU_FORMAT_ATTR(mark, "config:8");
+PMU_FORMAT_ATTR(combine, "config:10-11");
+PMU_FORMAT_ATTR(unit, "config:12-15");
+PMU_FORMAT_ATTR(pmc, "config:16-19");
+
+static struct attribute *generic_compat_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ &format_attr_pmcxsel.attr,
+ &format_attr_mark.attr,
+ &format_attr_combine.attr,
+ &format_attr_unit.attr,
+ &format_attr_pmc.attr,
+ NULL,
+};
+
+static struct attribute_group generic_compat_pmu_format_group = {
+ .name = "format",
+ .attrs = generic_compat_pmu_format_attr,
+};
+
+static const struct attribute_group *generic_compat_pmu_attr_groups[] = {
+ &generic_compat_pmu_format_group,
+ &generic_compat_pmu_events_group,
+ NULL,
+};
+
+static int compat_generic_events[] = {
+ [PERF_COUNT_HW_CPU_CYCLES] = PM_CYC,
+ [PERF_COUNT_HW_INSTRUCTIONS] = PM_INST_CMPL,
+};
+
+#define C(x) PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(L1I) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(LL) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0,
+ [ C(RESULT_MISS) ] = 0,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(NODE) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+#undef C
+
+static struct power_pmu generic_compat_pmu = {
+ .name = "GENERIC_COMPAT",
+ .n_counter = MAX_PMU_COUNTERS,
+ .add_fields = ISA207_ADD_FIELDS,
+ .test_adder = ISA207_TEST_ADDER,
+ .compute_mmcr = isa207_compute_mmcr,
+ .get_constraint = isa207_get_constraint,
+ .disable_pmc = isa207_disable_pmc,
+ .flags = PPMU_HAS_SIER | PPMU_ARCH_207S,
+ .n_generic = ARRAY_SIZE(compat_generic_events),
+ .generic_events = compat_generic_events,
+ .cache_events = &generic_compat_cache_events,
+ .attr_groups = generic_compat_pmu_attr_groups,
+};
+
+int init_generic_compat_pmu(void)
+{
+ int rc = 0;
+
+ rc = register_power_pmu(&generic_compat_pmu);
+ if (rc)
+ return rc;
+
+ /* Tell userspace that EBB is supported */
+ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+ return 0;
+}
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index b1c37cc3fa98..31fa753e2eb2 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -43,12 +43,17 @@ static DEFINE_PER_CPU(u64 *, thread_imc_mem);
static struct imc_pmu *thread_imc_pmu;
static int thread_imc_mem_size;
+/* Trace IMC data structures */
+static DEFINE_PER_CPU(u64 *, trace_imc_mem);
+static struct imc_pmu_ref *trace_imc_refc;
+static int trace_imc_mem_size;
+
static struct imc_pmu *imc_event_to_pmu(struct perf_event *event)
{
return container_of(event->pmu, struct imc_pmu, pmu);
}
-PMU_FORMAT_ATTR(event, "config:0-40");
+PMU_FORMAT_ATTR(event, "config:0-61");
PMU_FORMAT_ATTR(offset, "config:0-31");
PMU_FORMAT_ATTR(rvalue, "config:32");
PMU_FORMAT_ATTR(mode, "config:33-40");
@@ -65,6 +70,25 @@ static struct attribute_group imc_format_group = {
.attrs = imc_format_attrs,
};
+/* Format attribute for imc trace-mode */
+PMU_FORMAT_ATTR(cpmc_reserved, "config:0-19");
+PMU_FORMAT_ATTR(cpmc_event, "config:20-27");
+PMU_FORMAT_ATTR(cpmc_samplesel, "config:28-29");
+PMU_FORMAT_ATTR(cpmc_load, "config:30-61");
+static struct attribute *trace_imc_format_attrs[] = {
+ &format_attr_event.attr,
+ &format_attr_cpmc_reserved.attr,
+ &format_attr_cpmc_event.attr,
+ &format_attr_cpmc_samplesel.attr,
+ &format_attr_cpmc_load.attr,
+ NULL,
+};
+
+static struct attribute_group trace_imc_format_group = {
+.name = "format",
+.attrs = trace_imc_format_attrs,
+};
+
/* Get the cpumask printed to a buffer "buf" */
static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
struct device_attribute *attr,
@@ -487,6 +511,11 @@ static int nest_imc_event_init(struct perf_event *event)
* Get the base memory addresss for this cpu.
*/
chip_id = cpu_to_chip_id(event->cpu);
+
+ /* Return, if chip_id is not valid */
+ if (chip_id < 0)
+ return -ENODEV;
+
pcni = pmu->mem_info;
do {
if (pcni->id == chip_id) {
@@ -494,7 +523,7 @@ static int nest_imc_event_init(struct perf_event *event)
break;
}
pcni++;
- } while (pcni);
+ } while (pcni->vbase != 0);
if (!flag)
return -ENODEV;
@@ -788,8 +817,11 @@ static int core_imc_event_init(struct perf_event *event)
}
/*
- * Allocates a page of memory for each of the online cpus, and write the
- * physical base address of that page to the LDBAR for that cpu.
+ * Allocates a page of memory for each of the online cpus, and load
+ * LDBAR with 0.
+ * The physical base address of the page allocated for a cpu will be
+ * written to the LDBAR for that cpu, when the thread-imc event
+ * is added.
*
* LDBAR Register Layout:
*
@@ -807,7 +839,7 @@ static int core_imc_event_init(struct perf_event *event)
*/
static int thread_imc_mem_alloc(int cpu_id, int size)
{
- u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, cpu_id);
+ u64 *local_mem = per_cpu(thread_imc_mem, cpu_id);
int nid = cpu_to_node(cpu_id);
if (!local_mem) {
@@ -824,9 +856,7 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
- ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
-
- mtspr(SPRN_LDBAR, ldbar_value);
+ mtspr(SPRN_LDBAR, 0);
return 0;
}
@@ -858,6 +888,9 @@ static int thread_imc_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
/* Sampling not supported */
if (event->hw.sample_period)
return -EINVAL;
@@ -977,6 +1010,7 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
{
int core_id;
struct imc_pmu_ref *ref;
+ u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, smp_processor_id());
if (flags & PERF_EF_START)
imc_event_start(event, flags);
@@ -985,6 +1019,9 @@ static int thread_imc_event_add(struct perf_event *event, int flags)
return -EINVAL;
core_id = smp_processor_id() / threads_per_core;
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | THREAD_IMC_ENABLE;
+ mtspr(SPRN_LDBAR, ldbar_value);
+
/*
* imc pmus are enabled only when it is used.
* See if this is triggered for the first time.
@@ -1016,11 +1053,7 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
int core_id;
struct imc_pmu_ref *ref;
- /*
- * Take a snapshot and calculate the delta and update
- * the event counter values.
- */
- imc_event_update(event);
+ mtspr(SPRN_LDBAR, 0);
core_id = smp_processor_id() / threads_per_core;
ref = &core_imc_refc[core_id];
@@ -1039,6 +1072,240 @@ static void thread_imc_event_del(struct perf_event *event, int flags)
ref->refc = 0;
}
mutex_unlock(&ref->lock);
+ /*
+ * Take a snapshot and calculate the delta and update
+ * the event counter values.
+ */
+ imc_event_update(event);
+}
+
+/*
+ * Allocate a page of memory for each cpu, and load LDBAR with 0.
+ */
+static int trace_imc_mem_alloc(int cpu_id, int size)
+{
+ u64 *local_mem = per_cpu(trace_imc_mem, cpu_id);
+ int phys_id = cpu_to_node(cpu_id), rc = 0;
+ int core_id = (cpu_id / threads_per_core);
+
+ if (!local_mem) {
+ local_mem = page_address(alloc_pages_node(phys_id,
+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE |
+ __GFP_NOWARN, get_order(size)));
+ if (!local_mem)
+ return -ENOMEM;
+ per_cpu(trace_imc_mem, cpu_id) = local_mem;
+
+ /* Initialise the counters for trace mode */
+ rc = opal_imc_counters_init(OPAL_IMC_COUNTERS_TRACE, __pa((void *)local_mem),
+ get_hard_smp_processor_id(cpu_id));
+ if (rc) {
+ pr_info("IMC:opal init failed for trace imc\n");
+ return rc;
+ }
+ }
+
+ /* Init the mutex, if not already */
+ trace_imc_refc[core_id].id = core_id;
+ mutex_init(&trace_imc_refc[core_id].lock);
+
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int ppc_trace_imc_cpu_online(unsigned int cpu)
+{
+ return trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+}
+
+static int ppc_trace_imc_cpu_offline(unsigned int cpu)
+{
+ mtspr(SPRN_LDBAR, 0);
+ return 0;
+}
+
+static int trace_imc_cpu_init(void)
+{
+ return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+ "perf/powerpc/imc_trace:online",
+ ppc_trace_imc_cpu_online,
+ ppc_trace_imc_cpu_offline);
+}
+
+static u64 get_trace_imc_event_base_addr(void)
+{
+ return (u64)per_cpu(trace_imc_mem, smp_processor_id());
+}
+
+/*
+ * Function to parse trace-imc data obtained
+ * and to prepare the perf sample.
+ */
+static int trace_imc_prepare_sample(struct trace_imc_data *mem,
+ struct perf_sample_data *data,
+ u64 *prev_tb,
+ struct perf_event_header *header,
+ struct perf_event *event)
+{
+ /* Sanity checks for a valid record */
+ if (be64_to_cpu(READ_ONCE(mem->tb1)) > *prev_tb)
+ *prev_tb = be64_to_cpu(READ_ONCE(mem->tb1));
+ else
+ return -EINVAL;
+
+ if ((be64_to_cpu(READ_ONCE(mem->tb1)) & IMC_TRACE_RECORD_TB1_MASK) !=
+ be64_to_cpu(READ_ONCE(mem->tb2)))
+ return -EINVAL;
+
+ /* Prepare perf sample */
+ data->ip = be64_to_cpu(READ_ONCE(mem->ip));
+ data->period = event->hw.last_period;
+
+ header->type = PERF_RECORD_SAMPLE;
+ header->size = sizeof(*header) + event->header_size;
+ header->misc = 0;
+
+ if (is_kernel_addr(data->ip))
+ header->misc |= PERF_RECORD_MISC_KERNEL;
+ else
+ header->misc |= PERF_RECORD_MISC_USER;
+
+ perf_event_header__init_id(header, data, event);
+
+ return 0;
+}
+
+static void dump_trace_imc_data(struct perf_event *event)
+{
+ struct trace_imc_data *mem;
+ int i, ret;
+ u64 prev_tb = 0;
+
+ mem = (struct trace_imc_data *)get_trace_imc_event_base_addr();
+ for (i = 0; i < (trace_imc_mem_size / sizeof(struct trace_imc_data));
+ i++, mem++) {
+ struct perf_sample_data data;
+ struct perf_event_header header;
+
+ ret = trace_imc_prepare_sample(mem, &data, &prev_tb, &header, event);
+ if (ret) /* Exit, if not a valid record */
+ break;
+ else {
+ /* If this is a valid record, create the sample */
+ struct perf_output_handle handle;
+
+ if (perf_output_begin(&handle, event, header.size))
+ return;
+
+ perf_output_sample(&handle, &header, &data, event);
+ perf_output_end(&handle);
+ }
+ }
+}
+
+static int trace_imc_event_add(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+ u64 local_mem, ldbar_value;
+
+ /* Set trace-imc bit in ldbar and load ldbar with per-thread memory address */
+ local_mem = get_trace_imc_event_base_addr();
+ ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE;
+
+ if (core_imc_refc)
+ ref = &core_imc_refc[core_id];
+ if (!ref) {
+ /* If core-imc is not enabled, use trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref)
+ return -EINVAL;
+ }
+ mtspr(SPRN_LDBAR, ldbar_value);
+ mutex_lock(&ref->lock);
+ if (ref->refc == 0) {
+ if (opal_imc_counters_start(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to start the counters for core %d\n", core_id);
+ mtspr(SPRN_LDBAR, 0);
+ return -EINVAL;
+ }
+ }
+ ++ref->refc;
+ mutex_unlock(&ref->lock);
+
+ return 0;
+}
+
+static void trace_imc_event_read(struct perf_event *event)
+{
+ return;
+}
+
+static void trace_imc_event_stop(struct perf_event *event, int flags)
+{
+ u64 local_mem = get_trace_imc_event_base_addr();
+ dump_trace_imc_data(event);
+ memset((void *)local_mem, 0, sizeof(u64));
+}
+
+static void trace_imc_event_start(struct perf_event *event, int flags)
+{
+ return;
+}
+
+static void trace_imc_event_del(struct perf_event *event, int flags)
+{
+ int core_id = smp_processor_id() / threads_per_core;
+ struct imc_pmu_ref *ref = NULL;
+
+ if (core_imc_refc)
+ ref = &core_imc_refc[core_id];
+ if (!ref) {
+ /* If core-imc is not enabled, use trace-imc reference count */
+ if (trace_imc_refc)
+ ref = &trace_imc_refc[core_id];
+ if (!ref)
+ return;
+ }
+ mtspr(SPRN_LDBAR, 0);
+ mutex_lock(&ref->lock);
+ ref->refc--;
+ if (ref->refc == 0) {
+ if (opal_imc_counters_stop(OPAL_IMC_COUNTERS_TRACE,
+ get_hard_smp_processor_id(smp_processor_id()))) {
+ mutex_unlock(&ref->lock);
+ pr_err("trace-imc: Unable to stop the counters for core %d\n", core_id);
+ return;
+ }
+ } else if (ref->refc < 0) {
+ ref->refc = 0;
+ }
+ mutex_unlock(&ref->lock);
+ trace_imc_event_stop(event, flags);
+}
+
+static int trace_imc_event_init(struct perf_event *event)
+{
+ struct task_struct *target;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+
+ /* Return if this is a couting event */
+ if (event->attr.sample_period == 0)
+ return -ENOENT;
+
+ event->hw.idx = -1;
+ target = event->hw.target;
+
+ event->pmu->task_ctx_nr = perf_hw_context;
+ return 0;
}
/* update_pmu_ops : Populate the appropriate operations for "pmu" */
@@ -1071,6 +1338,14 @@ static int update_pmu_ops(struct imc_pmu *pmu)
pmu->pmu.cancel_txn = thread_imc_pmu_cancel_txn;
pmu->pmu.commit_txn = thread_imc_pmu_commit_txn;
break;
+ case IMC_DOMAIN_TRACE:
+ pmu->pmu.event_init = trace_imc_event_init;
+ pmu->pmu.add = trace_imc_event_add;
+ pmu->pmu.del = trace_imc_event_del;
+ pmu->pmu.start = trace_imc_event_start;
+ pmu->pmu.stop = trace_imc_event_stop;
+ pmu->pmu.read = trace_imc_event_read;
+ pmu->attr_groups[IMC_FORMAT_ATTR] = &trace_imc_format_group;
default:
break;
}
@@ -1163,6 +1438,18 @@ static void cleanup_all_thread_imc_memory(void)
}
}
+static void cleanup_all_trace_imc_memory(void)
+{
+ int i, order = get_order(trace_imc_mem_size);
+
+ for_each_online_cpu(i) {
+ if (per_cpu(trace_imc_mem, i))
+ free_pages((u64)per_cpu(trace_imc_mem, i), order);
+
+ }
+ kfree(trace_imc_refc);
+}
+
/* Function to free the attr_groups which are dynamically allocated */
static void imc_common_mem_free(struct imc_pmu *pmu_ptr)
{
@@ -1204,6 +1491,11 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE);
cleanup_all_thread_imc_memory();
}
+
+ if (pmu_ptr->domain == IMC_DOMAIN_TRACE) {
+ cpuhp_remove_state(CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE);
+ cleanup_all_trace_imc_memory();
+ }
}
/*
@@ -1286,6 +1578,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
thread_imc_pmu = pmu_ptr;
break;
+ case IMC_DOMAIN_TRACE:
+ /* Update the pmu name */
+ pmu_ptr->pmu.name = kasprintf(GFP_KERNEL, "%s%s", s, "_imc");
+ if (!pmu_ptr->pmu.name)
+ return -ENOMEM;
+
+ nr_cores = DIV_ROUND_UP(num_possible_cpus(), threads_per_core);
+ trace_imc_refc = kcalloc(nr_cores, sizeof(struct imc_pmu_ref),
+ GFP_KERNEL);
+ if (!trace_imc_refc)
+ return -ENOMEM;
+
+ trace_imc_mem_size = pmu_ptr->counter_mem_size;
+ for_each_online_cpu(cpu) {
+ res = trace_imc_mem_alloc(cpu, trace_imc_mem_size);
+ if (res) {
+ cleanup_all_trace_imc_memory();
+ goto err;
+ }
+ }
+ break;
default:
return -EINVAL;
}
@@ -1359,6 +1672,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
}
break;
+ case IMC_DOMAIN_TRACE:
+ ret = trace_imc_cpu_init();
+ if (ret) {
+ cleanup_all_trace_imc_memory();
+ goto err_free_mem;
+ }
+
+ break;
default:
return -EINVAL; /* Unknown domain */
}
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
new file mode 100644
index 000000000000..f755c64da137
--- /dev/null
+++ b/arch/powerpc/perf/internal.h
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2019 Madhavan Srinivasan, IBM Corporation.
+
+extern int init_ppc970_pmu(void);
+extern int init_power5_pmu(void);
+extern int init_power5p_pmu(void);
+extern int init_power6_pmu(void);
+extern int init_power7_pmu(void);
+extern int init_power8_pmu(void);
+extern int init_power9_pmu(void);
+extern int init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index 0526dac66007..9aa803504cb2 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -677,7 +677,7 @@ static struct power_pmu power5p_pmu = {
.cache_events = &power5p_cache_events,
};
-static int __init init_power5p_pmu(void)
+int init_power5p_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
(strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5+")
@@ -686,5 +686,3 @@ static int __init init_power5p_pmu(void)
return register_power_pmu(&power5p_pmu);
}
-
-early_initcall(init_power5p_pmu);
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index 4dc99f9f7962..30cb13d081a9 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -618,7 +618,7 @@ static struct power_pmu power5_pmu = {
.flags = PPMU_HAS_SSLOT,
};
-static int __init init_power5_pmu(void)
+int init_power5_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power5"))
@@ -626,5 +626,3 @@ static int __init init_power5_pmu(void)
return register_power_pmu(&power5_pmu);
}
-
-early_initcall(init_power5_pmu);
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 9c9d646b68a1..80ec48632cfe 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -540,7 +540,7 @@ static struct power_pmu power6_pmu = {
.cache_events = &power6_cache_events,
};
-static int __init init_power6_pmu(void)
+int init_power6_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power6"))
@@ -548,5 +548,3 @@ static int __init init_power6_pmu(void)
return register_power_pmu(&power6_pmu);
}
-
-early_initcall(init_power6_pmu);
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index 6dbae9884ec4..bb6efd5d2530 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -445,7 +445,7 @@ static struct power_pmu power7_pmu = {
.cache_events = &power7_cache_events,
};
-static int __init init_power7_pmu(void)
+int init_power7_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power7"))
@@ -456,5 +456,3 @@ static int __init init_power7_pmu(void)
return register_power_pmu(&power7_pmu);
}
-
-early_initcall(init_power7_pmu);
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index d12a2db26353..bcc3409a06de 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -379,7 +379,7 @@ static struct power_pmu power8_pmu = {
.bhrb_nr = 32,
};
-static int __init init_power8_pmu(void)
+int init_power8_pmu(void)
{
int rc;
@@ -399,4 +399,3 @@ static int __init init_power8_pmu(void)
return 0;
}
-early_initcall(init_power8_pmu);
diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h
index 063c9d9f2516..6b1dc9a83ede 100644
--- a/arch/powerpc/perf/power9-events-list.h
+++ b/arch/powerpc/perf/power9-events-list.h
@@ -63,8 +63,6 @@ EVENT(PM_RUN_CYC_ALT, 0x200f4)
/* Instruction Dispatched */
EVENT(PM_INST_DISP, 0x200f2)
EVENT(PM_INST_DISP_ALT, 0x300f2)
-/* Alternate Branch event code */
-EVENT(PM_BR_CMPL_ALT, 0x10012)
/* Branch event that are not strongly biased */
EVENT(PM_BR_2PATH, 0x20036)
/* ALternate branch event that are not strongly biased */
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 030544e35959..3a31ac6f4805 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -437,7 +437,7 @@ static struct power_pmu power9_pmu = {
.bhrb_nr = 32,
};
-static int __init init_power9_pmu(void)
+int init_power9_pmu(void)
{
int rc = 0;
unsigned int pvr = mfspr(SPRN_PVR);
@@ -467,4 +467,3 @@ static int __init init_power9_pmu(void)
return 0;
}
-early_initcall(init_power9_pmu);
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 8b6a8a36fa38..1d3370914022 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -490,7 +490,7 @@ static struct power_pmu ppc970_pmu = {
.flags = PPMU_NO_SIPR | PPMU_NO_CONT_SAMPLING,
};
-static int __init init_ppc970_pmu(void)
+int init_ppc970_pmu(void)
{
if (!cur_cpu_spec->oprofile_cpu_type ||
(strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/970")
@@ -499,5 +499,3 @@ static int __init init_ppc970_pmu(void)
return register_power_pmu(&ppc970_pmu);
}
-
-early_initcall(init_ppc970_pmu);
diff --git a/arch/powerpc/platforms/512x/clock-commonclk.c b/arch/powerpc/platforms/512x/clock-commonclk.c
index b3097fe6441b..af265ae40a61 100644
--- a/arch/powerpc/platforms/512x/clock-commonclk.c
+++ b/arch/powerpc/platforms/512x/clock-commonclk.c
@@ -239,6 +239,7 @@ static inline struct clk *mpc512x_clk_divider(
const char *name, const char *parent_name, u8 clkflags,
u32 __iomem *reg, u8 pos, u8 len, int divflags)
{
+ divflags |= CLK_DIVIDER_BIG_ENDIAN;
return clk_register_divider(NULL, name, parent_name, clkflags,
reg, pos, len, divflags, &clklock);
}
@@ -250,7 +251,7 @@ static inline struct clk *mpc512x_clk_divtable(
{
u8 divflags;
- divflags = 0;
+ divflags = CLK_DIVIDER_BIG_ENDIAN;
return clk_register_divider_table(NULL, name, parent_name, 0,
reg, pos, len, divflags,
divtab, &clklock);
@@ -261,10 +262,12 @@ static inline struct clk *mpc512x_clk_gated(
u32 __iomem *reg, u8 pos)
{
int clkflags;
+ u8 gateflags;
clkflags = CLK_SET_RATE_PARENT;
+ gateflags = CLK_GATE_BIG_ENDIAN;
return clk_register_gate(NULL, name, parent_name, clkflags,
- reg, pos, 0, &clklock);
+ reg, pos, gateflags, &clklock);
}
static inline struct clk *mpc512x_clk_muxed(const char *name,
@@ -275,7 +278,7 @@ static inline struct clk *mpc512x_clk_muxed(const char *name,
u8 muxflags;
clkflags = CLK_SET_RATE_PARENT;
- muxflags = 0;
+ muxflags = CLK_MUX_BIG_ENDIAN;
return clk_register_mux(NULL, name,
parent_names, parent_count, clkflags,
reg, pos, len, muxflags, &clklock);
diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
index 17cf249b18ee..3cb2f07ce8eb 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c
@@ -628,7 +628,7 @@ static int mpc52xx_wdt_open(struct inode *inode, struct file *file)
}
file->private_data = mpc52xx_gpt_wdt;
- return nonseekable_open(inode, file);
+ return stream_open(inode, file);
}
static int mpc52xx_wdt_release(struct inode *inode, struct file *file)
diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c
index 5c31d8292d3b..e7c2c3fb011a 100644
--- a/arch/powerpc/platforms/83xx/usb.c
+++ b/arch/powerpc/platforms/83xx/usb.c
@@ -221,8 +221,10 @@ int mpc837x_usb_cfg(void)
int ret = 0;
np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr");
- if (!np || !of_device_is_available(np))
+ if (!np || !of_device_is_available(np)) {
+ of_node_put(np);
return -ENODEV;
+ }
prop = of_get_property(np, "phy_type", NULL);
if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) {
diff --git a/arch/powerpc/platforms/8xx/pic.c b/arch/powerpc/platforms/8xx/pic.c
index 8d5a25d43ef3..e9617d35fd1f 100644
--- a/arch/powerpc/platforms/8xx/pic.c
+++ b/arch/powerpc/platforms/8xx/pic.c
@@ -153,10 +153,9 @@ int mpc8xx_pic_init(void)
if (mpc8xx_pic_host == NULL) {
printk(KERN_ERR "MPC8xx PIC: failed to allocate irq host!\n");
ret = -ENOMEM;
- goto out;
}
- return 0;
+ ret = 0;
out:
of_node_put(np);
return ret;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 50cd09b4e05d..d0e172d47574 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -25,6 +25,8 @@ config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
config PPC_85xx
bool "Freescale 85xx"
@@ -34,6 +36,9 @@ config PPC_8xx
bool "Freescale 8xx"
select FSL_SOC
select SYS_SUPPORTS_HUGETLBFS
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
+ select PPC_MM_SLICES if HUGETLB_PAGE
config 40x
bool "AMCC 40x"
@@ -75,6 +80,7 @@ config PPC_BOOK3S_64
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
+ select PPC_MM_SLICES
config PPC_BOOK3E_64
bool "Embedded processors"
@@ -326,6 +332,8 @@ config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64 && HUGETLB_PAGE
select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA
+ select PPC_HAVE_KUEP
+ select PPC_HAVE_KUAP
default y
help
Enable support for the Power ISA 3.0 Radix style MMU. Currently this
@@ -345,6 +353,37 @@ config PPC_RADIX_MMU_DEFAULT
If you're unsure, say Y.
+config PPC_HAVE_KUEP
+ bool
+
+config PPC_KUEP
+ bool "Kernel Userspace Execution Prevention"
+ depends on PPC_HAVE_KUEP
+ default y
+ help
+ Enable support for Kernel Userspace Execution Prevention (KUEP)
+
+ If you're unsure, say Y.
+
+config PPC_HAVE_KUAP
+ bool
+
+config PPC_KUAP
+ bool "Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP
+ default y
+ help
+ Enable support for Kernel Userspace Access Protection (KUAP)
+
+ If you're unsure, say Y.
+
+config PPC_KUAP_DEBUG
+ bool "Extra debugging for Kernel Userspace Access Protection"
+ depends on PPC_HAVE_KUAP && (PPC_RADIX_MMU || PPC_32)
+ help
+ Add extra debugging for Kernel Userspace Access Protection (KUAP)
+ If you're unsure, say N.
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -354,14 +393,16 @@ config PPC_MMU_NOHASH
def_bool y
depends on !PPC_BOOK3S
+config PPC_MMU_NOHASH_32
+ def_bool y
+ depends on PPC_MMU_NOHASH && PPC32
+
config PPC_BOOK3E_MMU
def_bool y
depends on FSL_BOOKE || PPC_BOOK3E
config PPC_MM_SLICES
bool
- default y if PPC_BOOK3S_64
- default y if PPC_8xx && HUGETLB_PAGE
config PPC_HAVE_PMU_SUPPORT
bool
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 7f12c7b78c0f..6646f152d57b 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -194,7 +194,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr)
* faults need to be deferred to process context.
*/
if ((dsisr & MFC_DSISR_PTE_NOT_FOUND) &&
- (REGION_ID(ea) != USER_REGION_ID)) {
+ (get_region_id(ea) != USER_REGION_ID)) {
spin_unlock(&spu->register_lock);
ret = hash_page(ea,
@@ -224,7 +224,7 @@ static void __spu_kernel_slb(void *addr, struct copro_slb *slb)
unsigned long ea = (unsigned long)addr;
u64 llp;
- if (REGION_ID(ea) == KERNEL_REGION_ID)
+ if (get_region_id(ea) == LINEAR_MAP_REGION_ID)
llp = mmu_psize_defs[mmu_linear_psize].sllp;
else
llp = mmu_psize_defs[mmu_virtual_psize].sllp;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 48c2477e7e2a..bfb9ca99ac05 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -588,7 +588,7 @@ static int spufs_pipe_open(struct inode *inode, struct file *file)
struct spufs_inode_info *i = SPUFS_I(inode);
file->private_data = i->i_ctx;
- return nonseekable_open(inode, file);
+ return stream_open(inode, file);
}
/*
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index db329d4bf1c3..c1a75216050a 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -71,17 +71,11 @@ spufs_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
-static void spufs_i_callback(struct rcu_head *head)
+static void spufs_free_inode(struct inode *inode)
{
- struct inode *inode = container_of(head, struct inode, i_rcu);
kmem_cache_free(spufs_inode_cache, SPUFS_I(inode));
}
-static void spufs_destroy_inode(struct inode *inode)
-{
- call_rcu(&inode->i_rcu, spufs_i_callback);
-}
-
static void
spufs_init_once(void *p)
{
@@ -739,7 +733,7 @@ spufs_fill_super(struct super_block *sb, void *data, int silent)
struct spufs_sb_info *info;
static const struct super_operations s_ops = {
.alloc_inode = spufs_alloc_inode,
- .destroy_inode = spufs_destroy_inode,
+ .free_inode = spufs_free_inode,
.statfs = simple_statfs,
.evict_inode = spufs_evict_inode,
.show_options = spufs_show_options,
diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c
index 0409714e8070..829bf3697dc9 100644
--- a/arch/powerpc/platforms/embedded6xx/holly.c
+++ b/arch/powerpc/platforms/embedded6xx/holly.c
@@ -44,7 +44,8 @@
#define HOLLY_PCI_CFG_PHYS 0x7c000000
-int holly_exclude_device(struct pci_controller *hose, u_char bus, u_char devfn)
+static int holly_exclude_device(struct pci_controller *hose, u_char bus,
+ u_char devfn)
{
if (bus == 0 && PCI_SLOT(devfn) == 0)
return PCIBIOS_DEVICE_NOT_FOUND;
@@ -187,13 +188,13 @@ static void __init holly_init_IRQ(void)
tsi108_write_reg(TSI108_MPIC_OFFSET + 0x30c, 0);
}
-void holly_show_cpuinfo(struct seq_file *m)
+static void holly_show_cpuinfo(struct seq_file *m)
{
seq_printf(m, "vendor\t\t: IBM\n");
seq_printf(m, "machine\t\t: PPC750 GX/CL\n");
}
-void __noreturn holly_restart(char *cmd)
+static void __noreturn holly_restart(char *cmd)
{
__be32 __iomem *ocn_bar1 = NULL;
unsigned long bar;
@@ -233,18 +234,6 @@ void __noreturn holly_restart(char *cmd)
for (;;) ;
}
-void holly_power_off(void)
-{
- local_irq_disable();
- /* No way to shut power off with software */
- for (;;) ;
-}
-
-void holly_halt(void)
-{
- holly_power_off();
-}
-
/*
* Called very early, device-tree isn't unflattened
*/
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 20ebf35d7913..f4247ade71ca 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -2,6 +2,12 @@
CFLAGS_bootx_init.o += -fPIC
CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector)
+KASAN_SANITIZE_bootx_init.o := n
+
+ifdef CONFIG_KASAN
+CFLAGS_bootx_init.o += -DDISABLE_BRANCH_PROFILING
+endif
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index e52f9b06dd9c..c9133f7908ca 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/cpu.h>
+#include <asm/asm-prototypes.h>
#include <asm/firmware.h>
#include <asm/machdep.h>
#include <asm/opal.h>
@@ -48,10 +49,10 @@ static u64 pnv_default_stop_mask;
static bool default_stop_found;
/*
- * First deep stop state. Used to figure out when to save/restore
- * hypervisor context.
+ * First stop state levels when SPR and TB loss can occur.
*/
-u64 pnv_first_deep_stop_state = MAX_STOP_STATE;
+static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
/*
* psscr value and mask of the deepest stop idle state.
@@ -62,6 +63,8 @@ static u64 pnv_deepest_stop_psscr_mask;
static u64 pnv_deepest_stop_flag;
static bool deepest_stop_found;
+static unsigned long power7_offline_type;
+
static int pnv_save_sprs_for_deep_states(void)
{
int cpu;
@@ -72,12 +75,12 @@ static int pnv_save_sprs_for_deep_states(void)
* all cpus at boot. Get these reg values of current cpu and use the
* same across all cpus.
*/
- uint64_t lpcr_val = mfspr(SPRN_LPCR);
- uint64_t hid0_val = mfspr(SPRN_HID0);
- uint64_t hid1_val = mfspr(SPRN_HID1);
- uint64_t hid4_val = mfspr(SPRN_HID4);
- uint64_t hid5_val = mfspr(SPRN_HID5);
- uint64_t hmeer_val = mfspr(SPRN_HMEER);
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
uint64_t msr_val = MSR_IDLE;
uint64_t psscr_val = pnv_deepest_stop_psscr_val;
@@ -137,89 +140,6 @@ static int pnv_save_sprs_for_deep_states(void)
return 0;
}
-static void pnv_alloc_idle_core_states(void)
-{
- int i, j;
- int nr_cores = cpu_nr_cores();
- u32 *core_idle_state;
-
- /*
- * core_idle_state - The lower 8 bits track the idle state of
- * each thread of the core.
- *
- * The most significant bit is the lock bit.
- *
- * Initially all the bits corresponding to threads_per_core
- * are set. They are cleared when the thread enters deep idle
- * state like sleep and winkle/stop.
- *
- * Initially the lock bit is cleared. The lock bit has 2
- * purposes:
- * a. While the first thread in the core waking up from
- * idle is restoring core state, it prevents other
- * threads in the core from switching to process
- * context.
- * b. While the last thread in the core is saving the
- * core state, it prevents a different thread from
- * waking up.
- */
- for (i = 0; i < nr_cores; i++) {
- int first_cpu = i * threads_per_core;
- int node = cpu_to_node(first_cpu);
- size_t paca_ptr_array_size;
-
- core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
- *core_idle_state = (1 << threads_per_core) - 1;
- paca_ptr_array_size = (threads_per_core *
- sizeof(struct paca_struct *));
-
- for (j = 0; j < threads_per_core; j++) {
- int cpu = first_cpu + j;
-
- paca_ptrs[cpu]->core_idle_state_ptr = core_idle_state;
- paca_ptrs[cpu]->thread_idle_state = PNV_THREAD_RUNNING;
- paca_ptrs[cpu]->thread_mask = 1 << j;
- }
- }
-
- update_subcore_sibling_mask();
-
- if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
- int rc = pnv_save_sprs_for_deep_states();
-
- if (likely(!rc))
- return;
-
- /*
- * The stop-api is unable to restore hypervisor
- * resources on wakeup from platform idle states which
- * lose full context. So disable such states.
- */
- supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
- pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
- pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
-
- if (cpu_has_feature(CPU_FTR_ARCH_300) &&
- (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
- /*
- * Use the default stop state for CPU-Hotplug
- * if available.
- */
- if (default_stop_found) {
- pnv_deepest_stop_psscr_val =
- pnv_default_stop_val;
- pnv_deepest_stop_psscr_mask =
- pnv_default_stop_mask;
- pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
- pnv_deepest_stop_psscr_val);
- } else { /* Fallback to snooze loop for CPU-Hotplug */
- deepest_stop_found = false;
- pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
- }
- }
- }
-}
-
u32 pnv_get_supported_cpuidle_states(void)
{
return supported_cpuidle_states;
@@ -238,6 +158,9 @@ static void pnv_fastsleep_workaround_apply(void *info)
*err = 1;
}
+static bool power7_fastsleep_workaround_entry = true;
+static bool power7_fastsleep_workaround_exit = true;
+
/*
* Used to store fastsleep workaround state
* 0 - Workaround applied/undone at fastsleep entry/exit path (Default)
@@ -269,21 +192,15 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
* fastsleep_workaround_applyonce = 1 implies
* fastsleep workaround needs to be left in 'applied' state on all
* the cores. Do this by-
- * 1. Patching out the call to 'undo' workaround in fastsleep exit path
- * 2. Sending ipi to all the cores which have at least one online thread
- * 3. Patching out the call to 'apply' workaround in fastsleep entry
- * path
+ * 1. Disable the 'undo' workaround in fastsleep exit path
+ * 2. Sendi IPIs to all the cores which have at least one online thread
+ * 3. Disable the 'apply' workaround in fastsleep entry path
+ *
* There is no need to send ipi to cores which have all threads
* offlined, as last thread of the core entering fastsleep or deeper
* state would have applied workaround.
*/
- err = patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
- PPC_INST_NOP);
- if (err) {
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_exit");
- goto fail;
- }
+ power7_fastsleep_workaround_exit = false;
get_online_cpus();
primary_thread_mask = cpu_online_cores_map();
@@ -296,13 +213,7 @@ static ssize_t store_fastsleep_workaround_applyonce(struct device *dev,
goto fail;
}
- err = patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
- PPC_INST_NOP);
- if (err) {
- pr_err("fastsleep_workaround_applyonce change failed while patching pnv_fastsleep_workaround_at_entry");
- goto fail;
- }
+ power7_fastsleep_workaround_entry = false;
fastsleep_workaround_applyonce = 1;
@@ -315,27 +226,346 @@ static DEVICE_ATTR(fastsleep_workaround_applyonce, 0600,
show_fastsleep_workaround_applyonce,
store_fastsleep_workaround_applyonce);
-static unsigned long __power7_idle_type(unsigned long type)
+static inline void atomic_start_thread_idle(void)
{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ clear_bit(thread_nr, state);
+}
+
+static inline void atomic_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ int thread_nr = cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ set_bit(thread_nr, state);
+}
+
+static inline void atomic_lock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ while (unlikely(test_and_set_bit_lock(NR_PNV_CORE_IDLE_LOCK_BIT, state)))
+ barrier();
+}
+
+static inline void atomic_unlock_and_stop_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ u64 s = READ_ONCE(*state);
+ u64 new, tmp;
+
+ BUG_ON(!(s & PNV_CORE_IDLE_LOCK_BIT));
+ BUG_ON(s & thread);
+
+again:
+ new = (s | thread) & ~PNV_CORE_IDLE_LOCK_BIT;
+ tmp = cmpxchg(state, s, new);
+ if (unlikely(tmp != s)) {
+ s = tmp;
+ goto again;
+ }
+}
+
+static inline void atomic_unlock_thread_idle(void)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+
+ BUG_ON(!test_bit(NR_PNV_CORE_IDLE_LOCK_BIT, state));
+ clear_bit_unlock(NR_PNV_CORE_IDLE_LOCK_BIT, state);
+}
+
+/* P7 and P8 */
+struct p7_sprs {
+ /* per core */
+ u64 tscr;
+ u64 worc;
+
+ /* per subcore */
+ u64 sdr1;
+ u64 rpr;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 wort;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 amor;
+ u64 uamor;
+};
+
+static unsigned long power7_idle_insn(unsigned long type)
+{
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long thread = 1UL << cpu_thread_in_core(cpu);
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
unsigned long srr1;
+ bool full_winkle;
+ struct p7_sprs sprs = {}; /* avoid false use-uninitialised */
+ bool sprs_saved = false;
+ int rc;
- if (!prep_irq_for_idle_irqsoff())
- return 0;
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+
+ BUG_ON(!(*state & thread));
+ *state &= ~thread;
+
+ if (power7_fastsleep_workaround_entry) {
+ if ((*state & core_thread_mask) == 0) {
+ rc = opal_config_cpu_idle_state(
+ OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_APPLY);
+ BUG_ON(rc);
+ }
+ }
+
+ if (type == PNV_THREAD_WINKLE) {
+ sprs.tscr = mfspr(SPRN_TSCR);
+ sprs.worc = mfspr(SPRN_WORC);
+
+ sprs.sdr1 = mfspr(SPRN_SDR1);
+ sprs.rpr = mfspr(SPRN_RPR);
+
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ }
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.wort = mfspr(SPRN_WORT);
+
+ sprs_saved = true;
+
+ /*
+ * Increment winkle counter and set all winkle bits if
+ * all threads are winkling. This allows wakeup side to
+ * distinguish between fast sleep and winkle state
+ * loss. Fast sleep still has to resync the timebase so
+ * this may not be a really big win.
+ */
+ *state += 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if ((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS)
+ >> PNV_CORE_IDLE_WINKLE_COUNT_SHIFT
+ == threads_per_core)
+ *state |= PNV_CORE_IDLE_THREAD_WINKLE_BITS;
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ }
+
+ atomic_unlock_thread_idle();
+ }
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.amor = mfspr(SPRN_AMOR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+ }
+
+ local_paca->thread_idle_state = type;
+ srr1 = isa206_idle_insn_mayloss(type); /* go idle */
+ local_paca->thread_idle_state = PNV_THREAD_RUNNING;
+
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ /*
+ * We don't need an isync after the mtsprs here because
+ * the upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+ }
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ if (likely((srr1 & SRR1_WAKESTATE) != SRR1_WS_HVLOSS)) {
+ if (unlikely(type != PNV_THREAD_NAP)) {
+ atomic_lock_thread_idle();
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ }
+ atomic_unlock_and_stop_thread_idle();
+ }
+ return srr1;
+ }
+
+ /* HV state loss */
+ BUG_ON(type == PNV_THREAD_NAP);
+
+ atomic_lock_thread_idle();
+
+ full_winkle = false;
+ if (type == PNV_THREAD_WINKLE) {
+ WARN_ON((*state & PNV_CORE_IDLE_WINKLE_COUNT_BITS) == 0);
+ *state -= 1 << PNV_CORE_IDLE_WINKLE_COUNT_SHIFT;
+ if (*state & (thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT)) {
+ *state &= ~(thread << PNV_CORE_IDLE_THREAD_WINKLE_BITS_SHIFT);
+ full_winkle = true;
+ BUG_ON(!sprs_saved);
+ }
+ }
+
+ WARN_ON(*state & thread);
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ if (full_winkle) {
+ mtspr(SPRN_TSCR, sprs.tscr);
+ mtspr(SPRN_WORC, sprs.worc);
+ }
+
+ if (power7_fastsleep_workaround_exit) {
+ rc = opal_config_cpu_idle_state(OPAL_CONFIG_IDLE_FASTSLEEP,
+ OPAL_CONFIG_IDLE_UNDO);
+ BUG_ON(rc);
+ }
+
+ /* TB */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+
+core_woken:
+ if (!full_winkle)
+ goto subcore_woken;
+
+ if ((*state & local_paca->subcore_sibling_mask) != 0)
+ goto subcore_woken;
+
+ /* Per-subcore SPRs */
+ mtspr(SPRN_SDR1, sprs.sdr1);
+ mtspr(SPRN_RPR, sprs.rpr);
+
+subcore_woken:
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Fast sleep does not lose SPRs */
+ if (!full_winkle)
+ return srr1;
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ }
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_WORT, sprs.wort);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+ /*
+ * The SLB has to be restored here, but it sometimes still
+ * contains entries, so the __ variant must be used to prevent
+ * multi hits.
+ */
+ __slb_restore_bolted_realmode();
+
+ return srr1;
+}
+
+extern unsigned long idle_kvm_start_guest(unsigned long srr1);
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power7_offline(void)
+{
+ unsigned long srr1;
+
+ mtmsr(MSR_IDLE);
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're entering idle. */
+ /******************************************************/
+ /* N O T E W E L L ! ! ! N O T E W E L L */
+ /* The following store to HSTATE_HWTHREAD_STATE(r13) */
+ /* MUST occur in real mode, i.e. with the MMU off, */
+ /* and the MMU must stay off until we clear this flag */
+ /* and test HSTATE_HWTHREAD_REQ(r13) in */
+ /* pnv_powersave_wakeup in this file. */
+ /* The reason is that another thread can switch the */
+ /* MMU to a guest context whenever this flag is set */
+ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
+ /* that would potentially cause this thread to start */
+ /* executing instructions from guest memory in */
+ /* hypervisor mode, leading to a host crash or data */
+ /* corruption, or worse. */
+ /******************************************************/
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
+#endif
__ppc64_runlatch_off();
- srr1 = power7_idle_insn(type);
+ srr1 = power7_idle_insn(power7_offline_type);
__ppc64_runlatch_on();
- fini_irq_for_idle_irqsoff();
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+#endif
+
+ mtmsr(MSR_KERNEL);
return srr1;
}
+#endif
void power7_idle_type(unsigned long type)
{
unsigned long srr1;
- srr1 = __power7_idle_type(type);
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ mtmsr(MSR_IDLE);
+ __ppc64_runlatch_off();
+ srr1 = power7_idle_insn(type);
+ __ppc64_runlatch_on();
+ mtmsr(MSR_KERNEL);
+
+ fini_irq_for_idle_irqsoff();
irq_set_pending_from_srr1(srr1);
}
@@ -347,33 +577,292 @@ void power7_idle(void)
power7_idle_type(PNV_THREAD_NAP);
}
-static unsigned long __power9_idle_type(unsigned long stop_psscr_val,
- unsigned long stop_psscr_mask)
+struct p9_sprs {
+ /* per core */
+ u64 ptcr;
+ u64 rpr;
+ u64 tscr;
+ u64 ldbar;
+
+ /* per thread */
+ u64 lpcr;
+ u64 hfscr;
+ u64 fscr;
+ u64 pid;
+ u64 purr;
+ u64 spurr;
+ u64 dscr;
+ u64 wort;
+
+ u64 mmcra;
+ u32 mmcr0;
+ u32 mmcr1;
+ u64 mmcr2;
+
+ /* per thread SPRs that get lost in shallow states */
+ u64 amr;
+ u64 iamr;
+ u64 amor;
+ u64 uamor;
+};
+
+static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
{
- unsigned long psscr;
+ int cpu = raw_smp_processor_id();
+ int first = cpu_first_thread_sibling(cpu);
+ unsigned long *state = &paca_ptrs[first]->idle_state;
+ unsigned long core_thread_mask = (1UL << threads_per_core) - 1;
unsigned long srr1;
+ unsigned long pls;
+ unsigned long mmcr0 = 0;
+ struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
+ bool sprs_saved = false;
- if (!prep_irq_for_idle_irqsoff())
- return 0;
+ if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
+ /* EC=ESL=0 case */
+
+ BUG_ON(!mmu_on);
+
+ /*
+ * Wake synchronously. SRESET via xscom may still cause
+ * a 0x100 powersave wakeup with SRR1 reason!
+ */
+ srr1 = isa300_idle_stop_noloss(psscr); /* go idle */
+ if (likely(!srr1))
+ return 0;
+
+ /*
+ * Registers not saved, can't recover!
+ * This would be a hardware bug
+ */
+ BUG_ON((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS);
+
+ goto out;
+ }
+
+ /* EC=ESL=1 case */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (cpu_has_feature(CPU_FTR_P9_TM_XER_SO_BUG)) {
+ local_paca->requested_psscr = psscr;
+ /* order setting requested_psscr vs testing dont_stop */
+ smp_mb();
+ if (atomic_read(&local_paca->dont_stop)) {
+ local_paca->requested_psscr = 0;
+ return 0;
+ }
+ }
+#endif
+
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ /*
+ * POWER9 DD2 can incorrectly set PMAO when waking up
+ * after a state-loss idle. Saving and restoring MMCR0
+ * over idle is a workaround.
+ */
+ mmcr0 = mfspr(SPRN_MMCR0);
+ }
+ if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) {
+ sprs.lpcr = mfspr(SPRN_LPCR);
+ sprs.hfscr = mfspr(SPRN_HFSCR);
+ sprs.fscr = mfspr(SPRN_FSCR);
+ sprs.pid = mfspr(SPRN_PID);
+ sprs.purr = mfspr(SPRN_PURR);
+ sprs.spurr = mfspr(SPRN_SPURR);
+ sprs.dscr = mfspr(SPRN_DSCR);
+ sprs.wort = mfspr(SPRN_WORT);
+
+ sprs.mmcra = mfspr(SPRN_MMCRA);
+ sprs.mmcr0 = mfspr(SPRN_MMCR0);
+ sprs.mmcr1 = mfspr(SPRN_MMCR1);
+ sprs.mmcr2 = mfspr(SPRN_MMCR2);
+
+ sprs.ptcr = mfspr(SPRN_PTCR);
+ sprs.rpr = mfspr(SPRN_RPR);
+ sprs.tscr = mfspr(SPRN_TSCR);
+ sprs.ldbar = mfspr(SPRN_LDBAR);
+
+ sprs_saved = true;
+
+ atomic_start_thread_idle();
+ }
+
+ sprs.amr = mfspr(SPRN_AMR);
+ sprs.iamr = mfspr(SPRN_IAMR);
+ sprs.amor = mfspr(SPRN_AMOR);
+ sprs.uamor = mfspr(SPRN_UAMOR);
+
+ srr1 = isa300_idle_stop_mayloss(psscr); /* go idle */
+
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ local_paca->requested_psscr = 0;
+#endif
psscr = mfspr(SPRN_PSSCR);
- psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+ WARN_ON_ONCE(!srr1);
+ WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
+
+ if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
+ unsigned long mmcra;
+
+ /*
+ * We don't need an isync after the mtsprs here because the
+ * upcoming mtmsrd is execution synchronizing.
+ */
+ mtspr(SPRN_AMR, sprs.amr);
+ mtspr(SPRN_IAMR, sprs.iamr);
+ mtspr(SPRN_AMOR, sprs.amor);
+ mtspr(SPRN_UAMOR, sprs.uamor);
+
+ /*
+ * Workaround for POWER9 DD2.0, if we lost resources, the ERAT
+ * might have been corrupted and needs flushing. We also need
+ * to reload MMCR0 (see mmcr0 comment above).
+ */
+ if (!cpu_has_feature(CPU_FTR_POWER9_DD2_1)) {
+ asm volatile(PPC_INVALIDATE_ERAT);
+ mtspr(SPRN_MMCR0, mmcr0);
+ }
+
+ /*
+ * DD2.2 and earlier need to set then clear bit 60 in MMCRA
+ * to ensure the PMU starts running.
+ */
+ mmcra = mfspr(SPRN_MMCRA);
+ mmcra |= PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ mmcra &= ~PPC_BIT(60);
+ mtspr(SPRN_MMCRA, mmcra);
+ }
+
+ if (unlikely((srr1 & SRR1_WAKEMASK_P8) == SRR1_WAKEHMI))
+ hmi_exception_realmode(NULL);
+
+ /*
+ * On POWER9, SRR1 bits do not match exactly as expected.
+ * SRR1_WS_GPRLOSS (10b) can also result in SPR loss, so
+ * just always test PSSCR for SPR/TB state loss.
+ */
+ pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
+ if (likely(pls < pnv_first_spr_loss_level)) {
+ if (sprs_saved)
+ atomic_stop_thread_idle();
+ goto out;
+ }
+
+ /* HV state loss */
+ BUG_ON(!sprs_saved);
+
+ atomic_lock_thread_idle();
+
+ if ((*state & core_thread_mask) != 0)
+ goto core_woken;
+
+ /* Per-core SPRs */
+ mtspr(SPRN_PTCR, sprs.ptcr);
+ mtspr(SPRN_RPR, sprs.rpr);
+ mtspr(SPRN_TSCR, sprs.tscr);
+ mtspr(SPRN_LDBAR, sprs.ldbar);
+
+ if (pls >= pnv_first_tb_loss_level) {
+ /* TB loss */
+ if (opal_resync_timebase() != OPAL_SUCCESS)
+ BUG();
+ }
+
+ /*
+ * isync after restoring shared SPRs and before unlocking. Unlock
+ * only contains hwsync which does not necessarily do the right
+ * thing for SPRs.
+ */
+ isync();
+
+core_woken:
+ atomic_unlock_and_stop_thread_idle();
+
+ /* Per-thread SPRs */
+ mtspr(SPRN_LPCR, sprs.lpcr);
+ mtspr(SPRN_HFSCR, sprs.hfscr);
+ mtspr(SPRN_FSCR, sprs.fscr);
+ mtspr(SPRN_PID, sprs.pid);
+ mtspr(SPRN_PURR, sprs.purr);
+ mtspr(SPRN_SPURR, sprs.spurr);
+ mtspr(SPRN_DSCR, sprs.dscr);
+ mtspr(SPRN_WORT, sprs.wort);
+
+ mtspr(SPRN_MMCRA, sprs.mmcra);
+ mtspr(SPRN_MMCR0, sprs.mmcr0);
+ mtspr(SPRN_MMCR1, sprs.mmcr1);
+ mtspr(SPRN_MMCR2, sprs.mmcr2);
+
+ mtspr(SPRN_SPRG3, local_paca->sprg_vdso);
+
+ if (!radix_enabled())
+ __slb_restore_bolted_realmode();
+
+out:
+ if (mmu_on)
+ mtmsr(MSR_KERNEL);
+
+ return srr1;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static unsigned long power9_offline_stop(unsigned long psscr)
+{
+ unsigned long srr1;
+
+#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
__ppc64_runlatch_off();
- srr1 = power9_idle_stop(psscr);
+ srr1 = power9_idle_stop(psscr, true);
__ppc64_runlatch_on();
+#else
+ /*
+ * Tell KVM we're entering idle.
+ * This does not have to be done in real mode because the P9 MMU
+ * is independent per-thread. Some steppings share radix/hash mode
+ * between threads, but in that case KVM has a barrier sync in real
+ * mode before and after switching between radix and hash.
+ *
+ * kvm_start_guest must still be called in real mode though, hence
+ * the false argument.
+ */
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
- fini_irq_for_idle_irqsoff();
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr, false);
+ __ppc64_runlatch_on();
+
+ local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ smp_mb();
+ if (local_paca->kvm_hstate.hwthread_req)
+ srr1 = idle_kvm_start_guest(srr1);
+ mtmsr(MSR_KERNEL);
+#endif
return srr1;
}
+#endif
void power9_idle_type(unsigned long stop_psscr_val,
unsigned long stop_psscr_mask)
{
+ unsigned long psscr;
unsigned long srr1;
- srr1 = __power9_idle_type(stop_psscr_val, stop_psscr_mask);
+ if (!prep_irq_for_idle_irqsoff())
+ return;
+
+ psscr = mfspr(SPRN_PSSCR);
+ psscr = (psscr & ~stop_psscr_mask) | stop_psscr_val;
+
+ __ppc64_runlatch_off();
+ srr1 = power9_idle_stop(psscr, true);
+ __ppc64_runlatch_on();
+
+ fini_irq_for_idle_irqsoff();
+
irq_set_pending_from_srr1(srr1);
}
@@ -409,7 +898,7 @@ void pnv_power9_force_smt4_catch(void)
atomic_inc(&paca_ptrs[cpu0+thr]->dont_stop);
}
/* order setting dont_stop vs testing requested_psscr */
- mb();
+ smp_mb();
for (thr = 0; thr < threads_per_core; ++thr) {
if (!paca_ptrs[cpu0+thr]->requested_psscr)
++awake_threads;
@@ -481,7 +970,6 @@ void pnv_program_cpu_hotplug_lpcr(unsigned int cpu, u64 lpcr_val)
unsigned long pnv_cpu_offline(unsigned int cpu)
{
unsigned long srr1;
- u32 idle_states = pnv_get_supported_cpuidle_states();
__ppc64_runlatch_off();
@@ -492,15 +980,8 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
psscr = (psscr & ~pnv_deepest_stop_psscr_mask) |
pnv_deepest_stop_psscr_val;
srr1 = power9_offline_stop(psscr);
-
- } else if ((idle_states & OPAL_PM_WINKLE_ENABLED) &&
- (idle_states & OPAL_PM_LOSE_FULL_CONTEXT)) {
- srr1 = power7_idle_insn(PNV_THREAD_WINKLE);
- } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
- (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- srr1 = power7_idle_insn(PNV_THREAD_SLEEP);
- } else if (idle_states & OPAL_PM_NAP_ENABLED) {
- srr1 = power7_idle_insn(PNV_THREAD_NAP);
+ } else if (cpu_has_feature(CPU_FTR_ARCH_206) && power7_offline_type) {
+ srr1 = power7_offline();
} else {
/* This is the fallback method. We emulate snooze */
while (!generic_check_cpu_restart(cpu)) {
@@ -596,33 +1077,44 @@ int validate_psscr_val_mask(u64 *psscr_val, u64 *psscr_mask, u32 flags)
* @dt_idle_states: Number of idle state entries
* Returns 0 on success
*/
-static int __init pnv_power9_idle_init(void)
+static void __init pnv_power9_idle_init(void)
{
u64 max_residency_ns = 0;
int i;
/*
- * Set pnv_first_deep_stop_state, pnv_deepest_stop_psscr_{val,mask},
- * and the pnv_default_stop_{val,mask}.
- *
- * pnv_first_deep_stop_state should be set to the first stop
- * level to cause hypervisor state loss.
- *
* pnv_deepest_stop_{val,mask} should be set to values corresponding to
* the deepest stop state.
*
* pnv_default_stop_{val,mask} should be set to values corresponding to
- * the shallowest (OPAL_PM_STOP_INST_FAST) loss-less stop state.
+ * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
*/
- pnv_first_deep_stop_state = MAX_STOP_STATE;
+ pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
+ pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
for (i = 0; i < nr_pnv_idle_states; i++) {
int err;
struct pnv_idle_states_t *state = &pnv_idle_states[i];
u64 psscr_rl = state->psscr_val & PSSCR_RL_MASK;
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (pnv_first_tb_loss_level > psscr_rl))
+ pnv_first_tb_loss_level = psscr_rl;
+
if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
- pnv_first_deep_stop_state > psscr_rl)
- pnv_first_deep_stop_state = psscr_rl;
+ (pnv_first_spr_loss_level > psscr_rl))
+ pnv_first_spr_loss_level = psscr_rl;
+
+ /*
+ * The idle code does not deal with TB loss occurring
+ * in a shallower state than SPR loss, so force it to
+ * behave like SPRs are lost if TB is lost. POWER9 would
+ * never encouter this, but a POWER8 core would if it
+ * implemented the stop instruction. So this is for forward
+ * compatibility.
+ */
+ if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
+ (pnv_first_spr_loss_level > psscr_rl))
+ pnv_first_spr_loss_level = psscr_rl;
err = validate_psscr_val_mask(&state->psscr_val,
&state->psscr_mask,
@@ -647,6 +1139,7 @@ static int __init pnv_power9_idle_init(void)
pnv_default_stop_val = state->psscr_val;
pnv_default_stop_mask = state->psscr_mask;
default_stop_found = true;
+ WARN_ON(state->flags & OPAL_PM_LOSE_FULL_CONTEXT);
}
}
@@ -666,10 +1159,40 @@ static int __init pnv_power9_idle_init(void)
pnv_deepest_stop_psscr_mask);
}
- pr_info("cpuidle-powernv: Requested Level (RL) value of first deep stop = 0x%llx\n",
- pnv_first_deep_stop_state);
+ pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%lld\n",
+ pnv_first_spr_loss_level);
- return 0;
+ pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%lld\n",
+ pnv_first_tb_loss_level);
+}
+
+static void __init pnv_disable_deep_states(void)
+{
+ /*
+ * The stop-api is unable to restore hypervisor
+ * resources on wakeup from platform idle states which
+ * lose full context. So disable such states.
+ */
+ supported_cpuidle_states &= ~OPAL_PM_LOSE_FULL_CONTEXT;
+ pr_warn("cpuidle-powernv: Disabling idle states that lose full context\n");
+ pr_warn("cpuidle-powernv: Idle power-savings, CPU-Hotplug affected\n");
+
+ if (cpu_has_feature(CPU_FTR_ARCH_300) &&
+ (pnv_deepest_stop_flag & OPAL_PM_LOSE_FULL_CONTEXT)) {
+ /*
+ * Use the default stop state for CPU-Hotplug
+ * if available.
+ */
+ if (default_stop_found) {
+ pnv_deepest_stop_psscr_val = pnv_default_stop_val;
+ pnv_deepest_stop_psscr_mask = pnv_default_stop_mask;
+ pr_warn("cpuidle-powernv: Offlined CPUs will stop with psscr = 0x%016llx\n",
+ pnv_deepest_stop_psscr_val);
+ } else { /* Fallback to snooze loop for CPU-Hotplug */
+ deepest_stop_found = false;
+ pr_warn("cpuidle-powernv: Offlined CPUs will busy wait\n");
+ }
+ }
}
/*
@@ -684,10 +1207,8 @@ static void __init pnv_probe_idle_states(void)
return;
}
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (pnv_power9_idle_init())
- return;
- }
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ pnv_power9_idle_init();
for (i = 0; i < nr_pnv_idle_states; i++)
supported_cpuidle_states |= pnv_idle_states[i].flags;
@@ -807,11 +1328,33 @@ out:
static int __init pnv_init_idle_states(void)
{
+ int cpu;
int rc = 0;
- supported_cpuidle_states = 0;
+
+ /* Set up PACA fields */
+ for_each_present_cpu(cpu) {
+ struct paca_struct *p = paca_ptrs[cpu];
+
+ p->idle_state = 0;
+ if (cpu == cpu_first_thread_sibling(cpu))
+ p->idle_state = (1 << threads_per_core) - 1;
+
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* P7/P8 nap */
+ p->thread_idle_state = PNV_THREAD_RUNNING;
+ } else {
+ /* P9 stop */
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ p->requested_psscr = 0;
+ atomic_set(&p->dont_stop, 0);
+#endif
+ }
+ }
/* In case we error out nr_pnv_idle_states will be zero */
nr_pnv_idle_states = 0;
+ supported_cpuidle_states = 0;
+
if (cpuidle_disable != IDLE_NO_OVERRIDE)
goto out;
rc = pnv_parse_cpuidle_dt();
@@ -819,27 +1362,40 @@ static int __init pnv_init_idle_states(void)
return rc;
pnv_probe_idle_states();
- if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_entry,
- PPC_INST_NOP);
- patch_instruction(
- (unsigned int *)pnv_fastsleep_workaround_at_exit,
- PPC_INST_NOP);
- } else {
- /*
- * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
- * workaround is needed to use fastsleep. Provide sysfs
- * control to choose how this workaround has to be applied.
- */
- device_create_file(cpu_subsys.dev_root,
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ power7_fastsleep_workaround_entry = false;
+ power7_fastsleep_workaround_exit = false;
+ } else {
+ /*
+ * OPAL_PM_SLEEP_ENABLED_ER1 is set. It indicates that
+ * workaround is needed to use fastsleep. Provide sysfs
+ * control to choose how this workaround has to be
+ * applied.
+ */
+ device_create_file(cpu_subsys.dev_root,
&dev_attr_fastsleep_workaround_applyonce);
- }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED) {
+ ppc_md.power_save = power7_idle;
+ power7_offline_type = PNV_THREAD_NAP;
+ }
- pnv_alloc_idle_core_states();
+ if ((supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED) &&
+ (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT))
+ power7_offline_type = PNV_THREAD_WINKLE;
+ else if ((supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+ power7_offline_type = PNV_THREAD_SLEEP;
+ }
- if (supported_cpuidle_states & OPAL_PM_NAP_ENABLED)
- ppc_md.power_save = power7_idle;
+ if (supported_cpuidle_states & OPAL_PM_LOSE_FULL_CONTEXT) {
+ if (pnv_save_sprs_for_deep_states())
+ pnv_disable_deep_states();
+ }
out:
return 0;
diff --git a/arch/powerpc/platforms/powernv/opal-call.c b/arch/powerpc/platforms/powernv/opal-call.c
index daad8c45c8e7..36c8fa3647a2 100644
--- a/arch/powerpc/platforms/powernv/opal-call.c
+++ b/arch/powerpc/platforms/powernv/opal-call.c
@@ -121,6 +121,8 @@ static int64_t opal_call(int64_t a0, int64_t a1, int64_t a2, int64_t a3,
#define OPAL_CALL(name, opcode) \
int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
+ int64_t a4, int64_t a5, int64_t a6, int64_t a7); \
+int64_t name(int64_t a0, int64_t a1, int64_t a2, int64_t a3, \
int64_t a4, int64_t a5, int64_t a6, int64_t a7) \
{ \
return opal_call(a0, a1, a2, a3, a4, a5, a6, a7, opcode); \
@@ -218,6 +220,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_handle_hmi2, OPAL_HANDLE_HMI2);
OPAL_CALL(opal_config_cpu_idle_state, OPAL_CONFIG_CPU_IDLE_STATE);
OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
@@ -260,6 +263,9 @@ OPAL_CALL(opal_xive_get_vp_info, OPAL_XIVE_GET_VP_INFO);
OPAL_CALL(opal_xive_set_vp_info, OPAL_XIVE_SET_VP_INFO);
OPAL_CALL(opal_xive_sync, OPAL_XIVE_SYNC);
OPAL_CALL(opal_xive_dump, OPAL_XIVE_DUMP);
+OPAL_CALL(opal_xive_get_queue_state, OPAL_XIVE_GET_QUEUE_STATE);
+OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
+OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c
index 58a07948c76e..3e497b91d210 100644
--- a/arch/powerpc/platforms/powernv/opal-imc.c
+++ b/arch/powerpc/platforms/powernv/opal-imc.c
@@ -127,7 +127,7 @@ static int imc_get_mem_addr_nest(struct device_node *node,
nr_chips))
goto error;
- pmu_ptr->mem_info = kcalloc(nr_chips, sizeof(*pmu_ptr->mem_info),
+ pmu_ptr->mem_info = kcalloc(nr_chips + 1, sizeof(*pmu_ptr->mem_info),
GFP_KERNEL);
if (!pmu_ptr->mem_info)
goto error;
@@ -284,6 +284,9 @@ static int opal_imc_counters_probe(struct platform_device *pdev)
case IMC_TYPE_THREAD:
domain = IMC_DOMAIN_THREAD;
break;
+ case IMC_TYPE_TRACE:
+ domain = IMC_DOMAIN_TRACE;
+ break;
default:
pr_warn("IMC Unknown Device type \n");
domain = -1;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 2b0eca104f86..f2b063b027f0 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -505,7 +505,7 @@ static int opal_recover_mce(struct pt_regs *regs,
recovered = 0;
}
- if (!recovered && evt->severity == MCE_SEV_ERROR_SYNC) {
+ if (!recovered && evt->sync_error) {
/*
* Try to kill processes if we get a synchronous machine check
* (e.g., one caused by execution of this instruction). This
@@ -614,6 +614,27 @@ int opal_hmi_exception_early(struct pt_regs *regs)
return 0;
}
+int opal_hmi_exception_early2(struct pt_regs *regs)
+{
+ s64 rc;
+ __be64 out_flags;
+
+ /*
+ * call opal hmi handler.
+ * Check 64-bit flag mask to find out if an event was generated,
+ * and whether TB is still valid or not etc.
+ */
+ rc = opal_handle_hmi2(&out_flags);
+ if (rc != OPAL_SUCCESS)
+ return 0;
+
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_NEW_EVENT)
+ local_paca->hmi_event_available = 1;
+ if (be64_to_cpu(out_flags) & OPAL_HMI_FLAGS_TOD_TB_FAIL)
+ tb_invalid = true;
+ return 1;
+}
+
/* HMI exception handler called in virtual mode during check_irq_replay. */
int opal_handle_hmi_exception(struct pt_regs *regs)
{
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3ead4c237ed0..126602b4e399 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -847,11 +847,11 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
if (rc)
- pe_warn(pe, "OPAL error %ld remove self from PELTV\n", rc);
+ pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
if (rc)
- pe_err(pe, "OPAL error %ld trying to setup PELT table\n", rc);
+ pe_err(pe, "OPAL error %lld trying to setup PELT table\n", rc);
pe->pbus = NULL;
pe->pdev = NULL;
@@ -1174,11 +1174,12 @@ static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
pe->rid = bus->busn_res.start << 8;
if (all)
- pe_info(pe, "Secondary bus %d..%d associated with PE#%x\n",
- bus->busn_res.start, bus->busn_res.end, pe->pe_number);
+ pe_info(pe, "Secondary bus %pad..%pad associated with PE#%x\n",
+ &bus->busn_res.start, &bus->busn_res.end,
+ pe->pe_number);
else
- pe_info(pe, "Secondary bus %d associated with PE#%x\n",
- bus->busn_res.start, pe->pe_number);
+ pe_info(pe, "Secondary bus %pad associated with PE#%x\n",
+ &bus->busn_res.start, pe->pe_number);
if (pnv_ioda_configure_pe(phb, pe)) {
/* XXX What do we do here ? */
@@ -1448,7 +1449,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
tbl = pe->table_group.tables[0];
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
- pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
pnv_pci_ioda2_set_bypass(pe, false);
if (pe->table_group.group) {
@@ -1836,7 +1837,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
struct pnv_ioda_pe *pe;
if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return -ENODEV;
+ return false;
pe = &phb->ioda.pe_array[pdn->pe_number];
if (pe->tce_bypass_enabled) {
@@ -1859,7 +1860,7 @@ static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
/* Configure the bypass mode */
s64 rc = pnv_pci_ioda_dma_64bit_bypass(pe);
if (rc)
- return rc;
+ return false;
/* 4GB offset bypasses 32-bit space */
pdev->dev.archdata.dma_offset = (1ULL << 32);
return true;
@@ -2286,8 +2287,8 @@ found:
__pa(addr) + tce32_segsz * i,
tce32_segsz, IOMMU_PAGE_SIZE_4K);
if (rc) {
- pe_err(pe, " Failed to configure 32-bit TCE table,"
- " err %ld\n", rc);
+ pe_err(pe, " Failed to configure 32-bit TCE table, err %lld\n",
+ rc);
goto fail;
}
}
@@ -2332,9 +2333,9 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
const __u64 win_size = tbl->it_size << tbl->it_page_shift;
- pe_info(pe, "Setting up window#%d %llx..%llx pg=%x\n", num,
- start_addr, start_addr + win_size - 1,
- IOMMU_PAGE_SIZE(tbl));
+ pe_info(pe, "Setting up window#%d %llx..%llx pg=%lx\n",
+ num, start_addr, start_addr + win_size - 1,
+ IOMMU_PAGE_SIZE(tbl));
/*
* Map TCE table through TVT. The TVE index is the PE number
@@ -2348,7 +2349,7 @@ static long pnv_pci_ioda2_set_window(struct iommu_table_group *table_group,
size << 3,
IOMMU_PAGE_SIZE(tbl));
if (rc) {
- pe_err(pe, "Failed to configure TCE table, err %ld\n", rc);
+ pe_err(pe, "Failed to configure TCE table, err %lld\n", rc);
return rc;
}
@@ -3450,7 +3451,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
#ifdef CONFIG_IOMMU_API
rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (rc)
- pe_warn(pe, "OPAL error %ld release DMA window\n", rc);
+ pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
#endif
pnv_pci_ioda2_set_bypass(pe, false);
@@ -3484,7 +3485,7 @@ static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
phb->ioda.reserved_pe_idx, win, 0, idx);
if (rc != OPAL_SUCCESS)
- pe_warn(pe, "Error %ld unmapping (%d) segment#%d\n",
+ pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
rc, win, idx);
map[idx] = IODA_INVALID_PE;
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 8e36da379252..be26ab3d99e0 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -2,6 +2,7 @@
#ifndef __POWERNV_PCI_H
#define __POWERNV_PCI_H
+#include <linux/compiler.h> /* for __printf */
#include <linux/iommu.h>
#include <asm/iommu.h>
#include <asm/msi_bitmap.h>
@@ -204,6 +205,7 @@ extern unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
__u64 window_size, __u32 levels);
extern int pnv_eeh_post_init(void);
+__printf(3, 4)
extern void pe_level_printk(const struct pnv_ioda_pe *pe, const char *level,
const char *fmt, ...);
#define pe_err(pe, fmt, ...) \
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 14befee4b3f1..3cf40f689aac 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -401,7 +401,10 @@ static void __init pnv_setup_machdep_opal(void)
/* ppc_md.system_reset_exception gets filled in by pnv_smp_init() */
ppc_md.machine_check_exception = opal_machine_check;
ppc_md.mce_check_early_recovery = opal_mce_check_early_recovery;
- ppc_md.hmi_exception_early = opal_hmi_exception_early;
+ if (opal_check_token(OPAL_HANDLE_HMI2))
+ ppc_md.hmi_exception_early = opal_hmi_exception_early2;
+ else
+ ppc_md.hmi_exception_early = opal_hmi_exception_early;
ppc_md.handle_hmi_exception = opal_handle_hmi_exception;
}
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index 45563004feda..1d7a9fd30dd1 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -183,7 +183,7 @@ static void unsplit_core(void)
cpu = smp_processor_id();
if (cpu_thread_in_core(cpu) != 0) {
while (mfspr(SPRN_HID0) & mask)
- power7_idle_insn(PNV_THREAD_NAP);
+ power7_idle_type(PNV_THREAD_NAP);
per_cpu(split_state, cpu).step = SYNC_STEP_UNSPLIT;
return;
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index d291b618a559..47087832f8b2 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -379,7 +379,7 @@ static int dlpar_add_lmb(struct drmem_lmb *);
static int dlpar_remove_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (!lmb_is_removable(lmb))
return -EINVAL;
@@ -389,14 +389,14 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb)
return rc;
block_sz = pseries_memory_block_size();
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
lmb->flags &= ~DRCONF_MEM_ASSIGNED;
return 0;
@@ -653,7 +653,7 @@ static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
static int dlpar_add_lmb(struct drmem_lmb *lmb)
{
unsigned long block_sz;
- int nid, rc;
+ int rc;
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
@@ -664,13 +664,11 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
return rc;
}
+ lmb_set_nid(lmb);
block_sz = memory_block_size_bytes();
- /* Find the node id for this address */
- nid = memory_add_physaddr_to_nid(lmb->base_addr);
-
/* Add the memory */
- rc = __add_memory(nid, lmb->base_addr, block_sz);
+ rc = __add_memory(lmb->nid, lmb->base_addr, block_sz);
if (rc) {
invalidate_lmb_associativity_index(lmb);
return rc;
@@ -678,8 +676,9 @@ static int dlpar_add_lmb(struct drmem_lmb *lmb)
rc = dlpar_online_lmb(lmb);
if (rc) {
- __remove_memory(nid, lmb->base_addr, block_sz);
+ __remove_memory(lmb->nid, lmb->base_addr, block_sz);
invalidate_lmb_associativity_index(lmb);
+ lmb_clear_nid(lmb);
} else {
lmb->flags |= DRCONF_MEM_ASSIGNED;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 36eb1ddbac69..03bbb299320e 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -105,7 +105,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
unsigned long attrs)
{
u64 proto_tce;
- __be64 *tcep, *tces;
+ __be64 *tcep;
u64 rpn;
proto_tce = TCE_PCI_READ; // Read allowed
@@ -113,7 +113,7 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
if (direction != DMA_TO_DEVICE)
proto_tce |= TCE_PCI_WRITE;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--) {
/* can't move this out since we might cross MEMBLOCK boundary */
@@ -129,9 +129,9 @@ static int tce_build_pSeries(struct iommu_table *tbl, long index,
static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
{
- __be64 *tcep, *tces;
+ __be64 *tcep;
- tces = tcep = ((__be64 *)tbl->it_base) + index;
+ tcep = ((__be64 *)tbl->it_base) + index;
while (npages--)
*(tcep++) = 0;
@@ -945,7 +945,7 @@ static phys_addr_t ddw_memory_hotplug_max(void)
for_each_node_by_type(memory, "memory") {
unsigned long start, size;
- int ranges, n_mem_addr_cells, n_mem_size_cells, len;
+ int n_mem_addr_cells, n_mem_size_cells, len;
const __be32 *memcell_buf;
memcell_buf = of_get_property(memory, "reg", &len);
@@ -955,9 +955,6 @@ static phys_addr_t ddw_memory_hotplug_max(void)
n_mem_addr_cells = of_n_addr_cells(memory);
n_mem_size_cells = of_n_size_cells(memory);
- /* ranges in cell */
- ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
-
start = of_read_number(memcell_buf, n_mem_addr_cells);
memcell_buf += n_mem_addr_cells;
size = of_read_number(memcell_buf, n_mem_size_cells);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f2a9f0adc2d3..1034ef1fe2b4 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -901,8 +901,10 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
break;
case H_PARAMETER:
+ pr_warn("Invalid argument from H_RESIZE_HPT_PREPARE\n");
return -EINVAL;
case H_RESOURCE:
+ pr_warn("Operation not permitted from H_RESIZE_HPT_PREPARE\n");
return -EPERM;
default:
pr_warn("Unexpected error %d from H_RESIZE_HPT_PREPARE\n", rc);
@@ -918,7 +920,6 @@ static int pseries_lpar_resize_hpt(unsigned long shift)
if (rc != 0) {
switch (state.commit_rc) {
case H_PTEG_FULL:
- pr_warn("Hash collision while resizing HPT\n");
return -ENOSPC;
default:
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index 27f0a915c8a9..f860a897a9e0 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -106,7 +106,7 @@ static ssize_t pmem_drc_remove_node(u32 drc_index)
int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
{
- u32 count, drc_index;
+ u32 drc_index;
int rc;
/* slim chance, but we might get a hotplug event while booting */
@@ -123,7 +123,6 @@ int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
return -EINVAL;
}
- count = hp_elog->_drc_u.drc_count;
drc_index = hp_elog->_drc_u.drc_index;
lock_device_hotplug();
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index 452dcfd7e5dd..c97d15352f9f 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -539,44 +539,44 @@ static void pseries_print_mce_info(struct pt_regs *regs,
int disposition = rtas_error_disposition(errp);
static const char * const initiators[] = {
- "Unknown",
- "CPU",
- "PCI",
- "ISA",
- "Memory",
- "Power Mgmt",
+ [0] = "Unknown",
+ [1] = "CPU",
+ [2] = "PCI",
+ [3] = "ISA",
+ [4] = "Memory",
+ [5] = "Power Mgmt",
};
static const char * const mc_err_types[] = {
- "UE",
- "SLB",
- "ERAT",
- "Unknown",
- "TLB",
- "D-Cache",
- "Unknown",
- "I-Cache",
+ [0] = "UE",
+ [1] = "SLB",
+ [2] = "ERAT",
+ [3] = "Unknown",
+ [4] = "TLB",
+ [5] = "D-Cache",
+ [6] = "Unknown",
+ [7] = "I-Cache",
};
static const char * const mc_ue_types[] = {
- "Indeterminate",
- "Instruction fetch",
- "Page table walk ifetch",
- "Load/Store",
- "Page table walk Load/Store",
+ [0] = "Indeterminate",
+ [1] = "Instruction fetch",
+ [2] = "Page table walk ifetch",
+ [3] = "Load/Store",
+ [4] = "Page table walk Load/Store",
};
/* SLB sub errors valid values are 0x0, 0x1, 0x2 */
static const char * const mc_slb_types[] = {
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Parity",
+ [1] = "Multihit",
+ [2] = "Indeterminate",
};
/* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
static const char * const mc_soft_types[] = {
- "Unknown",
- "Parity",
- "Multihit",
- "Indeterminate",
+ [0] = "Unknown",
+ [1] = "Parity",
+ [2] = "Multihit",
+ [3] = "Indeterminate",
};
if (!rtas_error_extended(errp)) {
@@ -707,6 +707,87 @@ out:
return disposition;
}
+#ifdef CONFIG_MEMORY_FAILURE
+
+static DEFINE_PER_CPU(int, rtas_ue_count);
+static DEFINE_PER_CPU(unsigned long, rtas_ue_paddr[MAX_MC_EVT]);
+
+#define UE_EFFECTIVE_ADDR_PROVIDED 0x40
+#define UE_LOGICAL_ADDR_PROVIDED 0x20
+
+
+static void pseries_hwpoison_work_fn(struct work_struct *work)
+{
+ unsigned long paddr;
+ int index;
+
+ while (__this_cpu_read(rtas_ue_count) > 0) {
+ index = __this_cpu_read(rtas_ue_count) - 1;
+ paddr = __this_cpu_read(rtas_ue_paddr[index]);
+ memory_failure(paddr >> PAGE_SHIFT, 0);
+ __this_cpu_dec(rtas_ue_count);
+ }
+}
+
+static DECLARE_WORK(hwpoison_work, pseries_hwpoison_work_fn);
+
+static void queue_ue_paddr(unsigned long paddr)
+{
+ int index;
+
+ index = __this_cpu_inc_return(rtas_ue_count) - 1;
+ if (index >= MAX_MC_EVT) {
+ __this_cpu_dec(rtas_ue_count);
+ return;
+ }
+ this_cpu_write(rtas_ue_paddr[index], paddr);
+ schedule_work(&hwpoison_work);
+}
+
+static void pseries_do_memory_failure(struct pt_regs *regs,
+ struct pseries_mc_errorlog *mce_log)
+{
+ unsigned long paddr;
+
+ if (mce_log->sub_err_type & UE_LOGICAL_ADDR_PROVIDED) {
+ paddr = be64_to_cpu(mce_log->logical_address);
+ } else if (mce_log->sub_err_type & UE_EFFECTIVE_ADDR_PROVIDED) {
+ unsigned long pfn;
+
+ pfn = addr_to_pfn(regs,
+ be64_to_cpu(mce_log->effective_address));
+ if (pfn == ULONG_MAX)
+ return;
+ paddr = pfn << PAGE_SHIFT;
+ } else {
+ return;
+ }
+ queue_ue_paddr(paddr);
+}
+
+static void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp)
+{
+ struct pseries_errorlog *pseries_log;
+ struct pseries_mc_errorlog *mce_log;
+
+ if (!rtas_error_extended(errp))
+ return;
+
+ pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+ if (!pseries_log)
+ return;
+
+ mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+ if (mce_log->error_type == MC_ERROR_TYPE_UE)
+ pseries_do_memory_failure(regs, mce_log);
+}
+#else
+static inline void pseries_process_ue(struct pt_regs *regs,
+ struct rtas_error_log *errp) { }
+#endif /*CONFIG_MEMORY_FAILURE */
+
/*
* Process MCE rtas errlog event.
*/
@@ -765,6 +846,8 @@ static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err)
recovered = 1;
}
+ pseries_process_ue(regs, err);
+
/* Queue irq work to log this rtas event later. */
irq_work_queue(&mce_errlog_process_work);
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index 4314ba5baf43..7c6d8b14f440 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
+
+KASAN_SANITIZE := n
+
targets += trampoline.o purgatory.ro kexec-purgatory.c
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
diff --git a/arch/powerpc/sysdev/tsi108_dev.c b/arch/powerpc/sysdev/tsi108_dev.c
index 1f1af12f23e2..c92dcac85231 100644
--- a/arch/powerpc/sysdev/tsi108_dev.c
+++ b/arch/powerpc/sysdev/tsi108_dev.c
@@ -105,7 +105,7 @@ static int __init tsi108_eth_of_init(void)
}
mac_addr = of_get_mac_address(np);
- if (mac_addr)
+ if (!IS_ERR(mac_addr))
memcpy(tsi_eth_data.mac_addr, mac_addr, 6);
ph = of_get_property(np, "mdio-handle", NULL);
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 1ca127d052a6..0c037e933e55 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -437,6 +437,12 @@ void xive_native_sync_source(u32 hw_irq)
}
EXPORT_SYMBOL_GPL(xive_native_sync_source);
+void xive_native_sync_queue(u32 hw_irq)
+{
+ opal_xive_sync(XIVE_SYNC_QUEUE, hw_irq);
+}
+EXPORT_SYMBOL_GPL(xive_native_sync_queue);
+
static const struct xive_ops xive_native_ops = {
.populate_irq_data = xive_native_populate_irq_data,
.configure_irq = xive_native_configure_irq,
@@ -711,3 +717,96 @@ bool xive_native_has_single_escalation(void)
return xive_has_single_esc;
}
EXPORT_SYMBOL_GPL(xive_native_has_single_escalation);
+
+int xive_native_get_queue_info(u32 vp_id, u32 prio,
+ u64 *out_qpage,
+ u64 *out_qsize,
+ u64 *out_qeoi_page,
+ u32 *out_escalate_irq,
+ u64 *out_qflags)
+{
+ __be64 qpage;
+ __be64 qsize;
+ __be64 qeoi_page;
+ __be32 escalate_irq;
+ __be64 qflags;
+ s64 rc;
+
+ rc = opal_xive_get_queue_info(vp_id, prio, &qpage, &qsize,
+ &qeoi_page, &escalate_irq, &qflags);
+ if (rc) {
+ pr_err("OPAL failed to get queue info for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ if (out_qpage)
+ *out_qpage = be64_to_cpu(qpage);
+ if (out_qsize)
+ *out_qsize = be32_to_cpu(qsize);
+ if (out_qeoi_page)
+ *out_qeoi_page = be64_to_cpu(qeoi_page);
+ if (out_escalate_irq)
+ *out_escalate_irq = be32_to_cpu(escalate_irq);
+ if (out_qflags)
+ *out_qflags = be64_to_cpu(qflags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_info);
+
+int xive_native_get_queue_state(u32 vp_id, u32 prio, u32 *qtoggle, u32 *qindex)
+{
+ __be32 opal_qtoggle;
+ __be32 opal_qindex;
+ s64 rc;
+
+ rc = opal_xive_get_queue_state(vp_id, prio, &opal_qtoggle,
+ &opal_qindex);
+ if (rc) {
+ pr_err("OPAL failed to get queue state for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ if (qtoggle)
+ *qtoggle = be32_to_cpu(opal_qtoggle);
+ if (qindex)
+ *qindex = be32_to_cpu(opal_qindex);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_queue_state);
+
+int xive_native_set_queue_state(u32 vp_id, u32 prio, u32 qtoggle, u32 qindex)
+{
+ s64 rc;
+
+ rc = opal_xive_set_queue_state(vp_id, prio, qtoggle, qindex);
+ if (rc) {
+ pr_err("OPAL failed to set queue state for VCPU %d/%d : %lld\n",
+ vp_id, prio, rc);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+
+int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
+{
+ __be64 state;
+ s64 rc;
+
+ rc = opal_xive_get_vp_state(vp_id, &state);
+ if (rc) {
+ pr_err("OPAL failed to get vp state for VCPU %d : %lld\n",
+ vp_id, rc);
+ return -EIO;
+ }
+
+ if (out_state)
+ *out_state = be64_to_cpu(state);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(xive_native_get_vp_state);
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index 3050f9323254..f142570ad860 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -7,6 +7,7 @@ subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
UBSAN_SANITIZE := n
+KASAN_SANITIZE := n
# Disable ftrace for the entire directory
ORIG_CFLAGS := $(KBUILD_CFLAGS)
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a0f44f992360..1b0149b2bb6c 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -80,6 +80,7 @@ static int set_indicator_token = RTAS_UNKNOWN_SERVICE;
#endif
static unsigned long in_xmon __read_mostly = 0;
static int xmon_on = IS_ENABLED(CONFIG_XMON_DEFAULT);
+static bool xmon_is_ro = IS_ENABLED(CONFIG_XMON_DEFAULT_RO_MODE);
static unsigned long adrs;
static int size = 1;
@@ -202,6 +203,8 @@ static void dump_tlb_book3e(void);
#define GETWORD(v) (((v)[0] << 24) + ((v)[1] << 16) + ((v)[2] << 8) + (v)[3])
#endif
+static const char *xmon_ro_msg = "Operation disabled: xmon in read-only mode\n";
+
static char *help_string = "\
Commands:\n\
b show breakpoints\n\
@@ -989,6 +992,10 @@ cmds(struct pt_regs *excp)
memlocate();
break;
case 'z':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
memzcan();
break;
case 'i':
@@ -1042,6 +1049,10 @@ cmds(struct pt_regs *excp)
set_lpp_cmd();
break;
case 'b':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
bpt_cmds();
break;
case 'C':
@@ -1055,6 +1066,10 @@ cmds(struct pt_regs *excp)
bootcmds();
break;
case 'p':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
proccall();
break;
case 'P':
@@ -1777,6 +1792,11 @@ read_spr(int n, unsigned long *vp)
static void
write_spr(int n, unsigned long val)
{
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ return;
+ }
+
if (setjmp(bus_error_jmp) == 0) {
catch_spr_faults = 1;
sync();
@@ -2016,6 +2036,12 @@ mwrite(unsigned long adrs, void *buf, int size)
char *p, *q;
n = 0;
+
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ return n;
+ }
+
if (setjmp(bus_error_jmp) == 0) {
catch_memory_errors = 1;
sync();
@@ -2429,9 +2455,11 @@ static void dump_one_paca(int cpu)
DUMP(p, trap_save, "%#-*x");
DUMP(p, irq_soft_mask, "%#-*x");
DUMP(p, irq_happened, "%#-*x");
- DUMP(p, io_sync, "%#-*x");
+#ifdef CONFIG_MMIOWB
+ DUMP(p, mmiowb_state.nesting_count, "%#-*x");
+ DUMP(p, mmiowb_state.mmiowb_pending, "%#-*x");
+#endif
DUMP(p, irq_work_pending, "%#-*x");
- DUMP(p, nap_state_lost, "%#-*x");
DUMP(p, sprg_vdso, "%#-*llx");
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -2439,19 +2467,16 @@ static void dump_one_paca(int cpu)
#endif
#ifdef CONFIG_PPC_POWERNV
- DUMP(p, core_idle_state_ptr, "%-*px");
- DUMP(p, thread_idle_state, "%#-*x");
- DUMP(p, thread_mask, "%#-*x");
- DUMP(p, subcore_sibling_mask, "%#-*x");
- DUMP(p, requested_psscr, "%#-*llx");
- DUMP(p, stop_sprs.pid, "%#-*llx");
- DUMP(p, stop_sprs.ldbar, "%#-*llx");
- DUMP(p, stop_sprs.fscr, "%#-*llx");
- DUMP(p, stop_sprs.hfscr, "%#-*llx");
- DUMP(p, stop_sprs.mmcr1, "%#-*llx");
- DUMP(p, stop_sprs.mmcr2, "%#-*llx");
- DUMP(p, stop_sprs.mmcra, "%#-*llx");
- DUMP(p, dont_stop.counter, "%#-*x");
+ DUMP(p, idle_state, "%#-*lx");
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ DUMP(p, thread_idle_state, "%#-*x");
+ DUMP(p, subcore_sibling_mask, "%#-*x");
+ } else {
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ DUMP(p, requested_psscr, "%#-*llx");
+ DUMP(p, dont_stop.counter, "%#-*x");
+#endif
+ }
#endif
DUMP(p, accounting.utime, "%#-*lx");
@@ -2884,9 +2909,17 @@ memops(int cmd)
scanhex((void *)&mcount);
switch( cmd ){
case 'm':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
memmove((void *)mdest, (void *)msrc, mcount);
break;
case 's':
+ if (xmon_is_ro) {
+ printf(xmon_ro_msg);
+ break;
+ }
memset((void *)mdest, mval, mcount);
break;
case 'd':
@@ -3796,6 +3829,14 @@ static int __init early_parse_xmon(char *p)
} else if (strncmp(p, "on", 2) == 0) {
xmon_init(1);
xmon_on = 1;
+ } else if (strncmp(p, "rw", 2) == 0) {
+ xmon_init(1);
+ xmon_on = 1;
+ xmon_is_ro = false;
+ } else if (strncmp(p, "ro", 2) == 0) {
+ xmon_init(1);
+ xmon_on = 1;
+ xmon_is_ro = true;
} else if (strncmp(p, "off", 3) == 0)
xmon_on = 0;
else
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index eb56c82d8aa1..e66745decea1 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -48,6 +48,7 @@ config RISCV
select RISCV_TIMER
select GENERIC_IRQ_MULTI_HANDLER
select ARCH_HAS_PTE_SPECIAL
+ select ARCH_HAS_MMIOWB
select HAVE_EBPF_JIT if 64BIT
config MMU
@@ -69,9 +70,6 @@ config STACKTRACE_SUPPORT
config TRACE_IRQFLAGS_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
config GENERIC_BUG
def_bool y
depends on BUG
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 1d9c1376dc64..744fd92e77bc 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -20,6 +20,7 @@
#define _ASM_RISCV_IO_H
#include <linux/types.h>
+#include <asm/mmiowb.h>
extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
@@ -100,18 +101,6 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
#endif
/*
- * FIXME: I'm flip-flopping on whether or not we should keep this or enforce
- * the ordering with I/O on spinlocks like PowerPC does. The worry is that
- * drivers won't get this correct, but I also don't want to introduce a fence
- * into the lock code that otherwise only uses AMOs (and is essentially defined
- * by the ISA to be correct). For now I'm leaving this here: "o,w" is
- * sufficient to ensure that all writes to the device have completed before the
- * write to the spinlock is allowed to commit. I surmised this from reading
- * "ACQUIRES VS I/O ACCESSES" in memory-barriers.txt.
- */
-#define mmiowb() __asm__ __volatile__ ("fence o,w" : : : "memory");
-
-/*
* Unordered I/O memory access primitives. These are even more relaxed than
* the relaxed versions, as they don't even order accesses between successive
* operations to the I/O regions.
@@ -165,7 +154,7 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
#define __io_br() do {} while (0)
#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory");
#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory");
-#define __io_aw() do {} while (0)
+#define __io_aw() mmiowb_set_pending()
#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
#define readw(c) ({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; })
diff --git a/arch/riscv/include/asm/mmiowb.h b/arch/riscv/include/asm/mmiowb.h
new file mode 100644
index 000000000000..5d7e3a2b4e3b
--- /dev/null
+++ b/arch/riscv/include/asm/mmiowb.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_MMIOWB_H
+#define _ASM_RISCV_MMIOWB_H
+
+/*
+ * "o,w" is sufficient to ensure that all writes to the device have completed
+ * before the write to the spinlock is allowed to commit.
+ */
+#define mmiowb() __asm__ __volatile__ ("fence o,w" : : : "memory");
+
+#include <asm-generic/mmiowb.h>
+
+#endif /* ASM_RISCV_MMIOWB_H */
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index a3d5273ded7c..0f2fe1794c8f 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -88,7 +88,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
#ifdef CONFIG_64BIT
return AUDIT_ARCH_RISCV64;
diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h
index 439dc7072e05..1ad8d093c58b 100644
--- a/arch/riscv/include/asm/tlb.h
+++ b/arch/riscv/include/asm/tlb.h
@@ -18,6 +18,7 @@ struct mmu_gather;
static void tlb_flush(struct mmu_gather *tlb);
+#define tlb_flush tlb_flush
#include <asm-generic/tlb.h>
static inline void tlb_flush(struct mmu_gather *tlb)
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index a4b1d94371a0..4d403274c2e8 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -169,8 +169,6 @@ static bool save_trace(unsigned long pc, void *arg)
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
walk_stackframe(tsk, NULL, save_trace, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index b6e3d0653002..07485582d027 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -14,12 +14,6 @@ config LOCKDEP_SUPPORT
config STACKTRACE_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- bool
-
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config ARCH_HAS_ILOG2_U32
def_bool n
@@ -149,6 +143,7 @@ config S390
select HAVE_FUNCTION_TRACER
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
+ select HAVE_GENERIC_GUP
select HAVE_KERNEL_BZIP2
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZ4
@@ -164,11 +159,13 @@ config S390
select HAVE_PERF_USER_STACK_DUMP
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MEMBLOCK_PHYS_MAP
+ select HAVE_MMU_GATHER_NO_GATHER
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NOP_MCOUNT
select HAVE_OPROFILE
select HAVE_PCI
select HAVE_PERF_EVENTS
+ select HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
select HAVE_SYSCALL_TRACEPOINTS
@@ -188,7 +185,6 @@ config S390
select TTY
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
- select VIRT_TO_BUS
select HAVE_NMI
@@ -240,6 +236,7 @@ choice
config MARCH_Z900
bool "IBM zSeries model z800 and z900"
+ depends on !CC_IS_CLANG
select HAVE_MARCH_Z900_FEATURES
help
Select this to enable optimizations for model z800/z900 (2064 and
@@ -248,6 +245,7 @@ config MARCH_Z900
config MARCH_Z990
bool "IBM zSeries model z890 and z990"
+ depends on !CC_IS_CLANG
select HAVE_MARCH_Z990_FEATURES
help
Select this to enable optimizations for model z890/z990 (2084 and
@@ -256,6 +254,7 @@ config MARCH_Z990
config MARCH_Z9_109
bool "IBM System z9"
+ depends on !CC_IS_CLANG
select HAVE_MARCH_Z9_109_FEATURES
help
Select this to enable optimizations for IBM System z9 (2094 and
@@ -347,12 +346,15 @@ config TUNE_DEFAULT
config TUNE_Z900
bool "IBM zSeries model z800 and z900"
+ depends on !CC_IS_CLANG
config TUNE_Z990
bool "IBM zSeries model z890 and z990"
+ depends on !CC_IS_CLANG
config TUNE_Z9_109
bool "IBM System z9"
+ depends on !CC_IS_CLANG
config TUNE_Z10
bool "IBM System z10"
@@ -388,6 +390,9 @@ config COMPAT
(and some other stuff like libraries and such) is needed for
executing 31 bit applications. It is safe to say "Y".
+config COMPAT_VDSO
+ def_bool COMPAT && !CC_IS_CLANG
+
config SYSVIPC_COMPAT
def_bool y if COMPAT && SYSVIPC
@@ -549,6 +554,17 @@ config ARCH_HAS_KEXEC_PURGATORY
def_bool y
depends on KEXEC_FILE
+config KEXEC_VERIFY_SIG
+ bool "Verify kernel signature during kexec_file_load() syscall"
+ depends on KEXEC_FILE && SYSTEM_DATA_VERIFICATION
+ help
+ This option makes kernel signature verification mandatory for
+ the kexec_file_load() syscall.
+
+ In addition to that option, you need to enable signature
+ verification for the corresponding kernel image type being
+ loaded in order for this to work.
+
config ARCH_RANDOM
def_bool y
prompt "s390 architectural random number generation API"
@@ -609,6 +625,29 @@ config EXPOLINE_FULL
endchoice
+config RELOCATABLE
+ bool "Build a relocatable kernel"
+ select MODULE_REL_CRCS if MODVERSIONS
+ default y
+ help
+ This builds a kernel image that retains relocation information
+ so it can be loaded at an arbitrary address.
+ The kernel is linked as a position-independent executable (PIE)
+ and contains dynamic relocations which are processed early in the
+ bootup process.
+ The relocations make the kernel image about 15% larger (compressed
+ 10%), but are discarded at runtime.
+
+config RANDOMIZE_BASE
+ bool "Randomize the address of the kernel image (KASLR)"
+ depends on RELOCATABLE
+ default y
+ help
+ In support of Kernel Address Space Layout Randomization (KASLR),
+ this randomizes the address at which the kernel image is loaded,
+ as a security feature that deters exploit attempts relying on
+ knowledge of the location of kernel internals.
+
endmenu
menu "Memory setup"
@@ -837,6 +876,17 @@ config HAVE_PNETID
menu "Virtualization"
+config PROTECTED_VIRTUALIZATION_GUEST
+ def_bool n
+ prompt "Protected virtualization guest support"
+ help
+ Select this option, if you want to be able to run this
+ kernel as a protected virtualization KVM guest.
+ Protected virtualization capable machines have a mini hypervisor
+ located at machine level (an ultravisor). With help of the
+ Ultravisor, KVM will be able to run "protected" VMs, special
+ VMs whose memory and management data are unavailable to KVM.
+
config PFAULT
def_bool y
prompt "Pseudo page fault support"
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index e21053e5e0da..df1d6a150f30 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -16,10 +16,14 @@ KBUILD_AFLAGS_MODULE += -fPIC
KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_AFLAGS += -m64
KBUILD_CFLAGS += -m64
+ifeq ($(CONFIG_RELOCATABLE),y)
+KBUILD_CFLAGS += -fPIE
+LDFLAGS_vmlinux := -pie
+endif
aflags_dwarf := -Wa,-gdwarf-2
-KBUILD_AFLAGS_DECOMPRESSOR := -m64 -D__ASSEMBLY__
+KBUILD_AFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -D__ASSEMBLY__
KBUILD_AFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),$(aflags_dwarf))
-KBUILD_CFLAGS_DECOMPRESSOR := -m64 -O2
+KBUILD_CFLAGS_DECOMPRESSOR := $(CLANG_FLAGS) -m64 -O2
KBUILD_CFLAGS_DECOMPRESSOR += -DDISABLE_BRANCH_PROFILING -D__NO_FORTIFY
KBUILD_CFLAGS_DECOMPRESSOR += -fno-delete-null-pointer-checks -msoft-float
KBUILD_CFLAGS_DECOMPRESSOR += -fno-asynchronous-unwind-tables
@@ -111,7 +115,7 @@ endif
cfi := $(call as-instr,.cfi_startproc\n.cfi_val_offset 15$(comma)-160\n.cfi_endproc,-DCONFIG_AS_CFI_VAL_OFFSET=1)
KBUILD_CFLAGS += -mbackchain -msoft-float $(cflags-y)
-KBUILD_CFLAGS += -pipe -fno-strength-reduce -Wno-sign-compare
+KBUILD_CFLAGS += -pipe -Wno-sign-compare
KBUILD_CFLAGS += -fno-asynchronous-unwind-tables $(cfi)
KBUILD_AFLAGS += $(aflags-y) $(cfi)
export KBUILD_AFLAGS_DECOMPRESSOR
diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile
index c844eaf24ed7..c51496bbac19 100644
--- a/arch/s390/boot/Makefile
+++ b/arch/s390/boot/Makefile
@@ -12,25 +12,35 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS_DECOMPRESSOR)
KBUILD_CFLAGS := $(KBUILD_CFLAGS_DECOMPRESSOR)
#
-# Use -march=z900 for als.c to be able to print an error
+# Use minimum architecture for als.c to be able to print an error
# message if the kernel is started on a machine which is too old
#
-ifneq ($(CC_FLAGS_MARCH),-march=z900)
+ifndef CONFIG_CC_IS_CLANG
+CC_FLAGS_MARCH_MINIMUM := -march=z900
+else
+CC_FLAGS_MARCH_MINIMUM := -march=z10
+endif
+
+ifneq ($(CC_FLAGS_MARCH),$(CC_FLAGS_MARCH_MINIMUM))
AFLAGS_REMOVE_head.o += $(CC_FLAGS_MARCH)
-AFLAGS_head.o += -march=z900
+AFLAGS_head.o += $(CC_FLAGS_MARCH_MINIMUM)
AFLAGS_REMOVE_mem.o += $(CC_FLAGS_MARCH)
-AFLAGS_mem.o += -march=z900
+AFLAGS_mem.o += $(CC_FLAGS_MARCH_MINIMUM)
CFLAGS_REMOVE_als.o += $(CC_FLAGS_MARCH)
-CFLAGS_als.o += -march=z900
+CFLAGS_als.o += $(CC_FLAGS_MARCH_MINIMUM)
CFLAGS_REMOVE_sclp_early_core.o += $(CC_FLAGS_MARCH)
-CFLAGS_sclp_early_core.o += -march=z900
+CFLAGS_sclp_early_core.o += $(CC_FLAGS_MARCH_MINIMUM)
endif
CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o string.o ebcdic.o
-obj-y += sclp_early_core.o mem.o ipl_vmparm.o cmdline.o ctype.o
-targets := bzImage startup.a section_cmp.boot.data $(obj-y)
+obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
+obj-y += ctype.o text_dma.o
+obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o
+obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
+obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
+targets := bzImage startup.a section_cmp.boot.data section_cmp.boot.preserved.data $(obj-y)
subdir- := compressed
OBJECTS := $(addprefix $(obj)/,$(obj-y))
@@ -48,7 +58,8 @@ define cmd_section_cmp
touch $@
endef
-$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data FORCE
+OBJCOPYFLAGS_bzImage := --pad-to $$(readelf -s $(obj)/compressed/vmlinux | awk '/\<_end\>/ {print or(strtonum("0x"$$2),4095)+1}')
+$(obj)/bzImage: $(obj)/compressed/vmlinux $(obj)/section_cmp.boot.data $(obj)/section_cmp.boot.preserved.data FORCE
$(call if_changed,objcopy)
$(obj)/section_cmp%: vmlinux $(obj)/compressed/vmlinux FORCE
diff --git a/arch/s390/boot/als.c b/arch/s390/boot/als.c
index f902215e9cd9..ff6801d401c4 100644
--- a/arch/s390/boot/als.c
+++ b/arch/s390/boot/als.c
@@ -99,7 +99,7 @@ static void facility_mismatch(void)
print_machine_type();
print_missing_facilities();
sclp_early_printk("See Principles of Operations for facility bits\n");
- disabled_wait(0x8badcccc);
+ disabled_wait();
}
void verify_facilities(void)
diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h
index 82bc06346e05..ad57c2205a71 100644
--- a/arch/s390/boot/boot.h
+++ b/arch/s390/boot/boot.h
@@ -9,5 +9,10 @@ void setup_boot_command_line(void);
void parse_boot_command_line(void);
void setup_memory_end(void);
void print_missing_facilities(void);
+unsigned long get_random_base(unsigned long safe_addr);
+
+extern int kaslr_enabled;
+
+unsigned long read_ipl_report(unsigned long safe_offset);
#endif /* BOOT_BOOT_H */
diff --git a/arch/s390/boot/compressed/decompressor.h b/arch/s390/boot/compressed/decompressor.h
index e1c1f2ec60f4..c15eb7114d83 100644
--- a/arch/s390/boot/compressed/decompressor.h
+++ b/arch/s390/boot/compressed/decompressor.h
@@ -17,6 +17,11 @@ struct vmlinux_info {
unsigned long bss_size; /* uncompressed image .bss size */
unsigned long bootdata_off;
unsigned long bootdata_size;
+ unsigned long bootdata_preserved_off;
+ unsigned long bootdata_preserved_size;
+ unsigned long dynsym_start;
+ unsigned long rela_dyn_start;
+ unsigned long rela_dyn_end;
};
extern char _vmlinux_info[];
diff --git a/arch/s390/boot/compressed/vmlinux.lds.S b/arch/s390/boot/compressed/vmlinux.lds.S
index 7efc3938f595..112b8d9f1e4c 100644
--- a/arch/s390/boot/compressed/vmlinux.lds.S
+++ b/arch/s390/boot/compressed/vmlinux.lds.S
@@ -33,7 +33,29 @@ SECTIONS
*(.data.*)
_edata = . ;
}
+ /*
+ * .dma section for code, data, ex_table that need to stay below 2 GB,
+ * even when the kernel is relocate: above 2 GB.
+ */
+ _sdma = .;
+ .dma.text : {
+ . = ALIGN(PAGE_SIZE);
+ _stext_dma = .;
+ *(.dma.text)
+ . = ALIGN(PAGE_SIZE);
+ _etext_dma = .;
+ }
+ . = ALIGN(16);
+ .dma.ex_table : {
+ _start_dma_ex_table = .;
+ KEEP(*(.dma.ex_table))
+ _stop_dma_ex_table = .;
+ }
+ .dma.data : { *(.dma.data) }
+ _edma = .;
+
BOOT_DATA
+ BOOT_DATA_PRESERVED
/*
* uncompressed image info used by the decompressor it should match
diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S
index ce2cbbc41742..028aab03a9e7 100644
--- a/arch/s390/boot/head.S
+++ b/arch/s390/boot/head.S
@@ -305,7 +305,7 @@ ENTRY(startup_kdump)
xc 0x300(256),0x300
xc 0xe00(256),0xe00
xc 0xf00(256),0xf00
- lctlg %c0,%c15,0x200(%r0) # initialize control registers
+ lctlg %c0,%c15,.Lctl-.LPG0(%r13) # load control registers
stcke __LC_BOOT_CLOCK
mvc __LC_LAST_UPDATE_CLOCK(8),__LC_BOOT_CLOCK+1
spt 6f-.LPG0(%r13)
@@ -319,20 +319,54 @@ ENTRY(startup_kdump)
.align 8
6: .long 0x7fffffff,0xffffffff
+.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
+ .quad 0 # cr1: primary space segment table
+ .quad .Lduct # cr2: dispatchable unit control table
+ .quad 0 # cr3: instruction authorization
+ .quad 0xffff # cr4: instruction authorization
+ .quad .Lduct # cr5: primary-aste origin
+ .quad 0 # cr6: I/O interrupts
+ .quad 0 # cr7: secondary space segment table
+ .quad 0 # cr8: access registers translation
+ .quad 0 # cr9: tracing off
+ .quad 0 # cr10: tracing off
+ .quad 0 # cr11: tracing off
+ .quad 0 # cr12: tracing off
+ .quad 0 # cr13: home space segment table
+ .quad 0xc0000000 # cr14: machine check handling off
+ .quad .Llinkage_stack # cr15: linkage stack operations
+
+ .section .dma.data,"aw",@progbits
+.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
+ .long 0,0,0,0,0,0,0,0
+.Llinkage_stack:
+ .long 0,0,0x89000000,0,0,0,0x8a000000,0
+ .align 64
+.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
+ .align 128
+.Lduald:.rept 8
+ .long 0x80000000,0,0,0 # invalid access-list entries
+ .endr
+ .previous
+
#include "head_kdump.S"
#
# params at 10400 (setup.h)
+# Must be keept in sync with struct parmarea in setup.h
#
.org PARMAREA
- .long 0,0 # IPL_DEVICE
- .long 0,0 # INITRD_START
- .long 0,0 # INITRD_SIZE
- .long 0,0 # OLDMEM_BASE
- .long 0,0 # OLDMEM_SIZE
+ .quad 0 # IPL_DEVICE
+ .quad 0 # INITRD_START
+ .quad 0 # INITRD_SIZE
+ .quad 0 # OLDMEM_BASE
+ .quad 0 # OLDMEM_SIZE
.org COMMAND_LINE
.byte "root=/dev/ram0 ro"
.byte 0
- .org 0x11000
+ .org EARLY_SCCB_OFFSET
+ .fill 4096
+
+ .org HEAD_END
diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c
index 36beb56de021..3c49bde8aa5e 100644
--- a/arch/s390/boot/ipl_parm.c
+++ b/arch/s390/boot/ipl_parm.c
@@ -7,16 +7,19 @@
#include <asm/sections.h>
#include <asm/boot_data.h>
#include <asm/facility.h>
+#include <asm/uv.h>
#include "boot.h"
char __bootdata(early_command_line)[COMMAND_LINE_SIZE];
-struct ipl_parameter_block __bootdata(early_ipl_block);
-int __bootdata(early_ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_block_valid);
unsigned long __bootdata(memory_end);
int __bootdata(memory_end_set);
int __bootdata(noexec_disabled);
+int kaslr_enabled __section(.data);
+
static inline int __diag308(unsigned long subcode, void *addr)
{
register unsigned long _addr asm("0") = (unsigned long)addr;
@@ -45,13 +48,15 @@ void store_ipl_parmblock(void)
{
int rc;
- rc = __diag308(DIAG308_STORE, &early_ipl_block);
+ uv_set_shared(__pa(&ipl_block));
+ rc = __diag308(DIAG308_STORE, &ipl_block);
+ uv_remove_shared(__pa(&ipl_block));
if (rc == DIAG308_RC_OK &&
- early_ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
- early_ipl_block_valid = 1;
+ ipl_block.hdr.version <= IPL_MAX_SUPPORTED_VERSION)
+ ipl_block_valid = 1;
}
-static size_t scpdata_length(const char *buf, size_t count)
+static size_t scpdata_length(const u8 *buf, size_t count)
{
while (count) {
if (buf[count - 1] != '\0' && buf[count - 1] != ' ')
@@ -68,26 +73,26 @@ static size_t ipl_block_get_ascii_scpdata(char *dest, size_t size,
size_t i;
int has_lowercase;
- count = min(size - 1, scpdata_length(ipb->ipl_info.fcp.scp_data,
- ipb->ipl_info.fcp.scp_data_len));
+ count = min(size - 1, scpdata_length(ipb->fcp.scp_data,
+ ipb->fcp.scp_data_len));
if (!count)
goto out;
has_lowercase = 0;
for (i = 0; i < count; i++) {
- if (!isascii(ipb->ipl_info.fcp.scp_data[i])) {
+ if (!isascii(ipb->fcp.scp_data[i])) {
count = 0;
goto out;
}
- if (!has_lowercase && islower(ipb->ipl_info.fcp.scp_data[i]))
+ if (!has_lowercase && islower(ipb->fcp.scp_data[i]))
has_lowercase = 1;
}
if (has_lowercase)
- memcpy(dest, ipb->ipl_info.fcp.scp_data, count);
+ memcpy(dest, ipb->fcp.scp_data, count);
else
for (i = 0; i < count; i++)
- dest[i] = tolower(ipb->ipl_info.fcp.scp_data[i]);
+ dest[i] = tolower(ipb->fcp.scp_data[i]);
out:
dest[count] = '\0';
return count;
@@ -103,14 +108,14 @@ static void append_ipl_block_parm(void)
delim = early_command_line + len; /* '\0' character position */
parm = early_command_line + len + 1; /* append right after '\0' */
- switch (early_ipl_block.hdr.pbt) {
- case DIAG308_IPL_TYPE_CCW:
+ switch (ipl_block.pb0_hdr.pbt) {
+ case IPL_PBT_CCW:
rc = ipl_block_get_ascii_vmparm(
- parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+ parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
- case DIAG308_IPL_TYPE_FCP:
+ case IPL_PBT_FCP:
rc = ipl_block_get_ascii_scpdata(
- parm, COMMAND_LINE_SIZE - len - 1, &early_ipl_block);
+ parm, COMMAND_LINE_SIZE - len - 1, &ipl_block);
break;
}
if (rc) {
@@ -141,7 +146,7 @@ void setup_boot_command_line(void)
strcpy(early_command_line, strim(COMMAND_LINE));
/* append IPL PARM data to the boot command line */
- if (early_ipl_block_valid)
+ if (!is_prot_virt_guest() && ipl_block_valid)
append_ipl_block_parm();
}
@@ -211,6 +216,7 @@ void parse_boot_command_line(void)
char *args;
int rc;
+ kaslr_enabled = IS_ENABLED(CONFIG_RANDOMIZE_BASE);
args = strcpy(command_line_buf, early_command_line);
while (*args) {
args = next_arg(args, &param, &val);
@@ -228,15 +234,21 @@ void parse_boot_command_line(void)
if (!strcmp(param, "facilities"))
modify_fac_list(val);
+
+ if (!strcmp(param, "nokaslr"))
+ kaslr_enabled = 0;
}
}
void setup_memory_end(void)
{
#ifdef CONFIG_CRASH_DUMP
- if (!OLDMEM_BASE && early_ipl_block_valid &&
- early_ipl_block.hdr.pbt == DIAG308_IPL_TYPE_FCP &&
- early_ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP) {
+ if (OLDMEM_BASE) {
+ kaslr_enabled = 0;
+ } else if (ipl_block_valid &&
+ ipl_block.pb0_hdr.pbt == IPL_PBT_FCP &&
+ ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP) {
+ kaslr_enabled = 0;
if (!sclp_early_get_hsa_size(&memory_end) && memory_end)
memory_end_set = 1;
}
diff --git a/arch/s390/boot/ipl_report.c b/arch/s390/boot/ipl_report.c
new file mode 100644
index 000000000000..0b4965573656
--- /dev/null
+++ b/arch/s390/boot/ipl_report.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/init.h>
+#include <linux/ctype.h>
+#include <asm/ebcdic.h>
+#include <asm/sclp.h>
+#include <asm/sections.h>
+#include <asm/boot_data.h>
+#include <uapi/asm/ipl.h>
+#include "boot.h"
+
+int __bootdata_preserved(ipl_secure_flag);
+
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
+
+#define for_each_rb_entry(entry, rb) \
+ for (entry = rb->entries; \
+ (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
+ entry++)
+
+static inline bool intersects(unsigned long addr0, unsigned long size0,
+ unsigned long addr1, unsigned long size1)
+{
+ return addr0 + size0 > addr1 && addr1 + size1 > addr0;
+}
+
+static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
+ struct ipl_rb_certificates *certs,
+ unsigned long safe_addr)
+{
+ struct ipl_rb_certificate_entry *cert;
+ struct ipl_rb_component_entry *comp;
+ size_t size;
+
+ /*
+ * Find the length for the IPL report boot data
+ */
+ early_ipl_comp_list_size = 0;
+ for_each_rb_entry(comp, comps)
+ early_ipl_comp_list_size += sizeof(*comp);
+ ipl_cert_list_size = 0;
+ for_each_rb_entry(cert, certs)
+ ipl_cert_list_size += sizeof(unsigned int) + cert->len;
+ size = ipl_cert_list_size + early_ipl_comp_list_size;
+
+ /*
+ * Start from safe_addr to find a free memory area large
+ * enough for the IPL report boot data. This area is used
+ * for ipl_cert_list_addr/ipl_cert_list_size and
+ * early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
+ * not overlap with any component or any certificate.
+ */
+repeat:
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE &&
+ intersects(INITRD_START, INITRD_SIZE, safe_addr, size))
+ safe_addr = INITRD_START + INITRD_SIZE;
+ for_each_rb_entry(comp, comps)
+ if (intersects(safe_addr, size, comp->addr, comp->len)) {
+ safe_addr = comp->addr + comp->len;
+ goto repeat;
+ }
+ for_each_rb_entry(cert, certs)
+ if (intersects(safe_addr, size, cert->addr, cert->len)) {
+ safe_addr = cert->addr + cert->len;
+ goto repeat;
+ }
+ early_ipl_comp_list_addr = safe_addr;
+ ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;
+
+ return safe_addr + size;
+}
+
+static void copy_components_bootdata(struct ipl_rb_components *comps)
+{
+ struct ipl_rb_component_entry *comp, *ptr;
+
+ ptr = (struct ipl_rb_component_entry *) early_ipl_comp_list_addr;
+ for_each_rb_entry(comp, comps)
+ memcpy(ptr++, comp, sizeof(*ptr));
+}
+
+static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
+{
+ struct ipl_rb_certificate_entry *cert;
+ void *ptr;
+
+ ptr = (void *) ipl_cert_list_addr;
+ for_each_rb_entry(cert, certs) {
+ *(unsigned int *) ptr = cert->len;
+ ptr += sizeof(unsigned int);
+ memcpy(ptr, (void *) cert->addr, cert->len);
+ ptr += cert->len;
+ }
+}
+
+unsigned long read_ipl_report(unsigned long safe_addr)
+{
+ struct ipl_rb_certificates *certs;
+ struct ipl_rb_components *comps;
+ struct ipl_pl_hdr *pl_hdr;
+ struct ipl_rl_hdr *rl_hdr;
+ struct ipl_rb_hdr *rb_hdr;
+ unsigned long tmp;
+ void *rl_end;
+
+ /*
+ * Check if there is a IPL report by looking at the copy
+ * of the IPL parameter information block.
+ */
+ if (!ipl_block_valid ||
+ !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
+ return safe_addr;
+ ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
+ /*
+ * There is an IPL report, to find it load the pointer to the
+ * IPL parameter information block from lowcore and skip past
+ * the IPL parameter list, then align the address to a double
+ * word boundary.
+ */
+ tmp = (unsigned long) S390_lowcore.ipl_parmblock_ptr;
+ pl_hdr = (struct ipl_pl_hdr *) tmp;
+ tmp = (tmp + pl_hdr->len + 7) & -8UL;
+ rl_hdr = (struct ipl_rl_hdr *) tmp;
+ /* Walk through the IPL report blocks in the IPL Report list */
+ certs = NULL;
+ comps = NULL;
+ rl_end = (void *) rl_hdr + rl_hdr->len;
+ rb_hdr = (void *) rl_hdr + sizeof(*rl_hdr);
+ while ((void *) rb_hdr + sizeof(*rb_hdr) < rl_end &&
+ (void *) rb_hdr + rb_hdr->len <= rl_end) {
+
+ switch (rb_hdr->rbt) {
+ case IPL_RBT_CERTIFICATES:
+ certs = (struct ipl_rb_certificates *) rb_hdr;
+ break;
+ case IPL_RBT_COMPONENTS:
+ comps = (struct ipl_rb_components *) rb_hdr;
+ break;
+ default:
+ break;
+ }
+
+ rb_hdr = (void *) rb_hdr + rb_hdr->len;
+ }
+
+ /*
+ * With either the component list or the certificate list
+ * missing the kernel will stay ignorant of secure IPL.
+ */
+ if (!comps || !certs)
+ return safe_addr;
+
+ /*
+ * Copy component and certificate list to a safe area
+ * where the decompressed kernel can find them.
+ */
+ safe_addr = find_bootdata_space(comps, certs, safe_addr);
+ copy_components_bootdata(comps);
+ copy_certificates_bootdata(certs);
+
+ return safe_addr;
+}
diff --git a/arch/s390/boot/kaslr.c b/arch/s390/boot/kaslr.c
new file mode 100644
index 000000000000..3bdd8132e56b
--- /dev/null
+++ b/arch/s390/boot/kaslr.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corp. 2019
+ */
+#include <asm/mem_detect.h>
+#include <asm/cpacf.h>
+#include <asm/timex.h>
+#include <asm/sclp.h>
+#include "compressed/decompressor.h"
+
+#define PRNG_MODE_TDES 1
+#define PRNG_MODE_SHA512 2
+#define PRNG_MODE_TRNG 3
+
+struct prno_parm {
+ u32 res;
+ u32 reseed_counter;
+ u64 stream_bytes;
+ u8 V[112];
+ u8 C[112];
+};
+
+struct prng_parm {
+ u8 parm_block[32];
+ u32 reseed_counter;
+ u64 byte_counter;
+};
+
+static int check_prng(void)
+{
+ if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG)) {
+ sclp_early_printk("KASLR disabled: CPU has no PRNG\n");
+ return 0;
+ }
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ return PRNG_MODE_TRNG;
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN))
+ return PRNG_MODE_SHA512;
+ else
+ return PRNG_MODE_TDES;
+}
+
+static unsigned long get_random(unsigned long limit)
+{
+ struct prng_parm prng = {
+ /* initial parameter block for tdes mode, copied from libica */
+ .parm_block = {
+ 0x0F, 0x2B, 0x8E, 0x63, 0x8C, 0x8E, 0xD2, 0x52,
+ 0x64, 0xB7, 0xA0, 0x7B, 0x75, 0x28, 0xB8, 0xF4,
+ 0x75, 0x5F, 0xD2, 0xA6, 0x8D, 0x97, 0x11, 0xFF,
+ 0x49, 0xD8, 0x23, 0xF3, 0x7E, 0x21, 0xEC, 0xA0
+ },
+ };
+ unsigned long seed, random;
+ struct prno_parm prno;
+ __u64 entropy[4];
+ int mode, i;
+
+ mode = check_prng();
+ seed = get_tod_clock_fast();
+ switch (mode) {
+ case PRNG_MODE_TRNG:
+ cpacf_trng(NULL, 0, (u8 *) &random, sizeof(random));
+ break;
+ case PRNG_MODE_SHA512:
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, &prno, NULL, 0,
+ (u8 *) &seed, sizeof(seed));
+ cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prno, (u8 *) &random,
+ sizeof(random), NULL, 0);
+ break;
+ case PRNG_MODE_TDES:
+ /* add entropy */
+ *(unsigned long *) prng.parm_block ^= seed;
+ for (i = 0; i < 16; i++) {
+ cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block,
+ (char *) entropy, (char *) entropy,
+ sizeof(entropy));
+ memcpy(prng.parm_block, entropy, sizeof(entropy));
+ }
+ random = seed;
+ cpacf_kmc(CPACF_KMC_PRNG, prng.parm_block, (u8 *) &random,
+ (u8 *) &random, sizeof(random));
+ break;
+ default:
+ random = 0;
+ }
+ return random % limit;
+}
+
+unsigned long get_random_base(unsigned long safe_addr)
+{
+ unsigned long base, start, end, kernel_size;
+ unsigned long block_sum, offset;
+ int i;
+
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && INITRD_START && INITRD_SIZE) {
+ if (safe_addr < INITRD_START + INITRD_SIZE)
+ safe_addr = INITRD_START + INITRD_SIZE;
+ }
+ safe_addr = ALIGN(safe_addr, THREAD_SIZE);
+
+ kernel_size = vmlinux.image_size + vmlinux.bss_size;
+ block_sum = 0;
+ for_each_mem_detect_block(i, &start, &end) {
+ if (memory_end_set) {
+ if (start >= memory_end)
+ break;
+ if (end > memory_end)
+ end = memory_end;
+ }
+ if (end - start < kernel_size)
+ continue;
+ block_sum += end - start - kernel_size;
+ }
+ if (!block_sum) {
+ sclp_early_printk("KASLR disabled: not enough memory\n");
+ return 0;
+ }
+
+ base = get_random(block_sum);
+ if (base == 0)
+ return 0;
+ if (base < safe_addr)
+ base = safe_addr;
+ block_sum = offset = 0;
+ for_each_mem_detect_block(i, &start, &end) {
+ if (memory_end_set) {
+ if (start >= memory_end)
+ break;
+ if (end > memory_end)
+ end = memory_end;
+ }
+ if (end - start < kernel_size)
+ continue;
+ block_sum += end - start - kernel_size;
+ if (base <= block_sum) {
+ base = start + base - offset;
+ base = ALIGN_DOWN(base, THREAD_SIZE);
+ break;
+ }
+ offset = block_sum;
+ }
+ return base;
+}
diff --git a/arch/s390/boot/machine_kexec_reloc.c b/arch/s390/boot/machine_kexec_reloc.c
new file mode 100644
index 000000000000..b7a5d0f72097
--- /dev/null
+++ b/arch/s390/boot/machine_kexec_reloc.c
@@ -0,0 +1,2 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "../kernel/machine_kexec_reloc.c"
diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c
index bdfc5549a299..7b0d05414618 100644
--- a/arch/s390/boot/startup.c
+++ b/arch/s390/boot/startup.c
@@ -1,11 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/string.h>
+#include <linux/elf.h>
+#include <asm/sections.h>
#include <asm/setup.h>
+#include <asm/kexec.h>
#include <asm/sclp.h>
+#include <asm/diag.h>
+#include <asm/uv.h>
#include "compressed/decompressor.h"
#include "boot.h"
extern char __boot_data_start[], __boot_data_end[];
+extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
+unsigned long __bootdata_preserved(__kaslr_offset);
+
+/*
+ * Some code and data needs to stay below 2 GB, even when the kernel would be
+ * relocated above 2 GB, because it has to use 31 bit addresses.
+ * Such code and data is part of the .dma section, and its location is passed
+ * over to the decompressed / relocated kernel via the .boot.preserved.data
+ * section.
+ */
+extern char _sdma[], _edma[];
+extern char _stext_dma[], _etext_dma[];
+extern struct exception_table_entry _start_dma_ex_table[];
+extern struct exception_table_entry _stop_dma_ex_table[];
+unsigned long __bootdata_preserved(__sdma) = __pa(&_sdma);
+unsigned long __bootdata_preserved(__edma) = __pa(&_edma);
+unsigned long __bootdata_preserved(__stext_dma) = __pa(&_stext_dma);
+unsigned long __bootdata_preserved(__etext_dma) = __pa(&_etext_dma);
+struct exception_table_entry *
+ __bootdata_preserved(__start_dma_ex_table) = _start_dma_ex_table;
+struct exception_table_entry *
+ __bootdata_preserved(__stop_dma_ex_table) = _stop_dma_ex_table;
+
+int _diag210_dma(struct diag210 *addr);
+int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode);
+int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode);
+void _diag0c_dma(struct hypfs_diag0c_entry *entry);
+void _diag308_reset_dma(void);
+struct diag_ops __bootdata_preserved(diag_dma_ops) = {
+ .diag210 = _diag210_dma,
+ .diag26c = _diag26c_dma,
+ .diag14 = _diag14_dma,
+ .diag0c = _diag0c_dma,
+ .diag308_reset = _diag308_reset_dma
+};
+static struct diag210 _diag210_tmp_dma __section(".dma.data");
+struct diag210 *__bootdata_preserved(__diag210_tmp_dma) = &_diag210_tmp_dma;
+void _swsusp_reset_dma(void);
+unsigned long __bootdata_preserved(__swsusp_reset_dma) = __pa(_swsusp_reset_dma);
void error(char *x)
{
@@ -13,7 +57,7 @@ void error(char *x)
sclp_early_printk(x);
sclp_early_printk("\n\n -- System halted");
- disabled_wait(0xdeadbeef);
+ disabled_wait();
}
#ifdef CONFIG_KERNEL_UNCOMPRESSED
@@ -23,19 +67,16 @@ unsigned long mem_safe_offset(void)
}
#endif
-static void rescue_initrd(void)
+static void rescue_initrd(unsigned long addr)
{
- unsigned long min_initrd_addr;
-
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
return;
if (!INITRD_START || !INITRD_SIZE)
return;
- min_initrd_addr = mem_safe_offset();
- if (min_initrd_addr <= INITRD_START)
+ if (addr <= INITRD_START)
return;
- memmove((void *)min_initrd_addr, (void *)INITRD_START, INITRD_SIZE);
- INITRD_START = min_initrd_addr;
+ memmove((void *)addr, (void *)INITRD_START, INITRD_SIZE);
+ INITRD_START = addr;
}
static void copy_bootdata(void)
@@ -43,23 +84,81 @@ static void copy_bootdata(void)
if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
error(".boot.data section size mismatch");
memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
+ if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size)
+ error(".boot.preserved.data section size mismatch");
+ memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
+}
+
+static void handle_relocs(unsigned long offset)
+{
+ Elf64_Rela *rela_start, *rela_end, *rela;
+ int r_type, r_sym, rc;
+ Elf64_Addr loc, val;
+ Elf64_Sym *dynsym;
+
+ rela_start = (Elf64_Rela *) vmlinux.rela_dyn_start;
+ rela_end = (Elf64_Rela *) vmlinux.rela_dyn_end;
+ dynsym = (Elf64_Sym *) vmlinux.dynsym_start;
+ for (rela = rela_start; rela < rela_end; rela++) {
+ loc = rela->r_offset + offset;
+ val = rela->r_addend + offset;
+ r_sym = ELF64_R_SYM(rela->r_info);
+ if (r_sym)
+ val += dynsym[r_sym].st_value;
+ r_type = ELF64_R_TYPE(rela->r_info);
+ rc = arch_kexec_do_relocs(r_type, (void *) loc, val, 0);
+ if (rc)
+ error("Unknown relocation type");
+ }
}
void startup_kernel(void)
{
+ unsigned long random_lma;
+ unsigned long safe_addr;
void *img;
- rescue_initrd();
- sclp_early_read_info();
store_ipl_parmblock();
+ safe_addr = mem_safe_offset();
+ safe_addr = read_ipl_report(safe_addr);
+ uv_query_info();
+ rescue_initrd(safe_addr);
+ sclp_early_read_info();
setup_boot_command_line();
parse_boot_command_line();
setup_memory_end();
detect_memory();
+
+ random_lma = __kaslr_offset = 0;
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
+ random_lma = get_random_base(safe_addr);
+ if (random_lma) {
+ __kaslr_offset = random_lma - vmlinux.default_lma;
+ img = (void *)vmlinux.default_lma;
+ vmlinux.default_lma += __kaslr_offset;
+ vmlinux.entry += __kaslr_offset;
+ vmlinux.bootdata_off += __kaslr_offset;
+ vmlinux.bootdata_preserved_off += __kaslr_offset;
+ vmlinux.rela_dyn_start += __kaslr_offset;
+ vmlinux.rela_dyn_end += __kaslr_offset;
+ vmlinux.dynsym_start += __kaslr_offset;
+ }
+ }
+
if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
img = decompress_kernel();
memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
- }
+ } else if (__kaslr_offset)
+ memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+
copy_bootdata();
+ if (IS_ENABLED(CONFIG_RELOCATABLE))
+ handle_relocs(__kaslr_offset);
+
+ if (__kaslr_offset) {
+ /* Clear non-relocated kernel */
+ if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
+ memset(img, 0, vmlinux.image_size);
+ }
vmlinux.entry();
}
diff --git a/arch/s390/boot/text_dma.S b/arch/s390/boot/text_dma.S
new file mode 100644
index 000000000000..9715715c4c28
--- /dev/null
+++ b/arch/s390/boot/text_dma.S
@@ -0,0 +1,184 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Code that needs to run below 2 GB.
+ *
+ * Copyright IBM Corp. 2019
+ */
+
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/sigp.h>
+
+#ifdef CC_USING_EXPOLINE
+ .pushsection .dma.text.__s390_indirect_jump_r14,"axG"
+__dma__s390_indirect_jump_r14:
+ larl %r1,0f
+ ex 0,0(%r1)
+ j .
+0: br %r14
+ .popsection
+#endif
+
+ .section .dma.text,"ax"
+/*
+ * Simplified version of expoline thunk. The normal thunks can not be used here,
+ * because they might be more than 2 GB away, and not reachable by the relative
+ * branch. No comdat, exrl, etc. optimizations used here, because it only
+ * affects a few functions that are not performance-relevant.
+ */
+ .macro BR_EX_DMA_r14
+#ifdef CC_USING_EXPOLINE
+ jg __dma__s390_indirect_jump_r14
+#else
+ br %r14
+#endif
+ .endm
+
+/*
+ * int _diag14_dma(unsigned long rx, unsigned long ry1, unsigned long subcode)
+ */
+ENTRY(_diag14_dma)
+ lgr %r1,%r2
+ lgr %r2,%r3
+ lgr %r3,%r4
+ lhi %r5,-EIO
+ sam31
+ diag %r1,%r2,0x14
+.Ldiag14_ex:
+ ipm %r5
+ srl %r5,28
+.Ldiag14_fault:
+ sam64
+ lgfr %r2,%r5
+ BR_EX_DMA_r14
+ EX_TABLE_DMA(.Ldiag14_ex, .Ldiag14_fault)
+ENDPROC(_diag14_dma)
+
+/*
+ * int _diag210_dma(struct diag210 *addr)
+ */
+ENTRY(_diag210_dma)
+ lgr %r1,%r2
+ lhi %r2,-1
+ sam31
+ diag %r1,%r0,0x210
+.Ldiag210_ex:
+ ipm %r2
+ srl %r2,28
+.Ldiag210_fault:
+ sam64
+ lgfr %r2,%r2
+ BR_EX_DMA_r14
+ EX_TABLE_DMA(.Ldiag210_ex, .Ldiag210_fault)
+ENDPROC(_diag210_dma)
+
+/*
+ * int _diag26c_dma(void *req, void *resp, enum diag26c_sc subcode)
+ */
+ENTRY(_diag26c_dma)
+ lghi %r5,-EOPNOTSUPP
+ sam31
+ diag %r2,%r4,0x26c
+.Ldiag26c_ex:
+ sam64
+ lgfr %r2,%r5
+ BR_EX_DMA_r14
+ EX_TABLE_DMA(.Ldiag26c_ex, .Ldiag26c_ex)
+ENDPROC(_diag26c_dma)
+
+/*
+ * void _diag0c_dma(struct hypfs_diag0c_entry *entry)
+ */
+ENTRY(_diag0c_dma)
+ sam31
+ diag %r2,%r2,0x0c
+ sam64
+ BR_EX_DMA_r14
+ENDPROC(_diag0c_dma)
+
+/*
+ * void _swsusp_reset_dma(void)
+ */
+ENTRY(_swsusp_reset_dma)
+ larl %r1,restart_entry
+ larl %r2,.Lrestart_diag308_psw
+ og %r1,0(%r2)
+ stg %r1,0(%r0)
+ lghi %r0,0
+ diag %r0,%r0,0x308
+restart_entry:
+ lhi %r1,1
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE
+ sam64
+ BR_EX_DMA_r14
+ENDPROC(_swsusp_reset_dma)
+
+/*
+ * void _diag308_reset_dma(void)
+ *
+ * Calls diag 308 subcode 1 and continues execution
+ */
+ENTRY(_diag308_reset_dma)
+ larl %r4,.Lctlregs # Save control registers
+ stctg %c0,%c15,0(%r4)
+ lg %r2,0(%r4) # Disable lowcore protection
+ nilh %r2,0xefff
+ larl %r4,.Lctlreg0
+ stg %r2,0(%r4)
+ lctlg %c0,%c0,0(%r4)
+ larl %r4,.Lfpctl # Floating point control register
+ stfpc 0(%r4)
+ larl %r4,.Lprefix # Save prefix register
+ stpx 0(%r4)
+ larl %r4,.Lprefix_zero # Set prefix register to 0
+ spx 0(%r4)
+ larl %r4,.Lcontinue_psw # Save PSW flags
+ epsw %r2,%r3
+ stm %r2,%r3,0(%r4)
+ larl %r4,restart_part2 # Setup restart PSW at absolute 0
+ larl %r3,.Lrestart_diag308_psw
+ og %r4,0(%r3) # Save PSW
+ lghi %r3,0
+ sturg %r4,%r3 # Use sturg, because of large pages
+ lghi %r1,1
+ lghi %r0,0
+ diag %r0,%r1,0x308
+restart_part2:
+ lhi %r0,0 # Load r0 with zero
+ lhi %r1,2 # Use mode 2 = ESAME (dump)
+ sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
+ sam64 # Switch to 64 bit addressing mode
+ larl %r4,.Lctlregs # Restore control registers
+ lctlg %c0,%c15,0(%r4)
+ larl %r4,.Lfpctl # Restore floating point ctl register
+ lfpc 0(%r4)
+ larl %r4,.Lprefix # Restore prefix register
+ spx 0(%r4)
+ larl %r4,.Lcontinue_psw # Restore PSW flags
+ lpswe 0(%r4)
+.Lcontinue:
+ BR_EX_DMA_r14
+ENDPROC(_diag308_reset_dma)
+
+ .section .dma.data,"aw",@progbits
+.align 8
+.Lrestart_diag308_psw:
+ .long 0x00080000,0x80000000
+
+.align 8
+.Lcontinue_psw:
+ .quad 0,.Lcontinue
+
+.align 8
+.Lctlreg0:
+ .quad 0
+.Lctlregs:
+ .rept 16
+ .quad 0
+ .endr
+.Lfpctl:
+ .long 0
+.Lprefix:
+ .long 0
+.Lprefix_zero:
+ .long 0
diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c
new file mode 100644
index 000000000000..ed007f4a6444
--- /dev/null
+++ b/arch/s390/boot/uv.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/uv.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+
+int __bootdata_preserved(prot_virt_guest);
+
+void uv_query_info(void)
+{
+ struct uv_cb_qui uvcb = {
+ .header.cmd = UVC_CMD_QUI,
+ .header.len = sizeof(uvcb)
+ };
+
+ if (!test_facility(158))
+ return;
+
+ if (uv_call(0, (uint64_t)&uvcb))
+ return;
+
+ if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
+ test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
+ prot_virt_guest = 1;
+}
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 9824c7bad9d4..b0920b35f87b 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -64,6 +64,7 @@ CONFIG_NUMA=y
CONFIG_PREEMPT=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_EXPOLINE=y
CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 4fcbe5792744..09aa5cb14873 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -65,6 +65,7 @@ CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_EXPOLINE=y
CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
index 2bf01ba44107..0099044e2c86 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -207,5 +207,6 @@ ENTRY(crc32_be_vgfm_16)
.Ldone:
VLGVF %r2,%v2,3
BR_EX %r14
+ENDPROC(crc32_be_vgfm_16)
.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
index 7d6f568bd3ad..71caf0f4ec08 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -105,13 +105,14 @@
ENTRY(crc32_le_vgfm_16)
larl %r5,.Lconstants_CRC_32_LE
j crc32_le_vgfm_generic
+ENDPROC(crc32_le_vgfm_16)
ENTRY(crc32c_le_vgfm_16)
larl %r5,.Lconstants_CRC_32C_LE
j crc32_le_vgfm_generic
+ENDPROC(crc32c_le_vgfm_16)
-
-crc32_le_vgfm_generic:
+ENTRY(crc32_le_vgfm_generic)
/* Load CRC-32 constants */
VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
@@ -267,5 +268,6 @@ crc32_le_vgfm_generic:
.Ldone:
VLGVF %r2,%v2,2
BR_EX %r14
+ENDPROC(crc32_le_vgfm_generic)
.previous
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 0d15383d0ff1..1f9ab24dc048 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -224,24 +224,11 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int key_len)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
+ int err;
- if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
- crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
- DES_KEY_SIZE)) &&
- (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
- return -EINVAL;
- }
-
- /* in fips mode, ensure k1 != k2 and k2 != k3 and k1 != k3 */
- if (fips_enabled &&
- !(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
- crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
- DES_KEY_SIZE) &&
- crypto_memneq(key, &key[DES_KEY_SIZE * 2], DES_KEY_SIZE))) {
- tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
- return -EINVAL;
- }
+ err = __des3_verify_key(&tfm->crt_flags, key);
+ if (unlikely(err))
+ return err;
memcpy(ctx->key, key, key_len);
return 0;
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index a97a1802cfb4..12cca467af7d 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -61,6 +61,7 @@ static unsigned int prng_reseed_limit;
module_param_named(reseed_limit, prng_reseed_limit, int, 0);
MODULE_PARM_DESC(prng_reseed_limit, "PRNG reseed limit");
+static bool trng_available;
/*
* Any one who considers arithmetical methods of producing random digits is,
@@ -115,46 +116,68 @@ static const u8 initial_parm_block[32] __initconst = {
/*
* generate_entropy:
- * This algorithm produces 64 bytes of entropy data based on 1024
- * individual stckf() invocations assuming that each stckf() value
- * contributes 0.25 bits of entropy. So the caller gets 256 bit
- * entropy per 64 byte or 4 bits entropy per byte.
+ * This function fills a given buffer with random bytes. The entropy within
+ * the random bytes given back is assumed to have at least 50% - meaning
+ * a 64 bytes buffer has at least 64 * 8 / 2 = 256 bits of entropy.
+ * Within the function the entropy generation is done in junks of 64 bytes.
+ * So the caller should also ask for buffer fill in multiples of 64 bytes.
+ * The generation of the entropy is based on the assumption that every stckf()
+ * invocation produces 0.5 bits of entropy. To accumulate 256 bits of entropy
+ * at least 512 stckf() values are needed. The entropy relevant part of the
+ * stckf value is bit 51 (counting starts at the left with bit nr 0) so
+ * here we use the lower 4 bytes and exor the values into 2k of bufferspace.
+ * To be on the save side, if there is ever a problem with stckf() the
+ * other half of the page buffer is filled with bytes from urandom via
+ * get_random_bytes(), so this function consumes 2k of urandom for each
+ * requested 64 bytes output data. Finally the buffer page is condensed into
+ * a 64 byte value by hashing with a SHA512 hash.
*/
static int generate_entropy(u8 *ebuf, size_t nbytes)
{
int n, ret = 0;
- u8 *pg, *h, hash[64];
-
- /* allocate 2 pages */
- pg = (u8 *) __get_free_pages(GFP_KERNEL, 1);
+ u8 *pg, pblock[80] = {
+ /* 8 x 64 bit init values */
+ 0x6A, 0x09, 0xE6, 0x67, 0xF3, 0xBC, 0xC9, 0x08,
+ 0xBB, 0x67, 0xAE, 0x85, 0x84, 0xCA, 0xA7, 0x3B,
+ 0x3C, 0x6E, 0xF3, 0x72, 0xFE, 0x94, 0xF8, 0x2B,
+ 0xA5, 0x4F, 0xF5, 0x3A, 0x5F, 0x1D, 0x36, 0xF1,
+ 0x51, 0x0E, 0x52, 0x7F, 0xAD, 0xE6, 0x82, 0xD1,
+ 0x9B, 0x05, 0x68, 0x8C, 0x2B, 0x3E, 0x6C, 0x1F,
+ 0x1F, 0x83, 0xD9, 0xAB, 0xFB, 0x41, 0xBD, 0x6B,
+ 0x5B, 0xE0, 0xCD, 0x19, 0x13, 0x7E, 0x21, 0x79,
+ /* 128 bit counter total message bit length */
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00 };
+
+ /* allocate one page stckf buffer */
+ pg = (u8 *) __get_free_page(GFP_KERNEL);
if (!pg) {
prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
return -ENOMEM;
}
+ /* fill the ebuf in chunks of 64 byte each */
while (nbytes) {
- /* fill pages with urandom bytes */
- get_random_bytes(pg, 2*PAGE_SIZE);
- /* exor pages with 1024 stckf values */
- for (n = 0; n < 2 * PAGE_SIZE / sizeof(u64); n++) {
- u64 *p = ((u64 *)pg) + n;
+ /* fill lower 2k with urandom bytes */
+ get_random_bytes(pg, PAGE_SIZE / 2);
+ /* exor upper 2k with 512 stckf values, offset 4 bytes each */
+ for (n = 0; n < 512; n++) {
+ int offset = (PAGE_SIZE / 2) + (n * 4) - 4;
+ u64 *p = (u64 *)(pg + offset);
*p ^= get_tod_clock_fast();
}
- n = (nbytes < sizeof(hash)) ? nbytes : sizeof(hash);
- if (n < sizeof(hash))
- h = hash;
- else
- h = ebuf;
- /* hash over the filled pages */
- cpacf_kimd(CPACF_KIMD_SHA_512, h, pg, 2*PAGE_SIZE);
- if (n < sizeof(hash))
- memcpy(ebuf, hash, n);
+ /* hash over the filled page */
+ cpacf_klmd(CPACF_KLMD_SHA_512, pblock, pg, PAGE_SIZE);
+ n = (nbytes < 64) ? nbytes : 64;
+ memcpy(ebuf, pblock, n);
ret += n;
ebuf += n;
nbytes -= n;
}
- free_pages((unsigned long)pg, 1);
+ memzero_explicit(pblock, sizeof(pblock));
+ memzero_explicit(pg, PAGE_SIZE);
+ free_page((unsigned long)pg);
return ret;
}
@@ -344,8 +367,8 @@ static int __init prng_sha512_selftest(void)
static int __init prng_sha512_instantiate(void)
{
- int ret, datalen;
- u8 seed[64 + 32 + 16];
+ int ret, datalen, seedlen;
+ u8 seed[128 + 16];
pr_debug("prng runs in SHA-512 mode "
"with chunksize=%d and reseed_limit=%u\n",
@@ -368,16 +391,36 @@ static int __init prng_sha512_instantiate(void)
if (ret)
goto outfree;
- /* generate initial seed bytestring, with 256 + 128 bits entropy */
- ret = generate_entropy(seed, 64 + 32);
- if (ret != 64 + 32)
- goto outfree;
- /* followed by 16 bytes of unique nonce */
- get_tod_clock_ext(seed + 64 + 32);
+ /* generate initial seed, we need at least 256 + 128 bits entropy. */
+ if (trng_available) {
+ /*
+ * Trng available, so use it. The trng works in chunks of
+ * 32 bytes and produces 100% entropy. So we pull 64 bytes
+ * which gives us 512 bits entropy.
+ */
+ seedlen = 2 * 32;
+ cpacf_trng(NULL, 0, seed, seedlen);
+ } else {
+ /*
+ * No trng available, so use the generate_entropy() function.
+ * This function works in 64 byte junks and produces
+ * 50% entropy. So we pull 2*64 bytes which gives us 512 bits
+ * of entropy.
+ */
+ seedlen = 2 * 64;
+ ret = generate_entropy(seed, seedlen);
+ if (ret != seedlen)
+ goto outfree;
+ }
+
+ /* append the seed by 16 bytes of unique nonce */
+ get_tod_clock_ext(seed + seedlen);
+ seedlen += 16;
- /* initial seed of the prno drng */
+ /* now initial seed of the prno drng */
cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+ &prng_data->prnows, NULL, 0, seed, seedlen);
+ memzero_explicit(seed, sizeof(seed));
/* if fips mode is enabled, generate a first block of random
bytes for the FIPS 140-2 Conditional Self Test */
@@ -405,17 +448,26 @@ static void prng_sha512_deinstantiate(void)
static int prng_sha512_reseed(void)
{
- int ret;
+ int ret, seedlen;
u8 seed[64];
- /* fetch 256 bits of fresh entropy */
- ret = generate_entropy(seed, sizeof(seed));
- if (ret != sizeof(seed))
- return ret;
+ /* We need at least 256 bits of fresh entropy for reseeding */
+ if (trng_available) {
+ /* trng produces 256 bits entropy in 32 bytes */
+ seedlen = 32;
+ cpacf_trng(NULL, 0, seed, seedlen);
+ } else {
+ /* generate_entropy() produces 256 bits entropy in 64 bytes */
+ seedlen = 64;
+ ret = generate_entropy(seed, seedlen);
+ if (ret != sizeof(seed))
+ return ret;
+ }
/* do a reseed of the prno drng with this bytestring */
cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_data->prnows, NULL, 0, seed, sizeof(seed));
+ &prng_data->prnows, NULL, 0, seed, seedlen);
+ memzero_explicit(seed, sizeof(seed));
return 0;
}
@@ -592,6 +644,7 @@ static ssize_t prng_sha512_read(struct file *file, char __user *ubuf,
ret = -EFAULT;
break;
}
+ memzero_explicit(p, n);
ubuf += n;
nbytes -= n;
ret += n;
@@ -773,6 +826,10 @@ static int __init prng_init(void)
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
return -EOPNOTSUPP;
+ /* check if TRNG subfunction is available */
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ trng_available = true;
+
/* choose prng mode */
if (prng_mode != PRNG_MODE_TDES) {
/* check for MSA5 support for PRNO operations */
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 4d58a92b5d97..c59b922cb6c5 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -39,6 +39,7 @@ CONFIG_NR_CPUS=256
CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
+CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
diff --git a/arch/s390/hypfs/hypfs_diag0c.c b/arch/s390/hypfs/hypfs_diag0c.c
index 72e3140fafb5..3235e4d82f2d 100644
--- a/arch/s390/hypfs/hypfs_diag0c.c
+++ b/arch/s390/hypfs/hypfs_diag0c.c
@@ -16,26 +16,12 @@
#define DBFS_D0C_HDR_VERSION 0
/*
- * Execute diagnose 0c in 31 bit mode
- */
-static void diag0c(struct hypfs_diag0c_entry *entry)
-{
- diag_stat_inc(DIAG_STAT_X00C);
- asm volatile (
- " sam31\n"
- " diag %0,%0,0x0c\n"
- " sam64\n"
- : /* no output register */
- : "a" (entry)
- : "memory");
-}
-
-/*
* Get hypfs_diag0c_entry from CPU vector and store diag0c data
*/
static void diag0c_fn(void *data)
{
- diag0c(((void **) data)[smp_processor_id()]);
+ diag_stat_inc(DIAG_STAT_X00C);
+ diag_dma_ops.diag0c(((void **) data)[smp_processor_id()]);
}
/*
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 12d77cb11fe5..2531f673f099 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -20,7 +20,7 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
-generic-y += rwsem.h
+generic-y += mmiowb.h
generic-y += trace_clock.h
generic-y += unaligned.h
generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/airq.h b/arch/s390/include/asm/airq.h
index fcf539efb32f..c10d2ee2dfda 100644
--- a/arch/s390/include/asm/airq.h
+++ b/arch/s390/include/asm/airq.h
@@ -14,7 +14,7 @@
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
- void (*handler)(struct airq_struct *); /* Thin-interrupt handler */
+ void (*handler)(struct airq_struct *airq, bool floating);
u8 *lsi_ptr; /* Local-Summary-Indicator pointer */
u8 lsi_mask; /* Local-Summary-Indicator mask */
u8 isc; /* Interrupt-subclass */
@@ -35,13 +35,15 @@ struct airq_iv {
unsigned int *data; /* 32 bit value associated with each bit */
unsigned long bits; /* Number of bits in the vector */
unsigned long end; /* Number of highest allocated bit + 1 */
+ unsigned long flags; /* Allocation flags */
spinlock_t lock; /* Lock to protect alloc & free */
};
-#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
-#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
-#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
-#define AIRQ_IV_DATA 8 /* Allocate the data array */
+#define AIRQ_IV_ALLOC 1 /* Use an allocation bit mask */
+#define AIRQ_IV_BITLOCK 2 /* Allocate the lock bit mask */
+#define AIRQ_IV_PTR 4 /* Allocate the ptr array */
+#define AIRQ_IV_DATA 8 /* Allocate the data array */
+#define AIRQ_IV_CACHELINE 16 /* Cacheline alignment for the vector */
struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags);
void airq_iv_release(struct airq_iv *iv);
diff --git a/arch/s390/include/asm/bitops.h b/arch/s390/include/asm/bitops.h
index d1f8a4d94cca..9900d655014c 100644
--- a/arch/s390/include/asm/bitops.h
+++ b/arch/s390/include/asm/bitops.h
@@ -73,7 +73,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __atomic64_or(mask, addr);
+ __atomic64_or(mask, (long *)addr);
}
static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -94,7 +94,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- __atomic64_and(mask, addr);
+ __atomic64_and(mask, (long *)addr);
}
static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
@@ -115,7 +115,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
}
#endif
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- __atomic64_xor(mask, addr);
+ __atomic64_xor(mask, (long *)addr);
}
static inline int
@@ -125,7 +125,7 @@ test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __atomic64_or_barrier(mask, addr);
+ old = __atomic64_or_barrier(mask, (long *)addr);
return (old & mask) != 0;
}
@@ -136,7 +136,7 @@ test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = ~(1UL << (nr & (BITS_PER_LONG - 1)));
- old = __atomic64_and_barrier(mask, addr);
+ old = __atomic64_and_barrier(mask, (long *)addr);
return (old & ~mask) != 0;
}
@@ -147,7 +147,7 @@ test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
unsigned long old, mask;
mask = 1UL << (nr & (BITS_PER_LONG - 1));
- old = __atomic64_xor_barrier(mask, addr);
+ old = __atomic64_xor_barrier(mask, (long *)addr);
return (old & mask) != 0;
}
diff --git a/arch/s390/include/asm/boot_data.h b/arch/s390/include/asm/boot_data.h
index 2d999ccb977a..f7eed27b3220 100644
--- a/arch/s390/include/asm/boot_data.h
+++ b/arch/s390/include/asm/boot_data.h
@@ -5,7 +5,14 @@
#include <asm/ipl.h>
extern char early_command_line[COMMAND_LINE_SIZE];
-extern struct ipl_parameter_block early_ipl_block;
-extern int early_ipl_block_valid;
+extern struct ipl_parameter_block ipl_block;
+extern int ipl_block_valid;
+extern int ipl_secure_flag;
+
+extern unsigned long ipl_cert_list_addr;
+extern unsigned long ipl_cert_list_size;
+
+extern unsigned long early_ipl_comp_list_addr;
+extern unsigned long early_ipl_comp_list_size;
#endif /* _ASM_S390_BOOT_DATA_H */
diff --git a/arch/s390/include/asm/bug.h b/arch/s390/include/asm/bug.h
index 429f43a8a8e8..713fc9735ffb 100644
--- a/arch/s390/include/asm/bug.h
+++ b/arch/s390/include/asm/bug.h
@@ -15,7 +15,7 @@
".section .rodata.str,\"aMS\",@progbits,1\n" \
"2: .asciz \""__FILE__"\"\n" \
".previous\n" \
- ".section __bug_table,\"aw\"\n" \
+ ".section __bug_table,\"awM\",@progbits,%2\n" \
"3: .long 1b-3b,2b-3b\n" \
" .short %0,%1\n" \
" .org 3b+%2\n" \
@@ -27,17 +27,17 @@
#else /* CONFIG_DEBUG_BUGVERBOSE */
-#define __EMIT_BUG(x) do { \
- asm volatile( \
- "0: j 0b+2\n" \
- "1:\n" \
- ".section __bug_table,\"aw\"\n" \
- "2: .long 1b-2b\n" \
- " .short %0\n" \
- " .org 2b+%1\n" \
- ".previous\n" \
- : : "i" (x), \
- "i" (sizeof(struct bug_entry))); \
+#define __EMIT_BUG(x) do { \
+ asm volatile( \
+ "0: j 0b+2\n" \
+ "1:\n" \
+ ".section __bug_table,\"awM\",@progbits,%1\n" \
+ "2: .long 1b-2b\n" \
+ " .short %0\n" \
+ " .org 2b+%1\n" \
+ ".previous\n" \
+ : : "i" (x), \
+ "i" (sizeof(struct bug_entry))); \
} while (0)
#endif /* CONFIG_DEBUG_BUGVERBOSE */
diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h
index 19562be22b7e..0036eab14391 100644
--- a/arch/s390/include/asm/diag.h
+++ b/arch/s390/include/asm/diag.h
@@ -308,4 +308,17 @@ union diag318_info {
int diag204(unsigned long subcode, unsigned long size, void *addr);
int diag224(void *ptr);
int diag26c(void *req, void *resp, enum diag26c_sc subcode);
+
+struct hypfs_diag0c_entry;
+
+struct diag_ops {
+ int (*diag210)(struct diag210 *addr);
+ int (*diag26c)(void *req, void *resp, enum diag26c_sc subcode);
+ int (*diag14)(unsigned long rx, unsigned long ry1, unsigned long subcode);
+ void (*diag0c)(struct hypfs_diag0c_entry *entry);
+ void (*diag308_reset)(void);
+};
+
+extern struct diag_ops diag_dma_ops;
+extern struct diag210 *__diag210_tmp_dma;
#endif /* _ASM_S390_DIAG_H */
diff --git a/arch/s390/include/asm/ebcdic.h b/arch/s390/include/asm/ebcdic.h
index 29441beb92e6..efb50fc6866c 100644
--- a/arch/s390/include/asm/ebcdic.h
+++ b/arch/s390/include/asm/ebcdic.h
@@ -20,7 +20,7 @@ extern __u8 _ebc_tolower[256]; /* EBCDIC -> lowercase */
extern __u8 _ebc_toupper[256]; /* EBCDIC -> uppercase */
static inline void
-codepage_convert(const __u8 *codepage, volatile __u8 * addr, unsigned long nr)
+codepage_convert(const __u8 *codepage, volatile char *addr, unsigned long nr)
{
if (nr-- <= 0)
return;
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index f74639a05f0f..5775fc22f410 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -107,6 +107,10 @@
#define HWCAP_S390_VXRS_BCD 4096
#define HWCAP_S390_VXRS_EXT 8192
#define HWCAP_S390_GS 16384
+#define HWCAP_S390_VXRS_EXT2 32768
+#define HWCAP_S390_VXRS_PDE 65536
+#define HWCAP_S390_SORT 131072
+#define HWCAP_S390_DFLT 262144
/* Internal bits, not exposed via elf */
#define HWCAP_INT_SIE 1UL
diff --git a/arch/s390/include/asm/extable.h b/arch/s390/include/asm/extable.h
index 80a4e5a9cb46..ae27f756b409 100644
--- a/arch/s390/include/asm/extable.h
+++ b/arch/s390/include/asm/extable.h
@@ -19,6 +19,11 @@ struct exception_table_entry
int insn, fixup;
};
+extern struct exception_table_entry *__start_dma_ex_table;
+extern struct exception_table_entry *__stop_dma_ex_table;
+
+const struct exception_table_entry *s390_search_extables(unsigned long addr);
+
static inline unsigned long extable_fixup(const struct exception_table_entry *x)
{
return (unsigned long)&x->fixup + x->fixup;
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 5a3c95b11952..68d362f8d6c1 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -11,9 +11,16 @@
#define MCOUNT_RETURN_FIXUP 18
#endif
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
#ifndef __ASSEMBLY__
+#ifdef CONFIG_CC_IS_CLANG
+/* https://bugs.llvm.org/show_bug.cgi?id=41424 */
+#define ftrace_return_address(n) 0UL
+#else
#define ftrace_return_address(n) __builtin_return_address(n)
+#endif
void _mcount(void);
void ftrace_caller(void);
diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
index f34d729347e4..ca421614722f 100644
--- a/arch/s390/include/asm/io.h
+++ b/arch/s390/include/asm/io.h
@@ -30,14 +30,8 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
#define ioremap_wc ioremap_nocache
#define ioremap_wt ioremap_nocache
-static inline void __iomem *ioremap(unsigned long offset, unsigned long size)
-{
- return (void __iomem *) offset;
-}
-
-static inline void iounmap(volatile void __iomem *addr)
-{
-}
+void __iomem *ioremap(unsigned long offset, unsigned long size);
+void iounmap(volatile void __iomem *addr);
static inline void __iomem *ioport_map(unsigned long port, unsigned int nr)
{
@@ -57,14 +51,17 @@ static inline void ioport_unmap(void __iomem *p)
* the corresponding device and create the mapping cookie.
*/
#define pci_iomap pci_iomap
+#define pci_iomap_range pci_iomap_range
#define pci_iounmap pci_iounmap
-#define pci_iomap_wc pci_iomap
-#define pci_iomap_wc_range pci_iomap_range
+#define pci_iomap_wc pci_iomap_wc
+#define pci_iomap_wc_range pci_iomap_wc_range
#define memcpy_fromio(dst, src, count) zpci_memcpy_fromio(dst, src, count)
#define memcpy_toio(dst, src, count) zpci_memcpy_toio(dst, src, count)
#define memset_io(dst, val, count) zpci_memset_io(dst, val, count)
+#define mmiowb() zpci_barrier()
+
#define __raw_readb zpci_read_u8
#define __raw_readw zpci_read_u16
#define __raw_readl zpci_read_u32
diff --git a/arch/s390/include/asm/ipl.h b/arch/s390/include/asm/ipl.h
index a8389e2d2f03..084e71b7272a 100644
--- a/arch/s390/include/asm/ipl.h
+++ b/arch/s390/include/asm/ipl.h
@@ -12,74 +12,36 @@
#include <asm/types.h>
#include <asm/cio.h>
#include <asm/setup.h>
+#include <uapi/asm/ipl.h>
-#define NSS_NAME_SIZE 8
-
-#define IPL_PARM_BLK_FCP_LEN (sizeof(struct ipl_list_hdr) + \
- sizeof(struct ipl_block_fcp))
-
-#define IPL_PARM_BLK0_FCP_LEN (sizeof(struct ipl_block_fcp) + 16)
+struct ipl_parameter_block {
+ struct ipl_pl_hdr hdr;
+ union {
+ struct ipl_pb_hdr pb0_hdr;
+ struct ipl_pb0_common common;
+ struct ipl_pb0_fcp fcp;
+ struct ipl_pb0_ccw ccw;
+ char raw[PAGE_SIZE - sizeof(struct ipl_pl_hdr)];
+ };
+} __packed __aligned(PAGE_SIZE);
-#define IPL_PARM_BLK_CCW_LEN (sizeof(struct ipl_list_hdr) + \
- sizeof(struct ipl_block_ccw))
+#define NSS_NAME_SIZE 8
-#define IPL_PARM_BLK0_CCW_LEN (sizeof(struct ipl_block_ccw) + 16)
+#define IPL_BP_FCP_LEN (sizeof(struct ipl_pl_hdr) + \
+ sizeof(struct ipl_pb0_fcp))
+#define IPL_BP0_FCP_LEN (sizeof(struct ipl_pb0_fcp))
+#define IPL_BP_CCW_LEN (sizeof(struct ipl_pl_hdr) + \
+ sizeof(struct ipl_pb0_ccw))
+#define IPL_BP0_CCW_LEN (sizeof(struct ipl_pb0_ccw))
#define IPL_MAX_SUPPORTED_VERSION (0)
-struct ipl_list_hdr {
- u32 len;
- u8 reserved1[3];
- u8 version;
- u32 blk0_len;
- u8 pbt;
- u8 flags;
- u16 reserved2;
- u8 loadparm[8];
-} __attribute__((packed));
-
-struct ipl_block_fcp {
- u8 reserved1[305-1];
- u8 opt;
- u8 reserved2[3];
- u16 reserved3;
- u16 devno;
- u8 reserved4[4];
- u64 wwpn;
- u64 lun;
- u32 bootprog;
- u8 reserved5[12];
- u64 br_lba;
- u32 scp_data_len;
- u8 reserved6[260];
- u8 scp_data[];
-} __attribute__((packed));
-
-#define DIAG308_VMPARM_SIZE 64
-#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - (sizeof(struct ipl_list_hdr) + \
- offsetof(struct ipl_block_fcp, scp_data)))
-
-struct ipl_block_ccw {
- u8 reserved1[84];
- u16 reserved2 : 13;
- u8 ssid : 3;
- u16 devno;
- u8 vm_flags;
- u8 reserved3[3];
- u32 vm_parm_len;
- u8 nss_name[8];
- u8 vm_parm[DIAG308_VMPARM_SIZE];
- u8 reserved4[8];
-} __attribute__((packed));
+#define IPL_RB_CERT_UNKNOWN ((unsigned short)-1)
-struct ipl_parameter_block {
- struct ipl_list_hdr hdr;
- union {
- struct ipl_block_fcp fcp;
- struct ipl_block_ccw ccw;
- char raw[PAGE_SIZE - sizeof(struct ipl_list_hdr)];
- } ipl_info;
-} __packed __aligned(PAGE_SIZE);
+#define DIAG308_VMPARM_SIZE (64)
+#define DIAG308_SCPDATA_OFFSET offsetof(struct ipl_parameter_block, \
+ fcp.scp_data)
+#define DIAG308_SCPDATA_SIZE (PAGE_SIZE - DIAG308_SCPDATA_OFFSET)
struct save_area;
struct save_area * __init save_area_alloc(bool is_boot_cpu);
@@ -88,7 +50,6 @@ void __init save_area_add_regs(struct save_area *, void *regs);
void __init save_area_add_vxrs(struct save_area *, __vector128 *vxrs);
extern void s390_reset_system(void);
-extern void ipl_store_parameters(void);
extern size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
const struct ipl_parameter_block *ipb);
@@ -122,6 +83,33 @@ extern struct ipl_info ipl_info;
extern void setup_ipl(void);
extern void set_os_info_reipl_block(void);
+struct ipl_report {
+ struct ipl_parameter_block *ipib;
+ struct list_head components;
+ struct list_head certificates;
+ size_t size;
+};
+
+struct ipl_report_component {
+ struct list_head list;
+ struct ipl_rb_component_entry entry;
+};
+
+struct ipl_report_certificate {
+ struct list_head list;
+ struct ipl_rb_certificate_entry entry;
+ void *key;
+};
+
+struct kexec_buf;
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib);
+void *ipl_report_finish(struct ipl_report *report);
+int ipl_report_free(struct ipl_report *report);
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+ unsigned char flags, unsigned short cert);
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+ unsigned long addr, unsigned long len);
+
/*
* DIAG 308 support
*/
@@ -133,32 +121,12 @@ enum diag308_subcode {
DIAG308_STORE = 6,
};
-enum diag308_ipl_type {
- DIAG308_IPL_TYPE_FCP = 0,
- DIAG308_IPL_TYPE_CCW = 2,
-};
-
-enum diag308_opt {
- DIAG308_IPL_OPT_IPL = 0x10,
- DIAG308_IPL_OPT_DUMP = 0x20,
-};
-
-enum diag308_flags {
- DIAG308_FLAGS_LP_VALID = 0x80,
-};
-
-enum diag308_vm_flags {
- DIAG308_VM_FLAGS_NSS_VALID = 0x80,
- DIAG308_VM_FLAGS_VP_VALID = 0x40,
-};
-
enum diag308_rc {
DIAG308_RC_OK = 0x0001,
DIAG308_RC_NOCONFIG = 0x0102,
};
extern int diag308(unsigned long subcode, void *addr);
-extern void diag308_reset(void);
extern void store_status(void (*fn)(void *), void *data);
extern void lgr_info_log(void);
diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h
index afaf5e3c57fd..9f75d67b8c20 100644
--- a/arch/s390/include/asm/irq.h
+++ b/arch/s390/include/asm/irq.h
@@ -47,7 +47,6 @@ enum interruption_class {
IRQEXT_CMC,
IRQEXT_FTP,
IRQIO_CIO,
- IRQIO_QAI,
IRQIO_DAS,
IRQIO_C15,
IRQIO_C70,
@@ -55,12 +54,14 @@ enum interruption_class {
IRQIO_VMR,
IRQIO_LCS,
IRQIO_CTC,
- IRQIO_APB,
IRQIO_ADM,
IRQIO_CSC,
- IRQIO_PCI,
- IRQIO_MSI,
IRQIO_VIR,
+ IRQIO_QAI,
+ IRQIO_APB,
+ IRQIO_PCF,
+ IRQIO_PCD,
+ IRQIO_MSI,
IRQIO_VAI,
IRQIO_GAL,
NMI_NMI,
diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
index 825dd0f7f221..ea398a05f643 100644
--- a/arch/s390/include/asm/kexec.h
+++ b/arch/s390/include/asm/kexec.h
@@ -11,6 +11,7 @@
#include <asm/processor.h>
#include <asm/page.h>
+#include <asm/setup.h>
/*
* KEXEC_SOURCE_MEMORY_LIMIT maximum page get_free_page can return.
* I.e. Maximum page that is mapped directly into kernel memory,
@@ -42,6 +43,9 @@
/* The native architecture */
#define KEXEC_ARCH KEXEC_ARCH_S390
+/* Allow kexec_file to load a segment to 0 */
+#define KEXEC_BUF_MEM_UNKNOWN -1
+
/* Provide a dummy definition to avoid build failures. */
static inline void crash_setup_regs(struct pt_regs *newregs,
struct pt_regs *oldregs) { }
@@ -51,20 +55,24 @@ struct s390_load_data {
/* Pointer to the kernel buffer. Used to register cmdline etc.. */
void *kernel_buf;
+ /* Load address of the kernel_buf. */
+ unsigned long kernel_mem;
+
+ /* Parmarea in the kernel buffer. */
+ struct parmarea *parm;
+
/* Total size of loaded segments in memory. Used as an offset. */
size_t memsz;
- /* Load address of initrd. Used to register INITRD_START in kernel. */
- unsigned long initrd_load_addr;
+ struct ipl_report *report;
};
-int kexec_file_add_purgatory(struct kimage *image,
- struct s390_load_data *data);
-int kexec_file_add_initrd(struct kimage *image,
- struct s390_load_data *data,
- char *initrd, unsigned long initrd_len);
-int *kexec_file_update_kernel(struct kimage *iamge,
- struct s390_load_data *data);
+int s390_verify_sig(const char *kernel, unsigned long kernel_len);
+void *kexec_file_add_components(struct kimage *image,
+ int (*add_kernel)(struct kimage *image,
+ struct s390_load_data *data));
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+ unsigned long addr);
extern const struct kexec_file_ops s390_kexec_image_ops;
extern const struct kexec_file_ops s390_kexec_elf_ops;
diff --git a/arch/s390/include/asm/linkage.h b/arch/s390/include/asm/linkage.h
index 1b95da3fdd64..7f22262b0e46 100644
--- a/arch/s390/include/asm/linkage.h
+++ b/arch/s390/include/asm/linkage.h
@@ -28,5 +28,12 @@
.long (_target) - . ; \
.previous
+#define EX_TABLE_DMA(_fault, _target) \
+ .section .dma.ex_table, "a" ; \
+ .align 4 ; \
+ .long (_fault) - . ; \
+ .long (_target) - . ; \
+ .previous
+
#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 5b9f10b1e55d..237ee0c4169f 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -129,7 +129,7 @@ struct lowcore {
/* SMP info area */
__u32 cpu_nr; /* 0x03a0 */
__u32 softirq_pending; /* 0x03a4 */
- __u32 preempt_count; /* 0x03a8 */
+ __s32 preempt_count; /* 0x03a8 */
__u32 spinlock_lockval; /* 0x03ac */
__u32 spinlock_index; /* 0x03b0 */
__u32 fpu_flags; /* 0x03b4 */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
index 123dac3717b3..0033dcd663b1 100644
--- a/arch/s390/include/asm/nospec-insn.h
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -32,23 +32,23 @@ _LC_BR_R1 = __LC_BR_R1
.endm
.macro __THUNK_PROLOG_BR r1,r2
- __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+ __THUNK_PROLOG_NAME __s390_indirect_jump_r\r2\()use_r\r1
.endm
.macro __THUNK_PROLOG_BC d0,r1,r2
- __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ __THUNK_PROLOG_NAME __s390_indirect_branch_\d0\()_\r2\()use_\r1
.endm
.macro __THUNK_BR r1,r2
- jg __s390x_indirect_jump_r\r2\()use_r\r1
+ jg __s390_indirect_jump_r\r2\()use_r\r1
.endm
.macro __THUNK_BC d0,r1,r2
- jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ jg __s390_indirect_branch_\d0\()_\r2\()use_\r1
.endm
.macro __THUNK_BRASL r1,r2,r3
- brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2
+ brasl \r1,__s390_indirect_jump_r\r3\()use_r\r2
.endm
.macro __DECODE_RR expand,reg,ruse
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 4e0efebc56a9..305befd55326 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -26,6 +26,9 @@ int pci_proc_domain(struct pci_bus *);
#define ZPCI_BUS_NR 0 /* default bus number */
#define ZPCI_DEVFN 0 /* default device number */
+#define ZPCI_NR_DMA_SPACES 1
+#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
+
/* PCI Function Controls */
#define ZPCI_FC_FN_ENABLED 0x80
#define ZPCI_FC_ERROR 0x40
@@ -83,6 +86,8 @@ enum zpci_state {
struct zpci_bar_struct {
struct resource *res; /* bus resource */
+ void __iomem *mio_wb;
+ void __iomem *mio_wt;
u32 val; /* bar start & 3 flag bits */
u16 map_idx; /* index into bar mapping array */
u8 size; /* order 2 exponent */
@@ -112,6 +117,8 @@ struct zpci_dev {
/* IRQ stuff */
u64 msi_addr; /* MSI address */
unsigned int max_msi; /* maximum number of MSI's */
+ unsigned int msi_first_bit;
+ unsigned int msi_nr_irqs;
struct airq_iv *aibv; /* adapter interrupt bit vector */
unsigned long aisb; /* number of the summary bit */
@@ -130,6 +137,7 @@ struct zpci_dev {
struct iommu_device iommu_dev; /* IOMMU core handle */
char res_name[16];
+ bool mio_capable;
struct zpci_bar_struct bars[PCI_BAR_COUNT];
u64 start_dma; /* Start of available DMA addresses */
@@ -158,6 +166,7 @@ static inline bool zdev_enabled(struct zpci_dev *zdev)
}
extern const struct attribute_group *zpci_attr_groups[];
+extern unsigned int s390_pci_force_floating __initdata;
/* -----------------------------------------------------------------------------
Prototypes
@@ -219,6 +228,9 @@ struct zpci_dev *get_zdev_by_fid(u32);
int zpci_dma_init(void);
void zpci_dma_exit(void);
+int __init zpci_irq_init(void);
+void __init zpci_irq_exit(void);
+
/* FMB */
int zpci_fmb_enable_device(struct zpci_dev *);
int zpci_fmb_disable_device(struct zpci_dev *);
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index b3b31b31f0d3..3ec52a05d500 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -43,6 +43,8 @@ struct clp_fh_list_entry {
#define CLP_SET_ENABLE_PCI_FN 0 /* Yes, 0 enables it */
#define CLP_SET_DISABLE_PCI_FN 1 /* Yes, 1 disables it */
+#define CLP_SET_ENABLE_MIO 2
+#define CLP_SET_DISABLE_MIO 3
#define CLP_UTIL_STR_LEN 64
#define CLP_PFIP_NR_SEGMENTS 4
@@ -80,7 +82,8 @@ struct clp_req_query_pci {
struct clp_rsp_query_pci {
struct clp_rsp_hdr hdr;
u16 vfn; /* virtual fn number */
- u16 : 7;
+ u16 : 6;
+ u16 mio_addr_avail : 1;
u16 util_str_avail : 1; /* utility string available? */
u16 pfgid : 8; /* pci function group id */
u32 fid; /* pci function id */
@@ -96,6 +99,15 @@ struct clp_rsp_query_pci {
u32 reserved[11];
u32 uid; /* user defined id */
u8 util_str[CLP_UTIL_STR_LEN]; /* utility string */
+ u32 reserved2[16];
+ u32 mio_valid : 6;
+ u32 : 26;
+ u32 : 32;
+ struct {
+ u64 wb;
+ u64 wt;
+ } addr[PCI_BAR_COUNT];
+ u32 reserved3[6];
} __packed;
/* Query PCI function group request */
@@ -118,7 +130,11 @@ struct clp_rsp_query_pci_grp {
u8 refresh : 1; /* TLB refresh mode */
u16 reserved2;
u16 mui;
- u64 reserved3;
+ u16 : 16;
+ u16 maxfaal;
+ u16 : 4;
+ u16 dnoi : 12;
+ u16 maxcpu;
u64 dasm; /* dma address space mask */
u64 msia; /* MSI address */
u64 reserved4;
diff --git a/arch/s390/include/asm/pci_insn.h b/arch/s390/include/asm/pci_insn.h
index ba22a6ea51a1..ff81ed19c506 100644
--- a/arch/s390/include/asm/pci_insn.h
+++ b/arch/s390/include/asm/pci_insn.h
@@ -2,6 +2,8 @@
#ifndef _ASM_S390_PCI_INSN_H
#define _ASM_S390_PCI_INSN_H
+#include <linux/jump_label.h>
+
/* Load/Store status codes */
#define ZPCI_PCI_ST_FUNC_NOT_ENABLED 4
#define ZPCI_PCI_ST_FUNC_IN_ERR 8
@@ -38,6 +40,8 @@
#define ZPCI_MOD_FC_RESET_ERROR 7
#define ZPCI_MOD_FC_RESET_BLOCK 9
#define ZPCI_MOD_FC_SET_MEASURE 10
+#define ZPCI_MOD_FC_REG_INT_D 16
+#define ZPCI_MOD_FC_DEREG_INT_D 17
/* FIB function controls */
#define ZPCI_FIB_FC_ENABLED 0x80
@@ -51,16 +55,7 @@
#define ZPCI_FIB_FC_LS_BLOCKED 0x20
#define ZPCI_FIB_FC_DMAAS_REG 0x10
-/* Function Information Block */
-struct zpci_fib {
- u32 fmt : 8; /* format */
- u32 : 24;
- u32 : 32;
- u8 fc; /* function controls */
- u64 : 56;
- u64 pba; /* PCI base address */
- u64 pal; /* PCI address limit */
- u64 iota; /* I/O Translation Anchor */
+struct zpci_fib_fmt0 {
u32 : 1;
u32 isc : 3; /* Interrupt subclass */
u32 noi : 12; /* Number of interrupts */
@@ -72,16 +67,90 @@ struct zpci_fib {
u32 : 32;
u64 aibv; /* Adapter int bit vector address */
u64 aisb; /* Adapter int summary bit address */
+};
+
+struct zpci_fib_fmt1 {
+ u32 : 4;
+ u32 noi : 12;
+ u32 : 16;
+ u32 dibvo : 16;
+ u32 : 16;
+ u64 : 64;
+ u64 : 64;
+};
+
+/* Function Information Block */
+struct zpci_fib {
+ u32 fmt : 8; /* format */
+ u32 : 24;
+ u32 : 32;
+ u8 fc; /* function controls */
+ u64 : 56;
+ u64 pba; /* PCI base address */
+ u64 pal; /* PCI address limit */
+ u64 iota; /* I/O Translation Anchor */
+ union {
+ struct zpci_fib_fmt0 fmt0;
+ struct zpci_fib_fmt1 fmt1;
+ };
u64 fmb_addr; /* Function measurement block address and key */
u32 : 32;
u32 gd;
} __packed __aligned(8);
+/* directed interruption information block */
+struct zpci_diib {
+ u32 : 1;
+ u32 isc : 3;
+ u32 : 28;
+ u16 : 16;
+ u16 nr_cpus;
+ u64 disb_addr;
+ u64 : 64;
+ u64 : 64;
+} __packed __aligned(8);
+
+/* cpu directed interruption information block */
+struct zpci_cdiib {
+ u64 : 64;
+ u64 dibv_addr;
+ u64 : 64;
+ u64 : 64;
+ u64 : 64;
+} __packed __aligned(8);
+
+union zpci_sic_iib {
+ struct zpci_diib diib;
+ struct zpci_cdiib cdiib;
+};
+
+DECLARE_STATIC_KEY_FALSE(have_mio);
+
u8 zpci_mod_fc(u64 req, struct zpci_fib *fib, u8 *status);
int zpci_refresh_trans(u64 fn, u64 addr, u64 range);
-int zpci_load(u64 *data, u64 req, u64 offset);
-int zpci_store(u64 data, u64 req, u64 offset);
-int zpci_store_block(const u64 *data, u64 req, u64 offset);
-int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc);
+int __zpci_load(u64 *data, u64 req, u64 offset);
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len);
+int __zpci_store(u64 data, u64 req, u64 offset);
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len);
+int __zpci_store_block(const u64 *data, u64 req, u64 offset);
+void zpci_barrier(void);
+int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib);
+
+static inline int zpci_set_irq_ctrl(u16 ctl, u8 isc)
+{
+ union zpci_sic_iib iib = {{0}};
+
+ return __zpci_set_irq_ctrl(ctl, isc, &iib);
+}
+
+#ifdef CONFIG_PCI
+static inline void enable_mio_ctl(void)
+{
+ if (static_branch_likely(&have_mio))
+ __ctl_set_bit(2, 5);
+}
+#else /* CONFIG_PCI */
+static inline void enable_mio_ctl(void) {}
+#endif /* CONFIG_PCI */
#endif
diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h
index cbb9cb9c6547..cd060b5dd8fd 100644
--- a/arch/s390/include/asm/pci_io.h
+++ b/arch/s390/include/asm/pci_io.h
@@ -37,12 +37,10 @@ extern struct zpci_iomap_entry *zpci_iomap_start;
#define zpci_read(LENGTH, RETTYPE) \
static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
{ \
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
u64 data; \
int rc; \
\
- rc = zpci_load(&data, req, ZPCI_OFFSET(addr)); \
+ rc = zpci_load(&data, addr, LENGTH); \
if (rc) \
data = -1ULL; \
return (RETTYPE) data; \
@@ -52,11 +50,9 @@ static inline RETTYPE zpci_read_##RETTYPE(const volatile void __iomem *addr) \
static inline void zpci_write_##VALTYPE(VALTYPE val, \
const volatile void __iomem *addr) \
{ \
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)]; \
- u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, LENGTH); \
u64 data = (VALTYPE) val; \
\
- zpci_store(data, req, ZPCI_OFFSET(addr)); \
+ zpci_store(addr, data, LENGTH); \
}
zpci_read(8, u64)
@@ -68,36 +64,38 @@ zpci_write(4, u32)
zpci_write(2, u16)
zpci_write(1, u8)
-static inline int zpci_write_single(u64 req, const u64 *data, u64 offset, u8 len)
+static inline int zpci_write_single(volatile void __iomem *dst, const void *src,
+ unsigned long len)
{
u64 val;
switch (len) {
case 1:
- val = (u64) *((u8 *) data);
+ val = (u64) *((u8 *) src);
break;
case 2:
- val = (u64) *((u16 *) data);
+ val = (u64) *((u16 *) src);
break;
case 4:
- val = (u64) *((u32 *) data);
+ val = (u64) *((u32 *) src);
break;
case 8:
- val = (u64) *((u64 *) data);
+ val = (u64) *((u64 *) src);
break;
default:
val = 0; /* let FW report error */
break;
}
- return zpci_store(val, req, offset);
+ return zpci_store(dst, val, len);
}
-static inline int zpci_read_single(u64 req, u64 *dst, u64 offset, u8 len)
+static inline int zpci_read_single(void *dst, const volatile void __iomem *src,
+ unsigned long len)
{
u64 data;
int cc;
- cc = zpci_load(&data, req, offset);
+ cc = zpci_load(&data, src, len);
if (cc)
goto out;
@@ -119,10 +117,8 @@ out:
return cc;
}
-static inline int zpci_write_block(u64 req, const u64 *data, u64 offset)
-{
- return zpci_store_block(data, req, offset);
-}
+int zpci_write_block(volatile void __iomem *dst, const void *src,
+ unsigned long len);
static inline u8 zpci_get_max_write_size(u64 src, u64 dst, int len, int max)
{
@@ -140,18 +136,15 @@ static inline int zpci_memcpy_fromio(void *dst,
const volatile void __iomem *src,
unsigned long n)
{
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(src)];
- u64 req, offset = ZPCI_OFFSET(src);
int size, rc = 0;
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) src,
(u64) dst, n, 8);
- req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
- rc = zpci_read_single(req, dst, offset, size);
+ rc = zpci_read_single(dst, src, size);
if (rc)
break;
- offset += size;
+ src += size;
dst += size;
n -= size;
}
@@ -161,8 +154,6 @@ static inline int zpci_memcpy_fromio(void *dst,
static inline int zpci_memcpy_toio(volatile void __iomem *dst,
const void *src, unsigned long n)
{
- struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
- u64 req, offset = ZPCI_OFFSET(dst);
int size, rc = 0;
if (!src)
@@ -171,16 +162,14 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst,
while (n > 0) {
size = zpci_get_max_write_size((u64 __force) dst,
(u64) src, n, 128);
- req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size);
-
if (size > 8) /* main path */
- rc = zpci_write_block(req, src, offset);
+ rc = zpci_write_block(dst, src, size);
else
- rc = zpci_write_single(req, src, offset, size);
+ rc = zpci_write_single(dst, src, size);
if (rc)
break;
- offset += size;
src += size;
+ dst += size;
n -= size;
}
return rc;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 76dc344edb8c..9f0195d5fa16 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -238,7 +238,7 @@ static inline int is_module_addr(void *addr)
#define _REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */
#define _REGION_ENTRY_OFFSET 0xc0 /* region table offset */
#define _REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
-#define _REGION_ENTRY_TYPE_MASK 0x0c /* region/segment table type mask */
+#define _REGION_ENTRY_TYPE_MASK 0x0c /* region table type mask */
#define _REGION_ENTRY_TYPE_R1 0x0c /* region first table type */
#define _REGION_ENTRY_TYPE_R2 0x08 /* region second table type */
#define _REGION_ENTRY_TYPE_R3 0x04 /* region third table type */
@@ -277,6 +277,7 @@ static inline int is_module_addr(void *addr)
#define _SEGMENT_ENTRY_PROTECT 0x200 /* segment protection bit */
#define _SEGMENT_ENTRY_NOEXEC 0x100 /* segment no-execute bit */
#define _SEGMENT_ENTRY_INVALID 0x20 /* invalid segment table entry */
+#define _SEGMENT_ENTRY_TYPE_MASK 0x0c /* segment table type mask */
#define _SEGMENT_ENTRY (0)
#define _SEGMENT_ENTRY_EMPTY (_SEGMENT_ENTRY_INVALID)
@@ -614,15 +615,9 @@ static inline int pgd_none(pgd_t pgd)
static inline int pgd_bad(pgd_t pgd)
{
- /*
- * With dynamic page table levels the pgd can be a region table
- * entry or a segment table entry. Check for the bit that are
- * invalid for either table entry.
- */
- unsigned long mask =
- ~_SEGMENT_ENTRY_ORIGIN & ~_REGION_ENTRY_INVALID &
- ~_REGION_ENTRY_TYPE_MASK & ~_REGION_ENTRY_LENGTH;
- return (pgd_val(pgd) & mask) != 0;
+ if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R1)
+ return 0;
+ return (pgd_val(pgd) & ~_REGION_ENTRY_BITS) != 0;
}
static inline unsigned long pgd_pfn(pgd_t pgd)
@@ -703,6 +698,8 @@ static inline int pmd_large(pmd_t pmd)
static inline int pmd_bad(pmd_t pmd)
{
+ if ((pmd_val(pmd) & _SEGMENT_ENTRY_TYPE_MASK) > 0)
+ return 1;
if (pmd_large(pmd))
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS_LARGE) != 0;
return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0;
@@ -710,8 +707,12 @@ static inline int pmd_bad(pmd_t pmd)
static inline int pud_bad(pud_t pud)
{
- if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R3)
- return pmd_bad(__pmd(pud_val(pud)));
+ unsigned long type = pud_val(pud) & _REGION_ENTRY_TYPE_MASK;
+
+ if (type > _REGION_ENTRY_TYPE_R3)
+ return 1;
+ if (type < _REGION_ENTRY_TYPE_R3)
+ return 0;
if (pud_large(pud))
return (pud_val(pud) & ~_REGION_ENTRY_BITS_LARGE) != 0;
return (pud_val(pud) & ~_REGION_ENTRY_BITS) != 0;
@@ -719,8 +720,12 @@ static inline int pud_bad(pud_t pud)
static inline int p4d_bad(p4d_t p4d)
{
- if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) < _REGION_ENTRY_TYPE_R2)
- return pud_bad(__pud(p4d_val(p4d)));
+ unsigned long type = p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK;
+
+ if (type > _REGION_ENTRY_TYPE_R2)
+ return 1;
+ if (type < _REGION_ENTRY_TYPE_R2)
+ return 0;
return (p4d_val(p4d) & ~_REGION_ENTRY_BITS) != 0;
}
@@ -1204,41 +1209,78 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE-1))
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-#define pgd_offset_raw(pgd, addr) ((pgd) + pgd_index(addr))
-
#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
-static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
+/*
+ * The pgd_offset function *always* adds the index for the top-level
+ * region/segment table. This is done to get a sequence like the
+ * following to work:
+ * pgdp = pgd_offset(current->mm, addr);
+ * pgd = READ_ONCE(*pgdp);
+ * p4dp = p4d_offset(&pgd, addr);
+ * ...
+ * The subsequent p4d_offset, pud_offset and pmd_offset functions
+ * only add an index if they dereferenced the pointer.
+ */
+static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address)
{
- p4d_t *p4d = (p4d_t *) pgd;
+ unsigned long rste;
+ unsigned int shift;
- if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
- p4d = (p4d_t *) pgd_deref(*pgd);
- return p4d + p4d_index(address);
+ /* Get the first entry of the top level table */
+ rste = pgd_val(*pgd);
+ /* Pick up the shift from the table type of the first entry */
+ shift = ((rste & _REGION_ENTRY_TYPE_MASK) >> 2) * 11 + 20;
+ return pgd + ((address >> shift) & (PTRS_PER_PGD - 1));
}
-static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+#define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address)
+#define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
- pud_t *pud = (pud_t *) p4d;
+ if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1)
+ return (p4d_t *) pgd_deref(*pgd) + p4d_index(address);
+ return (p4d_t *) pgd;
+}
- if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pud = (pud_t *) p4d_deref(*p4d);
- return pud + pud_index(address);
+static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
+{
+ if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2)
+ return (pud_t *) p4d_deref(*p4d) + pud_index(address);
+ return (pud_t *) p4d;
}
static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
- pmd_t *pmd = (pmd_t *) pud;
+ if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3)
+ return (pmd_t *) pud_deref(*pud) + pmd_index(address);
+ return (pmd_t *) pud;
+}
- if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- pmd = (pmd_t *) pud_deref(*pud);
- return pmd + pmd_index(address);
+static inline pte_t *pte_offset(pmd_t *pmd, unsigned long address)
+{
+ return (pte_t *) pmd_deref(*pmd) + pte_index(address);
+}
+
+#define pte_offset_kernel(pmd, address) pte_offset(pmd, address)
+#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
+#define pte_unmap(pte) do { } while (0)
+
+static inline bool gup_fast_permitted(unsigned long start, int nr_pages)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ if (end < start)
+ return false;
+ return end <= current->mm->context.asce_limit;
}
+#define gup_fast_permitted gup_fast_permitted
#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
@@ -1249,12 +1291,6 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
#define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
-/* Find an entry in the lowest level page table.. */
-#define pte_offset(pmd, addr) ((pte_t *) pmd_deref(*(pmd)) + pte_index(addr))
-#define pte_offset_kernel(pmd, address) pte_offset(pmd,address)
-#define pte_offset_map(pmd, address) pte_offset_kernel(pmd, address)
-#define pte_unmap(pte) do { } while (0)
-
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
pmd_val(pmd) &= ~_SEGMENT_ENTRY_WRITE;
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index 81038ab357ce..b0fcbc37b637 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -156,25 +156,6 @@ struct thread_struct {
typedef struct thread_struct thread_struct;
-/*
- * Stack layout of a C stack frame.
- */
-#ifndef __PACK_STACK
-struct stack_frame {
- unsigned long back_chain;
- unsigned long empty1[5];
- unsigned long gprs[10];
- unsigned int empty2[8];
-};
-#else
-struct stack_frame {
- unsigned long empty1[5];
- unsigned int empty2[8];
- unsigned long gprs[10];
- unsigned long back_chain;
-};
-#endif
-
#define ARCH_MIN_TASKALIGN 8
#define INIT_THREAD { \
@@ -206,11 +187,7 @@ struct mm_struct;
struct seq_file;
struct pt_regs;
-typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
-void dump_trace(dump_trace_func_t func, void *data,
- struct task_struct *task, unsigned long sp);
void show_registers(struct pt_regs *regs);
-
void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
@@ -244,55 +221,6 @@ static __no_kasan_or_inline unsigned short stap(void)
return cpu_address;
}
-#define CALL_ARGS_0() \
- register unsigned long r2 asm("2")
-#define CALL_ARGS_1(arg1) \
- register unsigned long r2 asm("2") = (unsigned long)(arg1)
-#define CALL_ARGS_2(arg1, arg2) \
- CALL_ARGS_1(arg1); \
- register unsigned long r3 asm("3") = (unsigned long)(arg2)
-#define CALL_ARGS_3(arg1, arg2, arg3) \
- CALL_ARGS_2(arg1, arg2); \
- register unsigned long r4 asm("4") = (unsigned long)(arg3)
-#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \
- CALL_ARGS_3(arg1, arg2, arg3); \
- register unsigned long r4 asm("5") = (unsigned long)(arg4)
-#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \
- CALL_ARGS_4(arg1, arg2, arg3, arg4); \
- register unsigned long r4 asm("6") = (unsigned long)(arg5)
-
-#define CALL_FMT_0
-#define CALL_FMT_1 CALL_FMT_0, "0" (r2)
-#define CALL_FMT_2 CALL_FMT_1, "d" (r3)
-#define CALL_FMT_3 CALL_FMT_2, "d" (r4)
-#define CALL_FMT_4 CALL_FMT_3, "d" (r5)
-#define CALL_FMT_5 CALL_FMT_4, "d" (r6)
-
-#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
-#define CALL_CLOBBER_4 CALL_CLOBBER_5
-#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
-#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
-#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
-#define CALL_CLOBBER_0 CALL_CLOBBER_1
-
-#define CALL_ON_STACK(fn, stack, nr, args...) \
-({ \
- CALL_ARGS_##nr(args); \
- unsigned long prev; \
- \
- asm volatile( \
- " la %[_prev],0(15)\n" \
- " la 15,0(%[_stack])\n" \
- " stg %[_prev],%[_bc](15)\n" \
- " brasl 14,%[_fn]\n" \
- " la 15,0(%[_prev])\n" \
- : "+&d" (r2), [_prev] "=&a" (prev) \
- : [_stack] "a" (stack), \
- [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
- [_fn] "X" (fn) CALL_FMT_##nr : CALL_CLOBBER_##nr); \
- r2; \
-})
-
/*
* Give up the time slice of the virtual PU.
*/
@@ -339,10 +267,10 @@ static __no_kasan_or_inline void __load_psw_mask(unsigned long mask)
asm volatile(
" larl %0,1f\n"
- " stg %0,%O1+8(%R1)\n"
- " lpswe %1\n"
+ " stg %0,%1\n"
+ " lpswe %2\n"
"1:"
- : "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
+ : "=&d" (addr), "=Q" (psw.addr) : "Q" (psw) : "memory", "cc");
}
/*
@@ -387,12 +315,12 @@ void enabled_wait(void);
/*
* Function to drop a processor into disabled wait state
*/
-static inline void __noreturn disabled_wait(unsigned long code)
+static inline void __noreturn disabled_wait(void)
{
psw_t psw;
psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
- psw.addr = code;
+ psw.addr = _THIS_IP_;
__load_psw(psw);
while (1);
}
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index ef4c9dec06a4..f577c5f6031a 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -79,6 +79,9 @@ struct sclp_info {
unsigned char has_kss : 1;
unsigned char has_gisaf : 1;
unsigned char has_diag318 : 1;
+ unsigned char has_sipl : 1;
+ unsigned char has_sipl_g2 : 1;
+ unsigned char has_dirq : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/arch/s390/include/asm/sections.h b/arch/s390/include/asm/sections.h
index 7afe4620685c..42de04ad9c07 100644
--- a/arch/s390/include/asm/sections.h
+++ b/arch/s390/include/asm/sections.h
@@ -2,8 +2,20 @@
#ifndef _S390_SECTIONS_H
#define _S390_SECTIONS_H
+#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+
#include <asm-generic/sections.h>
+extern bool initmem_freed;
+
+static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+{
+ if (!initmem_freed)
+ return 0;
+ return addr >= (unsigned long)__init_begin &&
+ addr < (unsigned long)__init_end;
+}
+
/*
* .boot.data section contains variables "shared" between the decompressor and
* the decompressed kernel. The decompressor will store values in them, and
@@ -16,4 +28,14 @@
*/
#define __bootdata(var) __section(.boot.data.var) var
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define __bootdata_preserved(var) __section(.boot.preserved.data.var) var
+
+extern unsigned long __sdma, __edma;
+extern unsigned long __stext_dma, __etext_dma;
+
#endif
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index efda97804aa4..925889d360c1 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -12,7 +12,10 @@
#define EP_OFFSET 0x10008
#define EP_STRING "S390EP"
#define PARMAREA 0x10400
-#define PARMAREA_END 0x11000
+#define EARLY_SCCB_OFFSET 0x11000
+#define HEAD_END 0x12000
+
+#define EARLY_SCCB_SIZE PAGE_SIZE
/*
* Machine features detected in early.c
@@ -65,6 +68,16 @@
#define OLDMEM_SIZE (*(unsigned long *) (OLDMEM_SIZE_OFFSET))
#define COMMAND_LINE ((char *) (COMMAND_LINE_OFFSET))
+struct parmarea {
+ unsigned long ipl_device; /* 0x10400 */
+ unsigned long initrd_start; /* 0x10408 */
+ unsigned long initrd_size; /* 0x10410 */
+ unsigned long oldmem_base; /* 0x10418 */
+ unsigned long oldmem_size; /* 0x10420 */
+ char pad1[0x10480 - 0x10428]; /* 0x10428 - 0x10480 */
+ char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */
+};
+
extern int noexec_disabled;
extern int memory_end_set;
extern unsigned long memory_end;
@@ -134,6 +147,12 @@ extern void (*_machine_restart)(char *command);
extern void (*_machine_halt)(void);
extern void (*_machine_power_off)(void);
+extern unsigned long __kaslr_offset;
+static inline unsigned long kaslr_offset(void)
+{
+ return __kaslr_offset;
+}
+
#else /* __ASSEMBLY__ */
#define IPL_DEVICE (IPL_DEVICE_OFFSET)
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
new file mode 100644
index 000000000000..49634bfbecdd
--- /dev/null
+++ b/arch/s390/include/asm/stacktrace.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_STACKTRACE_H
+#define _ASM_S390_STACKTRACE_H
+
+#include <linux/uaccess.h>
+#include <linux/ptrace.h>
+#include <asm/switch_to.h>
+
+enum stack_type {
+ STACK_TYPE_UNKNOWN,
+ STACK_TYPE_TASK,
+ STACK_TYPE_IRQ,
+ STACK_TYPE_NODAT,
+ STACK_TYPE_RESTART,
+};
+
+struct stack_info {
+ enum stack_type type;
+ unsigned long begin, end;
+};
+
+const char *stack_type_name(enum stack_type type);
+int get_stack_info(unsigned long sp, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask);
+
+static inline bool on_stack(struct stack_info *info,
+ unsigned long addr, size_t len)
+{
+ if (info->type == STACK_TYPE_UNKNOWN)
+ return false;
+ if (addr + len < addr)
+ return false;
+ return addr >= info->begin && addr + len < info->end;
+}
+
+static inline unsigned long get_stack_pointer(struct task_struct *task,
+ struct pt_regs *regs)
+{
+ if (regs)
+ return (unsigned long) kernel_stack_pointer(regs);
+ if (task == current)
+ return current_stack_pointer();
+ return (unsigned long) task->thread.ksp;
+}
+
+/*
+ * Stack layout of a C stack frame.
+ */
+#ifndef __PACK_STACK
+struct stack_frame {
+ unsigned long back_chain;
+ unsigned long empty1[5];
+ unsigned long gprs[10];
+ unsigned int empty2[8];
+};
+#else
+struct stack_frame {
+ unsigned long empty1[5];
+ unsigned int empty2[8];
+ unsigned long gprs[10];
+ unsigned long back_chain;
+};
+#endif
+
+#define CALL_ARGS_0() \
+ register unsigned long r2 asm("2")
+#define CALL_ARGS_1(arg1) \
+ register unsigned long r2 asm("2") = (unsigned long)(arg1)
+#define CALL_ARGS_2(arg1, arg2) \
+ CALL_ARGS_1(arg1); \
+ register unsigned long r3 asm("3") = (unsigned long)(arg2)
+#define CALL_ARGS_3(arg1, arg2, arg3) \
+ CALL_ARGS_2(arg1, arg2); \
+ register unsigned long r4 asm("4") = (unsigned long)(arg3)
+#define CALL_ARGS_4(arg1, arg2, arg3, arg4) \
+ CALL_ARGS_3(arg1, arg2, arg3); \
+ register unsigned long r4 asm("5") = (unsigned long)(arg4)
+#define CALL_ARGS_5(arg1, arg2, arg3, arg4, arg5) \
+ CALL_ARGS_4(arg1, arg2, arg3, arg4); \
+ register unsigned long r4 asm("6") = (unsigned long)(arg5)
+
+#define CALL_FMT_0 "=&d" (r2) :
+#define CALL_FMT_1 "+&d" (r2) :
+#define CALL_FMT_2 CALL_FMT_1 "d" (r3),
+#define CALL_FMT_3 CALL_FMT_2 "d" (r4),
+#define CALL_FMT_4 CALL_FMT_3 "d" (r5),
+#define CALL_FMT_5 CALL_FMT_4 "d" (r6),
+
+#define CALL_CLOBBER_5 "0", "1", "14", "cc", "memory"
+#define CALL_CLOBBER_4 CALL_CLOBBER_5
+#define CALL_CLOBBER_3 CALL_CLOBBER_4, "5"
+#define CALL_CLOBBER_2 CALL_CLOBBER_3, "4"
+#define CALL_CLOBBER_1 CALL_CLOBBER_2, "3"
+#define CALL_CLOBBER_0 CALL_CLOBBER_1
+
+#define CALL_ON_STACK(fn, stack, nr, args...) \
+({ \
+ CALL_ARGS_##nr(args); \
+ unsigned long prev; \
+ \
+ asm volatile( \
+ " la %[_prev],0(15)\n" \
+ " la 15,0(%[_stack])\n" \
+ " stg %[_prev],%[_bc](15)\n" \
+ " brasl 14,%[_fn]\n" \
+ " la 15,0(%[_prev])\n" \
+ : [_prev] "=&a" (prev), CALL_FMT_##nr \
+ [_stack] "a" (stack), \
+ [_bc] "i" (offsetof(struct stack_frame, back_chain)), \
+ [_fn] "X" (fn) : CALL_CLOBBER_##nr); \
+ r2; \
+})
+
+#endif /* _ASM_S390_STACKTRACE_H */
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 59c3e91f2cdb..f073292e9fdb 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -14,13 +14,8 @@
#include <linux/err.h>
#include <asm/ptrace.h>
-/*
- * The syscall table always contains 32 bit pointers since we know that the
- * address of the function to be called is (way) below 4GB. So the "int"
- * type here is what we want [need] for both 32 bit and 64 bit systems.
- */
-extern const unsigned int sys_call_table[];
-extern const unsigned int sys_call_table_emu[];
+extern const unsigned long sys_call_table[];
+extern const unsigned long sys_call_table_emu[];
static inline long syscall_get_nr(struct task_struct *task,
struct pt_regs *regs)
@@ -84,10 +79,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->orig_gpr2 = args[0];
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
#ifdef CONFIG_COMPAT
- if (test_tsk_thread_flag(current, TIF_31BIT))
+ if (test_tsk_thread_flag(task, TIF_31BIT))
return AUDIT_ARCH_S390;
#endif
return AUDIT_ARCH_S390X;
diff --git a/arch/s390/include/asm/syscall_wrapper.h b/arch/s390/include/asm/syscall_wrapper.h
index 5596c5c625d2..3c3d6fe8e2f0 100644
--- a/arch/s390/include/asm/syscall_wrapper.h
+++ b/arch/s390/include/asm/syscall_wrapper.h
@@ -119,8 +119,8 @@
"Type aliasing is used to sanitize syscall arguments");\
asmlinkage long __s390x_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_sys##name)))); \
- ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
- static long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
+ ALLOW_ERROR_INJECTION(__s390x_sys##name, ERRNO); \
+ long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
__S390_SYS_STUBx(x, name, __VA_ARGS__) \
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index b31c779cf581..aa406c05a350 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -22,98 +22,39 @@
* Pages used for the page tables is a different story. FIXME: more
*/
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/processor.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-
-struct mmu_gather {
- struct mm_struct *mm;
- struct mmu_table_batch *batch;
- unsigned int fullmm;
- unsigned long start, end;
-};
-
-struct mmu_table_batch {
- struct rcu_head rcu;
- unsigned int nr;
- void *tables[0];
-};
-
-#define MAX_TABLE_BATCH \
- ((PAGE_SIZE - sizeof(struct mmu_table_batch)) / sizeof(void *))
-
-extern void tlb_table_flush(struct mmu_gather *tlb);
-extern void tlb_remove_table(struct mmu_gather *tlb, void *table);
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
- tlb->batch = NULL;
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- __tlb_flush_mm_lazy(tlb->mm);
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- tlb_table_flush(tlb);
-}
-
+void __tlb_remove_table(void *_table);
+static inline void tlb_flush(struct mmu_gather *tlb);
+static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
+ struct page *page, int page_size);
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
+#define tlb_start_vma(tlb, vma) do { } while (0)
+#define tlb_end_vma(tlb, vma) do { } while (0)
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->start = start;
- tlb->end = end;
- }
+#define tlb_flush tlb_flush
+#define pte_free_tlb pte_free_tlb
+#define pmd_free_tlb pmd_free_tlb
+#define p4d_free_tlb p4d_free_tlb
+#define pud_free_tlb pud_free_tlb
- tlb_flush_mmu(tlb);
-}
+#include <asm/pgalloc.h>
+#include <asm/tlbflush.h>
+#include <asm-generic/tlb.h>
/*
* Release the page cache reference for a pte removed by
* tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
* has already been freed, so just do free_page_and_swap_cache.
*/
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
-}
-
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size)
{
- return __tlb_remove_page(tlb, page);
+ free_page_and_swap_cache(page);
+ return false;
}
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
+static inline void tlb_flush(struct mmu_gather *tlb)
{
- return tlb_remove_page(tlb, page);
+ __tlb_flush_mm_lazy(tlb->mm);
}
/*
@@ -121,8 +62,17 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
* page table from the tlb.
*/
static inline void pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
- unsigned long address)
+ unsigned long address)
{
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_ptes = 1;
+ /*
+ * page_table_free_rcu takes care of the allocation bit masks
+ * of the 2K table fragments in the 4K page table page,
+ * then calls tlb_remove_table.
+ */
page_table_free_rcu(tlb, (unsigned long *) pte, address);
}
@@ -139,6 +89,10 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
if (mm_pmd_folded(tlb->mm))
return;
pgtable_pmd_page_dtor(virt_to_page(pmd));
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_puds = 1;
tlb_remove_table(tlb, pmd);
}
@@ -154,6 +108,10 @@ static inline void p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
{
if (mm_p4d_folded(tlb->mm))
return;
+ __tlb_adjust_range(tlb, address, PAGE_SIZE);
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_p4ds = 1;
tlb_remove_table(tlb, p4d);
}
@@ -169,21 +127,11 @@ static inline void pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
{
if (mm_pud_folded(tlb->mm))
return;
+ tlb->mm->context.flush_mm = 1;
+ tlb->freed_tables = 1;
+ tlb->cleared_puds = 1;
tlb_remove_table(tlb, pud);
}
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_remove_tlb_entry(tlb, ptep, addr) do { } while (0)
-#define tlb_remove_pmd_tlb_entry(tlb, pmdp, addr) do { } while (0)
-#define tlb_migrate_finish(mm) do { } while (0)
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
#endif /* _S390_TLB_H */
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index 007fcb9aeeb8..bd2fd9a7821d 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -55,8 +55,10 @@ raw_copy_from_user(void *to, const void __user *from, unsigned long n);
unsigned long __must_check
raw_copy_to_user(void __user *to, const void *from, unsigned long n);
+#ifndef CONFIG_KASAN
#define INLINE_COPY_FROM_USER
#define INLINE_COPY_TO_USER
+#endif
#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
new file mode 100644
index 000000000000..6eb2ef105d87
--- /dev/null
+++ b/arch/s390/include/asm/unwind.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_UNWIND_H
+#define _ASM_S390_UNWIND_H
+
+#include <linux/sched.h>
+#include <linux/ftrace.h>
+#include <asm/ptrace.h>
+#include <asm/stacktrace.h>
+
+/*
+ * To use the stack unwinder it has to be initialized with unwind_start.
+ * There four combinations for task and regs:
+ * 1) task==NULL, regs==NULL: the unwind starts for the task that is currently
+ * running, sp/ip picked up from the CPU registers
+ * 2) task==NULL, regs!=NULL: the unwind starts from the sp/ip found in
+ * the struct pt_regs of an interrupt frame for the current task
+ * 3) task!=NULL, regs==NULL: the unwind starts for an inactive task with
+ * the sp picked up from task->thread.ksp and the ip picked up from the
+ * return address stored by __switch_to
+ * 4) task!=NULL, regs!=NULL: the sp/ip are picked up from the interrupt
+ * frame 'regs' of a inactive task
+ * If 'first_frame' is not zero unwind_start skips unwind frames until it
+ * reaches the specified stack pointer.
+ * The end of the unwinding is indicated with unwind_done, this can be true
+ * right after unwind_start, e.g. with first_frame!=0 that can not be found.
+ * unwind_next_frame skips to the next frame.
+ * Once the unwind is completed unwind_error() can be used to check if there
+ * has been a situation where the unwinder could not correctly understand
+ * the tasks call chain.
+ */
+
+struct unwind_state {
+ struct stack_info stack_info;
+ unsigned long stack_mask;
+ struct task_struct *task;
+ struct pt_regs *regs;
+ unsigned long sp, ip;
+ int graph_idx;
+ bool reliable;
+ bool error;
+};
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long first_frame);
+bool unwind_next_frame(struct unwind_state *state);
+unsigned long unwind_get_return_address(struct unwind_state *state);
+
+static inline bool unwind_done(struct unwind_state *state)
+{
+ return state->stack_info.type == STACK_TYPE_UNKNOWN;
+}
+
+static inline bool unwind_error(struct unwind_state *state)
+{
+ return state->error;
+}
+
+static inline void unwind_start(struct unwind_state *state,
+ struct task_struct *task,
+ struct pt_regs *regs,
+ unsigned long sp)
+{
+ sp = sp ? : get_stack_pointer(task, regs);
+ __unwind_start(state, task, regs, sp);
+}
+
+static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
+{
+ return unwind_done(state) ? NULL : state->regs;
+}
+
+#define unwind_for_each_frame(state, task, regs, first_frame) \
+ for (unwind_start(state, task, regs, first_frame); \
+ !unwind_done(state); \
+ unwind_next_frame(state))
+
+static inline void unwind_init(void) {}
+static inline void unwind_module_init(struct module *mod, void *orc_ip,
+ size_t orc_ip_size, void *orc,
+ size_t orc_size) {}
+
+#ifdef CONFIG_KASAN
+/*
+ * This disables KASAN checking when reading a value from another task's stack,
+ * since the other task could be running on another CPU and could have poisoned
+ * the stack in the meantime.
+ */
+#define READ_ONCE_TASK_STACK(task, x) \
+({ \
+ unsigned long val; \
+ if (task == current) \
+ val = READ_ONCE(x); \
+ else \
+ val = READ_ONCE_NOCHECK(x); \
+ val; \
+})
+#else
+#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x)
+#endif
+
+#endif /* _ASM_S390_UNWIND_H */
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
new file mode 100644
index 000000000000..ef3c00b049ab
--- /dev/null
+++ b/arch/s390/include/asm/uv.h
@@ -0,0 +1,132 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor Interfaces
+ *
+ * Copyright IBM Corp. 2019
+ *
+ * Author(s):
+ * Vasily Gorbik <gor@linux.ibm.com>
+ * Janosch Frank <frankja@linux.ibm.com>
+ */
+#ifndef _ASM_S390_UV_H
+#define _ASM_S390_UV_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/bug.h>
+#include <asm/page.h>
+
+#define UVC_RC_EXECUTED 0x0001
+#define UVC_RC_INV_CMD 0x0002
+#define UVC_RC_INV_STATE 0x0003
+#define UVC_RC_INV_LEN 0x0005
+#define UVC_RC_NO_RESUME 0x0007
+
+#define UVC_CMD_QUI 0x0001
+#define UVC_CMD_SET_SHARED_ACCESS 0x1000
+#define UVC_CMD_REMOVE_SHARED_ACCESS 0x1001
+
+/* Bits in installed uv calls */
+enum uv_cmds_inst {
+ BIT_UVC_CMD_QUI = 0,
+ BIT_UVC_CMD_SET_SHARED_ACCESS = 8,
+ BIT_UVC_CMD_REMOVE_SHARED_ACCESS = 9,
+};
+
+struct uv_cb_header {
+ u16 len;
+ u16 cmd; /* Command Code */
+ u16 rc; /* Response Code */
+ u16 rrc; /* Return Reason Code */
+} __packed __aligned(8);
+
+struct uv_cb_qui {
+ struct uv_cb_header header;
+ u64 reserved08;
+ u64 inst_calls_list[4];
+ u64 reserved30[15];
+} __packed __aligned(8);
+
+struct uv_cb_share {
+ struct uv_cb_header header;
+ u64 reserved08[3];
+ u64 paddr;
+ u64 reserved28;
+} __packed __aligned(8);
+
+static inline int uv_call(unsigned long r1, unsigned long r2)
+{
+ int cc;
+
+ asm volatile(
+ "0: .insn rrf,0xB9A40000,%[r1],%[r2],0,0\n"
+ " brc 3,0b\n"
+ " ipm %[cc]\n"
+ " srl %[cc],28\n"
+ : [cc] "=d" (cc)
+ : [r1] "a" (r1), [r2] "a" (r2)
+ : "memory", "cc");
+ return cc;
+}
+
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+extern int prot_virt_guest;
+
+static inline int is_prot_virt_guest(void)
+{
+ return prot_virt_guest;
+}
+
+static inline int share(unsigned long addr, u16 cmd)
+{
+ struct uv_cb_share uvcb = {
+ .header.cmd = cmd,
+ .header.len = sizeof(uvcb),
+ .paddr = addr
+ };
+
+ if (!is_prot_virt_guest())
+ return -ENOTSUPP;
+ /*
+ * Sharing is page wise, if we encounter addresses that are
+ * not page aligned, we assume something went wrong. If
+ * malloced structs are passed to this function, we could leak
+ * data to the hypervisor.
+ */
+ BUG_ON(addr & ~PAGE_MASK);
+
+ if (!uv_call(0, (u64)&uvcb))
+ return 0;
+ return -EINVAL;
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page shared with the
+ * hypervisor for IO.
+ *
+ * @addr: Real or absolute address of the page to be shared
+ */
+static inline int uv_set_shared(unsigned long addr)
+{
+ return share(addr, UVC_CMD_SET_SHARED_ACCESS);
+}
+
+/*
+ * Guest 2 request to the Ultravisor to make a page unshared.
+ *
+ * @addr: Real or absolute address of the page to be unshared
+ */
+static inline int uv_remove_shared(unsigned long addr)
+{
+ return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
+}
+
+void uv_query_info(void);
+#else
+#define is_prot_virt_guest() 0
+static inline int uv_set_shared(unsigned long addr) { return 0; }
+static inline int uv_remove_shared(unsigned long addr) { return 0; }
+static inline void uv_query_info(void) {}
+#endif
+
+#endif /* _ASM_S390_UV_H */
diff --git a/arch/s390/include/asm/vmlinux.lds.h b/arch/s390/include/asm/vmlinux.lds.h
index 2d127f900352..cbe670a6861b 100644
--- a/arch/s390/include/asm/vmlinux.lds.h
+++ b/arch/s390/include/asm/vmlinux.lds.h
@@ -18,3 +18,16 @@
*(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.data*))) \
__boot_data_end = .; \
}
+
+/*
+ * .boot.preserved.data is similar to .boot.data, but it is not part of the
+ * .init section and thus will be preserved for later use in the decompressed
+ * kernel.
+ */
+#define BOOT_DATA_PRESERVED \
+ . = ALIGN(PAGE_SIZE); \
+ .boot.preserved.data : { \
+ __boot_data_preserved_start = .; \
+ *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.boot.preserved.data*))) \
+ __boot_data_preserved_end = .; \
+ }
diff --git a/arch/s390/include/uapi/asm/ipl.h b/arch/s390/include/uapi/asm/ipl.h
new file mode 100644
index 000000000000..fd32b1cd80d2
--- /dev/null
+++ b/arch/s390/include/uapi/asm/ipl.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_UAPI_IPL_H
+#define _ASM_S390_UAPI_IPL_H
+
+#include <linux/types.h>
+
+/* IPL Parameter List header */
+struct ipl_pl_hdr {
+ __u32 len;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 version;
+} __packed;
+
+#define IPL_PL_FLAG_IPLPS 0x80
+#define IPL_PL_FLAG_SIPL 0x40
+#define IPL_PL_FLAG_IPLSR 0x20
+
+/* IPL Parameter Block header */
+struct ipl_pb_hdr {
+ __u32 len;
+ __u8 pbt;
+} __packed;
+
+/* IPL Parameter Block types */
+enum ipl_pbt {
+ IPL_PBT_FCP = 0,
+ IPL_PBT_SCP_DATA = 1,
+ IPL_PBT_CCW = 2,
+};
+
+/* IPL Parameter Block 0 with common fields */
+struct ipl_pb0_common {
+ __u32 len;
+ __u8 pbt;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 loadparm[8];
+ __u8 reserved2[84];
+} __packed;
+
+#define IPL_PB0_FLAG_LOADPARM 0x80
+
+/* IPL Parameter Block 0 for FCP */
+struct ipl_pb0_fcp {
+ __u32 len;
+ __u8 pbt;
+ __u8 reserved1[3];
+ __u8 loadparm[8];
+ __u8 reserved2[304];
+ __u8 opt;
+ __u8 reserved3[3];
+ __u8 cssid;
+ __u8 reserved4[1];
+ __u16 devno;
+ __u8 reserved5[4];
+ __u64 wwpn;
+ __u64 lun;
+ __u32 bootprog;
+ __u8 reserved6[12];
+ __u64 br_lba;
+ __u32 scp_data_len;
+ __u8 reserved7[260];
+ __u8 scp_data[];
+} __packed;
+
+#define IPL_PB0_FCP_OPT_IPL 0x10
+#define IPL_PB0_FCP_OPT_DUMP 0x20
+
+/* IPL Parameter Block 0 for CCW */
+struct ipl_pb0_ccw {
+ __u32 len;
+ __u8 pbt;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 loadparm[8];
+ __u8 reserved2[84];
+ __u16 reserved3 : 13;
+ __u8 ssid : 3;
+ __u16 devno;
+ __u8 vm_flags;
+ __u8 reserved4[3];
+ __u32 vm_parm_len;
+ __u8 nss_name[8];
+ __u8 vm_parm[64];
+ __u8 reserved5[8];
+} __packed;
+
+#define IPL_PB0_CCW_VM_FLAG_NSS 0x80
+#define IPL_PB0_CCW_VM_FLAG_VP 0x40
+
+/* IPL Parameter Block 1 for additional SCP data */
+struct ipl_pb1_scp_data {
+ __u32 len;
+ __u8 pbt;
+ __u8 scp_data[];
+} __packed;
+
+/* IPL Report List header */
+struct ipl_rl_hdr {
+ __u32 len;
+ __u8 flags;
+ __u8 reserved1[2];
+ __u8 version;
+ __u8 reserved2[8];
+} __packed;
+
+/* IPL Report Block header */
+struct ipl_rb_hdr {
+ __u32 len;
+ __u8 rbt;
+ __u8 reserved1[11];
+} __packed;
+
+/* IPL Report Block types */
+enum ipl_rbt {
+ IPL_RBT_CERTIFICATES = 1,
+ IPL_RBT_COMPONENTS = 2,
+};
+
+/* IPL Report Block for the certificate list */
+struct ipl_rb_certificate_entry {
+ __u64 addr;
+ __u64 len;
+} __packed;
+
+struct ipl_rb_certificates {
+ __u32 len;
+ __u8 rbt;
+ __u8 reserved1[11];
+ struct ipl_rb_certificate_entry entries[];
+} __packed;
+
+/* IPL Report Block for the component list */
+struct ipl_rb_component_entry {
+ __u64 addr;
+ __u64 len;
+ __u8 flags;
+ __u8 reserved1[5];
+ __u16 certificate_index;
+ __u8 reserved2[8];
+};
+
+#define IPL_RB_COMPONENT_FLAG_SIGNED 0x80
+#define IPL_RB_COMPONENT_FLAG_VERIFIED 0x40
+
+struct ipl_rb_components {
+ __u32 len;
+ __u8 rbt;
+ __u8 reserved1[11];
+ struct ipl_rb_component_entry entries[];
+} __packed;
+
+#endif
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 8a62c7f72e1b..b0478d01a0c5 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -39,6 +39,7 @@ CFLAGS_smp.o := -Wno-nonnull
#
CFLAGS_stacktrace.o += -fno-optimize-sibling-calls
CFLAGS_dumpstack.o += -fno-optimize-sibling-calls
+CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls
#
# Pass UTS_MACHINE for user_regset definition
@@ -51,7 +52,7 @@ obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o early_nobss.o
obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
-obj-y += nospec-branch.o ipl_vmparm.o
+obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
extra-y += head64.o vmlinux.lds
@@ -77,6 +78,8 @@ obj-$(CONFIG_JUMP_LABEL) += jump_label.o
obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o
obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o
+obj-$(CONFIG_IMA) += ima_arch.o
+
obj-$(CONFIG_PERF_EVENTS) += perf_event.o perf_cpum_cf_common.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf.o perf_cpum_sf.o
obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
@@ -86,7 +89,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o
# vdso
obj-y += vdso64/
-obj-$(CONFIG_COMPAT) += vdso32/
+obj-$(CONFIG_COMPAT_VDSO) += vdso32/
chkbss := head64.o early_nobss.o
include $(srctree)/arch/s390/scripts/Makefile.chkbss
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 164bec175628..41ac4ad21311 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -16,6 +16,7 @@
#include <asm/pgtable.h>
#include <asm/gmap.h>
#include <asm/nmi.h>
+#include <asm/stacktrace.h>
int main(void)
{
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index f268fca67e82..2f39ea57f358 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -28,6 +28,7 @@ ENTRY(s390_base_mcck_handler)
1: la %r1,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
lpswe __LC_MCK_OLD_PSW
+ENDPROC(s390_base_mcck_handler)
.section .bss
.align 8
@@ -48,6 +49,7 @@ ENTRY(s390_base_ext_handler)
1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
lpswe __LC_EXT_OLD_PSW
+ENDPROC(s390_base_ext_handler)
.section .bss
.align 8
@@ -68,6 +70,7 @@ ENTRY(s390_base_pgm_handler)
lmg %r0,%r15,__LC_SAVE_AREA_SYNC
lpswe __LC_PGM_OLD_PSW
1: lpswe disabled_wait_psw-0b(%r13)
+ENDPROC(s390_base_pgm_handler)
.align 8
disabled_wait_psw:
@@ -79,71 +82,3 @@ disabled_wait_psw:
s390_base_pgm_handler_fn:
.quad 0
.previous
-
-#
-# Calls diag 308 subcode 1 and continues execution
-#
-ENTRY(diag308_reset)
- larl %r4,.Lctlregs # Save control registers
- stctg %c0,%c15,0(%r4)
- lg %r2,0(%r4) # Disable lowcore protection
- nilh %r2,0xefff
- larl %r4,.Lctlreg0
- stg %r2,0(%r4)
- lctlg %c0,%c0,0(%r4)
- larl %r4,.Lfpctl # Floating point control register
- stfpc 0(%r4)
- larl %r4,.Lprefix # Save prefix register
- stpx 0(%r4)
- larl %r4,.Lprefix_zero # Set prefix register to 0
- spx 0(%r4)
- larl %r4,.Lcontinue_psw # Save PSW flags
- epsw %r2,%r3
- stm %r2,%r3,0(%r4)
- larl %r4,.Lrestart_psw # Setup restart PSW at absolute 0
- lghi %r3,0
- lg %r4,0(%r4) # Save PSW
- sturg %r4,%r3 # Use sturg, because of large pages
- lghi %r1,1
- lghi %r0,0
- diag %r0,%r1,0x308
-.Lrestart_part2:
- lhi %r0,0 # Load r0 with zero
- lhi %r1,2 # Use mode 2 = ESAME (dump)
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE # Switch to ESAME mode
- sam64 # Switch to 64 bit addressing mode
- larl %r4,.Lctlregs # Restore control registers
- lctlg %c0,%c15,0(%r4)
- larl %r4,.Lfpctl # Restore floating point ctl register
- lfpc 0(%r4)
- larl %r4,.Lprefix # Restore prefix register
- spx 0(%r4)
- larl %r4,.Lcontinue_psw # Restore PSW flags
- lpswe 0(%r4)
-.Lcontinue:
- BR_EX %r14
-.align 16
-.Lrestart_psw:
- .long 0x00080000,0x80000000 + .Lrestart_part2
-
- .section .data..nosave,"aw",@progbits
-.align 8
-.Lcontinue_psw:
- .quad 0,.Lcontinue
- .previous
-
- .section .bss
-.align 8
-.Lctlreg0:
- .quad 0
-.Lctlregs:
- .rept 16
- .quad 0
- .endr
-.Lfpctl:
- .long 0
-.Lprefix:
- .long 0
-.Lprefix_zero:
- .long 0
- .previous
diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c
index 7edaa733a77f..e9dac9a24d3f 100644
--- a/arch/s390/kernel/diag.c
+++ b/arch/s390/kernel/diag.c
@@ -13,6 +13,7 @@
#include <linux/debugfs.h>
#include <asm/diag.h>
#include <asm/trace/diag.h>
+#include <asm/sections.h>
struct diag_stat {
unsigned int counter[NR_DIAG_STAT];
@@ -49,6 +50,9 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = {
[DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" },
};
+struct diag_ops __bootdata_preserved(diag_dma_ops);
+struct diag210 *__bootdata_preserved(__diag210_tmp_dma);
+
static int show_diag_stat(struct seq_file *m, void *v)
{
struct diag_stat *stat;
@@ -139,30 +143,10 @@ EXPORT_SYMBOL(diag_stat_inc_norecursion);
/*
* Diagnose 14: Input spool file manipulation
*/
-static inline int __diag14(unsigned long rx, unsigned long ry1,
- unsigned long subcode)
-{
- register unsigned long _ry1 asm("2") = ry1;
- register unsigned long _ry2 asm("3") = subcode;
- int rc = 0;
-
- asm volatile(
- " sam31\n"
- " diag %2,2,0x14\n"
- " sam64\n"
- " ipm %0\n"
- " srl %0,28\n"
- : "=d" (rc), "+d" (_ry2)
- : "d" (rx), "d" (_ry1)
- : "cc");
-
- return rc;
-}
-
int diag14(unsigned long rx, unsigned long ry1, unsigned long subcode)
{
diag_stat_inc(DIAG_STAT_X014);
- return __diag14(rx, ry1, subcode);
+ return diag_dma_ops.diag14(rx, ry1, subcode);
}
EXPORT_SYMBOL(diag14);
@@ -195,30 +179,17 @@ EXPORT_SYMBOL(diag204);
*/
int diag210(struct diag210 *addr)
{
- /*
- * diag 210 needs its data below the 2GB border, so we
- * use a static data area to be sure
- */
- static struct diag210 diag210_tmp;
static DEFINE_SPINLOCK(diag210_lock);
unsigned long flags;
int ccode;
spin_lock_irqsave(&diag210_lock, flags);
- diag210_tmp = *addr;
+ *__diag210_tmp_dma = *addr;
diag_stat_inc(DIAG_STAT_X210);
- asm volatile(
- " lhi %0,-1\n"
- " sam31\n"
- " diag %1,0,0x210\n"
- "0: ipm %0\n"
- " srl %0,28\n"
- "1: sam64\n"
- EX_TABLE(0b, 1b)
- : "=&d" (ccode) : "a" (&diag210_tmp) : "cc", "memory");
-
- *addr = diag210_tmp;
+ ccode = diag_dma_ops.diag210(__diag210_tmp_dma);
+
+ *addr = *__diag210_tmp_dma;
spin_unlock_irqrestore(&diag210_lock, flags);
return ccode;
@@ -243,27 +214,9 @@ EXPORT_SYMBOL(diag224);
/*
* Diagnose 26C: Access Certain System Information
*/
-static inline int __diag26c(void *req, void *resp, enum diag26c_sc subcode)
-{
- register unsigned long _req asm("2") = (addr_t) req;
- register unsigned long _resp asm("3") = (addr_t) resp;
- register unsigned long _subcode asm("4") = subcode;
- register unsigned long _rc asm("5") = -EOPNOTSUPP;
-
- asm volatile(
- " sam31\n"
- " diag %[rx],%[ry],0x26c\n"
- "0: sam64\n"
- EX_TABLE(0b,0b)
- : "+d" (_rc)
- : [rx] "d" (_req), "d" (_resp), [ry] "d" (_subcode)
- : "cc", "memory");
- return _rc;
-}
-
int diag26c(void *req, void *resp, enum diag26c_sc subcode)
{
diag_stat_inc(DIAG_STAT_X26C);
- return __diag26c(req, resp, subcode);
+ return diag_dma_ops.diag26c(req, resp, subcode);
}
EXPORT_SYMBOL(diag26c);
diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c
index cb7f55bbe06e..9e87b68be21c 100644
--- a/arch/s390/kernel/dumpstack.c
+++ b/arch/s390/kernel/dumpstack.c
@@ -21,95 +21,124 @@
#include <asm/debug.h>
#include <asm/dis.h>
#include <asm/ipl.h>
+#include <asm/unwind.h>
-/*
- * For dump_trace we have tree different stack to consider:
- * - the panic stack which is used if the kernel stack has overflown
- * - the asynchronous interrupt stack (cpu related)
- * - the synchronous kernel stack (process related)
- * The stack trace can start at any of the three stacks and can potentially
- * touch all of them. The order is: panic stack, async stack, sync stack.
- */
-static unsigned long __no_sanitize_address
-__dump_trace(dump_trace_func_t func, void *data, unsigned long sp,
- unsigned long low, unsigned long high)
+const char *stack_type_name(enum stack_type type)
{
- struct stack_frame *sf;
- struct pt_regs *regs;
-
- while (1) {
- if (sp < low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- if (func(data, sf->gprs[8], 0))
- return sp;
- /* Follow the backchain. */
- while (1) {
- low = sp;
- sp = sf->back_chain;
- if (!sp)
- break;
- if (sp <= low || sp > high - sizeof(*sf))
- return sp;
- sf = (struct stack_frame *) sp;
- if (func(data, sf->gprs[8], 1))
- return sp;
- }
- /* Zero backchain detected, check for interrupt frame. */
- sp = (unsigned long) (sf + 1);
- if (sp <= low || sp > high - sizeof(*regs))
- return sp;
- regs = (struct pt_regs *) sp;
- if (!user_mode(regs)) {
- if (func(data, regs->psw.addr, 1))
- return sp;
- }
- low = sp;
- sp = regs->gprs[15];
+ switch (type) {
+ case STACK_TYPE_TASK:
+ return "task";
+ case STACK_TYPE_IRQ:
+ return "irq";
+ case STACK_TYPE_NODAT:
+ return "nodat";
+ case STACK_TYPE_RESTART:
+ return "restart";
+ default:
+ return "unknown";
}
}
-void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task,
- unsigned long sp)
+static inline bool in_stack(unsigned long sp, struct stack_info *info,
+ enum stack_type type, unsigned long low,
+ unsigned long high)
+{
+ if (sp < low || sp >= high)
+ return false;
+ info->type = type;
+ info->begin = low;
+ info->end = high;
+ return true;
+}
+
+static bool in_task_stack(unsigned long sp, struct task_struct *task,
+ struct stack_info *info)
+{
+ unsigned long stack;
+
+ stack = (unsigned long) task_stack_page(task);
+ return in_stack(sp, info, STACK_TYPE_TASK, stack, stack + THREAD_SIZE);
+}
+
+static bool in_irq_stack(unsigned long sp, struct stack_info *info)
{
- unsigned long frame_size;
+ unsigned long frame_size, top;
frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
-#ifdef CONFIG_CHECK_STACK
- sp = __dump_trace(func, data, sp,
- S390_lowcore.nodat_stack + frame_size - THREAD_SIZE,
- S390_lowcore.nodat_stack + frame_size);
-#endif
- sp = __dump_trace(func, data, sp,
- S390_lowcore.async_stack + frame_size - THREAD_SIZE,
- S390_lowcore.async_stack + frame_size);
- task = task ?: current;
- __dump_trace(func, data, sp,
- (unsigned long)task_stack_page(task),
- (unsigned long)task_stack_page(task) + THREAD_SIZE);
+ top = S390_lowcore.async_stack + frame_size;
+ return in_stack(sp, info, STACK_TYPE_IRQ, top - THREAD_SIZE, top);
+}
+
+static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
+{
+ unsigned long frame_size, top;
+
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ top = S390_lowcore.nodat_stack + frame_size;
+ return in_stack(sp, info, STACK_TYPE_NODAT, top - THREAD_SIZE, top);
}
-EXPORT_SYMBOL_GPL(dump_trace);
-static int show_address(void *data, unsigned long address, int reliable)
+static bool in_restart_stack(unsigned long sp, struct stack_info *info)
{
- if (reliable)
- printk(" [<%016lx>] %pSR \n", address, (void *)address);
- else
- printk("([<%016lx>] %pSR)\n", address, (void *)address);
+ unsigned long frame_size, top;
+
+ frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+ top = S390_lowcore.restart_stack + frame_size;
+ return in_stack(sp, info, STACK_TYPE_RESTART, top - THREAD_SIZE, top);
+}
+
+int get_stack_info(unsigned long sp, struct task_struct *task,
+ struct stack_info *info, unsigned long *visit_mask)
+{
+ if (!sp)
+ goto unknown;
+
+ task = task ? : current;
+
+ /* Check per-task stack */
+ if (in_task_stack(sp, task, info))
+ goto recursion_check;
+
+ if (task != current)
+ goto unknown;
+
+ /* Check per-cpu stacks */
+ if (!in_irq_stack(sp, info) &&
+ !in_nodat_stack(sp, info) &&
+ !in_restart_stack(sp, info))
+ goto unknown;
+
+recursion_check:
+ /*
+ * Make sure we don't iterate through any given stack more than once.
+ * If it comes up a second time then there's something wrong going on:
+ * just break out and report an unknown stack type.
+ */
+ if (*visit_mask & (1UL << info->type)) {
+ printk_deferred_once(KERN_WARNING
+ "WARNING: stack recursion on stack type %d\n",
+ info->type);
+ goto unknown;
+ }
+ *visit_mask |= 1UL << info->type;
return 0;
+unknown:
+ info->type = STACK_TYPE_UNKNOWN;
+ return -EINVAL;
}
void show_stack(struct task_struct *task, unsigned long *stack)
{
- unsigned long sp = (unsigned long) stack;
+ struct unwind_state state;
- if (!sp)
- sp = task ? task->thread.ksp : current_stack_pointer();
printk("Call Trace:\n");
- dump_trace(show_address, NULL, task, sp);
if (!task)
task = current;
- debug_show_held_locks(task);
+ unwind_for_each_frame(&state, task, NULL, (unsigned long) stack)
+ printk(state.reliable ? " [<%016lx>] %pSR \n" :
+ "([<%016lx>] %pSR)\n",
+ state.ip, (void *) state.ip);
+ debug_show_held_locks(task ? : current);
}
static void show_last_breaking_event(struct pt_regs *regs)
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index d6edf45f93b9..629f173f60cd 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -30,6 +30,7 @@
#include <asm/sclp.h>
#include <asm/facility.h>
#include <asm/boot_data.h>
+#include <asm/pci_insn.h>
#include "entry.h"
/*
@@ -138,9 +139,9 @@ static void early_pgm_check_handler(void)
unsigned long addr;
addr = S390_lowcore.program_old_psw.addr;
- fixup = search_exception_tables(addr);
+ fixup = s390_search_extables(addr);
if (!fixup)
- disabled_wait(0);
+ disabled_wait();
/* Disable low address protection before storing into lowcore. */
__ctl_store(cr0, 0, 0);
cr0_new = cr0 & ~(1UL << 28);
@@ -235,6 +236,7 @@ static __init void detect_machine_facilities(void)
clock_comparator_max = -1ULL >> 1;
__ctl_set_bit(0, 53);
}
+ enable_mio_ctl();
}
static inline void save_vector_registers(void)
@@ -296,7 +298,7 @@ static void __init check_image_bootable(void)
sclp_early_printk("Linux kernel boot failure: An attempt to boot a vmlinux ELF image failed.\n");
sclp_early_printk("This image does not contain all parts necessary for starting up. Use\n");
sclp_early_printk("bzImage or arch/s390/boot/compressed/vmlinux instead.\n");
- disabled_wait(0xbadb007);
+ disabled_wait();
}
void __init startup_init(void)
@@ -309,7 +311,6 @@ void __init startup_init(void)
setup_facility_list();
detect_machine_type();
setup_arch_string();
- ipl_store_parameters();
setup_boot_command_line();
detect_diag9c();
detect_diag44();
diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c
index 8d73f7fae16e..52a3ef959341 100644
--- a/arch/s390/kernel/early_nobss.c
+++ b/arch/s390/kernel/early_nobss.c
@@ -25,7 +25,7 @@ static void __init reset_tod_clock(void)
return;
/* TOD clock not running. Set the clock to Unix Epoch. */
if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0)
- disabled_wait(0);
+ disabled_wait();
memset(tod_clock_base, 0, 16);
*(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH;
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 583d65ef5007..3f4d272577d3 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -224,6 +224,7 @@ ENTRY(__bpon)
.globl __bpon
BPON
BR_EX %r14
+ENDPROC(__bpon)
/*
* Scheduler resume function, called by switch_to
@@ -248,6 +249,7 @@ ENTRY(__switch_to)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40
BR_EX %r14
+ENDPROC(__switch_to)
.L__critical_start:
@@ -324,6 +326,7 @@ sie_exit:
EX_TABLE(.Lrewind_pad4,.Lsie_fault)
EX_TABLE(.Lrewind_pad2,.Lsie_fault)
EX_TABLE(sie_exit,.Lsie_fault)
+ENDPROC(sie64a)
EXPORT_SYMBOL(sie64a)
EXPORT_SYMBOL(sie_exit)
#endif
@@ -358,19 +361,19 @@ ENTRY(system_call)
# load address of system call table
lg %r10,__THREAD_sysc_table(%r13,%r12)
llgh %r8,__PT_INT_CODE+2(%r11)
- slag %r8,%r8,2 # shift and test for svc 0
+ slag %r8,%r8,3 # shift and test for svc 0
jnz .Lsysc_nr_ok
# svc 0: system call number in %r1
llgfr %r1,%r1 # clear high word in r1
cghi %r1,NR_syscalls
jnl .Lsysc_nr_ok
sth %r1,__PT_INT_CODE+2(%r11)
- slag %r8,%r1,2
+ slag %r8,%r1,3
.Lsysc_nr_ok:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stg %r2,__PT_ORIG_GPR2(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
- lgf %r9,0(%r8,%r10) # get system call add.
+ lg %r9,0(%r8,%r10) # get system call add.
TSTMSK __TI_flags(%r12),_TIF_TRACE
jnz .Lsysc_tracesys
BASR_EX %r14,%r9 # call sys_xxxx
@@ -556,8 +559,8 @@ ENTRY(system_call)
lghi %r0,NR_syscalls
clgr %r0,%r2
jnh .Lsysc_tracenogo
- sllg %r8,%r2,2
- lgf %r9,0(%r8,%r10)
+ sllg %r8,%r2,3
+ lg %r9,0(%r8,%r10)
.Lsysc_tracego:
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
@@ -570,6 +573,7 @@ ENTRY(system_call)
lgr %r2,%r11 # pass pointer to pt_regs
larl %r14,.Lsysc_return
jg do_syscall_trace_exit
+ENDPROC(system_call)
#
# a new process exits the kernel with ret_from_fork
@@ -584,10 +588,16 @@ ENTRY(ret_from_fork)
jne .Lsysc_tracenogo
# it's a kernel thread
lmg %r9,%r10,__PT_R9(%r11) # load gprs
+ la %r2,0(%r10)
+ BASR_EX %r14,%r9
+ j .Lsysc_tracenogo
+ENDPROC(ret_from_fork)
+
ENTRY(kernel_thread_starter)
la %r2,0(%r10)
BASR_EX %r14,%r9
j .Lsysc_tracenogo
+ENDPROC(kernel_thread_starter)
/*
* Program check handler routine
@@ -665,9 +675,9 @@ ENTRY(pgm_check_handler)
larl %r1,pgm_check_table
llgh %r10,__PT_INT_CODE+2(%r11)
nill %r10,0x007f
- sll %r10,2
+ sll %r10,3
je .Lpgm_return
- lgf %r9,0(%r10,%r1) # load address of handler routine
+ lg %r9,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
BASR_EX %r14,%r9 # branch to interrupt-handler
.Lpgm_return:
@@ -698,6 +708,7 @@ ENTRY(pgm_check_handler)
stg %r14,__LC_RETURN_PSW+8
lghi %r14,_PIF_SYSCALL | _PIF_PER_TRAP
lpswe __LC_RETURN_PSW # branch to .Lsysc_per and enable irqs
+ENDPROC(pgm_check_handler)
/*
* IO interrupt handler routine
@@ -926,6 +937,7 @@ ENTRY(io_int_handler)
ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
TRACE_IRQS_OFF
j .Lio_return
+ENDPROC(io_int_handler)
/*
* External interrupt handler routine
@@ -965,6 +977,7 @@ ENTRY(ext_int_handler)
lghi %r3,EXT_INTERRUPT
brasl %r14,do_IRQ
j .Lio_return
+ENDPROC(ext_int_handler)
/*
* Load idle PSW. The second "half" of this function is in .Lcleanup_idle.
@@ -989,6 +1002,7 @@ ENTRY(psw_idle)
lpswe __SF_EMPTY(%r15)
BR_EX %r14
.Lpsw_idle_end:
+ENDPROC(psw_idle)
/*
* Store floating-point controls and floating-point or vector register
@@ -1031,6 +1045,7 @@ ENTRY(save_fpu_regs)
.Lsave_fpu_regs_exit:
BR_EX %r14
.Lsave_fpu_regs_end:
+ENDPROC(save_fpu_regs)
EXPORT_SYMBOL(save_fpu_regs)
/*
@@ -1077,6 +1092,7 @@ load_fpu_regs:
.Lload_fpu_regs_exit:
BR_EX %r14
.Lload_fpu_regs_end:
+ENDPROC(load_fpu_regs)
.L__critical_end:
@@ -1206,6 +1222,7 @@ ENTRY(mcck_int_handler)
lg %r15,__LC_NODAT_STACK
la %r11,STACK_FRAME_OVERHEAD(%r15)
j .Lmcck_skip
+ENDPROC(mcck_int_handler)
#
# PSW restart interrupt handler
@@ -1232,6 +1249,7 @@ ENTRY(restart_int_handler)
2: sigp %r4,%r3,SIGP_STOP # sigp stop to current cpu
brc 2,2b
3: j 3b
+ENDPROC(restart_int_handler)
.section .kprobes.text, "ax"
@@ -1241,7 +1259,7 @@ ENTRY(restart_int_handler)
* No need to properly save the registers, we are going to panic anyway.
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
-stack_overflow:
+ENTRY(stack_overflow)
lg %r15,__LC_NODAT_STACK # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@@ -1251,9 +1269,10 @@ stack_overflow:
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lgr %r2,%r11 # pass pointer to pt_regs
jg kernel_stack_overflow
+ENDPROC(stack_overflow)
#endif
-cleanup_critical:
+ENTRY(cleanup_critical)
#if IS_ENABLED(CONFIG_KVM)
clg %r9,BASED(.Lcleanup_table_sie) # .Lsie_gmap
jl 0f
@@ -1289,6 +1308,7 @@ cleanup_critical:
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
0: BR_EX %r14,%r11
+ENDPROC(cleanup_critical)
.align 8
.Lcleanup_table:
@@ -1512,7 +1532,7 @@ cleanup_critical:
.quad .Lsie_skip - .Lsie_entry
#endif
.section .rodata, "a"
-#define SYSCALL(esame,emu) .long __s390x_ ## esame
+#define SYSCALL(esame,emu) .quad __s390x_ ## esame
.globl sys_call_table
sys_call_table:
#include "asm/syscall_table.h"
@@ -1520,7 +1540,7 @@ sys_call_table:
#ifdef CONFIG_COMPAT
-#define SYSCALL(esame,emu) .long __s390_ ## emu
+#define SYSCALL(esame,emu) .quad __s390_ ## emu
.globl sys_call_table_emu
sys_call_table_emu:
#include "asm/syscall_table.h"
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index c3816ae108b0..20420c2b8a14 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -65,7 +65,7 @@ int setup_profiling_timer(unsigned int multiplier);
void __init time_init(void);
int pfn_is_nosave(unsigned long);
void s390_early_resume(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long sp, unsigned long ip);
struct s390_mmap_arg_struct;
struct fadvise64_64_args;
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 39b13d71a8fe..1bb85f60c0dd 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -201,17 +201,18 @@ device_initcall(ftrace_plt_init);
* Hook the return address and push it in the stack of return addresses
* in current thread info.
*/
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long ra, unsigned long sp,
+ unsigned long ip)
{
if (unlikely(ftrace_graph_is_dead()))
goto out;
if (unlikely(atomic_read(&current->tracing_graph_pause)))
goto out;
ip -= MCOUNT_INSN_SIZE;
- if (!function_graph_enter(parent, ip, 0, NULL))
- parent = (unsigned long) return_to_handler;
+ if (!function_graph_enter(ra, ip, 0, (void *) sp))
+ ra = (unsigned long) return_to_handler;
out:
- return parent;
+ return ra;
}
NOKPROBE_SYMBOL(prepare_ftrace_return);
diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S
index 56491e636eab..5aea1a527443 100644
--- a/arch/s390/kernel/head64.S
+++ b/arch/s390/kernel/head64.S
@@ -26,7 +26,6 @@ ENTRY(startup_continue)
0: larl %r1,tod_clock_base
mvc 0(16,%r1),__LC_BOOT_CLOCK
larl %r13,.LPG1 # get base
- lctlg %c0,%c15,.Lctl-.LPG1(%r13) # load control registers
larl %r0,boot_vdso_data
stg %r0,__LC_VDSO_PER_CPU
#
@@ -61,22 +60,6 @@ ENTRY(startup_continue)
.align 16
.LPG1:
-.Lctl: .quad 0x04040000 # cr0: AFP registers & secondary space
- .quad 0 # cr1: primary space segment table
- .quad .Lduct # cr2: dispatchable unit control table
- .quad 0 # cr3: instruction authorization
- .quad 0xffff # cr4: instruction authorization
- .quad .Lduct # cr5: primary-aste origin
- .quad 0 # cr6: I/O interrupts
- .quad 0 # cr7: secondary space segment table
- .quad 0 # cr8: access registers translation
- .quad 0 # cr9: tracing off
- .quad 0 # cr10: tracing off
- .quad 0 # cr11: tracing off
- .quad 0 # cr12: tracing off
- .quad 0 # cr13: home space segment table
- .quad 0xc0000000 # cr14: machine check handling off
- .quad .Llinkage_stack # cr15: linkage stack operations
.Lpcmsk:.quad 0x0000000180000000
.L4malign:.quad 0xffffffffffc00000
.Lscan2g:.quad 0x80000000 + 0x20000 - 8 # 2GB + 128K - 8
@@ -84,14 +67,5 @@ ENTRY(startup_continue)
.Lparmaddr:
.quad PARMAREA
.align 64
-.Lduct: .long 0,.Laste,.Laste,0,.Lduald,0,0,0
- .long 0,0,0,0,0,0,0,0
-.Laste: .quad 0,0xffffffffffffffff,0,0,0,0,0,0
- .align 128
-.Lduald:.rept 8
- .long 0x80000000,0,0,0 # invalid access-list entries
- .endr
-.Llinkage_stack:
- .long 0,0,0x89000000,0,0,0,0x8a000000,0
.Ldw: .quad 0x0002000180000000,0x0000000000000000
.Laregs:.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
diff --git a/arch/s390/kernel/ima_arch.c b/arch/s390/kernel/ima_arch.c
new file mode 100644
index 000000000000..f3c3e6e1c5d3
--- /dev/null
+++ b/arch/s390/kernel/ima_arch.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ima.h>
+#include <asm/boot_data.h>
+
+bool arch_ima_get_secureboot(void)
+{
+ return ipl_secure_flag;
+}
+
+const char * const *arch_get_ima_policy(void)
+{
+ return NULL;
+}
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 18a5d6317acc..d836af3ccc38 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -31,6 +31,7 @@
#include <asm/os_info.h>
#include <asm/sections.h>
#include <asm/boot_data.h>
+#include <asm/uv.h>
#include "entry.h"
#define IPL_PARM_BLOCK_VERSION 0
@@ -119,11 +120,15 @@ static char *dump_type_str(enum dump_type type)
}
}
-struct ipl_parameter_block __bootdata(early_ipl_block);
-int __bootdata(early_ipl_block_valid);
+int __bootdata_preserved(ipl_block_valid);
+struct ipl_parameter_block __bootdata_preserved(ipl_block);
+int __bootdata_preserved(ipl_secure_flag);
-static int ipl_block_valid;
-static struct ipl_parameter_block ipl_block;
+unsigned long __bootdata_preserved(ipl_cert_list_addr);
+unsigned long __bootdata_preserved(ipl_cert_list_size);
+
+unsigned long __bootdata(early_ipl_comp_list_addr);
+unsigned long __bootdata(early_ipl_comp_list_size);
static int reipl_capabilities = IPL_TYPE_UNKNOWN;
@@ -246,11 +251,11 @@ static __init enum ipl_type get_ipl_type(void)
if (!ipl_block_valid)
return IPL_TYPE_UNKNOWN;
- switch (ipl_block.hdr.pbt) {
- case DIAG308_IPL_TYPE_CCW:
+ switch (ipl_block.pb0_hdr.pbt) {
+ case IPL_PBT_CCW:
return IPL_TYPE_CCW;
- case DIAG308_IPL_TYPE_FCP:
- if (ipl_block.ipl_info.fcp.opt == DIAG308_IPL_OPT_DUMP)
+ case IPL_PBT_FCP:
+ if (ipl_block.fcp.opt == IPL_PB0_FCP_OPT_DUMP)
return IPL_TYPE_FCP_DUMP;
else
return IPL_TYPE_FCP;
@@ -269,12 +274,35 @@ static ssize_t ipl_type_show(struct kobject *kobj, struct kobj_attribute *attr,
static struct kobj_attribute sys_ipl_type_attr = __ATTR_RO(ipl_type);
+static ssize_t ipl_secure_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ return sprintf(page, "%i\n", !!ipl_secure_flag);
+}
+
+static struct kobj_attribute sys_ipl_secure_attr =
+ __ATTR(secure, 0444, ipl_secure_show, NULL);
+
+static ssize_t ipl_has_secure_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *page)
+{
+ if (MACHINE_IS_LPAR)
+ return sprintf(page, "%i\n", !!sclp.has_sipl);
+ else if (MACHINE_IS_VM)
+ return sprintf(page, "%i\n", !!sclp.has_sipl_g2);
+ else
+ return sprintf(page, "%i\n", 0);
+}
+
+static struct kobj_attribute sys_ipl_has_secure_attr =
+ __ATTR(has_secure, 0444, ipl_has_secure_show, NULL);
+
static ssize_t ipl_vm_parm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
{
char parm[DIAG308_VMPARM_SIZE + 1] = {};
- if (ipl_block_valid && (ipl_block.hdr.pbt == DIAG308_IPL_TYPE_CCW))
+ if (ipl_block_valid && (ipl_block.pb0_hdr.pbt == IPL_PBT_CCW))
ipl_block_get_ascii_vmparm(parm, sizeof(parm), &ipl_block);
return sprintf(page, "%s\n", parm);
}
@@ -287,12 +315,11 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
{
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- return sprintf(page, "0.%x.%04x\n", ipl_block.ipl_info.ccw.ssid,
- ipl_block.ipl_info.ccw.devno);
+ return sprintf(page, "0.%x.%04x\n", ipl_block.ccw.ssid,
+ ipl_block.ccw.devno);
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
- return sprintf(page, "0.0.%04x\n",
- ipl_block.ipl_info.fcp.devno);
+ return sprintf(page, "0.0.%04x\n", ipl_block.fcp.devno);
default:
return 0;
}
@@ -316,8 +343,8 @@ static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t off, size_t count)
{
- unsigned int size = ipl_block.ipl_info.fcp.scp_data_len;
- void *scp_data = &ipl_block.ipl_info.fcp.scp_data;
+ unsigned int size = ipl_block.fcp.scp_data_len;
+ void *scp_data = &ipl_block.fcp.scp_data;
return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
@@ -333,13 +360,13 @@ static struct bin_attribute *ipl_fcp_bin_attrs[] = {
/* FCP ipl device attributes */
DEFINE_IPL_ATTR_RO(ipl_fcp, wwpn, "0x%016llx\n",
- (unsigned long long)ipl_block.ipl_info.fcp.wwpn);
+ (unsigned long long)ipl_block.fcp.wwpn);
DEFINE_IPL_ATTR_RO(ipl_fcp, lun, "0x%016llx\n",
- (unsigned long long)ipl_block.ipl_info.fcp.lun);
+ (unsigned long long)ipl_block.fcp.lun);
DEFINE_IPL_ATTR_RO(ipl_fcp, bootprog, "%lld\n",
- (unsigned long long)ipl_block.ipl_info.fcp.bootprog);
+ (unsigned long long)ipl_block.fcp.bootprog);
DEFINE_IPL_ATTR_RO(ipl_fcp, br_lba, "%lld\n",
- (unsigned long long)ipl_block.ipl_info.fcp.br_lba);
+ (unsigned long long)ipl_block.fcp.br_lba);
static ssize_t ipl_ccw_loadparm_show(struct kobject *kobj,
struct kobj_attribute *attr, char *page)
@@ -365,6 +392,8 @@ static struct attribute *ipl_fcp_attrs[] = {
&sys_ipl_fcp_bootprog_attr.attr,
&sys_ipl_fcp_br_lba_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
@@ -380,6 +409,8 @@ static struct attribute *ipl_ccw_attrs_vm[] = {
&sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
&sys_ipl_vm_parm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
@@ -387,6 +418,8 @@ static struct attribute *ipl_ccw_attrs_lpar[] = {
&sys_ipl_type_attr.attr,
&sys_ipl_device_attr.attr,
&sys_ipl_ccw_loadparm_attr.attr,
+ &sys_ipl_secure_attr.attr,
+ &sys_ipl_has_secure_attr.attr,
NULL,
};
@@ -495,14 +528,14 @@ static ssize_t reipl_generic_vmparm_store(struct ipl_parameter_block *ipb,
if (!(isalnum(buf[i]) || isascii(buf[i]) || isprint(buf[i])))
return -EINVAL;
- memset(ipb->ipl_info.ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
- ipb->ipl_info.ccw.vm_parm_len = ip_len;
+ memset(ipb->ccw.vm_parm, 0, DIAG308_VMPARM_SIZE);
+ ipb->ccw.vm_parm_len = ip_len;
if (ip_len > 0) {
- ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
- memcpy(ipb->ipl_info.ccw.vm_parm, buf, ip_len);
- ASCEBC(ipb->ipl_info.ccw.vm_parm, ip_len);
+ ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+ memcpy(ipb->ccw.vm_parm, buf, ip_len);
+ ASCEBC(ipb->ccw.vm_parm, ip_len);
} else {
- ipb->ipl_info.ccw.vm_flags &= ~DIAG308_VM_FLAGS_VP_VALID;
+ ipb->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_VP;
}
return len;
@@ -549,8 +582,8 @@ static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr,
char *buf, loff_t off, size_t count)
{
- size_t size = reipl_block_fcp->ipl_info.fcp.scp_data_len;
- void *scp_data = reipl_block_fcp->ipl_info.fcp.scp_data;
+ size_t size = reipl_block_fcp->fcp.scp_data_len;
+ void *scp_data = reipl_block_fcp->fcp.scp_data;
return memory_read_from_buffer(buf, count, &off, scp_data, size);
}
@@ -566,17 +599,17 @@ static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
if (off)
return -EINVAL;
- memcpy(reipl_block_fcp->ipl_info.fcp.scp_data, buf, count);
+ memcpy(reipl_block_fcp->fcp.scp_data, buf, count);
if (scpdata_len % 8) {
padding = 8 - (scpdata_len % 8);
- memset(reipl_block_fcp->ipl_info.fcp.scp_data + scpdata_len,
+ memset(reipl_block_fcp->fcp.scp_data + scpdata_len,
0, padding);
scpdata_len += padding;
}
- reipl_block_fcp->ipl_info.fcp.scp_data_len = scpdata_len;
- reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN + scpdata_len;
- reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN + scpdata_len;
+ reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN + scpdata_len;
+ reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN + scpdata_len;
+ reipl_block_fcp->fcp.scp_data_len = scpdata_len;
return count;
}
@@ -590,20 +623,20 @@ static struct bin_attribute *reipl_fcp_bin_attrs[] = {
};
DEFINE_IPL_ATTR_RW(reipl_fcp, wwpn, "0x%016llx\n", "%llx\n",
- reipl_block_fcp->ipl_info.fcp.wwpn);
+ reipl_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(reipl_fcp, lun, "0x%016llx\n", "%llx\n",
- reipl_block_fcp->ipl_info.fcp.lun);
+ reipl_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(reipl_fcp, bootprog, "%lld\n", "%lld\n",
- reipl_block_fcp->ipl_info.fcp.bootprog);
+ reipl_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(reipl_fcp, br_lba, "%lld\n", "%lld\n",
- reipl_block_fcp->ipl_info.fcp.br_lba);
+ reipl_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(reipl_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
- reipl_block_fcp->ipl_info.fcp.devno);
+ reipl_block_fcp->fcp.devno);
static void reipl_get_ascii_loadparm(char *loadparm,
struct ipl_parameter_block *ibp)
{
- memcpy(loadparm, ibp->hdr.loadparm, LOADPARM_LEN);
+ memcpy(loadparm, ibp->common.loadparm, LOADPARM_LEN);
EBCASC(loadparm, LOADPARM_LEN);
loadparm[LOADPARM_LEN] = 0;
strim(loadparm);
@@ -638,11 +671,11 @@ static ssize_t reipl_generic_loadparm_store(struct ipl_parameter_block *ipb,
return -EINVAL;
}
/* initialize loadparm with blanks */
- memset(ipb->hdr.loadparm, ' ', LOADPARM_LEN);
+ memset(ipb->common.loadparm, ' ', LOADPARM_LEN);
/* copy and convert to ebcdic */
- memcpy(ipb->hdr.loadparm, buf, lp_len);
- ASCEBC(ipb->hdr.loadparm, LOADPARM_LEN);
- ipb->hdr.flags |= DIAG308_FLAGS_LP_VALID;
+ memcpy(ipb->common.loadparm, buf, lp_len);
+ ASCEBC(ipb->common.loadparm, LOADPARM_LEN);
+ ipb->common.flags |= IPL_PB0_FLAG_LOADPARM;
return len;
}
@@ -680,7 +713,7 @@ static struct attribute_group reipl_fcp_attr_group = {
};
/* CCW reipl device attributes */
-DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ipl_info.ccw);
+DEFINE_IPL_CCW_ATTR_RW(reipl_ccw, device, reipl_block_ccw->ccw);
/* NSS wrapper */
static ssize_t reipl_nss_loadparm_show(struct kobject *kobj,
@@ -742,7 +775,7 @@ static struct attribute_group reipl_ccw_attr_group_lpar = {
static void reipl_get_ascii_nss_name(char *dst,
struct ipl_parameter_block *ipb)
{
- memcpy(dst, ipb->ipl_info.ccw.nss_name, NSS_NAME_SIZE);
+ memcpy(dst, ipb->ccw.nss_name, NSS_NAME_SIZE);
EBCASC(dst, NSS_NAME_SIZE);
dst[NSS_NAME_SIZE] = 0;
}
@@ -770,16 +803,14 @@ static ssize_t reipl_nss_name_store(struct kobject *kobj,
if (nss_len > NSS_NAME_SIZE)
return -EINVAL;
- memset(reipl_block_nss->ipl_info.ccw.nss_name, 0x40, NSS_NAME_SIZE);
+ memset(reipl_block_nss->ccw.nss_name, 0x40, NSS_NAME_SIZE);
if (nss_len > 0) {
- reipl_block_nss->ipl_info.ccw.vm_flags |=
- DIAG308_VM_FLAGS_NSS_VALID;
- memcpy(reipl_block_nss->ipl_info.ccw.nss_name, buf, nss_len);
- ASCEBC(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
- EBC_TOUPPER(reipl_block_nss->ipl_info.ccw.nss_name, nss_len);
+ reipl_block_nss->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_NSS;
+ memcpy(reipl_block_nss->ccw.nss_name, buf, nss_len);
+ ASCEBC(reipl_block_nss->ccw.nss_name, nss_len);
+ EBC_TOUPPER(reipl_block_nss->ccw.nss_name, nss_len);
} else {
- reipl_block_nss->ipl_info.ccw.vm_flags &=
- ~DIAG308_VM_FLAGS_NSS_VALID;
+ reipl_block_nss->ccw.vm_flags &= ~IPL_PB0_CCW_VM_FLAG_NSS;
}
return len;
@@ -866,15 +897,21 @@ static void __reipl_run(void *unused)
{
switch (reipl_type) {
case IPL_TYPE_CCW:
+ uv_set_shared(__pa(reipl_block_ccw));
diag308(DIAG308_SET, reipl_block_ccw);
+ uv_remove_shared(__pa(reipl_block_ccw));
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_FCP:
+ uv_set_shared(__pa(reipl_block_fcp));
diag308(DIAG308_SET, reipl_block_fcp);
+ uv_remove_shared(__pa(reipl_block_fcp));
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_NSS:
+ uv_set_shared(__pa(reipl_block_nss));
diag308(DIAG308_SET, reipl_block_nss);
+ uv_remove_shared(__pa(reipl_block_nss));
diag308(DIAG308_LOAD_CLEAR, NULL);
break;
case IPL_TYPE_UNKNOWN:
@@ -883,7 +920,7 @@ static void __reipl_run(void *unused)
case IPL_TYPE_FCP_DUMP:
break;
}
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
}
static void reipl_run(struct shutdown_trigger *trigger)
@@ -893,10 +930,10 @@ static void reipl_run(struct shutdown_trigger *trigger)
static void reipl_block_ccw_init(struct ipl_parameter_block *ipb)
{
- ipb->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ ipb->hdr.len = IPL_BP_CCW_LEN;
ipb->hdr.version = IPL_PARM_BLOCK_VERSION;
- ipb->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
- ipb->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ ipb->pb0_hdr.len = IPL_BP0_CCW_LEN;
+ ipb->pb0_hdr.pbt = IPL_PBT_CCW;
}
static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
@@ -904,21 +941,20 @@ static void reipl_block_ccw_fill_parms(struct ipl_parameter_block *ipb)
/* LOADPARM */
/* check if read scp info worked and set loadparm */
if (sclp_ipl_info.is_valid)
- memcpy(ipb->hdr.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
+ memcpy(ipb->ccw.loadparm, &sclp_ipl_info.loadparm, LOADPARM_LEN);
else
/* read scp info failed: set empty loadparm (EBCDIC blanks) */
- memset(ipb->hdr.loadparm, 0x40, LOADPARM_LEN);
- ipb->hdr.flags = DIAG308_FLAGS_LP_VALID;
+ memset(ipb->ccw.loadparm, 0x40, LOADPARM_LEN);
+ ipb->ccw.flags = IPL_PB0_FLAG_LOADPARM;
/* VM PARM */
if (MACHINE_IS_VM && ipl_block_valid &&
- (ipl_block.ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID)) {
+ (ipl_block.ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP)) {
- ipb->ipl_info.ccw.vm_flags |= DIAG308_VM_FLAGS_VP_VALID;
- ipb->ipl_info.ccw.vm_parm_len =
- ipl_block.ipl_info.ccw.vm_parm_len;
- memcpy(ipb->ipl_info.ccw.vm_parm,
- ipl_block.ipl_info.ccw.vm_parm, DIAG308_VMPARM_SIZE);
+ ipb->ccw.vm_flags |= IPL_PB0_CCW_VM_FLAG_VP;
+ ipb->ccw.vm_parm_len = ipl_block.ccw.vm_parm_len;
+ memcpy(ipb->ccw.vm_parm,
+ ipl_block.ccw.vm_parm, DIAG308_VMPARM_SIZE);
}
}
@@ -958,8 +994,8 @@ static int __init reipl_ccw_init(void)
reipl_block_ccw_init(reipl_block_ccw);
if (ipl_info.type == IPL_TYPE_CCW) {
- reipl_block_ccw->ipl_info.ccw.ssid = ipl_block.ipl_info.ccw.ssid;
- reipl_block_ccw->ipl_info.ccw.devno = ipl_block.ipl_info.ccw.devno;
+ reipl_block_ccw->ccw.ssid = ipl_block.ccw.ssid;
+ reipl_block_ccw->ccw.devno = ipl_block.ccw.devno;
reipl_block_ccw_fill_parms(reipl_block_ccw);
}
@@ -997,14 +1033,14 @@ static int __init reipl_fcp_init(void)
* is invalid in the SCSI IPL parameter block, so take it
* always from sclp_ipl_info.
*/
- memcpy(reipl_block_fcp->hdr.loadparm, sclp_ipl_info.loadparm,
+ memcpy(reipl_block_fcp->fcp.loadparm, sclp_ipl_info.loadparm,
LOADPARM_LEN);
} else {
- reipl_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ reipl_block_fcp->hdr.len = IPL_BP_FCP_LEN;
reipl_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
- reipl_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
- reipl_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
- reipl_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_IPL;
+ reipl_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+ reipl_block_fcp->fcp.pbt = IPL_PBT_FCP;
+ reipl_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_IPL;
}
reipl_capabilities |= IPL_TYPE_FCP;
return 0;
@@ -1022,10 +1058,10 @@ static int __init reipl_type_init(void)
/*
* If we have an OS info reipl block, this will be used
*/
- if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_FCP) {
+ if (reipl_block->pb0_hdr.pbt == IPL_PBT_FCP) {
memcpy(reipl_block_fcp, reipl_block, size);
reipl_type = IPL_TYPE_FCP;
- } else if (reipl_block->hdr.pbt == DIAG308_IPL_TYPE_CCW) {
+ } else if (reipl_block->pb0_hdr.pbt == IPL_PBT_CCW) {
memcpy(reipl_block_ccw, reipl_block, size);
reipl_type = IPL_TYPE_CCW;
}
@@ -1070,15 +1106,15 @@ static struct shutdown_action __refdata reipl_action = {
/* FCP dump device attributes */
DEFINE_IPL_ATTR_RW(dump_fcp, wwpn, "0x%016llx\n", "%llx\n",
- dump_block_fcp->ipl_info.fcp.wwpn);
+ dump_block_fcp->fcp.wwpn);
DEFINE_IPL_ATTR_RW(dump_fcp, lun, "0x%016llx\n", "%llx\n",
- dump_block_fcp->ipl_info.fcp.lun);
+ dump_block_fcp->fcp.lun);
DEFINE_IPL_ATTR_RW(dump_fcp, bootprog, "%lld\n", "%lld\n",
- dump_block_fcp->ipl_info.fcp.bootprog);
+ dump_block_fcp->fcp.bootprog);
DEFINE_IPL_ATTR_RW(dump_fcp, br_lba, "%lld\n", "%lld\n",
- dump_block_fcp->ipl_info.fcp.br_lba);
+ dump_block_fcp->fcp.br_lba);
DEFINE_IPL_ATTR_RW(dump_fcp, device, "0.0.%04llx\n", "0.0.%llx\n",
- dump_block_fcp->ipl_info.fcp.devno);
+ dump_block_fcp->fcp.devno);
static struct attribute *dump_fcp_attrs[] = {
&sys_dump_fcp_device_attr.attr,
@@ -1095,7 +1131,7 @@ static struct attribute_group dump_fcp_attr_group = {
};
/* CCW dump device attributes */
-DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ipl_info.ccw);
+DEFINE_IPL_CCW_ATTR_RW(dump_ccw, device, dump_block_ccw->ccw);
static struct attribute *dump_ccw_attrs[] = {
&sys_dump_ccw_device_attr.attr,
@@ -1145,7 +1181,9 @@ static struct kset *dump_kset;
static void diag308_dump(void *dump_block)
{
+ uv_set_shared(__pa(dump_block));
diag308(DIAG308_SET, dump_block);
+ uv_remove_shared(__pa(dump_block));
while (1) {
if (diag308(DIAG308_LOAD_NORMAL_DUMP, NULL) != 0x302)
break;
@@ -1187,10 +1225,10 @@ static int __init dump_ccw_init(void)
free_page((unsigned long)dump_block_ccw);
return rc;
}
- dump_block_ccw->hdr.len = IPL_PARM_BLK_CCW_LEN;
+ dump_block_ccw->hdr.len = IPL_BP_CCW_LEN;
dump_block_ccw->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_ccw->hdr.blk0_len = IPL_PARM_BLK0_CCW_LEN;
- dump_block_ccw->hdr.pbt = DIAG308_IPL_TYPE_CCW;
+ dump_block_ccw->ccw.len = IPL_BP0_CCW_LEN;
+ dump_block_ccw->ccw.pbt = IPL_PBT_CCW;
dump_capabilities |= DUMP_TYPE_CCW;
return 0;
}
@@ -1209,11 +1247,11 @@ static int __init dump_fcp_init(void)
free_page((unsigned long)dump_block_fcp);
return rc;
}
- dump_block_fcp->hdr.len = IPL_PARM_BLK_FCP_LEN;
+ dump_block_fcp->hdr.len = IPL_BP_FCP_LEN;
dump_block_fcp->hdr.version = IPL_PARM_BLOCK_VERSION;
- dump_block_fcp->hdr.blk0_len = IPL_PARM_BLK0_FCP_LEN;
- dump_block_fcp->hdr.pbt = DIAG308_IPL_TYPE_FCP;
- dump_block_fcp->ipl_info.fcp.opt = DIAG308_IPL_OPT_DUMP;
+ dump_block_fcp->fcp.len = IPL_BP0_FCP_LEN;
+ dump_block_fcp->fcp.pbt = IPL_PBT_FCP;
+ dump_block_fcp->fcp.opt = IPL_PB0_FCP_OPT_DUMP;
dump_capabilities |= DUMP_TYPE_FCP;
return 0;
}
@@ -1337,7 +1375,7 @@ static void stop_run(struct shutdown_trigger *trigger)
{
if (strcmp(trigger->name, ON_PANIC_STR) == 0 ||
strcmp(trigger->name, ON_RESTART_STR) == 0)
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
smp_stop_cpu();
}
@@ -1572,7 +1610,7 @@ static int __init s390_ipl_init(void)
* READ SCP info provides the correct value.
*/
if (memcmp(sclp_ipl_info.loadparm, str, sizeof(str)) == 0 && ipl_block_valid)
- memcpy(sclp_ipl_info.loadparm, ipl_block.hdr.loadparm, LOADPARM_LEN);
+ memcpy(sclp_ipl_info.loadparm, ipl_block.ccw.loadparm, LOADPARM_LEN);
shutdown_actions_init();
shutdown_triggers_init();
return 0;
@@ -1657,15 +1695,15 @@ void __init setup_ipl(void)
ipl_info.type = get_ipl_type();
switch (ipl_info.type) {
case IPL_TYPE_CCW:
- ipl_info.data.ccw.dev_id.ssid = ipl_block.ipl_info.ccw.ssid;
- ipl_info.data.ccw.dev_id.devno = ipl_block.ipl_info.ccw.devno;
+ ipl_info.data.ccw.dev_id.ssid = ipl_block.ccw.ssid;
+ ipl_info.data.ccw.dev_id.devno = ipl_block.ccw.devno;
break;
case IPL_TYPE_FCP:
case IPL_TYPE_FCP_DUMP:
ipl_info.data.fcp.dev_id.ssid = 0;
- ipl_info.data.fcp.dev_id.devno = ipl_block.ipl_info.fcp.devno;
- ipl_info.data.fcp.wwpn = ipl_block.ipl_info.fcp.wwpn;
- ipl_info.data.fcp.lun = ipl_block.ipl_info.fcp.lun;
+ ipl_info.data.fcp.dev_id.devno = ipl_block.fcp.devno;
+ ipl_info.data.fcp.wwpn = ipl_block.fcp.wwpn;
+ ipl_info.data.fcp.lun = ipl_block.fcp.lun;
break;
case IPL_TYPE_NSS:
case IPL_TYPE_UNKNOWN:
@@ -1675,14 +1713,6 @@ void __init setup_ipl(void)
atomic_notifier_chain_register(&panic_notifier_list, &on_panic_nb);
}
-void __init ipl_store_parameters(void)
-{
- if (early_ipl_block_valid) {
- memcpy(&ipl_block, &early_ipl_block, sizeof(ipl_block));
- ipl_block_valid = 1;
- }
-}
-
void s390_reset_system(void)
{
/* Disable prefixing */
@@ -1690,5 +1720,139 @@ void s390_reset_system(void)
/* Disable lowcore protection */
__ctl_clear_bit(0, 28);
- diag308_reset();
+ diag_dma_ops.diag308_reset();
+}
+
+#ifdef CONFIG_KEXEC_FILE
+
+int ipl_report_add_component(struct ipl_report *report, struct kexec_buf *kbuf,
+ unsigned char flags, unsigned short cert)
+{
+ struct ipl_report_component *comp;
+
+ comp = vzalloc(sizeof(*comp));
+ if (!comp)
+ return -ENOMEM;
+ list_add_tail(&comp->list, &report->components);
+
+ comp->entry.addr = kbuf->mem;
+ comp->entry.len = kbuf->memsz;
+ comp->entry.flags = flags;
+ comp->entry.certificate_index = cert;
+
+ report->size += sizeof(comp->entry);
+
+ return 0;
+}
+
+int ipl_report_add_certificate(struct ipl_report *report, void *key,
+ unsigned long addr, unsigned long len)
+{
+ struct ipl_report_certificate *cert;
+
+ cert = vzalloc(sizeof(*cert));
+ if (!cert)
+ return -ENOMEM;
+ list_add_tail(&cert->list, &report->certificates);
+
+ cert->entry.addr = addr;
+ cert->entry.len = len;
+ cert->key = key;
+
+ report->size += sizeof(cert->entry);
+ report->size += cert->entry.len;
+
+ return 0;
+}
+
+struct ipl_report *ipl_report_init(struct ipl_parameter_block *ipib)
+{
+ struct ipl_report *report;
+
+ report = vzalloc(sizeof(*report));
+ if (!report)
+ return ERR_PTR(-ENOMEM);
+
+ report->ipib = ipib;
+ INIT_LIST_HEAD(&report->components);
+ INIT_LIST_HEAD(&report->certificates);
+
+ report->size = ALIGN(ipib->hdr.len, 8);
+ report->size += sizeof(struct ipl_rl_hdr);
+ report->size += sizeof(struct ipl_rb_components);
+ report->size += sizeof(struct ipl_rb_certificates);
+
+ return report;
+}
+
+void *ipl_report_finish(struct ipl_report *report)
+{
+ struct ipl_report_certificate *cert;
+ struct ipl_report_component *comp;
+ struct ipl_rb_certificates *certs;
+ struct ipl_parameter_block *ipib;
+ struct ipl_rb_components *comps;
+ struct ipl_rl_hdr *rl_hdr;
+ void *buf, *ptr;
+
+ buf = vzalloc(report->size);
+ if (!buf)
+ return ERR_PTR(-ENOMEM);
+ ptr = buf;
+
+ memcpy(ptr, report->ipib, report->ipib->hdr.len);
+ ipib = ptr;
+ if (ipl_secure_flag)
+ ipib->hdr.flags |= IPL_PL_FLAG_SIPL;
+ ipib->hdr.flags |= IPL_PL_FLAG_IPLSR;
+ ptr += report->ipib->hdr.len;
+ ptr = PTR_ALIGN(ptr, 8);
+
+ rl_hdr = ptr;
+ ptr += sizeof(*rl_hdr);
+
+ comps = ptr;
+ comps->rbt = IPL_RBT_COMPONENTS;
+ ptr += sizeof(*comps);
+ list_for_each_entry(comp, &report->components, list) {
+ memcpy(ptr, &comp->entry, sizeof(comp->entry));
+ ptr += sizeof(comp->entry);
+ }
+ comps->len = ptr - (void *)comps;
+
+ certs = ptr;
+ certs->rbt = IPL_RBT_CERTIFICATES;
+ ptr += sizeof(*certs);
+ list_for_each_entry(cert, &report->certificates, list) {
+ memcpy(ptr, &cert->entry, sizeof(cert->entry));
+ ptr += sizeof(cert->entry);
+ }
+ certs->len = ptr - (void *)certs;
+ rl_hdr->len = ptr - (void *)rl_hdr;
+
+ list_for_each_entry(cert, &report->certificates, list) {
+ memcpy(ptr, cert->key, cert->entry.len);
+ ptr += cert->entry.len;
+ }
+
+ BUG_ON(ptr > buf + report->size);
+ return buf;
+}
+
+int ipl_report_free(struct ipl_report *report)
+{
+ struct ipl_report_component *comp, *ncomp;
+ struct ipl_report_certificate *cert, *ncert;
+
+ list_for_each_entry_safe(comp, ncomp, &report->components, list)
+ vfree(comp);
+
+ list_for_each_entry_safe(cert, ncert, &report->certificates, list)
+ vfree(cert);
+
+ vfree(report);
+
+ return 0;
}
+
+#endif
diff --git a/arch/s390/kernel/ipl_vmparm.c b/arch/s390/kernel/ipl_vmparm.c
index 411838c0a0af..af43535a976d 100644
--- a/arch/s390/kernel/ipl_vmparm.c
+++ b/arch/s390/kernel/ipl_vmparm.c
@@ -11,11 +11,11 @@ size_t ipl_block_get_ascii_vmparm(char *dest, size_t size,
char has_lowercase = 0;
len = 0;
- if ((ipb->ipl_info.ccw.vm_flags & DIAG308_VM_FLAGS_VP_VALID) &&
- (ipb->ipl_info.ccw.vm_parm_len > 0)) {
+ if ((ipb->ccw.vm_flags & IPL_PB0_CCW_VM_FLAG_VP) &&
+ (ipb->ccw.vm_parm_len > 0)) {
- len = min_t(size_t, size - 1, ipb->ipl_info.ccw.vm_parm_len);
- memcpy(dest, ipb->ipl_info.ccw.vm_parm, len);
+ len = min_t(size_t, size - 1, ipb->ccw.vm_parm_len);
+ memcpy(dest, ipb->ccw.vm_parm, len);
/* If at least one character is lowercase, we assume mixed
* case; otherwise we convert everything to lowercase.
*/
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 0cd5a5f96729..8371855042dc 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -26,6 +26,7 @@
#include <asm/lowcore.h>
#include <asm/irq.h>
#include <asm/hw_irq.h>
+#include <asm/stacktrace.h>
#include "entry.h"
DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -73,7 +74,6 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQEXT_CMC, .name = "CMC", .desc = "[EXT] CPU-Measurement: Counter"},
{.irq = IRQEXT_FTP, .name = "FTP", .desc = "[EXT] HMC FTP Service"},
{.irq = IRQIO_CIO, .name = "CIO", .desc = "[I/O] Common I/O Layer Interrupt"},
- {.irq = IRQIO_QAI, .name = "QAI", .desc = "[I/O] QDIO Adapter Interrupt"},
{.irq = IRQIO_DAS, .name = "DAS", .desc = "[I/O] DASD"},
{.irq = IRQIO_C15, .name = "C15", .desc = "[I/O] 3215"},
{.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"},
@@ -81,14 +81,16 @@ static const struct irq_class irqclass_sub_desc[] = {
{.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"},
{.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"},
{.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"},
- {.irq = IRQIO_APB, .name = "APB", .desc = "[I/O] AP Bus"},
{.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"},
{.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"},
- {.irq = IRQIO_PCI, .name = "PCI", .desc = "[I/O] PCI Interrupt" },
- {.irq = IRQIO_MSI, .name = "MSI", .desc = "[I/O] MSI Interrupt" },
{.irq = IRQIO_VIR, .name = "VIR", .desc = "[I/O] Virtual I/O Devices"},
- {.irq = IRQIO_VAI, .name = "VAI", .desc = "[I/O] Virtual I/O Devices AI"},
- {.irq = IRQIO_GAL, .name = "GAL", .desc = "[I/O] GIB Alert"},
+ {.irq = IRQIO_QAI, .name = "QAI", .desc = "[AIO] QDIO Adapter Interrupt"},
+ {.irq = IRQIO_APB, .name = "APB", .desc = "[AIO] AP Bus"},
+ {.irq = IRQIO_PCF, .name = "PCF", .desc = "[AIO] PCI Floating Interrupt"},
+ {.irq = IRQIO_PCD, .name = "PCD", .desc = "[AIO] PCI Directed Interrupt"},
+ {.irq = IRQIO_MSI, .name = "MSI", .desc = "[AIO] MSI Interrupt"},
+ {.irq = IRQIO_VAI, .name = "VAI", .desc = "[AIO] Virtual I/O Devices AI"},
+ {.irq = IRQIO_GAL, .name = "GAL", .desc = "[AIO] GIB Alert"},
{.irq = NMI_NMI, .name = "NMI", .desc = "[NMI] Machine Check"},
{.irq = CPU_RST, .name = "RST", .desc = "[CPU] CPU Restart"},
};
@@ -116,6 +118,34 @@ void do_IRQ(struct pt_regs *regs, int irq)
set_irq_regs(old_regs);
}
+static void show_msi_interrupt(struct seq_file *p, int irq)
+{
+ struct irq_desc *desc;
+ unsigned long flags;
+ int cpu;
+
+ irq_lock_sparse();
+ desc = irq_to_desc(irq);
+ if (!desc)
+ goto out;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ seq_printf(p, "%3d: ", irq);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "%10u ", kstat_irqs_cpu(irq, cpu));
+
+ if (desc->irq_data.chip)
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+ if (desc->action)
+ seq_printf(p, " %s", desc->action->name);
+
+ seq_putc(p, '\n');
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+out:
+ irq_unlock_sparse();
+}
+
/*
* show_interrupts is needed by /proc/interrupts.
*/
@@ -128,7 +158,7 @@ int show_interrupts(struct seq_file *p, void *v)
if (index == 0) {
seq_puts(p, " ");
for_each_online_cpu(cpu)
- seq_printf(p, "CPU%d ", cpu);
+ seq_printf(p, "CPU%-8d", cpu);
seq_putc(p, '\n');
}
if (index < NR_IRQS_BASE) {
@@ -139,9 +169,10 @@ int show_interrupts(struct seq_file *p, void *v)
seq_putc(p, '\n');
goto out;
}
- if (index > NR_IRQS_BASE)
+ if (index < nr_irqs) {
+ show_msi_interrupt(p, index);
goto out;
-
+ }
for (index = 0; index < NR_ARCH_IRQS; index++) {
seq_printf(p, "%s: ", irqclass_sub_desc[index].name);
irq = irqclass_sub_desc[index].irq;
diff --git a/arch/s390/kernel/kexec_elf.c b/arch/s390/kernel/kexec_elf.c
index 5a286b012043..6d0635ceddd0 100644
--- a/arch/s390/kernel/kexec_elf.c
+++ b/arch/s390/kernel/kexec_elf.c
@@ -10,19 +10,26 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
+#include <asm/ipl.h>
#include <asm/setup.h>
-static int kexec_file_add_elf_kernel(struct kimage *image,
- struct s390_load_data *data,
- char *kernel, unsigned long kernel_len)
+static int kexec_file_add_kernel_elf(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
const Elf_Ehdr *ehdr;
const Elf_Phdr *phdr;
+ Elf_Addr entry;
+ void *kernel;
int i, ret;
+ kernel = image->kernel_buf;
ehdr = (Elf_Ehdr *)kernel;
buf.image = image;
+ if (image->type == KEXEC_TYPE_CRASH)
+ entry = STARTUP_KDUMP_OFFSET;
+ else
+ entry = ehdr->e_entry;
phdr = (void *)ehdr + ehdr->e_phoff;
for (i = 0; i < ehdr->e_phnum; i++, phdr++) {
@@ -33,30 +40,27 @@ static int kexec_file_add_elf_kernel(struct kimage *image,
buf.bufsz = phdr->p_filesz;
buf.mem = ALIGN(phdr->p_paddr, phdr->p_align);
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
buf.memsz = phdr->p_memsz;
+ data->memsz = ALIGN(data->memsz, phdr->p_align) + buf.memsz;
- if (phdr->p_paddr == 0) {
+ if (entry - phdr->p_paddr < phdr->p_memsz) {
data->kernel_buf = buf.buffer;
- data->memsz += STARTUP_NORMAL_OFFSET;
-
- buf.buffer += STARTUP_NORMAL_OFFSET;
- buf.bufsz -= STARTUP_NORMAL_OFFSET;
-
- buf.mem += STARTUP_NORMAL_OFFSET;
- buf.memsz -= STARTUP_NORMAL_OFFSET;
+ data->kernel_mem = buf.mem;
+ data->parm = buf.buffer + PARMAREA;
}
- if (image->type == KEXEC_TYPE_CRASH)
- buf.mem += crashk_res.start;
-
+ ipl_report_add_component(data->report, &buf,
+ IPL_RB_COMPONENT_FLAG_SIGNED |
+ IPL_RB_COMPONENT_FLAG_VERIFIED,
+ IPL_RB_CERT_UNKNOWN);
ret = kexec_add_buffer(&buf);
if (ret)
return ret;
-
- data->memsz += buf.memsz;
}
- return 0;
+ return data->memsz ? 0 : -EINVAL;
}
static void *s390_elf_load(struct kimage *image,
@@ -64,11 +68,10 @@ static void *s390_elf_load(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline, unsigned long cmdline_len)
{
- struct s390_load_data data = {0};
const Elf_Ehdr *ehdr;
const Elf_Phdr *phdr;
size_t size;
- int i, ret;
+ int i;
/* image->fobs->probe already checked for valid ELF magic number. */
ehdr = (Elf_Ehdr *)kernel;
@@ -101,24 +104,7 @@ static void *s390_elf_load(struct kimage *image,
if (size > kernel_len)
return ERR_PTR(-EINVAL);
- ret = kexec_file_add_elf_kernel(image, &data, kernel, kernel_len);
- if (ret)
- return ERR_PTR(ret);
-
- if (!data.memsz)
- return ERR_PTR(-EINVAL);
-
- if (initrd) {
- ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
- if (ret)
- return ERR_PTR(ret);
- }
-
- ret = kexec_file_add_purgatory(image, &data);
- if (ret)
- return ERR_PTR(ret);
-
- return kexec_file_update_kernel(image, &data);
+ return kexec_file_add_components(image, kexec_file_add_kernel_elf);
}
static int s390_elf_probe(const char *buf, unsigned long len)
@@ -144,4 +130,7 @@ static int s390_elf_probe(const char *buf, unsigned long len)
const struct kexec_file_ops s390_kexec_elf_ops = {
.probe = s390_elf_probe,
.load = s390_elf_load,
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+ .verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
};
diff --git a/arch/s390/kernel/kexec_image.c b/arch/s390/kernel/kexec_image.c
index 3800852595e8..58318bf89fd9 100644
--- a/arch/s390/kernel/kexec_image.c
+++ b/arch/s390/kernel/kexec_image.c
@@ -10,31 +10,34 @@
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/kexec.h>
+#include <asm/ipl.h>
#include <asm/setup.h>
-static int kexec_file_add_image_kernel(struct kimage *image,
- struct s390_load_data *data,
- char *kernel, unsigned long kernel_len)
+static int kexec_file_add_kernel_image(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
- int ret;
buf.image = image;
- buf.buffer = kernel + STARTUP_NORMAL_OFFSET;
- buf.bufsz = kernel_len - STARTUP_NORMAL_OFFSET;
+ buf.buffer = image->kernel_buf;
+ buf.bufsz = image->kernel_buf_len;
- buf.mem = STARTUP_NORMAL_OFFSET;
+ buf.mem = 0;
if (image->type == KEXEC_TYPE_CRASH)
buf.mem += crashk_res.start;
buf.memsz = buf.bufsz;
- ret = kexec_add_buffer(&buf);
+ data->kernel_buf = image->kernel_buf;
+ data->kernel_mem = buf.mem;
+ data->parm = image->kernel_buf + PARMAREA;
+ data->memsz += buf.memsz;
- data->kernel_buf = kernel;
- data->memsz += buf.memsz + STARTUP_NORMAL_OFFSET;
-
- return ret;
+ ipl_report_add_component(data->report, &buf,
+ IPL_RB_COMPONENT_FLAG_SIGNED |
+ IPL_RB_COMPONENT_FLAG_VERIFIED,
+ IPL_RB_CERT_UNKNOWN);
+ return kexec_add_buffer(&buf);
}
static void *s390_image_load(struct kimage *image,
@@ -42,24 +45,7 @@ static void *s390_image_load(struct kimage *image,
char *initrd, unsigned long initrd_len,
char *cmdline, unsigned long cmdline_len)
{
- struct s390_load_data data = {0};
- int ret;
-
- ret = kexec_file_add_image_kernel(image, &data, kernel, kernel_len);
- if (ret)
- return ERR_PTR(ret);
-
- if (initrd) {
- ret = kexec_file_add_initrd(image, &data, initrd, initrd_len);
- if (ret)
- return ERR_PTR(ret);
- }
-
- ret = kexec_file_add_purgatory(image, &data);
- if (ret)
- return ERR_PTR(ret);
-
- return kexec_file_update_kernel(image, &data);
+ return kexec_file_add_components(image, kexec_file_add_kernel_image);
}
static int s390_image_probe(const char *buf, unsigned long len)
@@ -73,4 +59,7 @@ static int s390_image_probe(const char *buf, unsigned long len)
const struct kexec_file_ops s390_kexec_image_ops = {
.probe = s390_image_probe,
.load = s390_image_load,
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+ .verify_sig = s390_verify_sig,
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
};
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index 7c0a095e9c5f..6f1388391620 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -27,29 +27,30 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = { };
-DEFINE_INSN_CACHE_OPS(dmainsn);
+DEFINE_INSN_CACHE_OPS(s390_insn);
-static void *alloc_dmainsn_page(void)
-{
- void *page;
+static int insn_page_in_use;
+static char insn_page[PAGE_SIZE] __aligned(PAGE_SIZE);
- page = (void *) __get_free_page(GFP_KERNEL | GFP_DMA);
- if (page)
- set_memory_x((unsigned long) page, 1);
- return page;
+static void *alloc_s390_insn_page(void)
+{
+ if (xchg(&insn_page_in_use, 1) == 1)
+ return NULL;
+ set_memory_x((unsigned long) &insn_page, 1);
+ return &insn_page;
}
-static void free_dmainsn_page(void *page)
+static void free_s390_insn_page(void *page)
{
set_memory_nx((unsigned long) page, 1);
- free_page((unsigned long)page);
+ xchg(&insn_page_in_use, 0);
}
-struct kprobe_insn_cache kprobe_dmainsn_slots = {
- .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
- .alloc = alloc_dmainsn_page,
- .free = free_dmainsn_page,
- .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+struct kprobe_insn_cache kprobe_s390_insn_slots = {
+ .mutex = __MUTEX_INITIALIZER(kprobe_s390_insn_slots.mutex),
+ .alloc = alloc_s390_insn_page,
+ .free = free_s390_insn_page,
+ .pages = LIST_HEAD_INIT(kprobe_s390_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
};
@@ -102,7 +103,7 @@ static int s390_get_insn_slot(struct kprobe *p)
*/
p->ainsn.insn = NULL;
if (is_kernel_addr(p->addr))
- p->ainsn.insn = get_dmainsn_slot();
+ p->ainsn.insn = get_s390_insn_slot();
else if (is_module_addr(p->addr))
p->ainsn.insn = get_insn_slot();
return p->ainsn.insn ? 0 : -ENOMEM;
@@ -114,7 +115,7 @@ static void s390_free_insn_slot(struct kprobe *p)
if (!p->ainsn.insn)
return;
if (is_kernel_addr(p->addr))
- free_dmainsn_slot(p->ainsn.insn, 0);
+ free_s390_insn_slot(p->ainsn.insn, 0);
else
free_insn_slot(p->ainsn.insn, 0);
p->ainsn.insn = NULL;
@@ -572,7 +573,7 @@ static int kprobe_trap_handler(struct pt_regs *regs, int trapnr)
* In case the user-specified fault handler returned
* zero, try to fix up.
*/
- entry = search_exception_tables(regs->psw.addr);
+ entry = s390_search_extables(regs->psw.addr);
if (entry) {
regs->psw.addr = extable_fixup(entry);
return 1;
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index cb582649aba6..8a1ae140c5e2 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
#include <asm/cacheflush.h>
#include <asm/os_info.h>
#include <asm/set_memory.h>
+#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/nmi.h>
@@ -95,7 +96,7 @@ static void __do_machine_kdump(void *image)
start_kdump(1);
/* Die if start_kdump returns */
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
}
/*
@@ -253,6 +254,9 @@ void arch_crash_save_vmcoreinfo(void)
VMCOREINFO_SYMBOL(high_memory);
VMCOREINFO_LENGTH(lowcore_ptr, NR_CPUS);
mem_assign_absolute(S390_lowcore.vmcore_info, paddr_vmcoreinfo_note());
+ vmcoreinfo_append_str("SDMA=%lx\n", __sdma);
+ vmcoreinfo_append_str("EDMA=%lx\n", __edma);
+ vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
}
void machine_shutdown(void)
@@ -280,7 +284,7 @@ static void __do_machine_kexec(void *data)
(*data_mover)(&image->head, image->start);
/* Die if kexec returns */
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
}
/*
diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
index 32023b4f9dc0..fbdd3ea73667 100644
--- a/arch/s390/kernel/machine_kexec_file.c
+++ b/arch/s390/kernel/machine_kexec_file.c
@@ -8,7 +8,12 @@
*/
#include <linux/elf.h>
+#include <linux/errno.h>
#include <linux/kexec.h>
+#include <linux/module.h>
+#include <linux/verification.h>
+#include <asm/boot_data.h>
+#include <asm/ipl.h>
#include <asm/setup.h>
const struct kexec_file_ops * const kexec_file_loaders[] = {
@@ -17,38 +22,78 @@ const struct kexec_file_ops * const kexec_file_loaders[] = {
NULL,
};
-int *kexec_file_update_kernel(struct kimage *image,
- struct s390_load_data *data)
-{
- unsigned long *loc;
-
- if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE)
- return ERR_PTR(-EINVAL);
-
- if (image->cmdline_buf_len)
- memcpy(data->kernel_buf + COMMAND_LINE_OFFSET,
- image->cmdline_buf, image->cmdline_buf_len);
-
- if (image->type == KEXEC_TYPE_CRASH) {
- loc = (unsigned long *)(data->kernel_buf + OLDMEM_BASE_OFFSET);
- *loc = crashk_res.start;
-
- loc = (unsigned long *)(data->kernel_buf + OLDMEM_SIZE_OFFSET);
- *loc = crashk_res.end - crashk_res.start + 1;
- }
+#ifdef CONFIG_KEXEC_VERIFY_SIG
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ * - Signer's name
+ * - Key identifier
+ * - Signature data
+ * - Information block
+ */
+struct module_signature {
+ u8 algo; /* Public-key crypto algorithm [0] */
+ u8 hash; /* Digest algorithm [0] */
+ u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
+ u8 signer_len; /* Length of signer's name [0] */
+ u8 key_id_len; /* Length of key identifier [0] */
+ u8 __pad[3];
+ __be32 sig_len; /* Length of signature data */
+};
- if (image->initrd_buf) {
- loc = (unsigned long *)(data->kernel_buf + INITRD_START_OFFSET);
- *loc = data->initrd_load_addr;
+#define PKEY_ID_PKCS7 2
- loc = (unsigned long *)(data->kernel_buf + INITRD_SIZE_OFFSET);
- *loc = image->initrd_buf_len;
+int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+{
+ const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+ struct module_signature *ms;
+ unsigned long sig_len;
+
+ /* Skip signature verification when not secure IPLed. */
+ if (!ipl_secure_flag)
+ return 0;
+
+ if (marker_len > kernel_len)
+ return -EKEYREJECTED;
+
+ if (memcmp(kernel + kernel_len - marker_len, MODULE_SIG_STRING,
+ marker_len))
+ return -EKEYREJECTED;
+ kernel_len -= marker_len;
+
+ ms = (void *)kernel + kernel_len - sizeof(*ms);
+ kernel_len -= sizeof(*ms);
+
+ sig_len = be32_to_cpu(ms->sig_len);
+ if (sig_len >= kernel_len)
+ return -EKEYREJECTED;
+ kernel_len -= sig_len;
+
+ if (ms->id_type != PKEY_ID_PKCS7)
+ return -EKEYREJECTED;
+
+ if (ms->algo != 0 ||
+ ms->hash != 0 ||
+ ms->signer_len != 0 ||
+ ms->key_id_len != 0 ||
+ ms->__pad[0] != 0 ||
+ ms->__pad[1] != 0 ||
+ ms->__pad[2] != 0) {
+ return -EBADMSG;
}
- return NULL;
+ return verify_pkcs7_signature(kernel, kernel_len,
+ kernel + kernel_len, sig_len,
+ VERIFY_USE_PLATFORM_KEYRING,
+ VERIFYING_MODULE_SIGNATURE,
+ NULL, NULL);
}
+#endif /* CONFIG_KEXEC_VERIFY_SIG */
-static int kexec_file_update_purgatory(struct kimage *image)
+static int kexec_file_update_purgatory(struct kimage *image,
+ struct s390_load_data *data)
{
u64 entry, type;
int ret;
@@ -90,7 +135,8 @@ static int kexec_file_update_purgatory(struct kimage *image)
return ret;
}
-int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data)
+static int kexec_file_add_purgatory(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
int ret;
@@ -105,21 +151,21 @@ int kexec_file_add_purgatory(struct kimage *image, struct s390_load_data *data)
ret = kexec_load_purgatory(image, &buf);
if (ret)
return ret;
+ data->memsz += buf.memsz;
- ret = kexec_file_update_purgatory(image);
- return ret;
+ return kexec_file_update_purgatory(image, data);
}
-int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
- char *initrd, unsigned long initrd_len)
+static int kexec_file_add_initrd(struct kimage *image,
+ struct s390_load_data *data)
{
struct kexec_buf buf;
int ret;
buf.image = image;
- buf.buffer = initrd;
- buf.bufsz = initrd_len;
+ buf.buffer = image->initrd_buf;
+ buf.bufsz = image->initrd_buf_len;
data->memsz = ALIGN(data->memsz, PAGE_SIZE);
buf.mem = data->memsz;
@@ -127,11 +173,115 @@ int kexec_file_add_initrd(struct kimage *image, struct s390_load_data *data,
buf.mem += crashk_res.start;
buf.memsz = buf.bufsz;
- data->initrd_load_addr = buf.mem;
+ data->parm->initrd_start = buf.mem;
+ data->parm->initrd_size = buf.memsz;
data->memsz += buf.memsz;
ret = kexec_add_buffer(&buf);
- return ret;
+ if (ret)
+ return ret;
+
+ return ipl_report_add_component(data->report, &buf, 0, 0);
+}
+
+static int kexec_file_add_ipl_report(struct kimage *image,
+ struct s390_load_data *data)
+{
+ __u32 *lc_ipl_parmblock_ptr;
+ unsigned int len, ncerts;
+ struct kexec_buf buf;
+ unsigned long addr;
+ void *ptr, *end;
+
+ buf.image = image;
+
+ data->memsz = ALIGN(data->memsz, PAGE_SIZE);
+ buf.mem = data->memsz;
+ if (image->type == KEXEC_TYPE_CRASH)
+ buf.mem += crashk_res.start;
+
+ ptr = (void *)ipl_cert_list_addr;
+ end = ptr + ipl_cert_list_size;
+ ncerts = 0;
+ while (ptr < end) {
+ ncerts++;
+ len = *(unsigned int *)ptr;
+ ptr += sizeof(len);
+ ptr += len;
+ }
+
+ addr = data->memsz + data->report->size;
+ addr += ncerts * sizeof(struct ipl_rb_certificate_entry);
+ ptr = (void *)ipl_cert_list_addr;
+ while (ptr < end) {
+ len = *(unsigned int *)ptr;
+ ptr += sizeof(len);
+ ipl_report_add_certificate(data->report, ptr, addr, len);
+ addr += len;
+ ptr += len;
+ }
+
+ buf.buffer = ipl_report_finish(data->report);
+ buf.bufsz = data->report->size;
+ buf.memsz = buf.bufsz;
+
+ data->memsz += buf.memsz;
+
+ lc_ipl_parmblock_ptr =
+ data->kernel_buf + offsetof(struct lowcore, ipl_parmblock_ptr);
+ *lc_ipl_parmblock_ptr = (__u32)buf.mem;
+
+ return kexec_add_buffer(&buf);
+}
+
+void *kexec_file_add_components(struct kimage *image,
+ int (*add_kernel)(struct kimage *image,
+ struct s390_load_data *data))
+{
+ struct s390_load_data data = {0};
+ int ret;
+
+ data.report = ipl_report_init(&ipl_block);
+ if (IS_ERR(data.report))
+ return data.report;
+
+ ret = add_kernel(image, &data);
+ if (ret)
+ goto out;
+
+ if (image->cmdline_buf_len >= ARCH_COMMAND_LINE_SIZE) {
+ ret = -EINVAL;
+ goto out;
+ }
+ memcpy(data.parm->command_line, image->cmdline_buf,
+ image->cmdline_buf_len);
+
+ if (image->type == KEXEC_TYPE_CRASH) {
+ data.parm->oldmem_base = crashk_res.start;
+ data.parm->oldmem_size = crashk_res.end - crashk_res.start + 1;
+ }
+
+ if (image->initrd_buf) {
+ ret = kexec_file_add_initrd(image, &data);
+ if (ret)
+ goto out;
+ }
+
+ ret = kexec_file_add_purgatory(image, &data);
+ if (ret)
+ goto out;
+
+ if (data.kernel_mem == 0) {
+ unsigned long restart_psw = 0x0008000080000000UL;
+ restart_psw += image->start;
+ memcpy(data.kernel_buf, &restart_psw, sizeof(restart_psw));
+ image->start = 0;
+ }
+
+ ret = kexec_file_add_ipl_report(image, &data);
+out:
+ ipl_report_free(data.report);
+ return ERR_PTR(ret);
}
int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
@@ -140,7 +290,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
const Elf_Shdr *symtab)
{
Elf_Rela *relas;
- int i;
+ int i, r_type;
relas = (void *)pi->ehdr + relsec->sh_offset;
@@ -174,46 +324,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
addr = section->sh_addr + relas[i].r_offset;
- switch (ELF64_R_TYPE(relas[i].r_info)) {
- case R_390_8: /* Direct 8 bit. */
- *(u8 *)loc = val;
- break;
- case R_390_12: /* Direct 12 bit. */
- *(u16 *)loc &= 0xf000;
- *(u16 *)loc |= val & 0xfff;
- break;
- case R_390_16: /* Direct 16 bit. */
- *(u16 *)loc = val;
- break;
- case R_390_20: /* Direct 20 bit. */
- *(u32 *)loc &= 0xf00000ff;
- *(u32 *)loc |= (val & 0xfff) << 16; /* DL */
- *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */
- break;
- case R_390_32: /* Direct 32 bit. */
- *(u32 *)loc = val;
- break;
- case R_390_64: /* Direct 64 bit. */
- *(u64 *)loc = val;
- break;
- case R_390_PC16: /* PC relative 16 bit. */
- *(u16 *)loc = (val - addr);
- break;
- case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
- *(u16 *)loc = (val - addr) >> 1;
- break;
- case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
- *(u32 *)loc = (val - addr) >> 1;
- break;
- case R_390_PC32: /* PC relative 32 bit. */
- *(u32 *)loc = (val - addr);
- break;
- case R_390_PC64: /* PC relative 64 bit. */
- *(u64 *)loc = (val - addr);
- break;
- default:
- break;
- }
+ r_type = ELF64_R_TYPE(relas[i].r_info);
+ arch_kexec_do_relocs(r_type, loc, val, addr);
}
return 0;
}
@@ -225,10 +337,8 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
* load memory in head.S will be accessed, e.g. to register the next
* command line. If the next kernel were smaller the current kernel
* will panic at load.
- *
- * 0x11000 = sizeof(head.S)
*/
- if (buf_len < 0x11000)
+ if (buf_len < HEAD_END)
return -ENOEXEC;
return kexec_image_probe_default(image, buf, buf_len);
diff --git a/arch/s390/kernel/machine_kexec_reloc.c b/arch/s390/kernel/machine_kexec_reloc.c
new file mode 100644
index 000000000000..1dded39239f8
--- /dev/null
+++ b/arch/s390/kernel/machine_kexec_reloc.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/elf.h>
+
+int arch_kexec_do_relocs(int r_type, void *loc, unsigned long val,
+ unsigned long addr)
+{
+ switch (r_type) {
+ case R_390_NONE:
+ break;
+ case R_390_8: /* Direct 8 bit. */
+ *(u8 *)loc = val;
+ break;
+ case R_390_12: /* Direct 12 bit. */
+ *(u16 *)loc &= 0xf000;
+ *(u16 *)loc |= val & 0xfff;
+ break;
+ case R_390_16: /* Direct 16 bit. */
+ *(u16 *)loc = val;
+ break;
+ case R_390_20: /* Direct 20 bit. */
+ *(u32 *)loc &= 0xf00000ff;
+ *(u32 *)loc |= (val & 0xfff) << 16; /* DL */
+ *(u32 *)loc |= (val & 0xff000) >> 4; /* DH */
+ break;
+ case R_390_32: /* Direct 32 bit. */
+ *(u32 *)loc = val;
+ break;
+ case R_390_64: /* Direct 64 bit. */
+ *(u64 *)loc = val;
+ break;
+ case R_390_PC16: /* PC relative 16 bit. */
+ *(u16 *)loc = (val - addr);
+ break;
+ case R_390_PC16DBL: /* PC relative 16 bit shifted by 1. */
+ *(u16 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32DBL: /* PC relative 32 bit shifted by 1. */
+ *(u32 *)loc = (val - addr) >> 1;
+ break;
+ case R_390_PC32: /* PC relative 32 bit. */
+ *(u32 *)loc = (val - addr);
+ break;
+ case R_390_PC64: /* PC relative 64 bit. */
+ *(u64 *)loc = (val - addr);
+ break;
+ case R_390_RELATIVE:
+ *(unsigned long *) loc = val;
+ break;
+ default:
+ return 1;
+ }
+ return 0;
+}
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index e93fbf02490c..9e1660a6b9db 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -20,6 +20,7 @@
ENTRY(ftrace_stub)
BR_EX %r14
+ENDPROC(ftrace_stub)
#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
@@ -28,7 +29,7 @@ ENTRY(ftrace_stub)
ENTRY(_mcount)
BR_EX %r14
-
+ENDPROC(_mcount)
EXPORT_SYMBOL(_mcount)
ENTRY(ftrace_caller)
@@ -61,10 +62,11 @@ ENTRY(ftrace_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
# The j instruction gets runtime patched to a nop instruction.
# See ftrace_enable_ftrace_graph_caller.
-ENTRY(ftrace_graph_caller)
+ .globl ftrace_graph_caller
+ftrace_graph_caller:
j ftrace_graph_caller_end
- lg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
- lg %r3,(STACK_PTREGS_PSW+8)(%r15)
+ lmg %r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15)
+ lg %r4,(STACK_PTREGS_PSW+8)(%r15)
brasl %r14,prepare_ftrace_return
stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15)
ftrace_graph_caller_end:
@@ -73,6 +75,7 @@ ftrace_graph_caller_end:
lg %r1,(STACK_PTREGS_PSW+8)(%r15)
lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
BR_EX %r1
+ENDPROC(ftrace_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -86,5 +89,6 @@ ENTRY(return_to_handler)
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
BR_EX %r14
+ENDPROC(return_to_handler)
#endif
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 8c867b43c8eb..0a487fae763e 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -125,7 +125,7 @@ void nmi_free_per_cpu(struct lowcore *lc)
static notrace void s390_handle_damage(void)
{
smp_emergency_stop();
- disabled_wait((unsigned long) __builtin_return_address(0));
+ disabled_wait();
while (1);
}
NOKPROBE_SYMBOL(s390_handle_damage);
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index bdddaae96559..29e511f5bf06 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/cpu.h>
#include <asm/nospec-branch.h>
static int __init nobp_setup_early(char *str)
@@ -37,7 +38,7 @@ static int __init nospec_report(void)
{
if (test_facility(156))
pr_info("Spectre V2 mitigation: etokens\n");
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable)
pr_info("Spectre V2 mitigation: execute trampolines\n");
if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
pr_info("Spectre V2 mitigation: limited branch prediction\n");
@@ -58,15 +59,15 @@ early_param("nospectre_v2", nospectre_v2_setup_early);
void __init nospec_auto_detect(void)
{
- if (test_facility(156)) {
+ if (test_facility(156) || cpu_mitigations_off()) {
/*
* The machine supports etokens.
* Disable expolines and disable nobp.
*/
- if (IS_ENABLED(CC_USING_EXPOLINE))
+ if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
__clear_facility(82, S390_lowcore.alt_stfle_fac_list);
- } else if (IS_ENABLED(CC_USING_EXPOLINE)) {
+ } else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
index e30e580ae362..48f472bf9290 100644
--- a/arch/s390/kernel/nospec-sysfs.c
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -15,7 +15,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
{
if (test_facility(156))
return sprintf(buf, "Mitigation: etokens\n");
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable)
return sprintf(buf, "Mitigation: execute trampolines\n");
if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
return sprintf(buf, "Mitigation: limited branch prediction\n");
diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c
index e1c54d28713a..48d48b6187c0 100644
--- a/arch/s390/kernel/perf_cpum_cf.c
+++ b/arch/s390/kernel/perf_cpum_cf.c
@@ -2,8 +2,8 @@
/*
* Performance event support for s390x - CPU-measurement Counter Facility
*
- * Copyright IBM Corp. 2012, 2017
- * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
+ * Copyright IBM Corp. 2012, 2019
+ * Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
*/
#define KMSG_COMPONENT "cpum_cf"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
@@ -26,7 +26,7 @@ static enum cpumf_ctr_set get_counter_set(u64 event)
set = CPUMF_CTR_SET_USER;
else if (event < 128)
set = CPUMF_CTR_SET_CRYPTO;
- else if (event < 256)
+ else if (event < 288)
set = CPUMF_CTR_SET_EXT;
else if (event >= 448 && event < 496)
set = CPUMF_CTR_SET_MT_DIAG;
@@ -50,12 +50,19 @@ static int validate_ctr_version(const struct hw_perf_event *hwc)
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_CRYPTO:
+ if ((cpuhw->info.csvn >= 1 && cpuhw->info.csvn <= 5 &&
+ hwc->config > 79) ||
+ (cpuhw->info.csvn >= 6 && hwc->config > 83))
+ err = -EOPNOTSUPP;
+ break;
case CPUMF_CTR_SET_EXT:
if (cpuhw->info.csvn < 1)
err = -EOPNOTSUPP;
if ((cpuhw->info.csvn == 1 && hwc->config > 159) ||
(cpuhw->info.csvn == 2 && hwc->config > 175) ||
- (cpuhw->info.csvn > 2 && hwc->config > 255))
+ (cpuhw->info.csvn >= 3 && cpuhw->info.csvn <= 5
+ && hwc->config > 255) ||
+ (cpuhw->info.csvn >= 6 && hwc->config > 287))
err = -EOPNOTSUPP;
break;
case CPUMF_CTR_SET_MT_DIAG:
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index b6854812d2ed..d4e031f7b9c8 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -306,15 +306,20 @@ static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset,
ctrset_size = 2;
break;
case CPUMF_CTR_SET_CRYPTO:
- ctrset_size = 16;
+ if (info->csvn >= 1 && info->csvn <= 5)
+ ctrset_size = 16;
+ else if (info->csvn == 6)
+ ctrset_size = 20;
break;
case CPUMF_CTR_SET_EXT:
if (info->csvn == 1)
ctrset_size = 32;
else if (info->csvn == 2)
ctrset_size = 48;
- else if (info->csvn >= 3)
+ else if (info->csvn >= 3 && info->csvn <= 5)
ctrset_size = 128;
+ else if (info->csvn == 6)
+ ctrset_size = 160;
break;
case CPUMF_CTR_SET_MT_DIAG:
if (info->csvn > 3)
diff --git a/arch/s390/kernel/perf_cpum_cf_events.c b/arch/s390/kernel/perf_cpum_cf_events.c
index b45238c89728..34cc96449b30 100644
--- a/arch/s390/kernel/perf_cpum_cf_events.c
+++ b/arch/s390/kernel/perf_cpum_cf_events.c
@@ -31,22 +31,26 @@ CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_CPU_CYCLES, 0x0020);
CPUMF_EVENT_ATTR(cf_fvn3, PROBLEM_STATE_INSTRUCTIONS, 0x0021);
CPUMF_EVENT_ATTR(cf_fvn3, L1D_DIR_WRITES, 0x0004);
CPUMF_EVENT_ATTR(cf_fvn3, L1D_PENALTY_CYCLES, 0x0005);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_FUNCTIONS, 0x0040);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_CYCLES, 0x0041);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS, 0x0042);
-CPUMF_EVENT_ATTR(cf_svn_generic, PRNG_BLOCKED_CYCLES, 0x0043);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_FUNCTIONS, 0x0044);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_CYCLES, 0x0045);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS, 0x0046);
-CPUMF_EVENT_ATTR(cf_svn_generic, SHA_BLOCKED_CYCLES, 0x0047);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_FUNCTIONS, 0x0048);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_CYCLES, 0x0049);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS, 0x004a);
-CPUMF_EVENT_ATTR(cf_svn_generic, DEA_BLOCKED_CYCLES, 0x004b);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_FUNCTIONS, 0x004c);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_CYCLES, 0x004d);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS, 0x004e);
-CPUMF_EVENT_ATTR(cf_svn_generic, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_FUNCTIONS, 0x0040);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_CYCLES, 0x0041);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS, 0x0042);
+CPUMF_EVENT_ATTR(cf_svn_12345, PRNG_BLOCKED_CYCLES, 0x0043);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_FUNCTIONS, 0x0044);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_CYCLES, 0x0045);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS, 0x0046);
+CPUMF_EVENT_ATTR(cf_svn_12345, SHA_BLOCKED_CYCLES, 0x0047);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_FUNCTIONS, 0x0048);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_CYCLES, 0x0049);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS, 0x004a);
+CPUMF_EVENT_ATTR(cf_svn_12345, DEA_BLOCKED_CYCLES, 0x004b);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_FUNCTIONS, 0x004c);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_CYCLES, 0x004d);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS, 0x004e);
+CPUMF_EVENT_ATTR(cf_svn_12345, AES_BLOCKED_CYCLES, 0x004f);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_FUNCTION_COUNT, 0x0050);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_CYCLES_COUNT, 0x0051);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT, 0x0052);
+CPUMF_EVENT_ATTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT, 0x0053);
CPUMF_EVENT_ATTR(cf_z10, L1I_L2_SOURCED_WRITES, 0x0080);
CPUMF_EVENT_ATTR(cf_z10, L1D_L2_SOURCED_WRITES, 0x0081);
CPUMF_EVENT_ATTR(cf_z10, L1I_L3_LOCAL_WRITES, 0x0082);
@@ -262,23 +266,47 @@ static struct attribute *cpumcf_fvn3_pmu_event_attr[] __initdata = {
NULL,
};
-static struct attribute *cpumcf_svn_generic_pmu_event_attr[] __initdata = {
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, PRNG_BLOCKED_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, SHA_BLOCKED_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, DEA_BLOCKED_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_CYCLES),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_FUNCTIONS),
- CPUMF_EVENT_PTR(cf_svn_generic, AES_BLOCKED_CYCLES),
+static struct attribute *cpumcf_svn_12345_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+ NULL,
+};
+
+static struct attribute *cpumcf_svn_6_pmu_event_attr[] __initdata = {
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, PRNG_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, SHA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, DEA_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_FUNCTIONS),
+ CPUMF_EVENT_PTR(cf_svn_12345, AES_BLOCKED_CYCLES),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_FUNCTION_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_CYCLES_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_FUNCTION_COUNT),
+ CPUMF_EVENT_PTR(cf_svn_6, ECC_BLOCKED_CYCLES_COUNT),
NULL,
};
@@ -562,7 +590,18 @@ __init const struct attribute_group **cpumf_cf_event_group(void)
default:
cfvn = none;
}
- csvn = cpumcf_svn_generic_pmu_event_attr;
+
+ /* Determine version specific crypto set */
+ switch (ci.csvn) {
+ case 1 ... 5:
+ csvn = cpumcf_svn_12345_pmu_event_attr;
+ break;
+ case 6:
+ csvn = cpumcf_svn_6_pmu_event_attr;
+ break;
+ default:
+ csvn = none;
+ }
/* Determine model-specific counter set(s) */
get_cpu_id(&cpu_id);
diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c
index 0d770e513abf..fcb6c2e92b07 100644
--- a/arch/s390/kernel/perf_event.c
+++ b/arch/s390/kernel/perf_event.c
@@ -21,6 +21,7 @@
#include <asm/lowcore.h>
#include <asm/processor.h>
#include <asm/sysinfo.h>
+#include <asm/unwind.h>
const char *perf_pmu_name(void)
{
@@ -219,20 +220,13 @@ static int __init service_level_perf_register(void)
}
arch_initcall(service_level_perf_register);
-static int __perf_callchain_kernel(void *data, unsigned long address, int reliable)
-{
- struct perf_callchain_entry_ctx *entry = data;
-
- perf_callchain_store(entry, address);
- return 0;
-}
-
void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry,
struct pt_regs *regs)
{
- if (user_mode(regs))
- return;
- dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]);
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, regs, 0)
+ perf_callchain_store(entry, state.ip);
}
/* Perf definitions for PMU event attributes in sysfs */
diff --git a/arch/s390/kernel/pgm_check.S b/arch/s390/kernel/pgm_check.S
index 3e62aae34ea3..59dee9d3bebf 100644
--- a/arch/s390/kernel/pgm_check.S
+++ b/arch/s390/kernel/pgm_check.S
@@ -7,7 +7,7 @@
#include <linux/linkage.h>
-#define PGM_CHECK(handler) .long handler
+#define PGM_CHECK(handler) .quad handler
#define PGM_CHECK_DEFAULT PGM_CHECK(default_trap_handler)
/*
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 6e758bb6cd29..63873aa6693f 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -37,6 +37,7 @@
#include <asm/irq.h>
#include <asm/nmi.h>
#include <asm/smp.h>
+#include <asm/stacktrace.h>
#include <asm/switch_to.h>
#include <asm/runtime_instr.h>
#include "entry.h"
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 6fe2e1875058..5de13307b703 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -109,7 +109,8 @@ static void show_cpu_summary(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
+ "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs",
+ "vxe2", "vxp", "sort", "dflt"
};
static const char * const int_hwcap_str[] = {
"sie"
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 7f14adf512c6..4a22163962eb 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -73,6 +73,7 @@ ENTRY(store_status)
lgr %r9,%r2
lgr %r2,%r3
BR_EX %r9
+ENDPROC(store_status)
.section .bss
.align 8
diff --git a/arch/s390/kernel/relocate_kernel.S b/arch/s390/kernel/relocate_kernel.S
index c97c2d40fe15..fe396673e8a6 100644
--- a/arch/s390/kernel/relocate_kernel.S
+++ b/arch/s390/kernel/relocate_kernel.S
@@ -58,11 +58,15 @@ ENTRY(relocate_kernel)
j .base
.done:
sgr %r0,%r0 # clear register r0
+ cghi %r3,0
+ je .diag
la %r4,load_psw-.base(%r13) # load psw-address into the register
o %r3,4(%r4) # or load address into psw
st %r3,4(%r4)
mvc 0(8,%r0),0(%r4) # copy psw to absolute address 0
+ .diag:
diag %r0,%r0,0x308
+ENDPROC(relocate_kernel)
.align 8
load_psw:
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2c642af526ce..f8544d517430 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -50,6 +50,7 @@
#include <linux/compat.h>
#include <linux/start_kernel.h>
+#include <asm/boot_data.h>
#include <asm/ipl.h>
#include <asm/facility.h>
#include <asm/smp.h>
@@ -65,11 +66,13 @@
#include <asm/diag.h>
#include <asm/os_info.h>
#include <asm/sclp.h>
+#include <asm/stacktrace.h>
#include <asm/sysinfo.h>
#include <asm/numa.h>
#include <asm/alternative.h>
#include <asm/nospec-branch.h>
#include <asm/mem_detect.h>
+#include <asm/uv.h>
#include "entry.h"
/*
@@ -89,12 +92,25 @@ char elf_platform[ELF_PLATFORM_SIZE];
unsigned long int_hwcap = 0;
+#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
+int __bootdata_preserved(prot_virt_guest);
+#endif
+
int __bootdata(noexec_disabled);
int __bootdata(memory_end_set);
unsigned long __bootdata(memory_end);
unsigned long __bootdata(max_physmem_end);
struct mem_detect_info __bootdata(mem_detect);
+struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
+struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
+unsigned long __bootdata_preserved(__swsusp_reset_dma);
+unsigned long __bootdata_preserved(__stext_dma);
+unsigned long __bootdata_preserved(__etext_dma);
+unsigned long __bootdata_preserved(__sdma);
+unsigned long __bootdata_preserved(__edma);
+unsigned long __bootdata_preserved(__kaslr_offset);
+
unsigned long VMALLOC_START;
EXPORT_SYMBOL(VMALLOC_START);
@@ -736,6 +752,15 @@ static void __init reserve_initrd(void)
#endif
}
+/*
+ * Reserve the memory area used to pass the certificate lists
+ */
+static void __init reserve_certificate_list(void)
+{
+ if (ipl_cert_list_addr)
+ memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
+}
+
static void __init reserve_mem_detect_info(void)
{
unsigned long start, size;
@@ -814,9 +839,10 @@ static void __init reserve_kernel(void)
{
unsigned long start_pfn = PFN_UP(__pa(_end));
- memblock_reserve(0, PARMAREA_END);
+ memblock_reserve(0, HEAD_END);
memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
- (unsigned long)_stext);
+ memblock_reserve(__sdma, __edma - __sdma);
}
static void __init setup_memory(void)
@@ -914,7 +940,15 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_S390_VXRS_EXT;
if (test_facility(135))
elf_hwcap |= HWCAP_S390_VXRS_BCD;
+ if (test_facility(148))
+ elf_hwcap |= HWCAP_S390_VXRS_EXT2;
+ if (test_facility(152))
+ elf_hwcap |= HWCAP_S390_VXRS_PDE;
}
+ if (test_facility(150))
+ elf_hwcap |= HWCAP_S390_SORT;
+ if (test_facility(151))
+ elf_hwcap |= HWCAP_S390_DFLT;
/*
* Guarded storage support HWCAP_S390_GS is bit 12.
@@ -1023,6 +1057,38 @@ static void __init setup_control_program_code(void)
}
/*
+ * Print the component list from the IPL report
+ */
+static void __init log_component_list(void)
+{
+ struct ipl_rb_component_entry *ptr, *end;
+ char *str;
+
+ if (!early_ipl_comp_list_addr)
+ return;
+ if (ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)
+ pr_info("Linux is running with Secure-IPL enabled\n");
+ else
+ pr_info("Linux is running with Secure-IPL disabled\n");
+ ptr = (void *) early_ipl_comp_list_addr;
+ end = (void *) ptr + early_ipl_comp_list_size;
+ pr_info("The IPL report contains the following components:\n");
+ while (ptr < end) {
+ if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
+ if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
+ str = "signed, verified";
+ else
+ str = "signed, verification failed";
+ } else {
+ str = "not signed";
+ }
+ pr_info("%016llx - %016llx (%s)\n",
+ ptr->addr, ptr->addr + ptr->len, str);
+ ptr++;
+ }
+}
+
+/*
* Setup function called from init/main.c just after the banner
* was printed.
*/
@@ -1042,6 +1108,8 @@ void __init setup_arch(char **cmdline_p)
else
pr_info("Linux is running as a guest in 64-bit mode\n");
+ log_component_list();
+
/* Have one command line that is parsed and saved in /proc/cmdline */
/* boot_command_line has been already set up in early.c */
*cmdline_p = boot_command_line;
@@ -1073,6 +1141,7 @@ void __init setup_arch(char **cmdline_p)
reserve_oldmem();
reserve_kernel();
reserve_initrd();
+ reserve_certificate_list();
reserve_mem_detect_info();
memblock_allow_resize();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index bd197baf1dc3..35fafa2b91a8 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -53,6 +53,7 @@
#include <asm/sigp.h>
#include <asm/idle.h>
#include <asm/nmi.h>
+#include <asm/stacktrace.h>
#include <asm/topology.h>
#include "entry.h"
@@ -689,7 +690,7 @@ void __init smp_save_dump_cpus(void)
smp_save_cpu_regs(sa, addr, is_boot_cpu, page);
}
memblock_free(page, PAGE_SIZE);
- diag308_reset();
+ diag_dma_ops.diag308_reset();
pcpu_set_smt(0);
}
#endif /* CONFIG_CRASH_DUMP */
diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c
index 460dcfba7d4e..f6a620f854e1 100644
--- a/arch/s390/kernel/stacktrace.c
+++ b/arch/s390/kernel/stacktrace.c
@@ -11,65 +11,52 @@
#include <linux/stacktrace.h>
#include <linux/kallsyms.h>
#include <linux/export.h>
-
-static int __save_address(void *data, unsigned long address, int nosched)
-{
- struct stack_trace *trace = data;
-
- if (nosched && in_sched_functions(address))
- return 0;
- if (trace->skip > 0) {
- trace->skip--;
- return 0;
- }
- if (trace->nr_entries < trace->max_entries) {
- trace->entries[trace->nr_entries++] = address;
- return 0;
- }
- return 1;
-}
-
-static int save_address(void *data, unsigned long address, int reliable)
-{
- return __save_address(data, address, 0);
-}
-
-static int save_address_nosched(void *data, unsigned long address, int reliable)
-{
- return __save_address(data, address, 1);
-}
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
void save_stack_trace(struct stack_trace *trace)
{
- unsigned long sp;
-
- sp = current_stack_pointer();
- dump_trace(save_address, trace, NULL, sp);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, NULL, 0) {
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ if (trace->skip > 0)
+ trace->skip--;
+ else
+ trace->entries[trace->nr_entries++] = state.ip;
+ }
}
EXPORT_SYMBOL_GPL(save_stack_trace);
void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
{
- unsigned long sp;
-
- sp = tsk->thread.ksp;
- if (tsk == current)
- sp = current_stack_pointer();
- dump_trace(save_address_nosched, trace, tsk, sp);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, tsk, NULL, 0) {
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ if (in_sched_functions(state.ip))
+ continue;
+ if (trace->skip > 0)
+ trace->skip--;
+ else
+ trace->entries[trace->nr_entries++] = state.ip;
+ }
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
{
- unsigned long sp;
-
- sp = kernel_stack_pointer(regs);
- dump_trace(save_address, trace, NULL, sp);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, regs, 0) {
+ if (trace->nr_entries >= trace->max_entries)
+ break;
+ if (trace->skip > 0)
+ trace->skip--;
+ else
+ trace->entries[trace->nr_entries++] = state.ip;
+ }
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 993100c31d65..19a3c427801a 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -108,6 +108,7 @@ ENTRY(swsusp_arch_suspend)
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
BR_EX %r14
+ENDPROC(swsusp_arch_suspend)
/*
* Restore saved memory image to correct place and restore register context.
@@ -154,20 +155,13 @@ ENTRY(swsusp_arch_resume)
ptlb /* flush tlb */
/* Reset System */
- larl %r1,restart_entry
- larl %r2,.Lrestart_diag308_psw
- og %r1,0(%r2)
- stg %r1,0(%r0)
larl %r1,.Lnew_pgm_check_psw
epsw %r2,%r3
stm %r2,%r3,0(%r1)
mvc __LC_PGM_NEW_PSW(16,%r0),0(%r1)
- lghi %r0,0
- diag %r0,%r0,0x308
-restart_entry:
- lhi %r1,1
- sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- sam64
+ larl %r1,__swsusp_reset_dma
+ lg %r1,0(%r1)
+ BASR_EX %r14,%r1
#ifdef CONFIG_SMP
larl %r1,smp_cpu_mt_shift
icm %r1,15,0(%r1)
@@ -267,6 +261,7 @@ restore_registers:
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
BR_EX %r14
+ENDPROC(swsusp_arch_resume)
.section .data..nosave,"aw",@progbits
.align 8
@@ -275,8 +270,6 @@ restore_registers:
.Lpanic_string:
.asciz "Resume not possible because suspend CPU is no longer available\n"
.align 8
-.Lrestart_diag308_psw:
- .long 0x00080000,0x80000000
.Lrestart_suspend_psw:
.quad 0x0000000180000000,restart_suspend
.Lnew_pgm_check_psw:
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index 8003b38c1688..82e81a9f7112 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -49,7 +49,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str)
report_user_fault(regs, si_signo, 0);
} else {
const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->psw.addr);
+ fixup = s390_search_extables(regs->psw.addr);
if (fixup)
regs->psw.addr = extable_fixup(fixup);
else {
@@ -263,5 +263,6 @@ NOKPROBE_SYMBOL(kernel_stack_overflow);
void __init trap_init(void)
{
+ sort_extable(__start_dma_ex_table, __stop_dma_ex_table);
local_mcck_enable();
}
diff --git a/arch/s390/kernel/unwind_bc.c b/arch/s390/kernel/unwind_bc.c
new file mode 100644
index 000000000000..57fd4e902f1f
--- /dev/null
+++ b/arch/s390/kernel/unwind_bc.c
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/sched.h>
+#include <linux/sched/task.h>
+#include <linux/sched/task_stack.h>
+#include <linux/interrupt.h>
+#include <asm/sections.h>
+#include <asm/ptrace.h>
+#include <asm/bitops.h>
+#include <asm/stacktrace.h>
+#include <asm/unwind.h>
+
+unsigned long unwind_get_return_address(struct unwind_state *state)
+{
+ if (unwind_done(state))
+ return 0;
+ return __kernel_text_address(state->ip) ? state->ip : 0;
+}
+EXPORT_SYMBOL_GPL(unwind_get_return_address);
+
+static bool outside_of_stack(struct unwind_state *state, unsigned long sp)
+{
+ return (sp <= state->sp) ||
+ (sp + sizeof(struct stack_frame) > state->stack_info.end);
+}
+
+static bool update_stack_info(struct unwind_state *state, unsigned long sp)
+{
+ struct stack_info *info = &state->stack_info;
+ unsigned long *mask = &state->stack_mask;
+
+ /* New stack pointer leaves the current stack */
+ if (get_stack_info(sp, state->task, info, mask) != 0 ||
+ !on_stack(info, sp, sizeof(struct stack_frame)))
+ /* 'sp' does not point to a valid stack */
+ return false;
+ return true;
+}
+
+bool unwind_next_frame(struct unwind_state *state)
+{
+ struct stack_info *info = &state->stack_info;
+ struct stack_frame *sf;
+ struct pt_regs *regs;
+ unsigned long sp, ip;
+ bool reliable;
+
+ regs = state->regs;
+ if (unlikely(regs)) {
+ sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]);
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ reliable = false;
+ regs = NULL;
+ } else {
+ sf = (struct stack_frame *) state->sp;
+ sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain);
+ if (likely(sp)) {
+ /* Non-zero back-chain points to the previous frame */
+ if (unlikely(outside_of_stack(state, sp))) {
+ if (!update_stack_info(state, sp))
+ goto out_err;
+ }
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ reliable = true;
+ } else {
+ /* No back-chain, look for a pt_regs structure */
+ sp = state->sp + STACK_FRAME_OVERHEAD;
+ if (!on_stack(info, sp, sizeof(struct pt_regs)))
+ goto out_stop;
+ regs = (struct pt_regs *) sp;
+ if (user_mode(regs))
+ goto out_stop;
+ ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ reliable = true;
+ }
+ }
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* Decode any ftrace redirection */
+ if (ip == (unsigned long) return_to_handler)
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ ip, (void *) sp);
+#endif
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->ip = ip;
+ state->regs = regs;
+ state->reliable = reliable;
+ return true;
+
+out_err:
+ state->error = true;
+out_stop:
+ state->stack_info.type = STACK_TYPE_UNKNOWN;
+ return false;
+}
+EXPORT_SYMBOL_GPL(unwind_next_frame);
+
+void __unwind_start(struct unwind_state *state, struct task_struct *task,
+ struct pt_regs *regs, unsigned long sp)
+{
+ struct stack_info *info = &state->stack_info;
+ unsigned long *mask = &state->stack_mask;
+ struct stack_frame *sf;
+ unsigned long ip;
+ bool reliable;
+
+ memset(state, 0, sizeof(*state));
+ state->task = task;
+ state->regs = regs;
+
+ /* Don't even attempt to start from user mode regs: */
+ if (regs && user_mode(regs)) {
+ info->type = STACK_TYPE_UNKNOWN;
+ return;
+ }
+
+ /* Get current stack pointer and initialize stack info */
+ if (get_stack_info(sp, task, info, mask) != 0 ||
+ !on_stack(info, sp, sizeof(struct stack_frame))) {
+ /* Something is wrong with the stack pointer */
+ info->type = STACK_TYPE_UNKNOWN;
+ state->error = true;
+ return;
+ }
+
+ /* Get the instruction pointer from pt_regs or the stack frame */
+ if (regs) {
+ ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
+ reliable = true;
+ } else {
+ sf = (struct stack_frame *) sp;
+ ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
+ reliable = false;
+ }
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ /* Decode any ftrace redirection */
+ if (ip == (unsigned long) return_to_handler)
+ ip = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+ ip, NULL);
+#endif
+
+ /* Update unwind state */
+ state->sp = sp;
+ state->ip = ip;
+ state->reliable = reliable;
+}
+EXPORT_SYMBOL_GPL(__unwind_start);
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index e7920a68a12e..243d8b1185bf 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -29,7 +29,7 @@
#include <asm/vdso.h>
#include <asm/facility.h>
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
extern char vdso32_start, vdso32_end;
static void *vdso32_kbase = &vdso32_start;
static unsigned int vdso32_pages;
@@ -55,7 +55,7 @@ static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
if (vma->vm_mm->context.compat_mm) {
vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
@@ -76,7 +76,7 @@ static int vdso_mremap(const struct vm_special_mapping *sm,
unsigned long vdso_pages;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
if (vma->vm_mm->context.compat_mm)
vdso_pages = vdso32_pages;
#endif
@@ -223,7 +223,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
return 0;
vdso_pages = vdso64_pages;
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
mm->context.compat_mm = is_compat_task();
if (mm->context.compat_mm)
vdso_pages = vdso32_pages;
@@ -280,7 +280,7 @@ static int __init vdso_init(void)
int i;
vdso_init_data(vdso_data);
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_VDSO
/* Calculate the size of the 32 bit vDSO */
vdso32_pages = ((&vdso32_end - &vdso32_start
+ PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index e76309fbbcb3..aee9ffbccb54 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -19,7 +19,7 @@ KBUILD_AFLAGS_31 += -m31 -s
KBUILD_CFLAGS_31 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_31 += -m31 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_31 += -nostdlib -Wl,-soname=linux-vdso32.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ -Wl,--hash-style=both
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_31)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_31)
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index f849ac61c5da..bec19e7e6e1c 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -19,7 +19,7 @@ KBUILD_AFLAGS_64 += -m64 -s
KBUILD_CFLAGS_64 := $(filter-out -m64,$(KBUILD_CFLAGS))
KBUILD_CFLAGS_64 += -m64 -fPIC -shared -fno-common -fno-builtin
KBUILD_CFLAGS_64 += -nostdlib -Wl,-soname=linux-vdso64.so.1 \
- $(call cc-ldoption, -Wl$(comma)--hash-style=both)
+ -Wl,--hash-style=both
$(targets:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_64)
$(targets:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_64)
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 8429ab079715..49d55327de0b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -72,6 +72,7 @@ SECTIONS
__end_ro_after_init = .;
RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+ BOOT_DATA_PRESERVED
_edata = .; /* End of data section */
@@ -143,6 +144,18 @@ SECTIONS
INIT_DATA_SECTION(0x100)
PERCPU_SECTION(0x100)
+
+ .dynsym ALIGN(8) : {
+ __dynsym_start = .;
+ *(.dynsym)
+ __dynsym_end = .;
+ }
+ .rela.dyn ALIGN(8) : {
+ __rela_dyn_start = .;
+ *(.rela*)
+ __rela_dyn_end = .;
+ }
+
. = ALIGN(PAGE_SIZE);
__init_end = .; /* freed after init ends here */
@@ -161,6 +174,12 @@ SECTIONS
QUAD(__bss_stop - __bss_start) /* bss_size */
QUAD(__boot_data_start) /* bootdata_off */
QUAD(__boot_data_end - __boot_data_start) /* bootdata_size */
+ QUAD(__boot_data_preserved_start) /* bootdata_preserved_off */
+ QUAD(__boot_data_preserved_end -
+ __boot_data_preserved_start) /* bootdata_preserved_size */
+ QUAD(__dynsym_start) /* dynsym_start */
+ QUAD(__rela_dyn_start) /* rela_dyn_start */
+ QUAD(__rela_dyn_end) /* rela_dyn_end */
} :NONE
/* Debugging sections. */
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 767453faacfc..1816ee48eadd 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -21,7 +21,6 @@ config KVM
prompt "Kernel-based Virtual Machine (KVM) support"
depends on HAVE_KVM
select PREEMPT_NOTIFIERS
- select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_VCPU_ASYNC_IOCTL
select HAVE_KVM_EVENTFD
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 82162867f378..37503ae62486 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3194,7 +3194,7 @@ out:
}
EXPORT_SYMBOL_GPL(kvm_s390_gisc_unregister);
-static void gib_alert_irq_handler(struct airq_struct *airq)
+static void gib_alert_irq_handler(struct airq_struct *airq, bool floating)
{
inc_irq_stat(IRQIO_GAL);
process_gib_alert_list();
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index 53008da05190..dc0874f2e203 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -178,6 +178,7 @@ ENTRY(__memset\bits)
BR_EX %r14
.L__memset_mvc\bits:
mvc \bytes(1,%r1),0(%r1)
+ENDPROC(__memset\bits)
.endm
__MEMSET 16,2,sth
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index f5880bfd1b0c..3175413186b9 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -4,7 +4,7 @@
#
obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o
-obj-y += page-states.o gup.o pageattr.o pgtable.o pgalloc.o
+obj-y += page-states.o pageattr.o pgtable.o pgalloc.o
obj-$(CONFIG_CMM) += cmm.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 11613362c4e7..c220399ae196 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -247,12 +247,24 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code)
current);
}
+const struct exception_table_entry *s390_search_extables(unsigned long addr)
+{
+ const struct exception_table_entry *fixup;
+
+ fixup = search_extable(__start_dma_ex_table,
+ __stop_dma_ex_table - __start_dma_ex_table,
+ addr);
+ if (!fixup)
+ fixup = search_exception_tables(addr);
+ return fixup;
+}
+
static noinline void do_no_context(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
/* Are we prepared to handle this kernel fault? */
- fixup = search_exception_tables(regs->psw.addr);
+ fixup = s390_search_extables(regs->psw.addr);
if (fixup) {
regs->psw.addr = extable_fixup(fixup);
return;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
deleted file mode 100644
index 2809d11c7a28..000000000000
--- a/arch/s390/mm/gup.c
+++ /dev/null
@@ -1,300 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Lockless get_user_pages_fast for s390
- *
- * Copyright IBM Corp. 2010
- * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
- */
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/hugetlb.h>
-#include <linux/vmstat.h>
-#include <linux/pagemap.h>
-#include <linux/rwsem.h>
-#include <asm/pgtable.h>
-
-/*
- * The performance critical leaf functions are made noinline otherwise gcc
- * inlines everything into a single function which results in too much
- * register pressure.
- */
-static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- unsigned long mask;
- pte_t *ptep, pte;
-
- mask = (write ? _PAGE_PROTECT : 0) | _PAGE_INVALID | _PAGE_SPECIAL;
-
- ptep = ((pte_t *) pmd_deref(pmd)) + pte_index(addr);
- do {
- pte = *ptep;
- barrier();
- /* Similar to the PMD case, NUMA hinting must take slow path */
- if (pte_protnone(pte))
- return 0;
- if ((pte_val(pte) & mask) != 0)
- return 0;
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- page = pte_page(pte);
- head = compound_head(page);
- if (!page_cache_get_speculative(head))
- return 0;
- if (unlikely(pte_val(pte) != pte_val(*ptep))) {
- put_page(head);
- return 0;
- }
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
-
- } while (ptep++, addr += PAGE_SIZE, addr != end);
-
- return 1;
-}
-
-static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- unsigned long mask;
- int refs;
-
- mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
- if ((pmd_val(pmd) & mask) != 0)
- return 0;
- VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
-
- refs = 0;
- head = pmd_page(pmd);
- page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON(compound_head(page) != head);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pmd_val(pmd) != pmd_val(*pmdp))) {
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-
-static inline int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pmd_t *pmdp, pmd;
-
- pmdp = (pmd_t *) pudp;
- if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R3)
- pmdp = (pmd_t *) pud_deref(pud);
- pmdp += pmd_index(addr);
- do {
- pmd = *pmdp;
- barrier();
- next = pmd_addr_end(addr, end);
- if (pmd_none(pmd))
- return 0;
- if (unlikely(pmd_large(pmd))) {
- /*
- * NUMA hinting faults need to be handled in the GUP
- * slowpath for accounting purposes and so that they
- * can be serialised against THP migration.
- */
- if (pmd_protnone(pmd))
- return 0;
- if (!gup_huge_pmd(pmdp, pmd, addr, next,
- write, pages, nr))
- return 0;
- } else if (!gup_pte_range(pmdp, pmd, addr, next,
- write, pages, nr))
- return 0;
- } while (pmdp++, addr = next, addr != end);
-
- return 1;
-}
-
-static int gup_huge_pud(pud_t *pudp, pud_t pud, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- struct page *head, *page;
- unsigned long mask;
- int refs;
-
- mask = (write ? _REGION_ENTRY_PROTECT : 0) | _REGION_ENTRY_INVALID;
- if ((pud_val(pud) & mask) != 0)
- return 0;
- VM_BUG_ON(!pfn_valid(pud_pfn(pud)));
-
- refs = 0;
- head = pud_page(pud);
- page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- do {
- VM_BUG_ON_PAGE(compound_head(page) != head, page);
- pages[*nr] = page;
- (*nr)++;
- page++;
- refs++;
- } while (addr += PAGE_SIZE, addr != end);
-
- if (!page_cache_add_speculative(head, refs)) {
- *nr -= refs;
- return 0;
- }
-
- if (unlikely(pud_val(pud) != pud_val(*pudp))) {
- *nr -= refs;
- while (refs--)
- put_page(head);
- return 0;
- }
-
- return 1;
-}
-
-static inline int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long next;
- pud_t *pudp, pud;
-
- pudp = (pud_t *) p4dp;
- if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R2)
- pudp = (pud_t *) p4d_deref(p4d);
- pudp += pud_index(addr);
- do {
- pud = *pudp;
- barrier();
- next = pud_addr_end(addr, end);
- if (pud_none(pud))
- return 0;
- if (unlikely(pud_large(pud))) {
- if (!gup_huge_pud(pudp, pud, addr, next, write, pages,
- nr))
- return 0;
- } else if (!gup_pmd_range(pudp, pud, addr, next, write, pages,
- nr))
- return 0;
- } while (pudp++, addr = next, addr != end);
-
- return 1;
-}
-
-static inline int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
- unsigned long end, int write, struct page **pages, int *nr)
-{
- unsigned long next;
- p4d_t *p4dp, p4d;
-
- p4dp = (p4d_t *) pgdp;
- if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) == _REGION_ENTRY_TYPE_R1)
- p4dp = (p4d_t *) pgd_deref(pgd);
- p4dp += p4d_index(addr);
- do {
- p4d = *p4dp;
- barrier();
- next = p4d_addr_end(addr, end);
- if (p4d_none(p4d))
- return 0;
- if (!gup_pud_range(p4dp, p4d, addr, next, write, pages, nr))
- return 0;
- } while (p4dp++, addr = next, addr != end);
-
- return 1;
-}
-
-/*
- * Like get_user_pages_fast() except its IRQ-safe in that it won't fall
- * back to the regular GUP.
- * Note a difference with get_user_pages_fast: this always returns the
- * number of pages pinned, 0 if no pages were pinned.
- */
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- struct mm_struct *mm = current->mm;
- unsigned long addr, len, end;
- unsigned long next, flags;
- pgd_t *pgdp, pgd;
- int nr = 0;
-
- start &= PAGE_MASK;
- addr = start;
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- if ((end <= start) || (end > mm->context.asce_limit))
- return 0;
- /*
- * local_irq_save() doesn't prevent pagetable teardown, but does
- * prevent the pagetables from being freed on s390.
- *
- * So long as we atomically load page table pointers versus teardown,
- * we can follow the address down to the the page and take a ref on it.
- */
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd = *pgdp;
- barrier();
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (!gup_p4d_range(pgdp, pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
-
- return nr;
-}
-
-/**
- * get_user_pages_fast() - pin user pages in memory
- * @start: starting user address
- * @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to
- * @pages: array that receives pointers to the pages pinned.
- * Should be at least nr_pages long.
- *
- * Attempt to pin user pages in memory without taking mm->mmap_sem.
- * If not successful, it will fall back to taking the lock and
- * calling get_user_pages().
- *
- * Returns number of pages pinned. This may be fewer than the number
- * requested. If nr_pages is 0 or negative, returns 0. If no pages
- * were pinned, returns -errno.
- */
-int get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages)
-{
- int nr, ret;
-
- might_sleep();
- start &= PAGE_MASK;
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
- if (nr == nr_pages)
- return nr;
-
- /* Try to get the remaining pages with get_user_pages */
- start += nr << PAGE_SHIFT;
- pages += nr;
- ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
- write ? FOLL_WRITE : 0);
- /* Have to be a bit careful with return values */
- if (nr > 0)
- ret = (ret < 0) ? nr : ret + nr;
- return ret;
-}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 3e82f66d5c61..7cf48eefec8f 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -49,6 +49,8 @@ unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(zero_page_mask);
+bool initmem_freed;
+
static void __init setup_zero_pages(void)
{
unsigned int order;
@@ -148,6 +150,7 @@ void __init mem_init(void)
void free_initmem(void)
{
+ initmem_freed = true;
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RW | SET_MEMORY_NX);
diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c
index 97b3ee53852b..818deeb1ebc3 100644
--- a/arch/s390/mm/maccess.c
+++ b/arch/s390/mm/maccess.c
@@ -16,6 +16,7 @@
#include <linux/cpu.h>
#include <asm/ctl_reg.h>
#include <asm/io.h>
+#include <asm/stacktrace.h>
static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t size)
{
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index db6bb2f97a2c..99e06213a22b 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -290,7 +290,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
tlb_remove_table(tlb, table);
}
-static void __tlb_remove_table(void *_table)
+void __tlb_remove_table(void *_table)
{
unsigned int mask = (unsigned long) _table & 3;
void *table = (void *)((unsigned long) _table ^ mask);
@@ -316,67 +316,6 @@ static void __tlb_remove_table(void *_table)
}
}
-static void tlb_remove_table_smp_sync(void *arg)
-{
- /* Simply deliver the interrupt */
-}
-
-static void tlb_remove_table_one(void *table)
-{
- /*
- * This isn't an RCU grace period and hence the page-tables cannot be
- * assumed to be actually RCU-freed.
- *
- * It is however sufficient for software page-table walkers that rely
- * on IRQ disabling. See the comment near struct mmu_table_batch.
- */
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
- __tlb_remove_table(table);
-}
-
-static void tlb_remove_table_rcu(struct rcu_head *head)
-{
- struct mmu_table_batch *batch;
- int i;
-
- batch = container_of(head, struct mmu_table_batch, rcu);
-
- for (i = 0; i < batch->nr; i++)
- __tlb_remove_table(batch->tables[i]);
-
- free_page((unsigned long)batch);
-}
-
-void tlb_table_flush(struct mmu_gather *tlb)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- if (*batch) {
- call_rcu(&(*batch)->rcu, tlb_remove_table_rcu);
- *batch = NULL;
- }
-}
-
-void tlb_remove_table(struct mmu_gather *tlb, void *table)
-{
- struct mmu_table_batch **batch = &tlb->batch;
-
- tlb->mm->context.flush_mm = 1;
- if (*batch == NULL) {
- *batch = (struct mmu_table_batch *)
- __get_free_page(GFP_NOWAIT | __GFP_NOWARN);
- if (*batch == NULL) {
- __tlb_flush_mm_lazy(tlb->mm);
- tlb_remove_table_one(table);
- return;
- }
- (*batch)->nr = 0;
- }
- (*batch)->tables[(*batch)->nr++] = table;
- if ((*batch)->nr == MAX_TABLE_BATCH)
- tlb_flush_mmu(tlb);
-}
-
/*
* Base infrastructure required to generate basic asces, region, segment,
* and page tables that do not make use of enhanced features like EDAT1.
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 8485d6dc2754..9ebd01219812 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -410,6 +410,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm,
return old;
}
+#ifdef CONFIG_PGSTE
static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
{
pgd_t *pgd;
@@ -427,6 +428,7 @@ static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr)
pmd = pmd_alloc(mm, pud, addr);
return pmd;
}
+#endif
pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr,
pmd_t *pmdp, pmd_t new)
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 0472e27febdf..b403fa14847d 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -413,6 +413,8 @@ void __init vmem_map_init(void)
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
+ __set_memory(__stext_dma, (__etext_dma - __stext_dma) >> PAGE_SHIFT,
+ SET_MEMORY_RO | SET_MEMORY_X);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
}
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 51dd0267d014..5e7c63033159 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -455,7 +455,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE, stack_depth);
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
jit->r14_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r14 thunk */
if (test_facility(35)) {
@@ -473,7 +473,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
/* br %r14 */
_EMIT2(0x07fe);
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
(jit->seen & SEEN_FUNC)) {
jit->r1_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r1 thunk */
@@ -999,7 +999,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
/* lg %w1,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
EMIT_CONST_U64(func));
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
/* brasl %r14,__s390_indirect_jump_r1 */
EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
} else {
diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c
index 43d9525c36fc..7441857df51b 100644
--- a/arch/s390/oprofile/init.c
+++ b/arch/s390/oprofile/init.c
@@ -13,23 +13,17 @@
#include <linux/oprofile.h>
#include <linux/init.h>
#include <asm/processor.h>
-
-static int __s390_backtrace(void *data, unsigned long address, int reliable)
-{
- unsigned int *depth = data;
-
- if (*depth == 0)
- return 1;
- (*depth)--;
- oprofile_add_trace(address);
- return 0;
-}
+#include <asm/unwind.h>
static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
{
- if (user_mode(regs))
- return;
- dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
+ struct unwind_state state;
+
+ unwind_for_each_frame(&state, current, regs, 0) {
+ if (depth-- == 0)
+ break;
+ oprofile_add_trace(state.ip);
+ }
}
int __init oprofile_arch_init(struct oprofile_operations *ops)
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 22d0871291ee..748626a33028 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,5 +3,5 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index dc9bc82c072c..0ebb7c405a25 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -24,11 +24,9 @@
#include <linux/err.h>
#include <linux/export.h>
#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/kernel_stat.h>
#include <linux/seq_file.h>
+#include <linux/jump_label.h>
#include <linux/pci.h>
-#include <linux/msi.h>
#include <asm/isc.h>
#include <asm/airq.h>
@@ -37,30 +35,13 @@
#include <asm/pci_clp.h>
#include <asm/pci_dma.h>
-#define DEBUG /* enable pr_debug */
-
-#define SIC_IRQ_MODE_ALL 0
-#define SIC_IRQ_MODE_SINGLE 1
-
-#define ZPCI_NR_DMA_SPACES 1
-#define ZPCI_NR_DEVICES CONFIG_PCI_NR_FUNCTIONS
-
/* list of all detected zpci devices */
static LIST_HEAD(zpci_list);
static DEFINE_SPINLOCK(zpci_list_lock);
-static struct irq_chip zpci_irq_chip = {
- .name = "zPCI",
- .irq_unmask = pci_msi_unmask_irq,
- .irq_mask = pci_msi_mask_irq,
-};
-
static DECLARE_BITMAP(zpci_domain, ZPCI_NR_DEVICES);
static DEFINE_SPINLOCK(zpci_domain_lock);
-static struct airq_iv *zpci_aisb_iv;
-static struct airq_iv *zpci_aibv[ZPCI_NR_DEVICES];
-
#define ZPCI_IOMAP_ENTRIES \
min(((unsigned long) ZPCI_NR_DEVICES * PCI_BAR_COUNT / 2), \
ZPCI_IOMAP_MAX_ENTRIES)
@@ -70,6 +51,8 @@ static unsigned long *zpci_iomap_bitmap;
struct zpci_iomap_entry *zpci_iomap_start;
EXPORT_SYMBOL_GPL(zpci_iomap_start);
+DEFINE_STATIC_KEY_FALSE(have_mio);
+
static struct kmem_cache *zdev_fmb_cache;
struct zpci_dev *get_zdev_by_fid(u32 fid)
@@ -123,39 +106,6 @@ int pci_proc_domain(struct pci_bus *bus)
}
EXPORT_SYMBOL_GPL(pci_proc_domain);
-/* Modify PCI: Register adapter interruptions */
-static int zpci_set_airq(struct zpci_dev *zdev)
-{
- u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
- struct zpci_fib fib = {0};
- u8 status;
-
- fib.isc = PCI_ISC;
- fib.sum = 1; /* enable summary notifications */
- fib.noi = airq_iv_end(zdev->aibv);
- fib.aibv = (unsigned long) zdev->aibv->vector;
- fib.aibvo = 0; /* each zdev has its own interrupt vector */
- fib.aisb = (unsigned long) zpci_aisb_iv->vector + (zdev->aisb/64)*8;
- fib.aisbo = zdev->aisb & 63;
-
- return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
-}
-
-/* Modify PCI: Unregister adapter interruptions */
-static int zpci_clear_airq(struct zpci_dev *zdev)
-{
- u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
- struct zpci_fib fib = {0};
- u8 cc, status;
-
- cc = zpci_mod_fc(req, &fib, &status);
- if (cc == 3 || (cc == 1 && status == 24))
- /* Function already gone or IRQs already deregistered. */
- cc = 0;
-
- return cc ? -EIO : 0;
-}
-
/* Modify PCI: Register I/O address translation parameters */
int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
u64 base, u64 limit, u64 iota)
@@ -241,7 +191,7 @@ static int zpci_cfg_load(struct zpci_dev *zdev, int offset, u32 *val, u8 len)
u64 data;
int rc;
- rc = zpci_load(&data, req, offset);
+ rc = __zpci_load(&data, req, offset);
if (!rc) {
data = le64_to_cpu((__force __le64) data);
data >>= (8 - len) * 8;
@@ -259,7 +209,7 @@ static int zpci_cfg_store(struct zpci_dev *zdev, int offset, u32 val, u8 len)
data <<= (8 - len) * 8;
data = (__force u64) cpu_to_le64(data);
- rc = zpci_store(data, req, offset);
+ rc = __zpci_store(data, req, offset);
return rc;
}
@@ -276,18 +226,48 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count)
zpci_memcpy_toio(to, from, count);
}
+void __iomem *ioremap(unsigned long ioaddr, unsigned long size)
+{
+ struct vm_struct *area;
+ unsigned long offset;
+
+ if (!size)
+ return NULL;
+
+ if (!static_branch_unlikely(&have_mio))
+ return (void __iomem *) ioaddr;
+
+ offset = ioaddr & ~PAGE_MASK;
+ ioaddr &= PAGE_MASK;
+ size = PAGE_ALIGN(size + offset);
+ area = get_vm_area(size, VM_IOREMAP);
+ if (!area)
+ return NULL;
+
+ if (ioremap_page_range((unsigned long) area->addr,
+ (unsigned long) area->addr + size,
+ ioaddr, PAGE_KERNEL)) {
+ vunmap(area->addr);
+ return NULL;
+ }
+ return (void __iomem *) ((unsigned long) area->addr + offset);
+}
+EXPORT_SYMBOL(ioremap);
+
+void iounmap(volatile void __iomem *addr)
+{
+ if (static_branch_likely(&have_mio))
+ vunmap((__force void *) ((unsigned long) addr & PAGE_MASK));
+}
+EXPORT_SYMBOL(iounmap);
+
/* Create a virtual mapping cookie for a PCI BAR */
-void __iomem *pci_iomap_range(struct pci_dev *pdev,
- int bar,
- unsigned long offset,
- unsigned long max)
+static void __iomem *pci_iomap_range_fh(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
{
struct zpci_dev *zdev = to_zpci(pdev);
int idx;
- if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
- return NULL;
-
idx = zdev->bars[bar].map_idx;
spin_lock(&zpci_iomap_lock);
/* Detect overrun */
@@ -298,6 +278,30 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
return (void __iomem *) ZPCI_ADDR(idx) + offset;
}
+
+static void __iomem *pci_iomap_range_mio(struct pci_dev *pdev, int bar,
+ unsigned long offset,
+ unsigned long max)
+{
+ unsigned long barsize = pci_resource_len(pdev, bar);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ void __iomem *iova;
+
+ iova = ioremap((unsigned long) zdev->bars[bar].mio_wt, barsize);
+ return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_range(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
+{
+ if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+ return NULL;
+
+ if (static_branch_likely(&have_mio))
+ return pci_iomap_range_mio(pdev, bar, offset, max);
+ else
+ return pci_iomap_range_fh(pdev, bar, offset, max);
+}
EXPORT_SYMBOL(pci_iomap_range);
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
@@ -306,7 +310,37 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
}
EXPORT_SYMBOL(pci_iomap);
-void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+static void __iomem *pci_iomap_wc_range_mio(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
+{
+ unsigned long barsize = pci_resource_len(pdev, bar);
+ struct zpci_dev *zdev = to_zpci(pdev);
+ void __iomem *iova;
+
+ iova = ioremap((unsigned long) zdev->bars[bar].mio_wb, barsize);
+ return iova ? iova + offset : iova;
+}
+
+void __iomem *pci_iomap_wc_range(struct pci_dev *pdev, int bar,
+ unsigned long offset, unsigned long max)
+{
+ if (!pci_resource_len(pdev, bar) || bar >= PCI_BAR_COUNT)
+ return NULL;
+
+ if (static_branch_likely(&have_mio))
+ return pci_iomap_wc_range_mio(pdev, bar, offset, max);
+ else
+ return pci_iomap_range_fh(pdev, bar, offset, max);
+}
+EXPORT_SYMBOL(pci_iomap_wc_range);
+
+void __iomem *pci_iomap_wc(struct pci_dev *dev, int bar, unsigned long maxlen)
+{
+ return pci_iomap_wc_range(dev, bar, 0, maxlen);
+}
+EXPORT_SYMBOL(pci_iomap_wc);
+
+static void pci_iounmap_fh(struct pci_dev *pdev, void __iomem *addr)
{
unsigned int idx = ZPCI_IDX(addr);
@@ -319,6 +353,19 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
}
spin_unlock(&zpci_iomap_lock);
}
+
+static void pci_iounmap_mio(struct pci_dev *pdev, void __iomem *addr)
+{
+ iounmap(addr);
+}
+
+void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
+{
+ if (static_branch_likely(&have_mio))
+ pci_iounmap_mio(pdev, addr);
+ else
+ pci_iounmap_fh(pdev, addr);
+}
EXPORT_SYMBOL(pci_iounmap);
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
@@ -354,136 +401,6 @@ static struct pci_ops pci_root_ops = {
.write = pci_write,
};
-static void zpci_irq_handler(struct airq_struct *airq)
-{
- unsigned long si, ai;
- struct airq_iv *aibv;
- int irqs_on = 0;
-
- inc_irq_stat(IRQIO_PCI);
- for (si = 0;;) {
- /* Scan adapter summary indicator bit vector */
- si = airq_iv_scan(zpci_aisb_iv, si, airq_iv_end(zpci_aisb_iv));
- if (si == -1UL) {
- if (irqs_on++)
- /* End of second scan with interrupts on. */
- break;
- /* First scan complete, reenable interrupts. */
- if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC))
- break;
- si = 0;
- continue;
- }
-
- /* Scan the adapter interrupt vector for this device. */
- aibv = zpci_aibv[si];
- for (ai = 0;;) {
- ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
- if (ai == -1UL)
- break;
- inc_irq_stat(IRQIO_MSI);
- airq_iv_lock(aibv, ai);
- generic_handle_irq(airq_iv_get_data(aibv, ai));
- airq_iv_unlock(aibv, ai);
- }
- }
-}
-
-int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
-{
- struct zpci_dev *zdev = to_zpci(pdev);
- unsigned int hwirq, msi_vecs;
- unsigned long aisb;
- struct msi_desc *msi;
- struct msi_msg msg;
- int rc, irq;
-
- zdev->aisb = -1UL;
- if (type == PCI_CAP_ID_MSI && nvec > 1)
- return 1;
- msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
-
- /* Allocate adapter summary indicator bit */
- aisb = airq_iv_alloc_bit(zpci_aisb_iv);
- if (aisb == -1UL)
- return -EIO;
- zdev->aisb = aisb;
-
- /* Create adapter interrupt vector */
- zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
- if (!zdev->aibv)
- return -ENOMEM;
-
- /* Wire up shortcut pointer */
- zpci_aibv[aisb] = zdev->aibv;
-
- /* Request MSI interrupts */
- hwirq = 0;
- for_each_pci_msi_entry(msi, pdev) {
- if (hwirq >= msi_vecs)
- break;
- irq = irq_alloc_desc(0); /* Alloc irq on node 0 */
- if (irq < 0)
- return -ENOMEM;
- rc = irq_set_msi_desc(irq, msi);
- if (rc)
- return rc;
- irq_set_chip_and_handler(irq, &zpci_irq_chip,
- handle_simple_irq);
- msg.data = hwirq;
- msg.address_lo = zdev->msi_addr & 0xffffffff;
- msg.address_hi = zdev->msi_addr >> 32;
- pci_write_msi_msg(irq, &msg);
- airq_iv_set_data(zdev->aibv, hwirq, irq);
- hwirq++;
- }
-
- /* Enable adapter interrupts */
- rc = zpci_set_airq(zdev);
- if (rc)
- return rc;
-
- return (msi_vecs == nvec) ? 0 : msi_vecs;
-}
-
-void arch_teardown_msi_irqs(struct pci_dev *pdev)
-{
- struct zpci_dev *zdev = to_zpci(pdev);
- struct msi_desc *msi;
- int rc;
-
- /* Disable adapter interrupts */
- rc = zpci_clear_airq(zdev);
- if (rc)
- return;
-
- /* Release MSI interrupts */
- for_each_pci_msi_entry(msi, pdev) {
- if (!msi->irq)
- continue;
- if (msi->msi_attrib.is_msix)
- __pci_msix_desc_mask_irq(msi, 1);
- else
- __pci_msi_desc_mask_irq(msi, 1, 1);
- irq_set_msi_desc(msi->irq, NULL);
- irq_free_desc(msi->irq);
- msi->msg.address_lo = 0;
- msi->msg.address_hi = 0;
- msi->msg.data = 0;
- msi->irq = 0;
- }
-
- if (zdev->aisb != -1UL) {
- zpci_aibv[zdev->aisb] = NULL;
- airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
- zdev->aisb = -1UL;
- }
- if (zdev->aibv) {
- airq_iv_release(zdev->aibv);
- zdev->aibv = NULL;
- }
-}
-
#ifdef CONFIG_PCI_IOV
static struct resource iov_res = {
.name = "PCI IOV res",
@@ -495,6 +412,7 @@ static struct resource iov_res = {
static void zpci_map_resources(struct pci_dev *pdev)
{
+ struct zpci_dev *zdev = to_zpci(pdev);
resource_size_t len;
int i;
@@ -502,8 +420,13 @@ static void zpci_map_resources(struct pci_dev *pdev)
len = pci_resource_len(pdev, i);
if (!len)
continue;
- pdev->resource[i].start =
- (resource_size_t __force) pci_iomap(pdev, i, 0);
+
+ if (static_branch_likely(&have_mio))
+ pdev->resource[i].start =
+ (resource_size_t __force) zdev->bars[i].mio_wb;
+ else
+ pdev->resource[i].start =
+ (resource_size_t __force) pci_iomap(pdev, i, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
@@ -524,6 +447,9 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
resource_size_t len;
int i;
+ if (static_branch_likely(&have_mio))
+ return;
+
for (i = 0; i < PCI_BAR_COUNT; i++) {
len = pci_resource_len(pdev, i);
if (!len)
@@ -533,41 +459,6 @@ static void zpci_unmap_resources(struct pci_dev *pdev)
}
}
-static struct airq_struct zpci_airq = {
- .handler = zpci_irq_handler,
- .isc = PCI_ISC,
-};
-
-static int __init zpci_irq_init(void)
-{
- int rc;
-
- rc = register_adapter_interrupt(&zpci_airq);
- if (rc)
- goto out;
- /* Set summary to 1 to be called every time for the ISC. */
- *zpci_airq.lsi_ptr = 1;
-
- rc = -ENOMEM;
- zpci_aisb_iv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
- if (!zpci_aisb_iv)
- goto out_airq;
-
- zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, NULL, PCI_ISC);
- return 0;
-
-out_airq:
- unregister_adapter_interrupt(&zpci_airq);
-out:
- return rc;
-}
-
-static void zpci_irq_exit(void)
-{
- airq_iv_release(zpci_aisb_iv);
- unregister_adapter_interrupt(&zpci_airq);
-}
-
static int zpci_alloc_iomap(struct zpci_dev *zdev)
{
unsigned long entry;
@@ -958,7 +849,9 @@ static void zpci_mem_exit(void)
kmem_cache_destroy(zdev_fmb_cache);
}
-static unsigned int s390_pci_probe = 1;
+static unsigned int s390_pci_probe __initdata = 1;
+static unsigned int s390_pci_no_mio __initdata;
+unsigned int s390_pci_force_floating __initdata;
static unsigned int s390_pci_initialized;
char * __init pcibios_setup(char *str)
@@ -967,6 +860,14 @@ char * __init pcibios_setup(char *str)
s390_pci_probe = 0;
return NULL;
}
+ if (!strcmp(str, "nomio")) {
+ s390_pci_no_mio = 1;
+ return NULL;
+ }
+ if (!strcmp(str, "force_floating")) {
+ s390_pci_force_floating = 1;
+ return NULL;
+ }
return str;
}
@@ -985,6 +886,9 @@ static int __init pci_base_init(void)
if (!test_facility(69) || !test_facility(71))
return 0;
+ if (test_facility(153) && !s390_pci_no_mio)
+ static_branch_enable(&have_mio);
+
rc = zpci_debug_init();
if (rc)
goto out;
diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c
index eeb7450db18c..3a36b07a5571 100644
--- a/arch/s390/pci/pci_clp.c
+++ b/arch/s390/pci/pci_clp.c
@@ -163,7 +163,14 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev,
memcpy(zdev->util_str, response->util_str,
sizeof(zdev->util_str));
}
+ zdev->mio_capable = response->mio_addr_avail;
+ for (i = 0; i < PCI_BAR_COUNT; i++) {
+ if (!(response->mio_valid & (1 << (PCI_BAR_COUNT - i - 1))))
+ continue;
+ zdev->bars[i].mio_wb = (void __iomem *) response->addr[i].wb;
+ zdev->bars[i].mio_wt = (void __iomem *) response->addr[i].wt;
+ }
return 0;
}
@@ -279,11 +286,18 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
int rc;
rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_PCI_FN);
- if (!rc)
- /* Success -> store enabled handle in zdev */
- zdev->fh = fh;
+ zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ if (rc)
+ goto out;
- zpci_dbg(3, "ena fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
+ zdev->fh = fh;
+ if (zdev->mio_capable) {
+ rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
+ zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
+ if (rc)
+ clp_disable_fh(zdev);
+ }
+out:
return rc;
}
@@ -296,11 +310,10 @@ int clp_disable_fh(struct zpci_dev *zdev)
return 0;
rc = clp_set_pci_fn(&fh, 0, CLP_SET_DISABLE_PCI_FN);
+ zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
if (!rc)
- /* Success -> store disabled handle in zdev */
zdev->fh = fh;
- zpci_dbg(3, "dis fid:%x, fh:%x, rc:%d\n", zdev->fid, zdev->fh, rc);
return rc;
}
diff --git a/arch/s390/pci/pci_insn.c b/arch/s390/pci/pci_insn.c
index f069929e8211..02f9505c99a8 100644
--- a/arch/s390/pci/pci_insn.c
+++ b/arch/s390/pci/pci_insn.c
@@ -8,9 +8,11 @@
#include <linux/export.h>
#include <linux/errno.h>
#include <linux/delay.h>
+#include <linux/jump_label.h>
#include <asm/facility.h>
#include <asm/pci_insn.h>
#include <asm/pci_debug.h>
+#include <asm/pci_io.h>
#include <asm/processor.h>
#define ZPCI_INSN_BUSY_DELAY 1 /* 1 microsecond */
@@ -96,13 +98,15 @@ int zpci_refresh_trans(u64 fn, u64 addr, u64 range)
}
/* Set Interruption Controls */
-int zpci_set_irq_ctrl(u16 ctl, char *unused, u8 isc)
+int __zpci_set_irq_ctrl(u16 ctl, u8 isc, union zpci_sic_iib *iib)
{
if (!test_facility(72))
return -EIO;
- asm volatile (
- " .insn rsy,0xeb00000000d1,%[ctl],%[isc],%[u]\n"
- : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [u] "Q" (*unused));
+
+ asm volatile(
+ ".insn rsy,0xeb00000000d1,%[ctl],%[isc],%[iib]\n"
+ : : [ctl] "d" (ctl), [isc] "d" (isc << 27), [iib] "Q" (*iib));
+
return 0;
}
@@ -140,7 +144,7 @@ static inline int __pcilg(u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int zpci_load(u64 *data, u64 req, u64 offset)
+int __zpci_load(u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -156,6 +160,52 @@ int zpci_load(u64 *data, u64 req, u64 offset)
return (cc > 0) ? -EIO : cc;
}
+EXPORT_SYMBOL_GPL(__zpci_load);
+
+static inline int zpci_load_fh(u64 *data, const volatile void __iomem *addr,
+ unsigned long len)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+
+ return __zpci_load(data, req, ZPCI_OFFSET(addr));
+}
+
+static inline int __pcilg_mio(u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+ register u64 addr asm("2") = ioaddr;
+ register u64 r3 asm("3") = len;
+ int cc = -ENXIO;
+ u64 __data;
+
+ asm volatile (
+ " .insn rre,0xb9d60000,%[data],%[ioaddr]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [data] "=d" (__data), "+d" (r3)
+ : [ioaddr] "d" (addr)
+ : "cc");
+ *status = r3 >> 24 & 0xff;
+ *data = __data;
+ return cc;
+}
+
+int zpci_load(u64 *data, const volatile void __iomem *addr, unsigned long len)
+{
+ u8 status;
+ int cc;
+
+ if (!static_branch_unlikely(&have_mio))
+ return zpci_load_fh(data, addr, len);
+
+ cc = __pcilg_mio(data, (__force u64) addr, len, &status);
+ if (cc)
+ zpci_err_insn(cc, status, 0, (__force u64) addr);
+
+ return (cc > 0) ? -EIO : cc;
+}
EXPORT_SYMBOL_GPL(zpci_load);
/* PCI Store */
@@ -178,7 +228,7 @@ static inline int __pcistg(u64 data, u64 req, u64 offset, u8 *status)
return cc;
}
-int zpci_store(u64 data, u64 req, u64 offset)
+int __zpci_store(u64 data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -194,6 +244,50 @@ int zpci_store(u64 data, u64 req, u64 offset)
return (cc > 0) ? -EIO : cc;
}
+EXPORT_SYMBOL_GPL(__zpci_store);
+
+static inline int zpci_store_fh(const volatile void __iomem *addr, u64 data,
+ unsigned long len)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(addr)];
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+
+ return __zpci_store(data, req, ZPCI_OFFSET(addr));
+}
+
+static inline int __pcistg_mio(u64 data, u64 ioaddr, u64 len, u8 *status)
+{
+ register u64 addr asm("2") = ioaddr;
+ register u64 r3 asm("3") = len;
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rre,0xb9d40000,%[data],%[ioaddr]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), "+d" (r3)
+ : [data] "d" (data), [ioaddr] "d" (addr)
+ : "cc");
+ *status = r3 >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_store(const volatile void __iomem *addr, u64 data, unsigned long len)
+{
+ u8 status;
+ int cc;
+
+ if (!static_branch_unlikely(&have_mio))
+ return zpci_store_fh(addr, data, len);
+
+ cc = __pcistg_mio(data, (__force u64) addr, len, &status);
+ if (cc)
+ zpci_err_insn(cc, status, 0, (__force u64) addr);
+
+ return (cc > 0) ? -EIO : cc;
+}
EXPORT_SYMBOL_GPL(zpci_store);
/* PCI Store Block */
@@ -214,7 +308,7 @@ static inline int __pcistb(const u64 *data, u64 req, u64 offset, u8 *status)
return cc;
}
-int zpci_store_block(const u64 *data, u64 req, u64 offset)
+int __zpci_store_block(const u64 *data, u64 req, u64 offset)
{
u8 status;
int cc;
@@ -230,4 +324,63 @@ int zpci_store_block(const u64 *data, u64 req, u64 offset)
return (cc > 0) ? -EIO : cc;
}
-EXPORT_SYMBOL_GPL(zpci_store_block);
+EXPORT_SYMBOL_GPL(__zpci_store_block);
+
+static inline int zpci_write_block_fh(volatile void __iomem *dst,
+ const void *src, unsigned long len)
+{
+ struct zpci_iomap_entry *entry = &zpci_iomap_start[ZPCI_IDX(dst)];
+ u64 req = ZPCI_CREATE_REQ(entry->fh, entry->bar, len);
+ u64 offset = ZPCI_OFFSET(dst);
+
+ return __zpci_store_block(src, req, offset);
+}
+
+static inline int __pcistb_mio(const u64 *data, u64 ioaddr, u64 len, u8 *status)
+{
+ int cc = -ENXIO;
+
+ asm volatile (
+ " .insn rsy,0xeb00000000d4,%[len],%[ioaddr],%[data]\n"
+ "0: ipm %[cc]\n"
+ " srl %[cc],28\n"
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : [cc] "+d" (cc), [len] "+d" (len)
+ : [ioaddr] "d" (ioaddr), [data] "Q" (*data)
+ : "cc");
+ *status = len >> 24 & 0xff;
+ return cc;
+}
+
+int zpci_write_block(volatile void __iomem *dst,
+ const void *src, unsigned long len)
+{
+ u8 status;
+ int cc;
+
+ if (!static_branch_unlikely(&have_mio))
+ return zpci_write_block_fh(dst, src, len);
+
+ cc = __pcistb_mio(src, (__force u64) dst, len, &status);
+ if (cc)
+ zpci_err_insn(cc, status, 0, (__force u64) dst);
+
+ return (cc > 0) ? -EIO : cc;
+}
+EXPORT_SYMBOL_GPL(zpci_write_block);
+
+static inline void __pciwb_mio(void)
+{
+ unsigned long unused = 0;
+
+ asm volatile (".insn rre,0xb9d50000,%[op],%[op]\n"
+ : [op] "+d" (unused));
+}
+
+void zpci_barrier(void)
+{
+ if (static_branch_likely(&have_mio))
+ __pciwb_mio();
+}
+EXPORT_SYMBOL_GPL(zpci_barrier);
diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c
new file mode 100644
index 000000000000..d80616ae8dd8
--- /dev/null
+++ b/arch/s390/pci/pci_irq.c
@@ -0,0 +1,486 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KMSG_COMPONENT "zpci"
+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/irq.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/msi.h>
+#include <linux/smp.h>
+
+#include <asm/isc.h>
+#include <asm/airq.h>
+
+static enum {FLOATING, DIRECTED} irq_delivery;
+
+#define SIC_IRQ_MODE_ALL 0
+#define SIC_IRQ_MODE_SINGLE 1
+#define SIC_IRQ_MODE_DIRECT 4
+#define SIC_IRQ_MODE_D_ALL 16
+#define SIC_IRQ_MODE_D_SINGLE 17
+#define SIC_IRQ_MODE_SET_CPU 18
+
+/*
+ * summary bit vector
+ * FLOATING - summary bit per function
+ * DIRECTED - summary bit per cpu (only used in fallback path)
+ */
+static struct airq_iv *zpci_sbv;
+
+/*
+ * interrupt bit vectors
+ * FLOATING - interrupt bit vector per function
+ * DIRECTED - interrupt bit vector per cpu
+ */
+static struct airq_iv **zpci_ibv;
+
+/* Modify PCI: Register adapter interruptions */
+static int zpci_set_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
+ struct zpci_fib fib = {0};
+ u8 status;
+
+ fib.fmt0.isc = PCI_ISC;
+ fib.fmt0.sum = 1; /* enable summary notifications */
+ fib.fmt0.noi = airq_iv_end(zdev->aibv);
+ fib.fmt0.aibv = (unsigned long) zdev->aibv->vector;
+ fib.fmt0.aibvo = 0; /* each zdev has its own interrupt vector */
+ fib.fmt0.aisb = (unsigned long) zpci_sbv->vector + (zdev->aisb/64)*8;
+ fib.fmt0.aisbo = zdev->aisb & 63;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister adapter interruptions */
+static int zpci_clear_airq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+/* Modify PCI: Register CPU directed interruptions */
+static int zpci_set_directed_irq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT_D);
+ struct zpci_fib fib = {0};
+ u8 status;
+
+ fib.fmt = 1;
+ fib.fmt1.noi = zdev->msi_nr_irqs;
+ fib.fmt1.dibvo = zdev->msi_first_bit;
+
+ return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
+}
+
+/* Modify PCI: Unregister CPU directed interruptions */
+static int zpci_clear_directed_irq(struct zpci_dev *zdev)
+{
+ u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT_D);
+ struct zpci_fib fib = {0};
+ u8 cc, status;
+
+ fib.fmt = 1;
+ cc = zpci_mod_fc(req, &fib, &status);
+ if (cc == 3 || (cc == 1 && status == 24))
+ /* Function already gone or IRQs already deregistered. */
+ cc = 0;
+
+ return cc ? -EIO : 0;
+}
+
+static int zpci_set_irq_affinity(struct irq_data *data, const struct cpumask *dest,
+ bool force)
+{
+ struct msi_desc *entry = irq_get_msi_desc(data->irq);
+ struct msi_msg msg = entry->msg;
+
+ msg.address_lo &= 0xff0000ff;
+ msg.address_lo |= (cpumask_first(dest) << 8);
+ pci_write_msi_msg(data->irq, &msg);
+
+ return IRQ_SET_MASK_OK;
+}
+
+static struct irq_chip zpci_irq_chip = {
+ .name = "PCI-MSI",
+ .irq_unmask = pci_msi_unmask_irq,
+ .irq_mask = pci_msi_mask_irq,
+ .irq_set_affinity = zpci_set_irq_affinity,
+};
+
+static void zpci_handle_cpu_local_irq(bool rescan)
+{
+ struct airq_iv *dibv = zpci_ibv[smp_processor_id()];
+ unsigned long bit;
+ int irqs_on = 0;
+
+ for (bit = 0;;) {
+ /* Scan the directed IRQ bit vector */
+ bit = airq_iv_scan(dibv, bit, airq_iv_end(dibv));
+ if (bit == -1UL) {
+ if (!rescan || irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC))
+ break;
+ bit = 0;
+ continue;
+ }
+ inc_irq_stat(IRQIO_MSI);
+ generic_handle_irq(airq_iv_get_data(dibv, bit));
+ }
+}
+
+struct cpu_irq_data {
+ call_single_data_t csd;
+ atomic_t scheduled;
+};
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct cpu_irq_data, irq_data);
+
+static void zpci_handle_remote_irq(void *data)
+{
+ atomic_t *scheduled = data;
+
+ do {
+ zpci_handle_cpu_local_irq(false);
+ } while (atomic_dec_return(scheduled));
+}
+
+static void zpci_handle_fallback_irq(void)
+{
+ struct cpu_irq_data *cpu_data;
+ unsigned long cpu;
+ int irqs_on = 0;
+
+ for (cpu = 0;;) {
+ cpu = airq_iv_scan(zpci_sbv, cpu, airq_iv_end(zpci_sbv));
+ if (cpu == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ break;
+ cpu = 0;
+ continue;
+ }
+ cpu_data = &per_cpu(irq_data, cpu);
+ if (atomic_inc_return(&cpu_data->scheduled) > 1)
+ continue;
+
+ cpu_data->csd.func = zpci_handle_remote_irq;
+ cpu_data->csd.info = &cpu_data->scheduled;
+ cpu_data->csd.flags = 0;
+ smp_call_function_single_async(cpu, &cpu_data->csd);
+ }
+}
+
+static void zpci_directed_irq_handler(struct airq_struct *airq, bool floating)
+{
+ if (floating) {
+ inc_irq_stat(IRQIO_PCF);
+ zpci_handle_fallback_irq();
+ } else {
+ inc_irq_stat(IRQIO_PCD);
+ zpci_handle_cpu_local_irq(true);
+ }
+}
+
+static void zpci_floating_irq_handler(struct airq_struct *airq, bool floating)
+{
+ unsigned long si, ai;
+ struct airq_iv *aibv;
+ int irqs_on = 0;
+
+ inc_irq_stat(IRQIO_PCF);
+ for (si = 0;;) {
+ /* Scan adapter summary indicator bit vector */
+ si = airq_iv_scan(zpci_sbv, si, airq_iv_end(zpci_sbv));
+ if (si == -1UL) {
+ if (irqs_on++)
+ /* End of second scan with interrupts on. */
+ break;
+ /* First scan complete, reenable interrupts. */
+ if (zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC))
+ break;
+ si = 0;
+ continue;
+ }
+
+ /* Scan the adapter interrupt vector for this device. */
+ aibv = zpci_ibv[si];
+ for (ai = 0;;) {
+ ai = airq_iv_scan(aibv, ai, airq_iv_end(aibv));
+ if (ai == -1UL)
+ break;
+ inc_irq_stat(IRQIO_MSI);
+ airq_iv_lock(aibv, ai);
+ generic_handle_irq(airq_iv_get_data(aibv, ai));
+ airq_iv_unlock(aibv, ai);
+ }
+ }
+}
+
+int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ unsigned int hwirq, msi_vecs, cpu;
+ unsigned long bit;
+ struct msi_desc *msi;
+ struct msi_msg msg;
+ int rc, irq;
+
+ zdev->aisb = -1UL;
+ zdev->msi_first_bit = -1U;
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
+ return 1;
+ msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
+
+ if (irq_delivery == DIRECTED) {
+ /* Allocate cpu vector bits */
+ bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
+ if (bit == -1UL)
+ return -EIO;
+ } else {
+ /* Allocate adapter summary indicator bit */
+ bit = airq_iv_alloc_bit(zpci_sbv);
+ if (bit == -1UL)
+ return -EIO;
+ zdev->aisb = bit;
+
+ /* Create adapter interrupt vector */
+ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK);
+ if (!zdev->aibv)
+ return -ENOMEM;
+
+ /* Wire up shortcut pointer */
+ zpci_ibv[bit] = zdev->aibv;
+ /* Each function has its own interrupt vector */
+ bit = 0;
+ }
+
+ /* Request MSI interrupts */
+ hwirq = bit;
+ for_each_pci_msi_entry(msi, pdev) {
+ rc = -EIO;
+ if (hwirq - bit >= msi_vecs)
+ break;
+ irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, msi->affinity);
+ if (irq < 0)
+ return -ENOMEM;
+ rc = irq_set_msi_desc(irq, msi);
+ if (rc)
+ return rc;
+ irq_set_chip_and_handler(irq, &zpci_irq_chip,
+ handle_percpu_irq);
+ msg.data = hwirq;
+ if (irq_delivery == DIRECTED) {
+ msg.address_lo = zdev->msi_addr & 0xff0000ff;
+ msg.address_lo |= msi->affinity ?
+ (cpumask_first(&msi->affinity->mask) << 8) : 0;
+ for_each_possible_cpu(cpu) {
+ airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
+ }
+ } else {
+ msg.address_lo = zdev->msi_addr & 0xffffffff;
+ airq_iv_set_data(zdev->aibv, hwirq, irq);
+ }
+ msg.address_hi = zdev->msi_addr >> 32;
+ pci_write_msi_msg(irq, &msg);
+ hwirq++;
+ }
+
+ zdev->msi_first_bit = bit;
+ zdev->msi_nr_irqs = msi_vecs;
+
+ if (irq_delivery == DIRECTED)
+ rc = zpci_set_directed_irq(zdev);
+ else
+ rc = zpci_set_airq(zdev);
+ if (rc)
+ return rc;
+
+ return (msi_vecs == nvec) ? 0 : msi_vecs;
+}
+
+void arch_teardown_msi_irqs(struct pci_dev *pdev)
+{
+ struct zpci_dev *zdev = to_zpci(pdev);
+ struct msi_desc *msi;
+ int rc;
+
+ /* Disable interrupts */
+ if (irq_delivery == DIRECTED)
+ rc = zpci_clear_directed_irq(zdev);
+ else
+ rc = zpci_clear_airq(zdev);
+ if (rc)
+ return;
+
+ /* Release MSI interrupts */
+ for_each_pci_msi_entry(msi, pdev) {
+ if (!msi->irq)
+ continue;
+ if (msi->msi_attrib.is_msix)
+ __pci_msix_desc_mask_irq(msi, 1);
+ else
+ __pci_msi_desc_mask_irq(msi, 1, 1);
+ irq_set_msi_desc(msi->irq, NULL);
+ irq_free_desc(msi->irq);
+ msi->msg.address_lo = 0;
+ msi->msg.address_hi = 0;
+ msi->msg.data = 0;
+ msi->irq = 0;
+ }
+
+ if (zdev->aisb != -1UL) {
+ zpci_ibv[zdev->aisb] = NULL;
+ airq_iv_free_bit(zpci_sbv, zdev->aisb);
+ zdev->aisb = -1UL;
+ }
+ if (zdev->aibv) {
+ airq_iv_release(zdev->aibv);
+ zdev->aibv = NULL;
+ }
+
+ if ((irq_delivery == DIRECTED) && zdev->msi_first_bit != -1U)
+ airq_iv_free(zpci_ibv[0], zdev->msi_first_bit, zdev->msi_nr_irqs);
+}
+
+static struct airq_struct zpci_airq = {
+ .handler = zpci_floating_irq_handler,
+ .isc = PCI_ISC,
+};
+
+static void __init cpu_enable_directed_irq(void *unused)
+{
+ union zpci_sic_iib iib = {{0}};
+
+ iib.cdiib.dibv_addr = (u64) zpci_ibv[smp_processor_id()]->vector;
+
+ __zpci_set_irq_ctrl(SIC_IRQ_MODE_SET_CPU, 0, &iib);
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_D_SINGLE, PCI_ISC);
+}
+
+static int __init zpci_directed_irq_init(void)
+{
+ union zpci_sic_iib iib = {{0}};
+ unsigned int cpu;
+
+ zpci_sbv = airq_iv_create(num_possible_cpus(), 0);
+ if (!zpci_sbv)
+ return -ENOMEM;
+
+ iib.diib.isc = PCI_ISC;
+ iib.diib.nr_cpus = num_possible_cpus();
+ iib.diib.disb_addr = (u64) zpci_sbv->vector;
+ __zpci_set_irq_ctrl(SIC_IRQ_MODE_DIRECT, 0, &iib);
+
+ zpci_ibv = kcalloc(num_possible_cpus(), sizeof(*zpci_ibv),
+ GFP_KERNEL);
+ if (!zpci_ibv)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ /*
+ * Per CPU IRQ vectors look the same but bit-allocation
+ * is only done on the first vector.
+ */
+ zpci_ibv[cpu] = airq_iv_create(cache_line_size() * BITS_PER_BYTE,
+ AIRQ_IV_DATA |
+ AIRQ_IV_CACHELINE |
+ (!cpu ? AIRQ_IV_ALLOC : 0));
+ if (!zpci_ibv[cpu])
+ return -ENOMEM;
+ }
+ on_each_cpu(cpu_enable_directed_irq, NULL, 1);
+
+ zpci_irq_chip.irq_set_affinity = zpci_set_irq_affinity;
+
+ return 0;
+}
+
+static int __init zpci_floating_irq_init(void)
+{
+ zpci_ibv = kcalloc(ZPCI_NR_DEVICES, sizeof(*zpci_ibv), GFP_KERNEL);
+ if (!zpci_ibv)
+ return -ENOMEM;
+
+ zpci_sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC);
+ if (!zpci_sbv)
+ goto out_free;
+
+ return 0;
+
+out_free:
+ kfree(zpci_ibv);
+ return -ENOMEM;
+}
+
+int __init zpci_irq_init(void)
+{
+ int rc;
+
+ irq_delivery = sclp.has_dirq ? DIRECTED : FLOATING;
+ if (s390_pci_force_floating)
+ irq_delivery = FLOATING;
+
+ if (irq_delivery == DIRECTED)
+ zpci_airq.handler = zpci_directed_irq_handler;
+
+ rc = register_adapter_interrupt(&zpci_airq);
+ if (rc)
+ goto out;
+ /* Set summary to 1 to be called every time for the ISC. */
+ *zpci_airq.lsi_ptr = 1;
+
+ switch (irq_delivery) {
+ case FLOATING:
+ rc = zpci_floating_irq_init();
+ break;
+ case DIRECTED:
+ rc = zpci_directed_irq_init();
+ break;
+ }
+
+ if (rc)
+ goto out_airq;
+
+ /*
+ * Enable floating IRQs (with suppression after one IRQ). When using
+ * directed IRQs this enables the fallback path.
+ */
+ zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, PCI_ISC);
+
+ return 0;
+out_airq:
+ unregister_adapter_interrupt(&zpci_airq);
+out:
+ return rc;
+}
+
+void __init zpci_irq_exit(void)
+{
+ unsigned int cpu;
+
+ if (irq_delivery == DIRECTED) {
+ for_each_possible_cpu(cpu) {
+ airq_iv_release(zpci_ibv[cpu]);
+ }
+ }
+ kfree(zpci_ibv);
+ if (zpci_sbv)
+ airq_iv_release(zpci_sbv);
+ unregister_adapter_interrupt(&zpci_airq);
+}
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index ce6a3f75065b..dc1ae4ff79d7 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -4,7 +4,7 @@ OBJECT_FILES_NON_STANDARD := y
purgatory-y := head.o purgatory.o string.o sha256.o mem.o
-targets += $(purgatory-y) purgatory.ro kexec-purgatory.c
+targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
@@ -16,22 +16,26 @@ $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
$(obj)/string.o: $(srctree)/arch/s390/lib/string.c FORCE
$(call if_changed_rule,cc_o_c)
-LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib
-LDFLAGS_purgatory.ro += -z nodefaultlib
KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes
KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare
KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding
KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float -fno-common
+KBUILD_CFLAGS += $(CLANG_FLAGS)
KBUILD_CFLAGS += $(call cc-option,-fno-PIE)
KBUILD_AFLAGS := $(filter-out -DCC_USING_EXPOLINE,$(KBUILD_AFLAGS))
-$(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+LDFLAGS_purgatory := -r --no-undefined -nostdlib -z nodefaultlib -T
+$(obj)/purgatory: $(obj)/purgatory.lds $(PURGATORY_OBJS) FORCE
$(call if_changed,ld)
-quiet_cmd_bin2c = BIN2C $@
- cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
+OBJCOPYFLAGS_purgatory.ro := -O elf64-s390
+OBJCOPYFLAGS_purgatory.ro += --remove-section='*debug*'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.comment'
+OBJCOPYFLAGS_purgatory.ro += --remove-section='.note.*'
+$(obj)/purgatory.ro: $(obj)/purgatory FORCE
+ $(call if_changed,objcopy)
-$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
- $(call if_changed,bin2c)
+$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
+ $(call if_changed_rule,as_o_S)
obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
diff --git a/arch/s390/purgatory/kexec-purgatory.S b/arch/s390/purgatory/kexec-purgatory.S
new file mode 100644
index 000000000000..8293753100ae
--- /dev/null
+++ b/arch/s390/purgatory/kexec-purgatory.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+ .section .rodata, "a"
+
+ .align 8
+kexec_purgatory:
+ .globl kexec_purgatory
+ .incbin "arch/s390/purgatory/purgatory.ro"
+.Lkexec_purgatroy_end:
+
+ .align 8
+kexec_purgatory_size:
+ .globl kexec_purgatory_size
+ .quad .Lkexec_purgatroy_end - kexec_purgatory
diff --git a/arch/s390/purgatory/purgatory.lds.S b/arch/s390/purgatory/purgatory.lds.S
new file mode 100644
index 000000000000..482eb4fbcef1
--- /dev/null
+++ b/arch/s390/purgatory/purgatory.lds.S
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm-generic/vmlinux.lds.h>
+
+OUTPUT_FORMAT("elf64-s390", "elf64-s390", "elf64-s390")
+OUTPUT_ARCH(s390:64-bit)
+
+ENTRY(purgatory_start)
+
+SECTIONS
+{
+ . = 0;
+ .head.text : {
+ _head = . ;
+ HEAD_TEXT
+ _ehead = . ;
+ }
+ .text : {
+ _text = .; /* Text */
+ *(.text)
+ *(.text.*)
+ _etext = . ;
+ }
+ .rodata : {
+ _rodata = . ;
+ *(.rodata) /* read-only data */
+ *(.rodata.*)
+ _erodata = . ;
+ }
+ .data : {
+ _data = . ;
+ *(.data)
+ *(.data.*)
+ _edata = . ;
+ }
+
+ . = ALIGN(256);
+ .bss : {
+ _bss = . ;
+ *(.bss)
+ *(.bss.*)
+ *(COMMON)
+ . = ALIGN(8); /* For convenience during zeroing */
+ _ebss = .;
+ }
+ _end = .;
+
+ /* Sections to be discarded */
+ /DISCARD/ : {
+ *(.eh_frame)
+ *(*__ksymtab*)
+ *(___kcrctab*)
+ }
+}
diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss
index cd7e8f4419f5..884a9caff5fb 100644
--- a/arch/s390/scripts/Makefile.chkbss
+++ b/arch/s390/scripts/Makefile.chkbss
@@ -11,7 +11,8 @@ chkbss: $(addprefix $(obj)/, $(chkbss-files))
quiet_cmd_chkbss = CHKBSS $<
cmd_chkbss = \
- if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
+ if $(OBJDUMP) -h $< | grep -q "\.bss" && \
+ ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \
echo "error: $< .bss section is not empty" >&2; exit 1; \
fi; \
touch $@;
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 1cbed82cd17b..64638b764d1c 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -1,3 +1,5 @@
+0000 illegal E
+0002 brkpt E
0101 pr E
0102 upt E
0104 ptff E
@@ -257,6 +259,7 @@ b258 bsg RRE_RR
b25a bsa RRE_RR
b25d clst RRE_RR
b25e srst RRE_RR
+b25f chsc RRE_R0
b263 cmpsc RRE_RR
b274 siga S_RD
b276 xsch S_00
@@ -277,6 +280,9 @@ b29d lfpc S_RD
b2a5 tre RRE_RR
b2a6 cu21 RRF_U0RR
b2a7 cu12 RRF_U0RR
+b2ad nqap RRE_RR
+b2ae dqap RRE_RR
+b2af pqap RRE_RR
b2b0 stfle S_RD
b2b1 stfl S_RD
b2b2 lpswe S_RD
@@ -290,6 +296,7 @@ b2e5 epctr RRE_RR
b2e8 ppa RRF_U0RR
b2ec etnd RRE_R0
b2ed ecpga RRE_RR
+b2f0 iucv RRE_RR
b2f8 tend S_00
b2fa niai IE_UU
b2fc tabort S_RD
@@ -559,12 +566,15 @@ b998 alcr RRE_RR
b999 slbr RRE_RR
b99a epair RRE_R0
b99b esair RRE_R0
+b99c eqbs RRF_U0RR
b99d esea RRE_R0
b99e pti RRE_RR
b99f ssair RRE_R0
+b9a0 clp RRF_U0RR
b9a1 tpei RRE_RR
b9a2 ptf RRE_R0
b9aa lptea RRF_RURR2
+b9ab essa RRF_U0RR
b9ac irbm RRE_RR
b9ae rrbm RRE_RR
b9af pfmf RRE_RR
@@ -1039,6 +1049,7 @@ eb7a agsi SIY_IRD
eb7e algsi SIY_IRD
eb80 icmh RSY_RURD
eb81 icmy RSY_RURD
+eb8a sqbs RSY_RDRU
eb8e mvclu RSY_RRRD
eb8f clclu RSY_RRRD
eb90 stmy RSY_RRRD
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b1c91ea9a958..0be08d586d40 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -90,12 +90,6 @@ config ARCH_DEFCONFIG
default "arch/sh/configs/shx3_defconfig" if SUPERH32
default "arch/sh/configs/cayman_defconfig" if SUPERH64
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config GENERIC_BUG
def_bool y
depends on BUG && SUPERH32
diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c
index 34e5414c5563..f402aa741bf3 100644
--- a/arch/sh/boards/mach-ecovec24/setup.c
+++ b/arch/sh/boards/mach-ecovec24/setup.c
@@ -806,7 +806,6 @@ static struct spi_board_info spi_bus[] = {
.platform_data = &mmc_spi_info,
.max_speed_hz = 5000000,
.mode = SPI_MODE_0,
- .controller_data = (void *) GPIO_PTM4,
},
};
@@ -838,6 +837,14 @@ static struct platform_device msiof0_device = {
.resource = msiof0_resources,
};
+static struct gpiod_lookup_table msiof_gpio_table = {
+ .dev_id = "spi_sh_msiof.0",
+ .table = {
+ GPIO_LOOKUP("sh7724_pfc", GPIO_PTM4, "cs", GPIO_ACTIVE_HIGH),
+ { },
+ },
+};
+
#endif
/* FSI */
@@ -1296,12 +1303,11 @@ static int __init arch_setup(void)
gpio_request(GPIO_FN_MSIOF0_TXD, NULL);
gpio_request(GPIO_FN_MSIOF0_RXD, NULL);
gpio_request(GPIO_FN_MSIOF0_TSCK, NULL);
- gpio_request(GPIO_PTM4, NULL); /* software CS control of TSYNC pin */
- gpio_direction_output(GPIO_PTM4, 1); /* active low CS */
gpio_request(GPIO_PTB6, NULL); /* 3.3V power control */
gpio_direction_output(GPIO_PTB6, 0); /* disable power by default */
gpiod_add_lookup_table(&mmc_spi_gpio_table);
+ gpiod_add_lookup_table(&msiof_gpio_table);
spi_register_board_info(spi_bus, ARRAY_SIZE(spi_bus));
#endif
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index 825c641726c4..d0d9ebc7165b 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -19,7 +19,6 @@ CONFIG_SLAB=y
CONFIG_PROFILING=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_CPU_SUBTYPE_SH7786=y
diff --git a/arch/sh/configs/ecovec24-romimage_defconfig b/arch/sh/configs/ecovec24-romimage_defconfig
index 0c5dfccbfe37..bdb61d1d0127 100644
--- a/arch/sh/configs/ecovec24-romimage_defconfig
+++ b/arch/sh/configs/ecovec24-romimage_defconfig
@@ -7,7 +7,6 @@ CONFIG_LOG_BUF_SHIFT=14
CONFIG_BLK_DEV_INITRD=y
# CONFIG_KALLSYMS is not set
CONFIG_SLAB=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7724=y
CONFIG_MEMORY_SIZE=0x10000000
diff --git a/arch/sh/configs/rsk7264_defconfig b/arch/sh/configs/rsk7264_defconfig
index 2b9b731fc86b..ad003ee469ea 100644
--- a/arch/sh/configs/rsk7264_defconfig
+++ b/arch/sh/configs/rsk7264_defconfig
@@ -16,7 +16,6 @@ CONFIG_PERF_COUNTERS=y
CONFIG_SLAB=y
CONFIG_MMAP_ALLOW_UNINITIALIZED=y
CONFIG_PROFILING=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_DEADLINE is not set
diff --git a/arch/sh/configs/rsk7269_defconfig b/arch/sh/configs/rsk7269_defconfig
index d041f7bcb84c..27fc01d58cf8 100644
--- a/arch/sh/configs/rsk7269_defconfig
+++ b/arch/sh/configs/rsk7269_defconfig
@@ -3,7 +3,6 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y
CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
CONFIG_SLAB=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
# CONFIG_IOSCHED_DEADLINE is not set
# CONFIG_IOSCHED_CFQ is not set
diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig
index 2ddf5ca7094e..a89ccc15af23 100644
--- a/arch/sh/configs/sh7785lcr_32bit_defconfig
+++ b/arch/sh/configs/sh7785lcr_32bit_defconfig
@@ -11,7 +11,6 @@ CONFIG_PROFILING=y
CONFIG_GCOV_KERNEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
-# CONFIG_LBDAF is not set
# CONFIG_BLK_DEV_BSG is not set
CONFIG_CPU_SUBTYPE_SH7785=y
CONFIG_MEMORY_START=0x40000000
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7bf2cb680d32..73fff39a0122 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += mm-arch-hooks.h
generic-y += parport.h
generic-y += percpu.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += serial.h
generic-y += sizes.h
generic-y += trace_clock.h
diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h
index 4f7f235f15f8..c28e37a344ad 100644
--- a/arch/sh/include/asm/io.h
+++ b/arch/sh/include/asm/io.h
@@ -229,9 +229,6 @@ __BUILD_IOPORT_STRING(q, u64)
#define IO_SPACE_LIMIT 0xffffffff
-/* synco on SH-4A, otherwise a nop */
-#define mmiowb() wmb()
-
/* We really want to try and get these to memcpy etc */
void memcpy_fromio(void *, const volatile void __iomem *, unsigned long);
void memcpy_toio(volatile void __iomem *, const void *, unsigned long);
diff --git a/arch/sh/include/asm/mmiowb.h b/arch/sh/include/asm/mmiowb.h
new file mode 100644
index 000000000000..535d59735f1d
--- /dev/null
+++ b/arch/sh/include/asm/mmiowb.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_SH_MMIOWB_H
+#define __ASM_SH_MMIOWB_H
+
+#include <asm/barrier.h>
+
+/* synco on SH-4A, otherwise a nop */
+#define mmiowb() wmb()
+
+#include <asm-generic/mmiowb.h>
+
+#endif /* __ASM_SH_MMIOWB_H */
diff --git a/arch/sh/include/asm/pgalloc.h b/arch/sh/include/asm/pgalloc.h
index 8ad73cb31121..b56f908b1395 100644
--- a/arch/sh/include/asm/pgalloc.h
+++ b/arch/sh/include/asm/pgalloc.h
@@ -70,6 +70,15 @@ do { \
tlb_remove_page((tlb), (pte)); \
} while (0)
+#if CONFIG_PGTABLE_LEVELS > 2
+#define __pmd_free_tlb(tlb, pmdp, addr) \
+do { \
+ struct page *page = virt_to_page(pmdp); \
+ pgtable_pmd_page_dtor(page); \
+ tlb_remove_page((tlb), page); \
+} while (0);
+#endif
+
static inline void check_pgt_cache(void)
{
quicklist_trim(QUICK_PT, NULL, 25, 16);
diff --git a/arch/sh/include/asm/spinlock-llsc.h b/arch/sh/include/asm/spinlock-llsc.h
index 786ee0fde3b0..7fd929cd2e7a 100644
--- a/arch/sh/include/asm/spinlock-llsc.h
+++ b/arch/sh/include/asm/spinlock-llsc.h
@@ -47,6 +47,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
unsigned long tmp;
+ /* This could be optimised with ARCH_HAS_MMIOWB */
+ mmiowb();
__asm__ __volatile__ (
"mov #1, %0 ! arch_spin_unlock \n\t"
"mov.l %0, @%1 \n\t"
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 8c9d7e5e5dcc..0b5b8e75edac 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -72,7 +72,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->regs[4] = args[0];
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
int arch = AUDIT_ARCH_SH;
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 22fad97da066..72efcbc76f91 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -59,7 +59,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->regs[2], args, 6 * sizeof(args[0]));
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
int arch = AUDIT_ARCH_SH;
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index 77abe192fb43..bc77f3dd4261 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -11,133 +11,8 @@
#ifdef CONFIG_MMU
#include <linux/swap.h>
-#include <asm/pgalloc.h>
-#include <asm/tlbflush.h>
-#include <asm/mmu_context.h>
-/*
- * TLB handling. This allows us to remove pages from the page
- * tables, and efficiently handle the TLB issues.
- */
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int fullmm;
- unsigned long start, end;
-};
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
- tlb->start = TASK_SIZE;
- tlb->end = 0;
-
- if (tlb->fullmm) {
- tlb->start = 0;
- tlb->end = TASK_SIZE;
- }
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
-
- init_tlb_gather(tlb);
-}
-
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (tlb->fullmm || force)
- flush_tlb_mm(tlb->mm);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-}
-
-static inline void
-tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long address)
-{
- if (tlb->start > address)
- tlb->start = address;
- if (tlb->end < address + PAGE_SIZE)
- tlb->end = address + PAGE_SIZE;
-}
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-/*
- * In the case of tlb vma handling, we can optimise these away in the
- * case where we're doing a full MM flush. When we're doing a munmap,
- * the vmas are adjusted to only cover the region to be torn down.
- */
-static inline void
-tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm)
- flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
-
-static inline void
-tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
-{
- if (!tlb->fullmm && tlb->end) {
- flush_tlb_range(vma, tlb->start, tlb->end);
- init_tlb_gather(tlb);
- }
-}
-
-static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
-}
-
-static inline void tlb_flush_mmu(struct mmu_gather *tlb)
-{
-}
-
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- __tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep)
-#define pmd_free_tlb(tlb, pmdp, addr) pmd_free((tlb)->mm, pmdp)
-#define pud_free_tlb(tlb, pudp, addr) pud_free((tlb)->mm, pudp)
-
-#define tlb_migrate_finish(mm) do { } while (0)
+#include <asm-generic/tlb.h>
#if defined(CONFIG_CPU_SH4) || defined(CONFIG_SUPERH64)
extern void tlb_wire_entry(struct vm_area_struct *, unsigned long, pte_t);
@@ -157,11 +32,6 @@ static inline void tlb_unwire_entry(void)
#else /* CONFIG_MMU */
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0)
-#define tlb_flush(tlb) do { } while (0)
-
#include <asm-generic/tlb.h>
#endif /* CONFIG_MMU */
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7786.h b/arch/sh/include/cpu-sh4/cpu/sh7786.h
index 8f9bfbf3cdb1..d6cce65b4871 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7786.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7786.h
@@ -132,7 +132,7 @@ enum {
static inline u32 sh7786_mm_sel(void)
{
- return __raw_readl(0xFC400020) & 0x7;
+ return __raw_readl((const volatile void __iomem *)0xFC400020) & 0x7;
}
#endif /* __CPU_SH7786_H__ */
diff --git a/arch/sh/include/uapi/asm/sockios.h b/arch/sh/include/uapi/asm/sockios.h
index 17313d2c3527..ef18a668456d 100644
--- a/arch/sh/include/uapi/asm/sockios.h
+++ b/arch/sh/include/uapi/asm/sockios.h
@@ -10,6 +10,7 @@
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
-#define SIOCGSTAMPNS _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD _IOR('s', 100, struct timeval) /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD _IOR('s', 101, struct timespec) /* Get stamp (timespec) */
+
#endif /* __ASM_SH_SOCKIOS_H */
diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c
index f3cb2cccb262..2950b19ad077 100644
--- a/arch/sh/kernel/stacktrace.c
+++ b/arch/sh/kernel/stacktrace.c
@@ -49,8 +49,6 @@ void save_stack_trace(struct stack_trace *trace)
unsigned long *sp = (unsigned long *)current_stack_pointer;
unwind_stack(current, NULL, sp, &save_stack_ops, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace);
@@ -84,7 +82,5 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
unsigned long *sp = (unsigned long *)tsk->thread.sp;
unwind_stack(current, NULL, sp, &save_stack_ops_nosched, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 40f8f4f73fe8..f6421c9ce5d3 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -63,6 +63,7 @@ config SPARC64
select HAVE_KRETPROBES
select HAVE_KPROBES
select HAVE_RCU_TABLE_FREE if SMP
+ select HAVE_RCU_TABLE_NO_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_MEMBLOCK_NODE_MAP
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
@@ -191,14 +192,6 @@ config NR_CPUS
source "kernel/Kconfig.hz"
-config RWSEM_GENERIC_SPINLOCK
- bool
- default y if SPARC32
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- default y if SPARC64
-
config GENERIC_HWEIGHT
bool
default y
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index 4884315daff4..453a4cf5492a 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -201,18 +201,15 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen)
{
struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
- const u32 *K = (const u32 *)key;
u32 *flags = &tfm->crt_flags;
u64 k1[DES_EXPKEY_WORDS / 2];
u64 k2[DES_EXPKEY_WORDS / 2];
u64 k3[DES_EXPKEY_WORDS / 2];
+ int err;
- if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
- !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
- (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
- *flags |= CRYPTO_TFM_RES_WEAK_KEY;
- return -EINVAL;
- }
+ err = __des3_verify_key(flags, key);
+ if (unlikely(err))
+ return err;
des_sparc64_key_expand((const u32 *)key, k1);
key += DES_KEY_SIZE;
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a22cfd5c0ee8..95c44380b1d6 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -15,10 +15,10 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += module.h
generic-y += msi.h
generic-y += preempt.h
-generic-y += rwsem.h
generic-y += serial.h
generic-y += trace_clock.h
generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index b162c23ae8c2..688911051b44 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -396,8 +396,6 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src,
}
}
-#define mmiowb()
-
#ifdef __KERNEL__
/* On sparc64 we have the whole physical IO address space accessible
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 4d075434e816..62a5a78804c4 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -127,10 +127,11 @@ static inline void syscall_set_arguments(struct task_struct *task,
regs->u_regs[UREG_I0 + i] = args[i];
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
#if defined(CONFIG_SPARC64) && defined(CONFIG_COMPAT)
- return in_compat_syscall() ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64;
+ return test_tsk_thread_flag(task, TIF_32BIT)
+ ? AUDIT_ARCH_SPARC : AUDIT_ARCH_SPARC64;
#elif defined(CONFIG_SPARC64)
return AUDIT_ARCH_SPARC64;
#else
diff --git a/arch/sparc/include/asm/tlb_32.h b/arch/sparc/include/asm/tlb_32.h
index 343cea19e573..5cd28a8793e3 100644
--- a/arch/sparc/include/asm/tlb_32.h
+++ b/arch/sparc/include/asm/tlb_32.h
@@ -2,24 +2,6 @@
#ifndef _SPARC_TLB_H
#define _SPARC_TLB_H
-#define tlb_start_vma(tlb, vma) \
-do { \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define tlb_end_vma(tlb, vma) \
-do { \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
-} while (0)
-
-#define __tlb_remove_tlb_entry(tlb, pte, address) \
- do { } while (0)
-
-#define tlb_flush(tlb) \
-do { \
- flush_tlb_mm((tlb)->mm); \
-} while (0)
-
#include <asm-generic/tlb.h>
#endif /* _SPARC_TLB_H */
diff --git a/arch/sparc/include/uapi/asm/sockios.h b/arch/sparc/include/uapi/asm/sockios.h
deleted file mode 100644
index 18a3ec14a847..000000000000
--- a/arch/sparc/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_SPARC_SOCKIOS_H
-#define _ASM_SPARC_SOCKIOS_H
-
-/* Socket-level I/O control calls. */
-#define FIOSETOWN 0x8901
-#define SIOCSPGRP 0x8902
-#define FIOGETOWN 0x8903
-#define SIOCGPGRP 0x8904
-#define SIOCATMARK 0x8905
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
-
-#endif /* !(_ASM_SPARC_SOCKIOS_H) */
-
diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c
index d1d52822603d..1cb62bfeaa1f 100644
--- a/arch/sparc/kernel/cpumap.c
+++ b/arch/sparc/kernel/cpumap.c
@@ -194,8 +194,7 @@ static struct cpuinfo_tree *build_cpuinfo_tree(void)
n = enumerate_cpuinfo_nodes(tmp_level);
- new_tree = kzalloc(sizeof(struct cpuinfo_tree) +
- (sizeof(struct cpuinfo_node) * n), GFP_ATOMIC);
+ new_tree = kzalloc(struct_size(new_tree, nodes, n), GFP_ATOMIC);
if (!new_tree)
return NULL;
diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c
index f87265afb175..cad08ccce625 100644
--- a/arch/sparc/kernel/ds.c
+++ b/arch/sparc/kernel/ds.c
@@ -876,7 +876,7 @@ void ldom_power_off(void)
static void ds_conn_reset(struct ds_info *dp)
{
- printk(KERN_ERR "ds-%llu: ds_conn_reset() from %pf\n",
+ printk(KERN_ERR "ds-%llu: ds_conn_reset() from %ps\n",
dp->id, __builtin_return_address(0));
}
diff --git a/arch/sparc/kernel/uprobes.c b/arch/sparc/kernel/uprobes.c
index d852ae56ddc1..c44bf5b85de8 100644
--- a/arch/sparc/kernel/uprobes.c
+++ b/arch/sparc/kernel/uprobes.c
@@ -29,7 +29,6 @@
#include <linux/kdebug.h>
#include <asm/cacheflush.h>
-#include <linux/uaccess.h>
/* Compute the address of the breakpoint instruction and return it.
*
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index f2d70ff7a284..bc2aaa47bc8a 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2269,19 +2269,6 @@ static unsigned long last_valid_pfn;
static void sun4u_pgprot_init(void);
static void sun4v_pgprot_init(void);
-static phys_addr_t __init available_memory(void)
-{
- phys_addr_t available = 0ULL;
- phys_addr_t pa_start, pa_end;
- u64 i;
-
- for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
- &pa_end, NULL)
- available = available + (pa_end - pa_start);
-
- return available;
-}
-
#define _PAGE_CACHE_4U (_PAGE_CP_4U | _PAGE_CV_4U)
#define _PAGE_CACHE_4V (_PAGE_CP_4V | _PAGE_CV_4V)
#define __DIRTY_BITS_4U (_PAGE_MODIFIED_4U | _PAGE_WRITE_4U | _PAGE_W_4U)
@@ -2295,33 +2282,8 @@ static phys_addr_t __init available_memory(void)
*/
static void __init reduce_memory(phys_addr_t limit_ram)
{
- phys_addr_t avail_ram = available_memory();
- phys_addr_t pa_start, pa_end;
- u64 i;
-
- if (limit_ram >= avail_ram)
- return;
-
- for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &pa_start,
- &pa_end, NULL) {
- phys_addr_t region_size = pa_end - pa_start;
- phys_addr_t clip_start = pa_start;
-
- avail_ram = avail_ram - region_size;
- /* Are we consuming too much? */
- if (avail_ram < limit_ram) {
- phys_addr_t give_back = limit_ram - avail_ram;
-
- region_size = region_size - give_back;
- clip_start = clip_start + give_back;
- }
-
- memblock_remove(clip_start, region_size);
-
- if (avail_ram <= limit_ram)
- break;
- i = 0UL;
- }
+ limit_ram += memblock_reserved_size();
+ memblock_enforce_memory_limit(limit_ram);
}
void __init paging_init(void)
diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c
index e8d5d73ca40d..71ac353032b6 100644
--- a/arch/sparc/mm/iommu.c
+++ b/arch/sparc/mm/iommu.c
@@ -175,16 +175,37 @@ static void iommu_flush_iotlb(iopte_t *iopte, unsigned int niopte)
}
}
-static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
+static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t len, bool per_page_flush)
{
struct iommu_struct *iommu = dev->archdata.iommu;
- int ioptex;
- iopte_t *iopte, *iopte0;
+ phys_addr_t paddr = page_to_phys(page) + offset;
+ unsigned long off = paddr & ~PAGE_MASK;
+ unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned long pfn = __phys_to_pfn(paddr);
unsigned int busa, busa0;
- int i;
+ iopte_t *iopte, *iopte0;
+ int ioptex, i;
+
+ /* XXX So what is maxphys for us and how do drivers know it? */
+ if (!len || len > 256 * 1024)
+ return DMA_MAPPING_ERROR;
+
+ /*
+ * We expect unmapped highmem pages to be not in the cache.
+ * XXX Is this a good assumption?
+ * XXX What if someone else unmaps it here and races us?
+ */
+ if (per_page_flush && !PageHighMem(page)) {
+ unsigned long vaddr, p;
+
+ vaddr = (unsigned long)page_address(page) + offset;
+ for (p = vaddr & PAGE_MASK; p < vaddr + len; p += PAGE_SIZE)
+ flush_page_for_dma(p);
+ }
/* page color = pfn of page */
- ioptex = bit_map_string_get(&iommu->usemap, npages, page_to_pfn(page));
+ ioptex = bit_map_string_get(&iommu->usemap, npages, pfn);
if (ioptex < 0)
panic("iommu out");
busa0 = iommu->start + (ioptex << PAGE_SHIFT);
@@ -193,29 +214,15 @@ static u32 iommu_get_one(struct device *dev, struct page *page, int npages)
busa = busa0;
iopte = iopte0;
for (i = 0; i < npages; i++) {
- iopte_val(*iopte) = MKIOPTE(page_to_pfn(page), IOPERM);
+ iopte_val(*iopte) = MKIOPTE(pfn, IOPERM);
iommu_invalidate_page(iommu->regs, busa);
busa += PAGE_SIZE;
iopte++;
- page++;
+ pfn++;
}
iommu_flush_iotlb(iopte0, npages);
-
- return busa0;
-}
-
-static dma_addr_t __sbus_iommu_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t len)
-{
- void *vaddr = page_address(page) + offset;
- unsigned long off = (unsigned long)vaddr & ~PAGE_MASK;
- unsigned long npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-
- /* XXX So what is maxphys for us and how do drivers know it? */
- if (!len || len > 256 * 1024)
- return DMA_MAPPING_ERROR;
- return iommu_get_one(dev, virt_to_page(vaddr), npages) + off;
+ return busa0 + off;
}
static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
@@ -223,81 +230,58 @@ static dma_addr_t sbus_iommu_map_page_gflush(struct device *dev,
enum dma_data_direction dir, unsigned long attrs)
{
flush_page_for_dma(0);
- return __sbus_iommu_map_page(dev, page, offset, len);
+ return __sbus_iommu_map_page(dev, page, offset, len, false);
}
static dma_addr_t sbus_iommu_map_page_pflush(struct device *dev,
struct page *page, unsigned long offset, size_t len,
enum dma_data_direction dir, unsigned long attrs)
{
- void *vaddr = page_address(page) + offset;
- unsigned long p = ((unsigned long)vaddr) & PAGE_MASK;
-
- while (p < (unsigned long)vaddr + len) {
- flush_page_for_dma(p);
- p += PAGE_SIZE;
- }
-
- return __sbus_iommu_map_page(dev, page, offset, len);
+ return __sbus_iommu_map_page(dev, page, offset, len, true);
}
-static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir, unsigned long attrs)
+static int __sbus_iommu_map_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs,
+ bool per_page_flush)
{
struct scatterlist *sg;
- int i, n;
-
- flush_page_for_dma(0);
+ int j;
- for_each_sg(sgl, sg, nents, i) {
- n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
- sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
+ for_each_sg(sgl, sg, nents, j) {
+ sg->dma_address =__sbus_iommu_map_page(dev, sg_page(sg),
+ sg->offset, sg->length, per_page_flush);
+ if (sg->dma_address == DMA_MAPPING_ERROR)
+ return 0;
sg->dma_length = sg->length;
}
return nents;
}
-static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
+static int sbus_iommu_map_sg_gflush(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
- unsigned long page, oldpage = 0;
- struct scatterlist *sg;
- int i, j, n;
-
- for_each_sg(sgl, sg, nents, j) {
- n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
-
- /*
- * We expect unmapped highmem pages to be not in the cache.
- * XXX Is this a good assumption?
- * XXX What if someone else unmaps it here and races us?
- */
- if ((page = (unsigned long) page_address(sg_page(sg))) != 0) {
- for (i = 0; i < n; i++) {
- if (page != oldpage) { /* Already flushed? */
- flush_page_for_dma(page);
- oldpage = page;
- }
- page += PAGE_SIZE;
- }
- }
-
- sg->dma_address = iommu_get_one(dev, sg_page(sg), n) + sg->offset;
- sg->dma_length = sg->length;
- }
+ flush_page_for_dma(0);
+ return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, false);
+}
- return nents;
+static int sbus_iommu_map_sg_pflush(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ return __sbus_iommu_map_sg(dev, sgl, nents, dir, attrs, true);
}
-static void iommu_release_one(struct device *dev, u32 busa, int npages)
+static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t len, enum dma_data_direction dir, unsigned long attrs)
{
struct iommu_struct *iommu = dev->archdata.iommu;
- int ioptex;
- int i;
+ unsigned int busa = dma_addr & PAGE_MASK;
+ unsigned long off = dma_addr & ~PAGE_MASK;
+ unsigned int npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
+ unsigned int ioptex = (busa - iommu->start) >> PAGE_SHIFT;
+ unsigned int i;
BUG_ON(busa < iommu->start);
- ioptex = (busa - iommu->start) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
iopte_val(iommu->page_table[ioptex + i]) = 0;
iommu_invalidate_page(iommu->regs, busa);
@@ -306,25 +290,15 @@ static void iommu_release_one(struct device *dev, u32 busa, int npages)
bit_map_clear(&iommu->usemap, ioptex, npages);
}
-static void sbus_iommu_unmap_page(struct device *dev, dma_addr_t dma_addr,
- size_t len, enum dma_data_direction dir, unsigned long attrs)
-{
- unsigned long off = dma_addr & ~PAGE_MASK;
- int npages;
-
- npages = (off + len + PAGE_SIZE-1) >> PAGE_SHIFT;
- iommu_release_one(dev, dma_addr & PAGE_MASK, npages);
-}
-
static void sbus_iommu_unmap_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *sg;
- int i, n;
+ int i;
for_each_sg(sgl, sg, nents, i) {
- n = (sg->length + sg->offset + PAGE_SIZE-1) >> PAGE_SHIFT;
- iommu_release_one(dev, sg->dma_address & PAGE_MASK, n);
+ sbus_iommu_unmap_page(dev, sg->dma_address, sg->length, dir,
+ attrs);
sg->dma_address = 0x21212121;
}
}
diff --git a/arch/sparc/vdso/Makefile b/arch/sparc/vdso/Makefile
index 74e97f77e23b..83c4b463cb3d 100644
--- a/arch/sparc/vdso/Makefile
+++ b/arch/sparc/vdso/Makefile
@@ -68,7 +68,7 @@ CFLAGS_REMOVE_vdso-note.o = -pg
CFLAGS_REMOVE_vclock_gettime.o = -pg
$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds)
diff --git a/arch/um/Kconfig b/arch/um/Kconfig
index ec9711d068b7..6b6eb938fcc1 100644
--- a/arch/um/Kconfig
+++ b/arch/um/Kconfig
@@ -80,46 +80,46 @@ config LD_SCRIPT_DYN
bool
default y
depends on !LD_SCRIPT_STATIC
- select MODULE_REL_CRCS if MODVERSIONS
+ select MODULE_REL_CRCS if MODVERSIONS
config HOSTFS
tristate "Host filesystem"
help
- While the User-Mode Linux port uses its own root file system for
- booting and normal file access, this module lets the UML user
- access files stored on the host. It does not require any
- network connection between the Host and UML. An example use of
- this might be:
+ While the User-Mode Linux port uses its own root file system for
+ booting and normal file access, this module lets the UML user
+ access files stored on the host. It does not require any
+ network connection between the Host and UML. An example use of
+ this might be:
- mount none /tmp/fromhost -t hostfs -o /tmp/umlshare
+ mount none /tmp/fromhost -t hostfs -o /tmp/umlshare
- where /tmp/fromhost is an empty directory inside UML and
- /tmp/umlshare is a directory on the host with files the UML user
- wishes to access.
+ where /tmp/fromhost is an empty directory inside UML and
+ /tmp/umlshare is a directory on the host with files the UML user
+ wishes to access.
- For more information, see
- <http://user-mode-linux.sourceforge.net/hostfs.html>.
+ For more information, see
+ <http://user-mode-linux.sourceforge.net/hostfs.html>.
- If you'd like to be able to work with files stored on the host,
- say Y or M here; otherwise say N.
+ If you'd like to be able to work with files stored on the host,
+ say Y or M here; otherwise say N.
config MCONSOLE
bool "Management console"
depends on PROC_FS
default y
help
- The user mode linux management console is a low-level interface to
- the kernel, somewhat like the i386 SysRq interface. Since there is
- a full-blown operating system running under every user mode linux
- instance, there is much greater flexibility possible than with the
- SysRq mechanism.
+ The user mode linux management console is a low-level interface to
+ the kernel, somewhat like the i386 SysRq interface. Since there is
+ a full-blown operating system running under every user mode linux
+ instance, there is much greater flexibility possible than with the
+ SysRq mechanism.
- If you answer 'Y' to this option, to use this feature, you need the
- mconsole client (called uml_mconsole) which is present in CVS in
- 2.4.5-9um and later (path /tools/mconsole), and is also in the
- distribution RPM package in 2.4.6 and later.
+ If you answer 'Y' to this option, to use this feature, you need the
+ mconsole client (called uml_mconsole) which is present in CVS in
+ 2.4.5-9um and later (path /tools/mconsole), and is also in the
+ distribution RPM package in 2.4.6 and later.
- It is safe to say 'Y' here.
+ It is safe to say 'Y' here.
config MAGIC_SYSRQ
bool "Magic SysRq key"
@@ -142,13 +142,17 @@ config MAGIC_SYSRQ
config KERNEL_STACK_ORDER
int "Kernel stack size order"
- default 1 if 64BIT
- range 1 10 if 64BIT
- default 0 if !64BIT
+ default 2 if 64BIT
+ range 2 10 if 64BIT
+ default 1 if !64BIT
help
This option determines the size of UML kernel stacks. They will
be 1 << order pages. The default is OK unless you're running Valgrind
on UML, in which case, set this to 3.
+ It is possible to reduce the stack to 1 for 64BIT and 0 for 32BIT on
+ older (pre-2017) CPUs. It is not recommended on newer CPUs due to the
+ increase in the size of the state which needs to be saved when handling
+ signals.
config MMAPPER
tristate "iomem emulation driver"
diff --git a/arch/um/drivers/Kconfig b/arch/um/drivers/Kconfig
index 2b1aaf7755aa..2638e46f50cc 100644
--- a/arch/um/drivers/Kconfig
+++ b/arch/um/drivers/Kconfig
@@ -11,58 +11,58 @@ config STDERR_CONSOLE
config SSL
bool "Virtual serial line"
help
- The User-Mode Linux environment allows you to create virtual serial
- lines on the UML that are usually made to show up on the host as
- ttys or ptys.
+ The User-Mode Linux environment allows you to create virtual serial
+ lines on the UML that are usually made to show up on the host as
+ ttys or ptys.
- See <http://user-mode-linux.sourceforge.net/old/input.html> for more
- information and command line examples of how to use this facility.
+ See <http://user-mode-linux.sourceforge.net/old/input.html> for more
+ information and command line examples of how to use this facility.
- Unless you have a specific reason for disabling this, say Y.
+ Unless you have a specific reason for disabling this, say Y.
config NULL_CHAN
bool "null channel support"
help
- This option enables support for attaching UML consoles and serial
- lines to a device similar to /dev/null. Data written to it disappears
- and there is never any data to be read.
+ This option enables support for attaching UML consoles and serial
+ lines to a device similar to /dev/null. Data written to it disappears
+ and there is never any data to be read.
config PORT_CHAN
bool "port channel support"
help
- This option enables support for attaching UML consoles and serial
- lines to host portals. They may be accessed with 'telnet <host>
- <port number>'. Any number of consoles and serial lines may be
- attached to a single portal, although what UML device you get when
- you telnet to that portal will be unpredictable.
- It is safe to say 'Y' here.
+ This option enables support for attaching UML consoles and serial
+ lines to host portals. They may be accessed with 'telnet <host>
+ <port number>'. Any number of consoles and serial lines may be
+ attached to a single portal, although what UML device you get when
+ you telnet to that portal will be unpredictable.
+ It is safe to say 'Y' here.
config PTY_CHAN
bool "pty channel support"
help
- This option enables support for attaching UML consoles and serial
- lines to host pseudo-terminals. Access to both traditional
- pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled
- with this option. The assignment of UML devices to host devices
- will be announced in the kernel message log.
- It is safe to say 'Y' here.
+ This option enables support for attaching UML consoles and serial
+ lines to host pseudo-terminals. Access to both traditional
+ pseudo-terminals (/dev/pty*) and pts pseudo-terminals are controlled
+ with this option. The assignment of UML devices to host devices
+ will be announced in the kernel message log.
+ It is safe to say 'Y' here.
config TTY_CHAN
bool "tty channel support"
help
- This option enables support for attaching UML consoles and serial
- lines to host terminals. Access to both virtual consoles
- (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and
- /dev/pts/*) are controlled by this option.
- It is safe to say 'Y' here.
+ This option enables support for attaching UML consoles and serial
+ lines to host terminals. Access to both virtual consoles
+ (/dev/tty*) and the slave side of pseudo-terminals (/dev/ttyp* and
+ /dev/pts/*) are controlled by this option.
+ It is safe to say 'Y' here.
config XTERM_CHAN
bool "xterm channel support"
help
- This option enables support for attaching UML consoles and serial
- lines to xterms. Each UML device so assigned will be brought up in
- its own xterm.
- It is safe to say 'Y' here.
+ This option enables support for attaching UML consoles and serial
+ lines to xterms. Each UML device so assigned will be brought up in
+ its own xterm.
+ It is safe to say 'Y' here.
config NOCONFIG_CHAN
bool
@@ -72,43 +72,43 @@ config CON_ZERO_CHAN
string "Default main console channel initialization"
default "fd:0,fd:1"
help
- This is the string describing the channel to which the main console
- will be attached by default. This value can be overridden from the
- command line. The default value is "fd:0,fd:1", which attaches the
- main console to stdin and stdout.
- It is safe to leave this unchanged.
+ This is the string describing the channel to which the main console
+ will be attached by default. This value can be overridden from the
+ command line. The default value is "fd:0,fd:1", which attaches the
+ main console to stdin and stdout.
+ It is safe to leave this unchanged.
config CON_CHAN
string "Default console channel initialization"
default "xterm"
help
- This is the string describing the channel to which all consoles
- except the main console will be attached by default. This value can
- be overridden from the command line. The default value is "xterm",
- which brings them up in xterms.
- It is safe to leave this unchanged, although you may wish to change
- this if you expect the UML that you build to be run in environments
- which don't have X or xterm available.
+ This is the string describing the channel to which all consoles
+ except the main console will be attached by default. This value can
+ be overridden from the command line. The default value is "xterm",
+ which brings them up in xterms.
+ It is safe to leave this unchanged, although you may wish to change
+ this if you expect the UML that you build to be run in environments
+ which don't have X or xterm available.
config SSL_CHAN
string "Default serial line channel initialization"
default "pty"
help
- This is the string describing the channel to which the serial lines
- will be attached by default. This value can be overridden from the
- command line. The default value is "pty", which attaches them to
- traditional pseudo-terminals.
- It is safe to leave this unchanged, although you may wish to change
- this if you expect the UML that you build to be run in environments
- which don't have a set of /dev/pty* devices.
+ This is the string describing the channel to which the serial lines
+ will be attached by default. This value can be overridden from the
+ command line. The default value is "pty", which attaches them to
+ traditional pseudo-terminals.
+ It is safe to leave this unchanged, although you may wish to change
+ this if you expect the UML that you build to be run in environments
+ which don't have a set of /dev/pty* devices.
config UML_SOUND
tristate "Sound support"
help
- This option enables UML sound support. If enabled, it will pull in
- soundcore and the UML hostaudio relay, which acts as a intermediary
- between the host's dsp and mixer devices and the UML sound system.
- It is safe to say 'Y' here.
+ This option enables UML sound support. If enabled, it will pull in
+ soundcore and the UML hostaudio relay, which acts as a intermediary
+ between the host's dsp and mixer devices and the UML sound system.
+ It is safe to say 'Y' here.
config SOUND
tristate
@@ -131,107 +131,107 @@ menu "UML Network Devices"
config UML_NET
bool "Virtual network device"
help
- While the User-Mode port cannot directly talk to any physical
- hardware devices, this choice and the following transport options
- provide one or more virtual network devices through which the UML
- kernels can talk to each other, the host, and with the host's help,
- machines on the outside world.
+ While the User-Mode port cannot directly talk to any physical
+ hardware devices, this choice and the following transport options
+ provide one or more virtual network devices through which the UML
+ kernels can talk to each other, the host, and with the host's help,
+ machines on the outside world.
- For more information, including explanations of the networking and
- sample configurations, see
- <http://user-mode-linux.sourceforge.net/old/networking.html>.
+ For more information, including explanations of the networking and
+ sample configurations, see
+ <http://user-mode-linux.sourceforge.net/old/networking.html>.
- If you'd like to be able to enable networking in the User-Mode
- linux environment, say Y; otherwise say N. Note that you must
- enable at least one of the following transport options to actually
- make use of UML networking.
+ If you'd like to be able to enable networking in the User-Mode
+ linux environment, say Y; otherwise say N. Note that you must
+ enable at least one of the following transport options to actually
+ make use of UML networking.
config UML_NET_ETHERTAP
bool "Ethertap transport"
depends on UML_NET
help
- The Ethertap User-Mode Linux network transport allows a single
- running UML to exchange packets with its host over one of the
- host's Ethertap devices, such as /dev/tap0. Additional running
- UMLs can use additional Ethertap devices, one per running UML.
- While the UML believes it's on a (multi-device, broadcast) virtual
- Ethernet network, it's in fact communicating over a point-to-point
- link with the host.
-
- To use this, your host kernel must have support for Ethertap
- devices. Also, if your host kernel is 2.4.x, it must have
- CONFIG_NETLINK_DEV configured as Y or M.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Ethertap
- networking.
-
- If you'd like to set up an IP network with the host and/or the
- outside world, say Y to this, the Daemon Transport and/or the
- Slip Transport. You'll need at least one of them, but may choose
- more than one without conflict. If you don't need UML networking,
- say N.
+ The Ethertap User-Mode Linux network transport allows a single
+ running UML to exchange packets with its host over one of the
+ host's Ethertap devices, such as /dev/tap0. Additional running
+ UMLs can use additional Ethertap devices, one per running UML.
+ While the UML believes it's on a (multi-device, broadcast) virtual
+ Ethernet network, it's in fact communicating over a point-to-point
+ link with the host.
+
+ To use this, your host kernel must have support for Ethertap
+ devices. Also, if your host kernel is 2.4.x, it must have
+ CONFIG_NETLINK_DEV configured as Y or M.
+
+ For more information, see
+ <http://user-mode-linux.sourceforge.net/old/networking.html> That site
+ has examples of the UML command line to use to enable Ethertap
+ networking.
+
+ If you'd like to set up an IP network with the host and/or the
+ outside world, say Y to this, the Daemon Transport and/or the
+ Slip Transport. You'll need at least one of them, but may choose
+ more than one without conflict. If you don't need UML networking,
+ say N.
config UML_NET_TUNTAP
bool "TUN/TAP transport"
depends on UML_NET
help
- The UML TUN/TAP network transport allows a UML instance to exchange
- packets with the host over a TUN/TAP device. This option will only
- work with a 2.4 host, unless you've applied the TUN/TAP patch to
- your 2.2 host kernel.
+ The UML TUN/TAP network transport allows a UML instance to exchange
+ packets with the host over a TUN/TAP device. This option will only
+ work with a 2.4 host, unless you've applied the TUN/TAP patch to
+ your 2.2 host kernel.
- To use this transport, your host kernel must have support for TUN/TAP
- devices, either built-in or as a module.
+ To use this transport, your host kernel must have support for TUN/TAP
+ devices, either built-in or as a module.
config UML_NET_SLIP
bool "SLIP transport"
depends on UML_NET
help
- The slip User-Mode Linux network transport allows a running UML to
- network with its host over a point-to-point link. Unlike Ethertap,
- which can carry any Ethernet frame (and hence even non-IP packets),
- the slip transport can only carry IP packets.
-
- To use this, your host must support slip devices.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html>.
- has examples of the UML command line to use to enable slip
- networking, and details of a few quirks with it.
-
- The Ethertap Transport is preferred over slip because of its
- limitations. If you prefer slip, however, say Y here. Otherwise
- choose the Multicast transport (to network multiple UMLs on
- multiple hosts), Ethertap (to network with the host and the
- outside world), and/or the Daemon transport (to network multiple
- UMLs on a single host). You may choose more than one without
- conflict. If you don't need UML networking, say N.
+ The slip User-Mode Linux network transport allows a running UML to
+ network with its host over a point-to-point link. Unlike Ethertap,
+ which can carry any Ethernet frame (and hence even non-IP packets),
+ the slip transport can only carry IP packets.
+
+ To use this, your host must support slip devices.
+
+ For more information, see
+ <http://user-mode-linux.sourceforge.net/old/networking.html>.
+ has examples of the UML command line to use to enable slip
+ networking, and details of a few quirks with it.
+
+ The Ethertap Transport is preferred over slip because of its
+ limitations. If you prefer slip, however, say Y here. Otherwise
+ choose the Multicast transport (to network multiple UMLs on
+ multiple hosts), Ethertap (to network with the host and the
+ outside world), and/or the Daemon transport (to network multiple
+ UMLs on a single host). You may choose more than one without
+ conflict. If you don't need UML networking, say N.
config UML_NET_DAEMON
bool "Daemon transport"
depends on UML_NET
help
- This User-Mode Linux network transport allows one or more running
- UMLs on a single host to communicate with each other, but not to
- the host.
-
- To use this form of networking, you'll need to run the UML
- networking daemon on the host.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Daemon
- networking.
-
- If you'd like to set up a network with other UMLs on a single host,
- say Y. If you need a network between UMLs on multiple physical
- hosts, choose the Multicast Transport. To set up a network with
- the host and/or other IP machines, say Y to the Ethertap or Slip
- transports. You'll need at least one of them, but may choose
- more than one without conflict. If you don't need UML networking,
- say N.
+ This User-Mode Linux network transport allows one or more running
+ UMLs on a single host to communicate with each other, but not to
+ the host.
+
+ To use this form of networking, you'll need to run the UML
+ networking daemon on the host.
+
+ For more information, see
+ <http://user-mode-linux.sourceforge.net/old/networking.html> That site
+ has examples of the UML command line to use to enable Daemon
+ networking.
+
+ If you'd like to set up a network with other UMLs on a single host,
+ say Y. If you need a network between UMLs on multiple physical
+ hosts, choose the Multicast Transport. To set up a network with
+ the host and/or other IP machines, say Y to the Ethertap or Slip
+ transports. You'll need at least one of them, but may choose
+ more than one without conflict. If you don't need UML networking,
+ say N.
config UML_NET_VECTOR
bool "Vector I/O high performance network devices"
@@ -270,26 +270,26 @@ config UML_NET_MCAST
bool "Multicast transport"
depends on UML_NET
help
- This Multicast User-Mode Linux network transport allows multiple
- UMLs (even ones running on different host machines!) to talk to
- each other over a virtual ethernet network. However, it requires
- at least one UML with one of the other transports to act as a
- bridge if any of them need to be able to talk to their hosts or any
- other IP machines.
-
- To use this, your host kernel(s) must support IP Multicasting.
-
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable Multicast
- networking, and notes about the security of this approach.
-
- If you need UMLs on multiple physical hosts to communicate as if
- they shared an Ethernet network, say Y. If you need to communicate
- with other IP machines, make sure you select one of the other
- transports (possibly in addition to Multicast; they're not
- exclusive). If you don't need to network UMLs say N to each of
- the transports.
+ This Multicast User-Mode Linux network transport allows multiple
+ UMLs (even ones running on different host machines!) to talk to
+ each other over a virtual ethernet network. However, it requires
+ at least one UML with one of the other transports to act as a
+ bridge if any of them need to be able to talk to their hosts or any
+ other IP machines.
+
+ To use this, your host kernel(s) must support IP Multicasting.
+
+ For more information, see
+ <http://user-mode-linux.sourceforge.net/old/networking.html> That site
+ has examples of the UML command line to use to enable Multicast
+ networking, and notes about the security of this approach.
+
+ If you need UMLs on multiple physical hosts to communicate as if
+ they shared an Ethernet network, say Y. If you need to communicate
+ with other IP machines, make sure you select one of the other
+ transports (possibly in addition to Multicast; they're not
+ exclusive). If you don't need to network UMLs say N to each of
+ the transports.
config UML_NET_PCAP
bool "pcap transport"
@@ -300,9 +300,9 @@ config UML_NET_PCAP
UML act as a network monitor for the host. You must have libcap
installed in order to build the pcap transport into UML.
- For more information, see
- <http://user-mode-linux.sourceforge.net/old/networking.html> That site
- has examples of the UML command line to use to enable this option.
+ For more information, see
+ <http://user-mode-linux.sourceforge.net/old/networking.html> That site
+ has examples of the UML command line to use to enable this option.
If you intend to use UML as a network monitor for the host, say
Y here. Otherwise, say N.
@@ -311,27 +311,27 @@ config UML_NET_SLIRP
bool "SLiRP transport"
depends on UML_NET
help
- The SLiRP User-Mode Linux network transport allows a running UML
- to network by invoking a program that can handle SLIP encapsulated
- packets. This is commonly (but not limited to) the application
- known as SLiRP, a program that can re-socket IP packets back onto
- the host on which it is run. Only IP packets are supported,
- unlike other network transports that can handle all Ethernet
- frames. In general, slirp allows the UML the same IP connectivity
- to the outside world that the host user is permitted, and unlike
- other transports, SLiRP works without the need of root level
- privleges, setuid binaries, or SLIP devices on the host. This
- also means not every type of connection is possible, but most
- situations can be accommodated with carefully crafted slirp
- commands that can be passed along as part of the network device's
- setup string. The effect of this transport on the UML is similar
- that of a host behind a firewall that masquerades all network
- connections passing through it (but is less secure).
-
- To use this you should first have slirp compiled somewhere
- accessible on the host, and have read its documentation. If you
- don't need UML networking, say N.
-
- Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
+ The SLiRP User-Mode Linux network transport allows a running UML
+ to network by invoking a program that can handle SLIP encapsulated
+ packets. This is commonly (but not limited to) the application
+ known as SLiRP, a program that can re-socket IP packets back onto
+ he host on which it is run. Only IP packets are supported,
+ unlike other network transports that can handle all Ethernet
+ frames. In general, slirp allows the UML the same IP connectivity
+ to the outside world that the host user is permitted, and unlike
+ other transports, SLiRP works without the need of root level
+ privleges, setuid binaries, or SLIP devices on the host. This
+ also means not every type of connection is possible, but most
+ situations can be accommodated with carefully crafted slirp
+ commands that can be passed along as part of the network device's
+ setup string. The effect of this transport on the UML is similar
+ that of a host behind a firewall that masquerades all network
+ connections passing through it (but is less secure).
+
+ To use this you should first have slirp compiled somewhere
+ accessible on the host, and have read its documentation. If you
+ don't need UML networking, say N.
+
+ Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
endmenu
diff --git a/arch/um/drivers/harddog_kern.c b/arch/um/drivers/harddog_kern.c
index 6d381279b362..000cb69ba0bc 100644
--- a/arch/um/drivers/harddog_kern.c
+++ b/arch/um/drivers/harddog_kern.c
@@ -85,7 +85,7 @@ static int harddog_open(struct inode *inode, struct file *file)
timer_alive = 1;
spin_unlock(&lock);
mutex_unlock(&harddog_mutex);
- return nonseekable_open(inode, file);
+ return stream_open(inode, file);
err:
spin_unlock(&lock);
mutex_unlock(&harddog_mutex);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index aca09be2373e..33c1cd6a12ac 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -276,14 +276,14 @@ static int ubd_setup_common(char *str, int *index_out, char **error_out)
str++;
if(!strcmp(str, "sync")){
global_openflags = of_sync(global_openflags);
- goto out1;
+ return err;
}
err = -EINVAL;
major = simple_strtoul(str, &end, 0);
if((*end != '\0') || (end == str)){
*error_out = "Didn't parse major number";
- goto out1;
+ return err;
}
mutex_lock(&ubd_lock);
diff --git a/arch/um/drivers/vector_kern.c b/arch/um/drivers/vector_kern.c
index 596e7056f376..e190e4ca52e1 100644
--- a/arch/um/drivers/vector_kern.c
+++ b/arch/um/drivers/vector_kern.c
@@ -1043,7 +1043,7 @@ static int vector_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
vector_send(vp->tx_queue);
return NETDEV_TX_OK;
}
- if (skb->xmit_more) {
+ if (netdev_xmit_more()) {
mod_timer(&vp->tl, vp->coalesce);
return NETDEV_TX_OK;
}
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 00bcbe2326d9..b506ad06aefc 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += irq_work.h
generic-y += kdebug.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += param.h
generic-y += pci.h
generic-y += percpu.h
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 9c04562310b3..b377df76cc28 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -263,7 +263,12 @@ static inline void set_pte(pte_t *pteptr, pte_t pteval)
*pteptr = pte_mknewpage(*pteptr);
if(pte_present(*pteptr)) *pteptr = pte_mknewprot(*pteptr);
}
-#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
+
+static inline void set_pte_at(struct mm_struct *mm, unsigned long addr,
+ pte_t *pteptr, pte_t pteval)
+{
+ set_pte(pteptr, pteval);
+}
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t pte_a, pte_t pte_b)
diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h
index dce6db147f24..70ee60383900 100644
--- a/arch/um/include/asm/tlb.h
+++ b/arch/um/include/asm/tlb.h
@@ -2,162 +2,8 @@
#ifndef __UM_TLB_H
#define __UM_TLB_H
-#include <linux/pagemap.h>
-#include <linux/swap.h>
-#include <asm/percpu.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
-
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
-/* struct mmu_gather is an opaque type used by the mm code for passing around
- * any data needed by arch specific code for tlb_remove_page.
- */
-struct mmu_gather {
- struct mm_struct *mm;
- unsigned int need_flush; /* Really unmapped some ptes? */
- unsigned long start;
- unsigned long end;
- unsigned int fullmm; /* non-zero means full mm flush */
-};
-
-static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
- unsigned long address)
-{
- if (tlb->start > address)
- tlb->start = address;
- if (tlb->end < address + PAGE_SIZE)
- tlb->end = address + PAGE_SIZE;
-}
-
-static inline void init_tlb_gather(struct mmu_gather *tlb)
-{
- tlb->need_flush = 0;
-
- tlb->start = TASK_SIZE;
- tlb->end = 0;
-
- if (tlb->fullmm) {
- tlb->start = 0;
- tlb->end = TASK_SIZE;
- }
-}
-
-static inline void
-arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- tlb->mm = mm;
- tlb->start = start;
- tlb->end = end;
- tlb->fullmm = !(start | (end+1));
-
- init_tlb_gather(tlb);
-}
-
-extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
- unsigned long end);
-
-static inline void
-tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
-{
- flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end);
-}
-
-static inline void
-tlb_flush_mmu_free(struct mmu_gather *tlb)
-{
- init_tlb_gather(tlb);
-}
-
-static inline void
-tlb_flush_mmu(struct mmu_gather *tlb)
-{
- if (!tlb->need_flush)
- return;
-
- tlb_flush_mmu_tlbonly(tlb);
- tlb_flush_mmu_free(tlb);
-}
-
-/* arch_tlb_finish_mmu
- * Called at the end of the shootdown operation to free up any resources
- * that were required.
- */
-static inline void
-arch_tlb_finish_mmu(struct mmu_gather *tlb,
- unsigned long start, unsigned long end, bool force)
-{
- if (force) {
- tlb->start = start;
- tlb->end = end;
- tlb->need_flush = 1;
- }
- tlb_flush_mmu(tlb);
-
- /* keep the page table cache within bounds */
- check_pgt_cache();
-}
-
-/* tlb_remove_page
- * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)),
- * while handling the additional races in SMP caused by other CPUs
- * caching valid mappings in their TLBs.
- */
-static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- tlb->need_flush = 1;
- free_page_and_swap_cache(page);
- return false; /* avoid calling tlb_flush_mmu */
-}
-
-static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page)
-{
- __tlb_remove_page(tlb, page);
-}
-
-static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return __tlb_remove_page(tlb, page);
-}
-
-static inline void tlb_remove_page_size(struct mmu_gather *tlb,
- struct page *page, int page_size)
-{
- return tlb_remove_page(tlb, page);
-}
-
-/**
- * tlb_remove_tlb_entry - remember a pte unmapping for later tlb invalidation.
- *
- * Record the fact that pte's were really umapped in ->need_flush, so we can
- * later optimise away the tlb invalidate. This helps when userspace is
- * unmapping already-unmapped pages, which happens quite a lot.
- */
-#define tlb_remove_tlb_entry(tlb, ptep, address) \
- do { \
- tlb->need_flush = 1; \
- __tlb_remove_tlb_entry(tlb, ptep, address); \
- } while (0)
-
-#define tlb_remove_huge_tlb_entry(h, tlb, ptep, address) \
- tlb_remove_tlb_entry(tlb, ptep, address)
-
-#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
-static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
- unsigned int page_size)
-{
-}
-
-#define pte_free_tlb(tlb, ptep, addr) __pte_free_tlb(tlb, ptep, addr)
-
-#define pud_free_tlb(tlb, pudp, addr) __pud_free_tlb(tlb, pudp, addr)
-
-#define pmd_free_tlb(tlb, pmdp, addr) __pmd_free_tlb(tlb, pmdp, addr)
-
-#define tlb_migrate_finish(mm) do {} while (0)
+#include <asm-generic/cacheflush.h>
+#include <asm-generic/tlb.h>
#endif
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index f4874b7ec503..598d7b3d9355 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -479,7 +479,7 @@ void __init init_IRQ(void)
irq_set_chip_and_handler(TIMER_IRQ, &SIGVTALRM_irq_type, handle_edge_irq);
- for (i = 1; i < NR_IRQS; i++)
+ for (i = 1; i < LAST_IRQ; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
/* Initialize EPOLL Loop */
os_setup_epoll();
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 7f06fdbc7ee1..bd3cb694322c 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -59,7 +59,6 @@ static pte_t *maybe_map(unsigned long virt, int is_write)
static int do_op_one_page(unsigned long addr, int len, int is_write,
int (*op)(unsigned long addr, int len, void *arg), void *arg)
{
- jmp_buf buf;
struct page *page;
pte_t *pte;
int n;
diff --git a/arch/um/kernel/stacktrace.c b/arch/um/kernel/stacktrace.c
index ebe7bcf62684..bd95e020d509 100644
--- a/arch/um/kernel/stacktrace.c
+++ b/arch/um/kernel/stacktrace.c
@@ -63,8 +63,6 @@ static const struct stacktrace_ops dump_ops = {
static void __save_stack_trace(struct task_struct *tsk, struct stack_trace *trace)
{
dump_trace(tsk, &dump_ops, trace);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 6b995e870d55..05585eef11d9 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -20,7 +20,7 @@
static void _print_addr(void *data, unsigned long address, int reliable)
{
- pr_info(" [<%08lx>] %s%pF\n", address, reliable ? "" : "? ",
+ pr_info(" [<%08lx>] %s%pS\n", address, reliable ? "" : "? ",
(void *)address);
}
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 052de4c8acb2..0c572a48158e 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -56,7 +56,7 @@ static int itimer_one_shot(struct clock_event_device *evt)
static struct clock_event_device timer_clockevent = {
.name = "posix-timer",
.rating = 250,
- .cpumask = cpu_all_mask,
+ .cpumask = cpu_possible_mask,
.features = CLOCK_EVT_FEAT_PERIODIC |
CLOCK_EVT_FEAT_ONESHOT,
.set_state_shutdown = itimer_shutdown,
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index bf0acb8aad8b..75b10235d369 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -31,29 +31,23 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *) = {
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
{
- struct uml_pt_regs *r;
+ struct uml_pt_regs r;
int save_errno = errno;
- r = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
- if (!r)
- panic("out of memory");
-
- r->is_user = 0;
+ r.is_user = 0;
if (sig == SIGSEGV) {
/* For segfaults, we want the data from the sigcontext. */
- get_regs_from_mc(r, mc);
- GET_FAULTINFO_FROM_MC(r->faultinfo, mc);
+ get_regs_from_mc(&r, mc);
+ GET_FAULTINFO_FROM_MC(r.faultinfo, mc);
}
/* enable signals if sig isn't IRQ signal */
if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGALRM))
unblock_signals();
- (*sig_info[sig])(sig, si, r);
+ (*sig_info[sig])(sig, si, &r);
errno = save_errno;
-
- free(r);
}
/*
@@ -91,17 +85,11 @@ void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
static void timer_real_alarm_handler(mcontext_t *mc)
{
- struct uml_pt_regs *regs;
-
- regs = uml_kmalloc(sizeof(struct uml_pt_regs), UM_GFP_ATOMIC);
- if (!regs)
- panic("out of memory");
+ struct uml_pt_regs regs;
if (mc != NULL)
- get_regs_from_mc(regs, mc);
- timer_handler(SIGALRM, NULL, regs);
-
- free(regs);
+ get_regs_from_mc(&regs, mc);
+ timer_handler(SIGALRM, NULL, &regs);
}
void timer_alarm_handler(int sig, struct siginfo *unused_si, mcontext_t *mc)
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 998fbb445458..e261656fe9d7 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -135,12 +135,18 @@ out:
*/
static inline int is_umdir_used(char *dir)
{
- char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
- char pid[sizeof("nnnnn\0")], *end;
+ char pid[sizeof("nnnnn\0")], *end, *file;
int dead, fd, p, n, err;
+ size_t filelen;
- n = snprintf(file, sizeof(file), "%s/pid", dir);
- if (n >= sizeof(file)) {
+ err = asprintf(&file, "%s/pid", dir);
+ if (err < 0)
+ return 0;
+
+ filelen = strlen(file);
+
+ n = snprintf(file, filelen, "%s/pid", dir);
+ if (n >= filelen) {
printk(UM_KERN_ERR "is_umdir_used - pid filename too long\n");
err = -E2BIG;
goto out;
@@ -185,6 +191,7 @@ static inline int is_umdir_used(char *dir)
out_close:
close(fd);
out:
+ free(file);
return 0;
}
@@ -210,18 +217,21 @@ static int umdir_take_if_dead(char *dir)
static void __init create_pid_file(void)
{
- char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
- char pid[sizeof("nnnnn\0")];
+ char pid[sizeof("nnnnn\0")], *file;
int fd, n;
- if (umid_file_name("pid", file, sizeof(file)))
+ file = malloc(strlen(uml_dir) + UMID_LEN + sizeof("/pid\0"));
+ if (!file)
return;
+ if (umid_file_name("pid", file, sizeof(file)))
+ goto out;
+
fd = open(file, O_RDWR | O_CREAT | O_EXCL, 0644);
if (fd < 0) {
printk(UM_KERN_ERR "Open of machine pid file \"%s\" failed: "
"%s\n", file, strerror(errno));
- return;
+ goto out;
}
snprintf(pid, sizeof(pid), "%d\n", getpid());
@@ -231,6 +241,8 @@ static void __init create_pid_file(void)
errno);
close(fd);
+out:
+ free(file);
}
int __init set_umid(char *name)
@@ -385,13 +397,19 @@ __uml_setup("uml_dir=", set_uml_dir,
static void remove_umid_dir(void)
{
- char dir[strlen(uml_dir) + UMID_LEN + 1], err;
+ char *dir, err;
+
+ dir = malloc(strlen(uml_dir) + UMID_LEN + 1);
+ if (!dir)
+ return;
sprintf(dir, "%s%s", uml_dir, umid);
err = remove_files_and_dir(dir);
if (err)
os_warn("%s - remove_files_and_dir failed with err = %d\n",
__func__, err);
+
+ free(dir);
}
__uml_exitcall(remove_umid_dir);
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index 817d82608712..2445dfcf6444 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -20,6 +20,7 @@ config UNICORE32
select GENERIC_IOMAP
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
+ select MMU_GATHER_NO_RANGE if MMU
help
UniCore-32 is 32-bit Instruction Set Architecture,
including a series of low-power-consumption RISC chip
@@ -38,12 +39,6 @@ config STACKTRACE_SUPPORT
config LOCKDEP_SUPPORT
def_bool y
-config RWSEM_GENERIC_SPINLOCK
- def_bool y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
-
config ARCH_HAS_ILOG2_U32
bool
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index d77d953c04c1..b301a0b3c0b2 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += kvm_para.h
generic-y += local.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += module.h
generic-y += parport.h
generic-y += percpu.h
diff --git a/arch/unicore32/include/asm/elf.h b/arch/unicore32/include/asm/elf.h
index 829042d07722..ae66dc1be49e 100644
--- a/arch/unicore32/include/asm/elf.h
+++ b/arch/unicore32/include/asm/elf.h
@@ -19,6 +19,7 @@
* ELF register definitions..
*/
#include <asm/ptrace.h>
+#include <linux/elf-em.h>
typedef unsigned long elf_greg_t;
typedef unsigned long elf_freg_t[3];
@@ -28,8 +29,6 @@ typedef elf_greg_t elf_gregset_t[ELF_NGREG];
typedef struct fp_state elf_fpregset_t;
-#define EM_UNICORE 110
-
#define R_UNICORE_NONE 0
#define R_UNICORE_PC24 1
#define R_UNICORE_ABS32 2
diff --git a/arch/unicore32/include/asm/syscall.h b/arch/unicore32/include/asm/syscall.h
new file mode 100644
index 000000000000..607961797fff
--- /dev/null
+++ b/arch/unicore32/include/asm/syscall.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_UNICORE_SYSCALL_H
+#define _ASM_UNICORE_SYSCALL_H
+
+#include <uapi/linux/audit.h>
+
+static inline int syscall_get_arch(struct task_struct *task)
+{
+ return AUDIT_ARCH_UNICORE;
+}
+
+#endif /* _ASM_UNICORE_SYSCALL_H */
diff --git a/arch/unicore32/include/asm/tlb.h b/arch/unicore32/include/asm/tlb.h
index 9cca15cdae94..00a8477333f6 100644
--- a/arch/unicore32/include/asm/tlb.h
+++ b/arch/unicore32/include/asm/tlb.h
@@ -12,10 +12,9 @@
#ifndef __UNICORE_TLB_H__
#define __UNICORE_TLB_H__
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
+/*
+ * unicore32 lacks an efficient flush_tlb_range(), use flush_tlb_mm().
+ */
#define __pte_free_tlb(tlb, pte, addr) \
do { \
diff --git a/arch/unicore32/kernel/stacktrace.c b/arch/unicore32/kernel/stacktrace.c
index 9976e767d51c..e37da8c6837b 100644
--- a/arch/unicore32/kernel/stacktrace.c
+++ b/arch/unicore32/kernel/stacktrace.c
@@ -120,8 +120,6 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
}
walk_stackframe(&frame, save_trace, &data);
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
void save_stack_trace(struct stack_trace *trace)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 62fc3fda1a05..e7212731cffb 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -14,6 +14,7 @@ config X86_32
select ARCH_WANT_IPC_PARSE_VERSION
select CLKSRC_I8253
select CLONE_BACKWARDS
+ select HAVE_DEBUG_STACKOVERFLOW
select MODULES_USE_ELF_REL
select OLD_SIGACTION
@@ -28,7 +29,6 @@ config X86_64
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE
select SWIOTLB
- select X86_DEV_DMA_OPS
select ARCH_HAS_SYSCALL_WRAPPER
#
@@ -44,7 +44,6 @@ config X86
#
select ACPI_LEGACY_TABLES_LOOKUP if ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
- select ANON_INODES
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_DATA
select ARCH_CLOCKSOURCE_INIT
@@ -65,6 +64,7 @@ config X86
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_SET_MEMORY
+ select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_SYNC_CORE_BEFORE_USERMODE
@@ -74,6 +74,7 @@ config X86
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select ARCH_MIGHT_HAVE_PC_PARPORT
select ARCH_MIGHT_HAVE_PC_SERIO
+ select ARCH_STACKWALK
select ARCH_SUPPORTS_ACPI
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_NUMA_BALANCING if X86_64
@@ -138,7 +139,6 @@ config X86
select HAVE_COPY_THREAD_TLS
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
- select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
@@ -183,7 +183,6 @@ config X86
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_RCU_TABLE_FREE if PARAVIRT
- select HAVE_RCU_TABLE_INVALIDATE if HAVE_RCU_TABLE_FREE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
select HAVE_FUNCTION_ARG_ACCESS_API
@@ -268,9 +267,6 @@ config ARCH_MAY_HAVE_PC_FDC
def_bool y
depends on ISA_DMA_API
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_CALIBRATE_DELAY
def_bool y
@@ -703,8 +699,6 @@ config STA2X11
bool "STA2X11 Companion Chip Support"
depends on X86_32_NON_STANDARD && PCI
select ARCH_HAS_PHYS_TO_DMA
- select X86_DEV_DMA_OPS
- select X86_DMA_REMAP
select SWIOTLB
select MFD_STA2X11
select GPIOLIB
@@ -783,14 +777,6 @@ config PARAVIRT_SPINLOCKS
If you are unsure how to answer this question, answer Y.
-config QUEUED_LOCK_STAT
- bool "Paravirt queued spinlock statistics"
- depends on PARAVIRT_SPINLOCKS && DEBUG_FS
- ---help---
- Enable the collection of statistical data on the slowpath
- behavior of paravirtualized queued spinlocks and report
- them on debugfs.
-
source "arch/x86/xen/Kconfig"
config KVM_GUEST
@@ -1330,8 +1316,16 @@ config MICROCODE_AMD
processors will be enabled.
config MICROCODE_OLD_INTERFACE
- def_bool y
+ bool "Ancient loading interface (DEPRECATED)"
+ default n
depends on MICROCODE
+ ---help---
+ DO NOT USE THIS! This is the ancient /dev/cpu/microcode interface
+ which was used by userspace tools like iucode_tool and microcode.ctl.
+ It is inadequate because it runs too late to be able to properly
+ load microcode on a machine and it needs special tools. Instead, you
+ should've switched to the early loading method with the initrd or
+ builtin microcode by now: Documentation/x86/microcode.txt
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
@@ -1606,12 +1600,9 @@ config ARCH_FLATMEM_ENABLE
depends on X86_32 && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
- def_bool y
- depends on NUMA && X86_32
-
-config ARCH_DISCONTIGMEM_DEFAULT
- def_bool y
+ def_bool n
depends on NUMA && X86_32
+ depends on BROKEN
config ARCH_SPARSEMEM_ENABLE
def_bool y
@@ -1620,8 +1611,7 @@ config ARCH_SPARSEMEM_ENABLE
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
config ARCH_SPARSEMEM_DEFAULT
- def_bool y
- depends on X86_64
+ def_bool X86_64 || (NUMA && X86_32)
config ARCH_SELECT_MEMORY_MODEL
def_bool y
@@ -2878,11 +2868,6 @@ config HAVE_ATOMIC_IOMAP
config X86_DEV_DMA_OPS
bool
- depends on X86_64 || STA2X11
-
-config X86_DMA_REMAP
- bool
- depends on STA2X11
config HAVE_GENERIC_GUP
def_bool y
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index a587805c6687..56e748a7679f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -47,7 +47,7 @@ export REALMODE_CFLAGS
export BITS
ifdef CONFIG_X86_NEED_RELOCS
- LDFLAGS_vmlinux := --emit-relocs
+ LDFLAGS_vmlinux := --emit-relocs --discard-none
endif
#
diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index 0ef4ad55b29b..ad84239e595e 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -276,7 +276,7 @@ static unsigned long get_acpi_srat_table(void)
if (acpi_table) {
header = (struct acpi_table_header *)acpi_table;
- if (ACPI_COMPARE_NAME(header->signature, ACPI_SIG_SRAT))
+ if (ACPI_COMPARE_NAMESEG(header->signature, ACPI_SIG_SRAT))
return acpi_table;
}
entry += size;
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 9f908112bbb9..2b2481acc661 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -25,18 +25,6 @@ CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_SMP=y
CONFIG_X86_GENERIC=y
CONFIG_HPET_TIMER=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 1d3badfda09e..e8829abf063a 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -24,18 +24,6 @@ CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_PARTITION_ADVANCED=y
-CONFIG_OSF_PARTITION=y
-CONFIG_AMIGA_PARTITION=y
-CONFIG_MAC_PARTITION=y
-CONFIG_BSD_DISKLABEL=y
-CONFIG_MINIX_SUBPARTITION=y
-CONFIG_SOLARIS_X86_PARTITION=y
-CONFIG_UNIXWARE_DISKLABEL=y
-CONFIG_SGI_PARTITION=y
-CONFIG_SUN_PARTITION=y
-CONFIG_KARMA_PARTITION=y
-CONFIG_EFI_PARTITION=y
CONFIG_SMP=y
CONFIG_CALGARY_IOMMU=y
CONFIG_NR_CPUS=64
diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c
index 3ea71b871813..bdeee1b830be 100644
--- a/arch/x86/crypto/aegis128-aesni-glue.c
+++ b/arch/x86/crypto/aegis128-aesni-glue.c
@@ -11,8 +11,8 @@
* any later version.
*/
-#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
@@ -242,131 +242,35 @@ static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
{
}
-static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead,
- const u8 *key, unsigned int keylen)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
-}
-
-static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead,
- unsigned int authsize)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
-}
-
-static int cryptd_aegis128_aesni_encrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_encrypt(req);
-}
-
-static int cryptd_aegis128_aesni_decrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_decrypt(req);
-}
-
-static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead *cryptd_tfm;
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- *ctx = cryptd_tfm;
- crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
- return 0;
-}
-
-static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_free_aead(*ctx);
-}
-
-static struct aead_alg crypto_aegis128_aesni_alg[] = {
- {
- .setkey = crypto_aegis128_aesni_setkey,
- .setauthsize = crypto_aegis128_aesni_setauthsize,
- .encrypt = crypto_aegis128_aesni_encrypt,
- .decrypt = crypto_aegis128_aesni_decrypt,
- .init = crypto_aegis128_aesni_init_tfm,
- .exit = crypto_aegis128_aesni_exit_tfm,
-
- .ivsize = AEGIS128_NONCE_SIZE,
- .maxauthsize = AEGIS128_MAX_AUTH_SIZE,
- .chunksize = AEGIS128_BLOCK_SIZE,
-
- .base = {
- .cra_flags = CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct aegis_ctx) +
- __alignof__(struct aegis_ctx),
- .cra_alignmask = 0,
-
- .cra_name = "__aegis128",
- .cra_driver_name = "__aegis128-aesni",
-
- .cra_module = THIS_MODULE,
- }
- }, {
- .setkey = cryptd_aegis128_aesni_setkey,
- .setauthsize = cryptd_aegis128_aesni_setauthsize,
- .encrypt = cryptd_aegis128_aesni_encrypt,
- .decrypt = cryptd_aegis128_aesni_decrypt,
- .init = cryptd_aegis128_aesni_init_tfm,
- .exit = cryptd_aegis128_aesni_exit_tfm,
-
- .ivsize = AEGIS128_NONCE_SIZE,
- .maxauthsize = AEGIS128_MAX_AUTH_SIZE,
- .chunksize = AEGIS128_BLOCK_SIZE,
-
- .base = {
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct cryptd_aead *),
- .cra_alignmask = 0,
-
- .cra_priority = 400,
-
- .cra_name = "aegis128",
- .cra_driver_name = "aegis128-aesni",
-
- .cra_module = THIS_MODULE,
- }
+static struct aead_alg crypto_aegis128_aesni_alg = {
+ .setkey = crypto_aegis128_aesni_setkey,
+ .setauthsize = crypto_aegis128_aesni_setauthsize,
+ .encrypt = crypto_aegis128_aesni_encrypt,
+ .decrypt = crypto_aegis128_aesni_decrypt,
+ .init = crypto_aegis128_aesni_init_tfm,
+ .exit = crypto_aegis128_aesni_exit_tfm,
+
+ .ivsize = AEGIS128_NONCE_SIZE,
+ .maxauthsize = AEGIS128_MAX_AUTH_SIZE,
+ .chunksize = AEGIS128_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aegis_ctx) +
+ __alignof__(struct aegis_ctx),
+ .cra_alignmask = 0,
+ .cra_priority = 400,
+
+ .cra_name = "__aegis128",
+ .cra_driver_name = "__aegis128-aesni",
+
+ .cra_module = THIS_MODULE,
}
};
+static struct simd_aead_alg *simd_alg;
+
static int __init crypto_aegis128_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
@@ -374,14 +278,13 @@ static int __init crypto_aegis128_aesni_module_init(void)
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
- return crypto_register_aeads(crypto_aegis128_aesni_alg,
- ARRAY_SIZE(crypto_aegis128_aesni_alg));
+ return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1,
+ &simd_alg);
}
static void __exit crypto_aegis128_aesni_module_exit(void)
{
- crypto_unregister_aeads(crypto_aegis128_aesni_alg,
- ARRAY_SIZE(crypto_aegis128_aesni_alg));
+ simd_unregister_aeads(&crypto_aegis128_aesni_alg, 1, &simd_alg);
}
module_init(crypto_aegis128_aesni_module_init);
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
index 1b1b39c66c5e..80d917f7e467 100644
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ b/arch/x86/crypto/aegis128l-aesni-glue.c
@@ -11,8 +11,8 @@
* any later version.
*/
-#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
@@ -242,131 +242,35 @@ static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
{
}
-static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead,
- const u8 *key, unsigned int keylen)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
-}
-
-static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead,
- unsigned int authsize)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
-}
-
-static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_encrypt(req);
-}
-
-static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_decrypt(req);
-}
-
-static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead *cryptd_tfm;
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- *ctx = cryptd_tfm;
- crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
- return 0;
-}
-
-static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_free_aead(*ctx);
-}
-
-static struct aead_alg crypto_aegis128l_aesni_alg[] = {
- {
- .setkey = crypto_aegis128l_aesni_setkey,
- .setauthsize = crypto_aegis128l_aesni_setauthsize,
- .encrypt = crypto_aegis128l_aesni_encrypt,
- .decrypt = crypto_aegis128l_aesni_decrypt,
- .init = crypto_aegis128l_aesni_init_tfm,
- .exit = crypto_aegis128l_aesni_exit_tfm,
-
- .ivsize = AEGIS128L_NONCE_SIZE,
- .maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
- .chunksize = AEGIS128L_BLOCK_SIZE,
-
- .base = {
- .cra_flags = CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct aegis_ctx) +
- __alignof__(struct aegis_ctx),
- .cra_alignmask = 0,
-
- .cra_name = "__aegis128l",
- .cra_driver_name = "__aegis128l-aesni",
-
- .cra_module = THIS_MODULE,
- }
- }, {
- .setkey = cryptd_aegis128l_aesni_setkey,
- .setauthsize = cryptd_aegis128l_aesni_setauthsize,
- .encrypt = cryptd_aegis128l_aesni_encrypt,
- .decrypt = cryptd_aegis128l_aesni_decrypt,
- .init = cryptd_aegis128l_aesni_init_tfm,
- .exit = cryptd_aegis128l_aesni_exit_tfm,
-
- .ivsize = AEGIS128L_NONCE_SIZE,
- .maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
- .chunksize = AEGIS128L_BLOCK_SIZE,
-
- .base = {
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct cryptd_aead *),
- .cra_alignmask = 0,
-
- .cra_priority = 400,
-
- .cra_name = "aegis128l",
- .cra_driver_name = "aegis128l-aesni",
-
- .cra_module = THIS_MODULE,
- }
+static struct aead_alg crypto_aegis128l_aesni_alg = {
+ .setkey = crypto_aegis128l_aesni_setkey,
+ .setauthsize = crypto_aegis128l_aesni_setauthsize,
+ .encrypt = crypto_aegis128l_aesni_encrypt,
+ .decrypt = crypto_aegis128l_aesni_decrypt,
+ .init = crypto_aegis128l_aesni_init_tfm,
+ .exit = crypto_aegis128l_aesni_exit_tfm,
+
+ .ivsize = AEGIS128L_NONCE_SIZE,
+ .maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
+ .chunksize = AEGIS128L_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aegis_ctx) +
+ __alignof__(struct aegis_ctx),
+ .cra_alignmask = 0,
+ .cra_priority = 400,
+
+ .cra_name = "__aegis128l",
+ .cra_driver_name = "__aegis128l-aesni",
+
+ .cra_module = THIS_MODULE,
}
};
+static struct simd_aead_alg *simd_alg;
+
static int __init crypto_aegis128l_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
@@ -374,14 +278,13 @@ static int __init crypto_aegis128l_aesni_module_init(void)
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
- return crypto_register_aeads(crypto_aegis128l_aesni_alg,
- ARRAY_SIZE(crypto_aegis128l_aesni_alg));
+ return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1,
+ &simd_alg);
}
static void __exit crypto_aegis128l_aesni_module_exit(void)
{
- crypto_unregister_aeads(crypto_aegis128l_aesni_alg,
- ARRAY_SIZE(crypto_aegis128l_aesni_alg));
+ simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg);
}
module_init(crypto_aegis128l_aesni_module_init);
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
index 6227ca3220a0..716eecb66bd5 100644
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ b/arch/x86/crypto/aegis256-aesni-glue.c
@@ -11,8 +11,8 @@
* any later version.
*/
-#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <crypto/scatterwalk.h>
#include <linux/module.h>
@@ -242,131 +242,35 @@ static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
{
}
-static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead,
- const u8 *key, unsigned int keylen)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
-}
-
-static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead,
- unsigned int authsize)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
-}
-
-static int cryptd_aegis256_aesni_encrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_encrypt(req);
-}
-
-static int cryptd_aegis256_aesni_decrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_decrypt(req);
-}
-
-static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead *cryptd_tfm;
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- *ctx = cryptd_tfm;
- crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
- return 0;
-}
-
-static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_free_aead(*ctx);
-}
-
-static struct aead_alg crypto_aegis256_aesni_alg[] = {
- {
- .setkey = crypto_aegis256_aesni_setkey,
- .setauthsize = crypto_aegis256_aesni_setauthsize,
- .encrypt = crypto_aegis256_aesni_encrypt,
- .decrypt = crypto_aegis256_aesni_decrypt,
- .init = crypto_aegis256_aesni_init_tfm,
- .exit = crypto_aegis256_aesni_exit_tfm,
-
- .ivsize = AEGIS256_NONCE_SIZE,
- .maxauthsize = AEGIS256_MAX_AUTH_SIZE,
- .chunksize = AEGIS256_BLOCK_SIZE,
-
- .base = {
- .cra_flags = CRYPTO_ALG_INTERNAL,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct aegis_ctx) +
- __alignof__(struct aegis_ctx),
- .cra_alignmask = 0,
-
- .cra_name = "__aegis256",
- .cra_driver_name = "__aegis256-aesni",
-
- .cra_module = THIS_MODULE,
- }
- }, {
- .setkey = cryptd_aegis256_aesni_setkey,
- .setauthsize = cryptd_aegis256_aesni_setauthsize,
- .encrypt = cryptd_aegis256_aesni_encrypt,
- .decrypt = cryptd_aegis256_aesni_decrypt,
- .init = cryptd_aegis256_aesni_init_tfm,
- .exit = cryptd_aegis256_aesni_exit_tfm,
-
- .ivsize = AEGIS256_NONCE_SIZE,
- .maxauthsize = AEGIS256_MAX_AUTH_SIZE,
- .chunksize = AEGIS256_BLOCK_SIZE,
-
- .base = {
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct cryptd_aead *),
- .cra_alignmask = 0,
-
- .cra_priority = 400,
-
- .cra_name = "aegis256",
- .cra_driver_name = "aegis256-aesni",
-
- .cra_module = THIS_MODULE,
- }
+static struct aead_alg crypto_aegis256_aesni_alg = {
+ .setkey = crypto_aegis256_aesni_setkey,
+ .setauthsize = crypto_aegis256_aesni_setauthsize,
+ .encrypt = crypto_aegis256_aesni_encrypt,
+ .decrypt = crypto_aegis256_aesni_decrypt,
+ .init = crypto_aegis256_aesni_init_tfm,
+ .exit = crypto_aegis256_aesni_exit_tfm,
+
+ .ivsize = AEGIS256_NONCE_SIZE,
+ .maxauthsize = AEGIS256_MAX_AUTH_SIZE,
+ .chunksize = AEGIS256_BLOCK_SIZE,
+
+ .base = {
+ .cra_flags = CRYPTO_ALG_INTERNAL,
+ .cra_blocksize = 1,
+ .cra_ctxsize = sizeof(struct aegis_ctx) +
+ __alignof__(struct aegis_ctx),
+ .cra_alignmask = 0,
+ .cra_priority = 400,
+
+ .cra_name = "__aegis256",
+ .cra_driver_name = "__aegis256-aesni",
+
+ .cra_module = THIS_MODULE,
}
};
+static struct simd_aead_alg *simd_alg;
+
static int __init crypto_aegis256_aesni_module_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2) ||
@@ -374,14 +278,13 @@ static int __init crypto_aegis256_aesni_module_init(void)
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
- return crypto_register_aeads(crypto_aegis256_aesni_alg,
- ARRAY_SIZE(crypto_aegis256_aesni_alg));
+ return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1,
+ &simd_alg);
}
static void __exit crypto_aegis256_aesni_module_exit(void)
{
- crypto_unregister_aeads(crypto_aegis256_aesni_alg,
- ARRAY_SIZE(crypto_aegis256_aesni_alg));
+ simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg);
}
module_init(crypto_aegis256_aesni_module_init);
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 1e3d2102033a..21c246799aa5 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -25,14 +25,13 @@
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
-#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <crypto/b128ops.h>
#include <crypto/gcm.h>
#include <crypto/xts.h>
#include <asm/cpu_device_id.h>
-#include <asm/fpu/api.h>
#include <asm/crypto/aes.h>
+#include <asm/simd.h>
#include <crypto/scatterwalk.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/simd.h>
@@ -333,7 +332,7 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx,
return -EINVAL;
}
- if (!irq_fpu_usable())
+ if (!crypto_simd_usable())
err = crypto_aes_expand_key(ctx, in_key, key_len);
else {
kernel_fpu_begin();
@@ -354,7 +353,7 @@ static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
- if (!irq_fpu_usable())
+ if (!crypto_simd_usable())
crypto_aes_encrypt_x86(ctx, dst, src);
else {
kernel_fpu_begin();
@@ -367,7 +366,7 @@ static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
- if (!irq_fpu_usable())
+ if (!crypto_simd_usable())
crypto_aes_decrypt_x86(ctx, dst, src);
else {
kernel_fpu_begin();
@@ -643,29 +642,6 @@ static int xts_decrypt(struct skcipher_request *req)
aes_ctx(ctx->raw_crypt_ctx));
}
-static int rfc4106_init(struct crypto_aead *aead)
-{
- struct cryptd_aead *cryptd_tfm;
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- *ctx = cryptd_tfm;
- crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
- return 0;
-}
-
-static void rfc4106_exit(struct crypto_aead *aead)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_free_aead(*ctx);
-}
-
static int
rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
{
@@ -710,15 +686,8 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len);
}
-static int gcmaes_wrapper_set_key(struct crypto_aead *parent, const u8 *key,
- unsigned int key_len)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(parent);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setkey(&cryptd_tfm->base, key, key_len);
-}
-
+/* This is the Integrity Check Value (aka the authentication tag) length and can
+ * be 8, 12 or 16 bytes long. */
static int common_rfc4106_set_authsize(struct crypto_aead *aead,
unsigned int authsize)
{
@@ -734,17 +703,6 @@ static int common_rfc4106_set_authsize(struct crypto_aead *aead,
return 0;
}
-/* This is the Integrity Check Value (aka the authentication tag length and can
- * be 8, 12 or 16 bytes long. */
-static int gcmaes_wrapper_set_authsize(struct crypto_aead *parent,
- unsigned int authsize)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(parent);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
-}
-
static int generic_gcmaes_set_authsize(struct crypto_aead *tfm,
unsigned int authsize)
{
@@ -964,38 +922,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req)
return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv,
aes_ctx);
}
-
-static int gcmaes_wrapper_encrypt(struct aead_request *req)
-{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- tfm = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- tfm = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, tfm);
-
- return crypto_aead_encrypt(req);
-}
-
-static int gcmaes_wrapper_decrypt(struct aead_request *req)
-{
- struct crypto_aead *tfm = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(tfm);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- tfm = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- tfm = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, tfm);
-
- return crypto_aead_decrypt(req);
-}
#endif
static struct crypto_alg aesni_algs[] = { {
@@ -1148,31 +1074,7 @@ static int generic_gcmaes_decrypt(struct aead_request *req)
aes_ctx);
}
-static int generic_gcmaes_init(struct crypto_aead *aead)
-{
- struct cryptd_aead *cryptd_tfm;
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_tfm = cryptd_alloc_aead("__driver-generic-gcm-aes-aesni",
- CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- *ctx = cryptd_tfm;
- crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
-
- return 0;
-}
-
-static void generic_gcmaes_exit(struct crypto_aead *aead)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_free_aead(*ctx);
-}
-
-static struct aead_alg aesni_aead_algs[] = { {
+static struct aead_alg aesni_aeads[] = { {
.setkey = common_rfc4106_set_key,
.setauthsize = common_rfc4106_set_authsize,
.encrypt = helper_rfc4106_encrypt,
@@ -1180,8 +1082,9 @@ static struct aead_alg aesni_aead_algs[] = { {
.ivsize = GCM_RFC4106_IV_SIZE,
.maxauthsize = 16,
.base = {
- .cra_name = "__gcm-aes-aesni",
- .cra_driver_name = "__driver-gcm-aes-aesni",
+ .cra_name = "__rfc4106(gcm(aes))",
+ .cra_driver_name = "__rfc4106-gcm-aesni",
+ .cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx),
@@ -1189,24 +1092,6 @@ static struct aead_alg aesni_aead_algs[] = { {
.cra_module = THIS_MODULE,
},
}, {
- .init = rfc4106_init,
- .exit = rfc4106_exit,
- .setkey = gcmaes_wrapper_set_key,
- .setauthsize = gcmaes_wrapper_set_authsize,
- .encrypt = gcmaes_wrapper_encrypt,
- .decrypt = gcmaes_wrapper_decrypt,
- .ivsize = GCM_RFC4106_IV_SIZE,
- .maxauthsize = 16,
- .base = {
- .cra_name = "rfc4106(gcm(aes))",
- .cra_driver_name = "rfc4106-gcm-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct cryptd_aead *),
- .cra_module = THIS_MODULE,
- },
-}, {
.setkey = generic_gcmaes_set_key,
.setauthsize = generic_gcmaes_set_authsize,
.encrypt = generic_gcmaes_encrypt,
@@ -1214,38 +1099,21 @@ static struct aead_alg aesni_aead_algs[] = { {
.ivsize = GCM_AES_IV_SIZE,
.maxauthsize = 16,
.base = {
- .cra_name = "__generic-gcm-aes-aesni",
- .cra_driver_name = "__driver-generic-gcm-aes-aesni",
- .cra_priority = 0,
+ .cra_name = "__gcm(aes)",
+ .cra_driver_name = "__generic-gcm-aesni",
+ .cra_priority = 400,
.cra_flags = CRYPTO_ALG_INTERNAL,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct generic_gcmaes_ctx),
.cra_alignmask = AESNI_ALIGN - 1,
.cra_module = THIS_MODULE,
},
-}, {
- .init = generic_gcmaes_init,
- .exit = generic_gcmaes_exit,
- .setkey = gcmaes_wrapper_set_key,
- .setauthsize = gcmaes_wrapper_set_authsize,
- .encrypt = gcmaes_wrapper_encrypt,
- .decrypt = gcmaes_wrapper_decrypt,
- .ivsize = GCM_AES_IV_SIZE,
- .maxauthsize = 16,
- .base = {
- .cra_name = "gcm(aes)",
- .cra_driver_name = "generic-gcm-aesni",
- .cra_priority = 400,
- .cra_flags = CRYPTO_ALG_ASYNC,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct cryptd_aead *),
- .cra_module = THIS_MODULE,
- },
} };
#else
-static struct aead_alg aesni_aead_algs[0];
+static struct aead_alg aesni_aeads[0];
#endif
+static struct simd_aead_alg *aesni_simd_aeads[ARRAY_SIZE(aesni_aeads)];
static const struct x86_cpu_id aesni_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_AES),
@@ -1253,23 +1121,9 @@ static const struct x86_cpu_id aesni_cpu_id[] = {
};
MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
-static void aesni_free_simds(void)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers) &&
- aesni_simd_skciphers[i]; i++)
- simd_skcipher_free(aesni_simd_skciphers[i]);
-}
-
static int __init aesni_init(void)
{
- struct simd_skcipher_alg *simd;
- const char *basename;
- const char *algname;
- const char *drvname;
int err;
- int i;
if (!x86_match_cpu(aesni_cpu_id))
return -ENODEV;
@@ -1304,36 +1158,22 @@ static int __init aesni_init(void)
if (err)
return err;
- err = crypto_register_skciphers(aesni_skciphers,
- ARRAY_SIZE(aesni_skciphers));
+ err = simd_register_skciphers_compat(aesni_skciphers,
+ ARRAY_SIZE(aesni_skciphers),
+ aesni_simd_skciphers);
if (err)
goto unregister_algs;
- err = crypto_register_aeads(aesni_aead_algs,
- ARRAY_SIZE(aesni_aead_algs));
+ err = simd_register_aeads_compat(aesni_aeads, ARRAY_SIZE(aesni_aeads),
+ aesni_simd_aeads);
if (err)
goto unregister_skciphers;
- for (i = 0; i < ARRAY_SIZE(aesni_skciphers); i++) {
- algname = aesni_skciphers[i].base.cra_name + 2;
- drvname = aesni_skciphers[i].base.cra_driver_name + 2;
- basename = aesni_skciphers[i].base.cra_driver_name;
- simd = simd_skcipher_create_compat(algname, drvname, basename);
- err = PTR_ERR(simd);
- if (IS_ERR(simd))
- goto unregister_simds;
-
- aesni_simd_skciphers[i] = simd;
- }
-
return 0;
-unregister_simds:
- aesni_free_simds();
- crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
unregister_skciphers:
- crypto_unregister_skciphers(aesni_skciphers,
- ARRAY_SIZE(aesni_skciphers));
+ simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
+ aesni_simd_skciphers);
unregister_algs:
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
return err;
@@ -1341,10 +1181,10 @@ unregister_algs:
static void __exit aesni_exit(void)
{
- aesni_free_simds();
- crypto_unregister_aeads(aesni_aead_algs, ARRAY_SIZE(aesni_aead_algs));
- crypto_unregister_skciphers(aesni_skciphers,
- ARRAY_SIZE(aesni_skciphers));
+ simd_unregister_aeads(aesni_aeads, ARRAY_SIZE(aesni_aeads),
+ aesni_simd_aeads);
+ simd_unregister_skciphers(aesni_skciphers, ARRAY_SIZE(aesni_skciphers),
+ aesni_simd_skciphers);
crypto_unregister_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
}
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 45c1c4143176..4967ad620775 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -12,10 +12,10 @@
#include <crypto/algapi.h>
#include <crypto/chacha.h>
+#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/fpu/api.h>
#include <asm/simd.h>
#define CHACHA_STATE_ALIGN 16
@@ -170,7 +170,7 @@ static int chacha_simd(struct skcipher_request *req)
struct skcipher_walk walk;
int err;
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
return crypto_chacha_crypt(req);
err = skcipher_walk_virt(&walk, req, true);
@@ -193,7 +193,7 @@ static int xchacha_simd(struct skcipher_request *req)
u8 real_iv[16];
int err;
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !irq_fpu_usable())
+ if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
return crypto_xchacha_crypt(req);
err = skcipher_walk_virt(&walk, req, true);
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index c8d9cdacbf10..cb4ab6645106 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -32,10 +32,11 @@
#include <linux/kernel.h>
#include <linux/crc32.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
-#include <asm/fpu/api.h>
+#include <asm/simd.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
@@ -54,7 +55,7 @@ static u32 __attribute__((pure))
unsigned int iremainder;
unsigned int prealign;
- if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !irq_fpu_usable())
+ if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable())
return crc32_le(crc, p, len);
if ((long)p & SCALE_F_MASK) {
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 5773e1161072..a58fe217c856 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -29,10 +29,11 @@
#include <linux/string.h>
#include <linux/kernel.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
-#include <asm/fpu/internal.h>
+#include <asm/simd.h>
#define CHKSUM_BLOCK_SIZE 1
#define CHKSUM_DIGEST_SIZE 4
@@ -177,7 +178,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
- if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
@@ -189,7 +190,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
- if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
+ if (len >= CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index 0e785c0b2354..3c81e15b0873 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -26,12 +26,13 @@
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
-#include <asm/fpu/api.h>
#include <asm/cpufeatures.h>
#include <asm/cpu_device_id.h>
+#include <asm/simd.h>
asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len);
@@ -53,7 +54,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data,
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
- if (length >= 16 && irq_fpu_usable()) {
+ if (length >= 16 && crypto_simd_usable()) {
kernel_fpu_begin();
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
kernel_fpu_end();
@@ -70,15 +71,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
return 0;
}
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
- u8 *out)
+static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out)
{
- if (len >= 16 && irq_fpu_usable()) {
+ if (len >= 16 && crypto_simd_usable()) {
kernel_fpu_begin();
- *(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
+ *(__u16 *)out = crc_t10dif_pcl(crc, data, len);
kernel_fpu_end();
} else
- *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
+ *(__u16 *)out = crc_t10dif_generic(crc, data, len);
return 0;
}
@@ -87,15 +87,13 @@ static int chksum_finup(struct shash_desc *desc, const u8 *data,
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
- return __chksum_finup(&ctx->crc, data, len, out);
+ return __chksum_finup(ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
- struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
- return __chksum_finup(&ctx->crc, data, length, out);
+ return __chksum_finup(0, data, length, out);
}
static struct shash_alg alg = {
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 3582ae885ee1..e3f3e6fd9d65 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -19,8 +19,9 @@
#include <crypto/cryptd.h>
#include <crypto/gf128mul.h>
#include <crypto/internal/hash.h>
-#include <asm/fpu/api.h>
+#include <crypto/internal/simd.h>
#include <asm/cpu_device_id.h>
+#include <asm/simd.h>
#define GHASH_BLOCK_SIZE 16
#define GHASH_DIGEST_SIZE 16
@@ -171,7 +172,6 @@ static int ghash_async_init(struct ahash_request *req)
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
- desc->flags = req->base.flags;
return crypto_shash_init(desc);
}
@@ -182,7 +182,7 @@ static int ghash_async_update(struct ahash_request *req)
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!irq_fpu_usable() ||
+ if (!crypto_simd_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
@@ -200,7 +200,7 @@ static int ghash_async_final(struct ahash_request *req)
struct ghash_async_ctx *ctx = crypto_ahash_ctx(tfm);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!irq_fpu_usable() ||
+ if (!crypto_simd_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
@@ -241,7 +241,7 @@ static int ghash_async_digest(struct ahash_request *req)
struct ahash_request *cryptd_req = ahash_request_ctx(req);
struct cryptd_ahash *cryptd_tfm = ctx->cryptd_tfm;
- if (!irq_fpu_usable() ||
+ if (!crypto_simd_usable() ||
(in_atomic() && cryptd_ahash_queued(cryptd_tfm))) {
memcpy(cryptd_req, req, sizeof(*req));
ahash_request_set_tfm(cryptd_req, &cryptd_tfm->base);
@@ -251,7 +251,6 @@ static int ghash_async_digest(struct ahash_request *req)
struct crypto_shash *child = cryptd_ahash_child(cryptd_tfm);
desc->tfm = child;
- desc->flags = req->base.flags;
return shash_ahash_digest(req, desc);
}
}
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
index 6634907d6ccd..679627a2a824 100644
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ b/arch/x86/crypto/morus1280-avx2-glue.c
@@ -12,6 +12,7 @@
*/
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
#include <crypto/morus1280_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
@@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
-MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400);
+MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400);
+
+static struct simd_aead_alg *simd_alg;
static int __init crypto_morus1280_avx2_module_init(void)
{
@@ -44,14 +47,13 @@ static int __init crypto_morus1280_avx2_module_init(void)
!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
return -ENODEV;
- return crypto_register_aeads(crypto_morus1280_avx2_algs,
- ARRAY_SIZE(crypto_morus1280_avx2_algs));
+ return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1,
+ &simd_alg);
}
static void __exit crypto_morus1280_avx2_module_exit(void)
{
- crypto_unregister_aeads(crypto_morus1280_avx2_algs,
- ARRAY_SIZE(crypto_morus1280_avx2_algs));
+ simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg);
}
module_init(crypto_morus1280_avx2_module_init);
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
index f40244eaf14d..c35c0638d0bb 100644
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ b/arch/x86/crypto/morus1280-sse2-glue.c
@@ -12,6 +12,7 @@
*/
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
#include <crypto/morus1280_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
@@ -35,7 +36,9 @@ asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
-MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350);
+MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350);
+
+static struct simd_aead_alg *simd_alg;
static int __init crypto_morus1280_sse2_module_init(void)
{
@@ -43,14 +46,13 @@ static int __init crypto_morus1280_sse2_module_init(void)
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
- return crypto_register_aeads(crypto_morus1280_sse2_algs,
- ARRAY_SIZE(crypto_morus1280_sse2_algs));
+ return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1,
+ &simd_alg);
}
static void __exit crypto_morus1280_sse2_module_exit(void)
{
- crypto_unregister_aeads(crypto_morus1280_sse2_algs,
- ARRAY_SIZE(crypto_morus1280_sse2_algs));
+ simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg);
}
module_init(crypto_morus1280_sse2_module_init);
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
index 7e600f8bcdad..30fc1bd98ec3 100644
--- a/arch/x86/crypto/morus1280_glue.c
+++ b/arch/x86/crypto/morus1280_glue.c
@@ -11,7 +11,6 @@
* any later version.
*/
-#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus1280_glue.h>
@@ -205,90 +204,6 @@ void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
}
EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
-int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
- unsigned int keylen)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey);
-
-int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead,
- unsigned int authsize)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize);
-
-int cryptd_morus1280_glue_encrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_encrypt(req);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt);
-
-int cryptd_morus1280_glue_decrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_decrypt(req);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt);
-
-int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead *cryptd_tfm;
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
- char internal_name[CRYPTO_MAX_ALG_NAME];
-
- if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
- >= CRYPTO_MAX_ALG_NAME)
- return -ENAMETOOLONG;
-
- cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- *ctx = cryptd_tfm;
- crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
- return 0;
-}
-EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm);
-
-void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_free_aead(*ctx);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm);
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
index 9afaf8f8565a..32da56b3bdad 100644
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ b/arch/x86/crypto/morus640-sse2-glue.c
@@ -12,6 +12,7 @@
*/
#include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
#include <crypto/morus640_glue.h>
#include <linux/module.h>
#include <asm/fpu/api.h>
@@ -35,7 +36,9 @@ asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
u64 assoclen, u64 cryptlen);
-MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400);
+MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400);
+
+static struct simd_aead_alg *simd_alg;
static int __init crypto_morus640_sse2_module_init(void)
{
@@ -43,14 +46,13 @@ static int __init crypto_morus640_sse2_module_init(void)
!cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
return -ENODEV;
- return crypto_register_aeads(crypto_morus640_sse2_algs,
- ARRAY_SIZE(crypto_morus640_sse2_algs));
+ return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1,
+ &simd_alg);
}
static void __exit crypto_morus640_sse2_module_exit(void)
{
- crypto_unregister_aeads(crypto_morus640_sse2_algs,
- ARRAY_SIZE(crypto_morus640_sse2_algs));
+ simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg);
}
module_init(crypto_morus640_sse2_module_init);
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
index cb3a81732016..1dea33d84426 100644
--- a/arch/x86/crypto/morus640_glue.c
+++ b/arch/x86/crypto/morus640_glue.c
@@ -11,7 +11,6 @@
* any later version.
*/
-#include <crypto/cryptd.h>
#include <crypto/internal/aead.h>
#include <crypto/internal/skcipher.h>
#include <crypto/morus640_glue.h>
@@ -200,90 +199,6 @@ void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
}
EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
-int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
- unsigned int keylen)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setkey(&cryptd_tfm->base, key, keylen);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey);
-
-int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead,
- unsigned int authsize)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- return crypto_aead_setauthsize(&cryptd_tfm->base, authsize);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize);
-
-int cryptd_morus640_glue_encrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_encrypt(req);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt);
-
-int cryptd_morus640_glue_decrypt(struct aead_request *req)
-{
- struct crypto_aead *aead = crypto_aead_reqtfm(req);
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- struct cryptd_aead *cryptd_tfm = *ctx;
-
- aead = &cryptd_tfm->base;
- if (irq_fpu_usable() && (!in_atomic() ||
- !cryptd_aead_queued(cryptd_tfm)))
- aead = cryptd_aead_child(cryptd_tfm);
-
- aead_request_set_tfm(req, aead);
-
- return crypto_aead_decrypt(req);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt);
-
-int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead *cryptd_tfm;
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
- const char *name = crypto_aead_alg(aead)->base.cra_driver_name;
- char internal_name[CRYPTO_MAX_ALG_NAME];
-
- if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name)
- >= CRYPTO_MAX_ALG_NAME)
- return -ENAMETOOLONG;
-
- cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL,
- CRYPTO_ALG_INTERNAL);
- if (IS_ERR(cryptd_tfm))
- return PTR_ERR(cryptd_tfm);
-
- *ctx = cryptd_tfm;
- crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base));
- return 0;
-}
-EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm);
-
-void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead)
-{
- struct cryptd_aead **ctx = crypto_aead_ctx(aead);
-
- cryptd_free_aead(*ctx);
-}
-EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm);
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index 20d815ea4b6a..f7567cbd35b6 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -7,9 +7,10 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
-#include <asm/fpu/api.h>
+#include <asm/simd.h>
asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
@@ -24,7 +25,7 @@ static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len,
static int nhpoly1305_avx2_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- if (srclen < 64 || !irq_fpu_usable())
+ if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
do {
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index ed68d164ce14..a661ede3b5cf 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -7,9 +7,10 @@
*/
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/nhpoly1305.h>
#include <linux/module.h>
-#include <asm/fpu/api.h>
+#include <asm/simd.h>
asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
u8 hash[NH_HASH_BYTES]);
@@ -24,7 +25,7 @@ static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len,
static int nhpoly1305_sse2_update(struct shash_desc *desc,
const u8 *src, unsigned int srclen)
{
- if (srclen < 64 || !irq_fpu_usable())
+ if (srclen < 64 || !crypto_simd_usable())
return crypto_nhpoly1305_update(desc, src, srclen);
do {
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 88cc01506c84..6eb65b237b3c 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -11,11 +11,11 @@
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <crypto/poly1305.h>
#include <linux/crypto.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <asm/fpu/api.h>
#include <asm/simd.h>
struct poly1305_simd_desc_ctx {
@@ -126,7 +126,7 @@ static int poly1305_simd_update(struct shash_desc *desc,
unsigned int bytes;
/* kernel_fpu_begin/end is costly, use fallback for small updates */
- if (srclen <= 288 || !may_use_simd())
+ if (srclen <= 288 || !crypto_simd_usable())
return crypto_poly1305_update(desc, src, srclen);
kernel_fpu_begin();
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 7391c7de72c7..42f177afc33a 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -22,6 +22,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -29,7 +30,7 @@
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha1_base.h>
-#include <asm/fpu/api.h>
+#include <asm/simd.h>
typedef void (sha1_transform_fn)(u32 *digest, const char *data,
unsigned int rounds);
@@ -39,7 +40,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
{
struct sha1_state *sctx = shash_desc_ctx(desc);
- if (!irq_fpu_usable() ||
+ if (!crypto_simd_usable() ||
(sctx->count % SHA1_BLOCK_SIZE) + len < SHA1_BLOCK_SIZE)
return crypto_sha1_update(desc, data, len);
@@ -57,7 +58,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data,
static int sha1_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha1_transform_fn *sha1_xform)
{
- if (!irq_fpu_usable())
+ if (!crypto_simd_usable())
return crypto_sha1_finup(desc, data, len, out);
kernel_fpu_begin();
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 773a873d2b28..73867da3cbee 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -30,6 +30,7 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
@@ -37,8 +38,8 @@
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha256_base.h>
-#include <asm/fpu/api.h>
#include <linux/string.h>
+#include <asm/simd.h>
asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data,
u64 rounds);
@@ -49,7 +50,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
{
struct sha256_state *sctx = shash_desc_ctx(desc);
- if (!irq_fpu_usable() ||
+ if (!crypto_simd_usable() ||
(sctx->count % SHA256_BLOCK_SIZE) + len < SHA256_BLOCK_SIZE)
return crypto_sha256_update(desc, data, len);
@@ -67,7 +68,7 @@ static int sha256_update(struct shash_desc *desc, const u8 *data,
static int sha256_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha256_transform_fn *sha256_xform)
{
- if (!irq_fpu_usable())
+ if (!crypto_simd_usable())
return crypto_sha256_finup(desc, data, len, out);
kernel_fpu_begin();
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index f1b811b60ba6..458356a3f124 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -28,16 +28,16 @@
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
+#include <linux/string.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <crypto/sha512_base.h>
-#include <asm/fpu/api.h>
-
-#include <linux/string.h>
+#include <asm/simd.h>
asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data,
u64 rounds);
@@ -49,7 +49,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
{
struct sha512_state *sctx = shash_desc_ctx(desc);
- if (!irq_fpu_usable() ||
+ if (!crypto_simd_usable() ||
(sctx->count[0] % SHA512_BLOCK_SIZE) + len < SHA512_BLOCK_SIZE)
return crypto_sha512_update(desc, data, len);
@@ -67,7 +67,7 @@ static int sha512_update(struct shash_desc *desc, const u8 *data,
static int sha512_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out, sha512_transform_fn *sha512_xform)
{
- if (!irq_fpu_usable())
+ if (!crypto_simd_usable())
return crypto_sha512_finup(desc, data, len, out);
kernel_fpu_begin();
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 7bc105f47d21..51beb8d29123 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -25,12 +25,13 @@
#include <linux/uprobes.h>
#include <linux/livepatch.h>
#include <linux/syscalls.h>
+#include <linux/uaccess.h>
#include <asm/desc.h>
#include <asm/traps.h>
#include <asm/vdso.h>
-#include <linux/uaccess.h>
#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -196,6 +197,13 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
if (unlikely(cached_flags & EXIT_TO_USERMODE_LOOP_FLAGS))
exit_to_usermode_loop(regs, cached_flags);
+ /* Reload ti->flags; we may have rescheduled above. */
+ cached_flags = READ_ONCE(ti->flags);
+
+ fpregs_assert_state_consistent();
+ if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
#ifdef CONFIG_COMPAT
/*
* Compat syscalls set TS_COMPAT. Make sure we clear it before
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index d309f30cf7af..7b23431be5cb 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -650,6 +650,7 @@ ENTRY(__switch_to_asm)
pushl %ebx
pushl %edi
pushl %esi
+ pushfl
/* switch stack */
movl %esp, TASK_threadsp(%eax)
@@ -672,6 +673,7 @@ ENTRY(__switch_to_asm)
#endif
/* restore callee-saved registers */
+ popfl
popl %esi
popl %edi
popl %ebx
@@ -766,13 +768,12 @@ END(ret_from_exception)
#ifdef CONFIG_PREEMPT
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
-.Lneed_resched:
cmpl $0, PER_CPU_VAR(__preempt_count)
jnz restore_all_kernel
testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
jz restore_all_kernel
call preempt_schedule_irq
- jmp .Lneed_resched
+ jmp restore_all_kernel
END(resume_kernel)
#endif
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 1f0efdb7b629..20e45d9b4e15 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
#ifdef CONFIG_STACKPROTECTOR
movq TASK_stack_canary(%rsi), %rbx
- movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
+ movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
#ifdef CONFIG_RETPOLINE
@@ -430,8 +430,8 @@ END(irq_entries_start)
* it before we actually move ourselves to the IRQ stack.
*/
- movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
- movq PER_CPU_VAR(irq_stack_ptr), %rsp
+ movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
+ movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
#ifdef CONFIG_DEBUG_ENTRY
/*
@@ -645,10 +645,9 @@ retint_kernel:
/* Check if we need preemption */
btl $9, EFLAGS(%rsp) /* were interrupts off? */
jnc 1f
-0: cmpl $0, PER_CPU_VAR(__preempt_count)
+ cmpl $0, PER_CPU_VAR(__preempt_count)
jnz 1f
call preempt_schedule_irq
- jmp 0b
1:
#endif
/*
@@ -841,7 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
/*
* Exception entry points.
*/
-#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
/**
* idtentry - Generate an IDT entry stub
@@ -879,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* @paranoid == 2 is special: the stub will never switch stacks. This is for
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
ENTRY(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
@@ -925,13 +924,13 @@ ENTRY(\sym)
.endif
.if \shift_ist != -1
- subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+ subq $\ist_offset, CPU_TSS_IST(\shift_ist)
.endif
call \do_sym
.if \shift_ist != -1
- addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
+ addq $\ist_offset, CPU_TSS_IST(\shift_ist)
.endif
/* these procedures expect "no swapgs" flag in ebx */
@@ -1129,7 +1128,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
hv_stimer0_callback_vector hv_stimer0_vector_handler
#endif /* CONFIG_HYPERV */
-idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
+idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
idtentry int3 do_int3 has_error_code=0
idtentry stack_segment do_stack_segment has_error_code=1
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 1f9607ed087c..4cd5f982b1e5 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -398,7 +398,12 @@
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
385 i386 io_pgetevents sys_io_pgetevents_time32 __ia32_compat_sys_io_pgetevents
386 i386 rseq sys_rseq __ia32_sys_rseq
-# don't use numbers 387 through 392, add new calls at the end
+387 i386 open_tree sys_open_tree __ia32_sys_open_tree
+388 i386 move_mount sys_move_mount __ia32_sys_move_mount
+389 i386 fsopen sys_fsopen __ia32_sys_fsopen
+390 i386 fsconfig sys_fsconfig __ia32_sys_fsconfig
+391 i386 fsmount sys_fsmount __ia32_sys_fsmount
+392 i386 fspick sys_fspick __ia32_sys_fspick
393 i386 semget sys_semget __ia32_sys_semget
394 i386 semctl sys_semctl __ia32_compat_sys_semctl
395 i386 shmget sys_shmget __ia32_sys_shmget
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 92ee0b4378d4..64ca0d06259a 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -343,6 +343,12 @@
332 common statx __x64_sys_statx
333 common io_pgetevents __x64_sys_io_pgetevents
334 common rseq __x64_sys_rseq
+335 common open_tree __x64_sys_open_tree
+336 common move_mount __x64_sys_move_mount
+337 common fsopen __x64_sys_fsopen
+338 common fsconfig __x64_sys_fsconfig
+339 common fsmount __x64_sys_fsmount
+340 common fspick __x64_sys_fspick
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal __x64_sys_pidfd_send_signal
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 5bfe2243a08f..42fe42e82baf 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -116,7 +116,7 @@ $(obj)/%-x32.o: $(obj)/%.o FORCE
targets += vdsox32.lds $(vobjx32s-y)
$(obj)/%.so: OBJCOPYFLAGS := -S
-$(obj)/%.so: $(obj)/%.so.dbg
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
$(call if_changed,objcopy)
$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 007b3fe9d727..98c7d12b945c 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -29,12 +29,12 @@ extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz);
extern time_t __vdso_time(time_t *t);
#ifdef CONFIG_PARAVIRT_CLOCK
-extern u8 pvclock_page
+extern u8 pvclock_page[PAGE_SIZE]
__attribute__((visibility("hidden")));
#endif
#ifdef CONFIG_HYPERV_TSCPAGE
-extern u8 hvclock_page
+extern u8 hvclock_page[PAGE_SIZE]
__attribute__((visibility("hidden")));
#endif
diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h
index fa847a620f40..a20b134de2a8 100644
--- a/arch/x86/entry/vdso/vdso2c.h
+++ b/arch/x86/entry/vdso/vdso2c.h
@@ -7,7 +7,7 @@
static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
void *stripped_addr, size_t stripped_len,
- FILE *outfile, const char *name)
+ FILE *outfile, const char *image_name)
{
int found_load = 0;
unsigned long load_size = -1; /* Work around bogus warning */
@@ -93,11 +93,12 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
int k;
ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) +
GET_LE(&symtab_hdr->sh_entsize) * i;
- const char *name = raw_addr + GET_LE(&strtab_hdr->sh_offset) +
- GET_LE(&sym->st_name);
+ const char *sym_name = raw_addr +
+ GET_LE(&strtab_hdr->sh_offset) +
+ GET_LE(&sym->st_name);
for (k = 0; k < NSYMS; k++) {
- if (!strcmp(name, required_syms[k].name)) {
+ if (!strcmp(sym_name, required_syms[k].name)) {
if (syms[k]) {
fail("duplicate symbol %s\n",
required_syms[k].name);
@@ -134,7 +135,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
if (syms[sym_vvar_start] % 4096)
fail("vvar_begin must be a multiple of 4096\n");
- if (!name) {
+ if (!image_name) {
fwrite(stripped_addr, stripped_len, 1, outfile);
return;
}
@@ -157,7 +158,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len,
}
fprintf(outfile, "\n};\n\n");
- fprintf(outfile, "const struct vdso_image %s = {\n", name);
+ fprintf(outfile, "const struct vdso_image %s = {\n", image_name);
fprintf(outfile, "\t.data = raw_data,\n");
fprintf(outfile, "\t.size = %lu,\n", mapping_size);
if (alt_sec) {
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index d45f3fbd232e..f15441b07dad 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -116,6 +116,110 @@ static __initconst const u64 amd_hw_cache_event_ids
},
};
+static __initconst const u64 amd_hw_cache_event_ids_f17h
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+[C(L1D)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0040, /* Data Cache Accesses */
+ [C(RESULT_MISS)] = 0xc860, /* L2$ access from DC Miss */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0xff5a, /* h/w prefetch DC Fills */
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(L1I)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0080, /* Instruction cache fetches */
+ [C(RESULT_MISS)] = 0x0081, /* Instruction cache misses */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(DTLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0xff45, /* All L2 DTLB accesses */
+ [C(RESULT_MISS)] = 0xf045, /* L2 DTLB misses (PT walks) */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+},
+[C(ITLB)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x0084, /* L1 ITLB misses, L2 ITLB hits */
+ [C(RESULT_MISS)] = 0xff85, /* L1 ITLB misses, L2 misses */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(BPU)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0x00c2, /* Retired Branch Instr. */
+ [C(RESULT_MISS)] = 0x00c3, /* Retired Mispredicted BI */
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+[C(NODE)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = 0,
+ [C(RESULT_MISS)] = 0,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = -1,
+ [C(RESULT_MISS)] = -1,
+ },
+},
+};
+
/*
* AMD Performance Monitor K7 and later, up to and including Family 16h:
*/
@@ -865,9 +969,10 @@ __init int amd_pmu_init(void)
x86_pmu.amd_nb_constraints = 0;
}
- /* Events are common for all AMDs */
- memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
- sizeof(hw_cache_event_ids));
+ if (boot_cpu_data.x86 >= 0x17)
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids_f17h, sizeof(hw_cache_event_ids));
+ else
+ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids));
return 0;
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 81911e11a15d..f315425d8468 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -560,6 +560,21 @@ int x86_pmu_hw_config(struct perf_event *event)
return -EINVAL;
}
+ /* sample_regs_user never support XMM registers */
+ if (unlikely(event->attr.sample_regs_user & PEBS_XMM_REGS))
+ return -EINVAL;
+ /*
+ * Besides the general purpose registers, XMM registers may
+ * be collected in PEBS on some platforms, e.g. Icelake
+ */
+ if (unlikely(event->attr.sample_regs_intr & PEBS_XMM_REGS)) {
+ if (x86_pmu.pebs_no_xmm_regs)
+ return -EINVAL;
+
+ if (!event->attr.precise_ip)
+ return -EINVAL;
+ }
+
return x86_setup_perfctr(event);
}
@@ -661,6 +676,10 @@ static inline int is_x86_event(struct perf_event *event)
return event->pmu == &pmu;
}
+struct pmu *x86_get_pmu(void)
+{
+ return &pmu;
+}
/*
* Event scheduler state:
*
@@ -849,18 +868,43 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
struct event_constraint *c;
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
struct perf_event *e;
- int i, wmin, wmax, unsched = 0;
+ int n0, i, wmin, wmax, unsched = 0;
struct hw_perf_event *hwc;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
+ /*
+ * Compute the number of events already present; see x86_pmu_add(),
+ * validate_group() and x86_pmu_commit_txn(). For the former two
+ * cpuc->n_events hasn't been updated yet, while for the latter
+ * cpuc->n_txn contains the number of events added in the current
+ * transaction.
+ */
+ n0 = cpuc->n_events;
+ if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
+ n0 -= cpuc->n_txn;
+
if (x86_pmu.start_scheduling)
x86_pmu.start_scheduling(cpuc);
for (i = 0, wmin = X86_PMC_IDX_MAX, wmax = 0; i < n; i++) {
- cpuc->event_constraint[i] = NULL;
- c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
- cpuc->event_constraint[i] = c;
+ c = cpuc->event_constraint[i];
+
+ /*
+ * Previously scheduled events should have a cached constraint,
+ * while new events should not have one.
+ */
+ WARN_ON_ONCE((c && i >= n0) || (!c && i < n0));
+
+ /*
+ * Request constraints for new events; or for those events that
+ * have a dynamic constraint -- for those the constraint can
+ * change due to external factors (sibling state, allow_tfa).
+ */
+ if (!c || (c->flags & PERF_X86_EVENT_DYNAMIC)) {
+ c = x86_pmu.get_event_constraints(cpuc, i, cpuc->event_list[i]);
+ cpuc->event_constraint[i] = c;
+ }
wmin = min(wmin, c->weight);
wmax = max(wmax, c->weight);
@@ -925,25 +969,20 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
if (!unsched && assign) {
for (i = 0; i < n; i++) {
e = cpuc->event_list[i];
- e->hw.flags |= PERF_X86_EVENT_COMMITTED;
if (x86_pmu.commit_scheduling)
x86_pmu.commit_scheduling(cpuc, i, assign[i]);
}
} else {
- for (i = 0; i < n; i++) {
+ for (i = n0; i < n; i++) {
e = cpuc->event_list[i];
- /*
- * do not put_constraint() on comitted events,
- * because they are good to go
- */
- if ((e->hw.flags & PERF_X86_EVENT_COMMITTED))
- continue;
/*
* release events that failed scheduling
*/
if (x86_pmu.put_event_constraints)
x86_pmu.put_event_constraints(cpuc, e);
+
+ cpuc->event_constraint[i] = NULL;
}
}
@@ -1373,11 +1412,6 @@ static void x86_pmu_del(struct perf_event *event, int flags)
int i;
/*
- * event is descheduled
- */
- event->hw.flags &= ~PERF_X86_EVENT_COMMITTED;
-
- /*
* If we're called during a txn, we only need to undo x86_pmu.add.
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
@@ -1413,6 +1447,7 @@ static void x86_pmu_del(struct perf_event *event, int flags)
cpuc->event_list[i-1] = cpuc->event_list[i];
cpuc->event_constraint[i-1] = cpuc->event_constraint[i];
}
+ cpuc->event_constraint[i-1] = NULL;
--cpuc->n_events;
perf_event_update_userpage(event);
@@ -2024,7 +2059,7 @@ static int validate_event(struct perf_event *event)
if (IS_ERR(fake_cpuc))
return PTR_ERR(fake_cpuc);
- c = x86_pmu.get_event_constraints(fake_cpuc, -1, event);
+ c = x86_pmu.get_event_constraints(fake_cpuc, 0, event);
if (!c || !c->weight)
ret = -EINVAL;
@@ -2072,8 +2107,7 @@ static int validate_group(struct perf_event *event)
if (n < 0)
goto out;
- fake_cpuc->n_events = n;
-
+ fake_cpuc->n_events = 0;
ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
out:
@@ -2348,6 +2382,15 @@ void arch_perf_update_userpage(struct perf_event *event,
cyc2ns_read_end();
}
+/*
+ * Determine whether the regs were taken from an irq/exception handler rather
+ * than from perf_arch_fetch_caller_regs().
+ */
+static bool perf_hw_regs(struct pt_regs *regs)
+{
+ return regs->flags & X86_EFLAGS_FIXED;
+}
+
void
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
{
@@ -2359,11 +2402,15 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
return;
}
- if (perf_callchain_store(entry, regs->ip))
- return;
+ if (perf_hw_regs(regs)) {
+ if (perf_callchain_store(entry, regs->ip))
+ return;
+ unwind_start(&state, current, regs, NULL);
+ } else {
+ unwind_start(&state, current, NULL, (void *)regs->sp);
+ }
- for (unwind_start(&state, current, regs, NULL); !unwind_done(&state);
- unwind_next_frame(&state)) {
+ for (; !unwind_done(&state); unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
if (!addr || perf_callchain_store(entry, addr))
return;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f9451566cd9b..ef763f535e3a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -239,6 +239,35 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
EVENT_EXTRA_END
};
+static struct event_constraint intel_icl_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ INTEL_UEVENT_CONSTRAINT(0x1c0, 0), /* INST_RETIRED.PREC_DIST */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */
+ INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
+ INTEL_EVENT_CONSTRAINT(0x32, 0xf), /* SW_PREFETCH_ACCESS.* */
+ INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff), /* CYCLE_ACTIVITY.STALLS_TOTAL */
+ INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff), /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
+ INTEL_EVENT_CONSTRAINT(0xa3, 0xf), /* CYCLE_ACTIVITY.* */
+ INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
+ INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
+ EVENT_CONSTRAINT_END
+};
+
+static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
+ INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
+ INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
+ EVENT_EXTRA_END
+};
+
EVENT_ATTR_STR(mem-loads, mem_ld_nhm, "event=0x0b,umask=0x10,ldlat=3");
EVENT_ATTR_STR(mem-loads, mem_ld_snb, "event=0xcd,umask=0x1,ldlat=3");
EVENT_ATTR_STR(mem-stores, mem_st_snb, "event=0xcd,umask=0x2");
@@ -1827,6 +1856,45 @@ static __initconst const u64 glp_hw_cache_extra_regs
},
};
+#define TNT_LOCAL_DRAM BIT_ULL(26)
+#define TNT_DEMAND_READ GLM_DEMAND_DATA_RD
+#define TNT_DEMAND_WRITE GLM_DEMAND_RFO
+#define TNT_LLC_ACCESS GLM_ANY_RESPONSE
+#define TNT_SNP_ANY (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
+ SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
+#define TNT_LLC_MISS (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
+
+static __initconst const u64 tnt_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
+ [C(LL)] = {
+ [C(OP_READ)] = {
+ [C(RESULT_ACCESS)] = TNT_DEMAND_READ|
+ TNT_LLC_ACCESS,
+ [C(RESULT_MISS)] = TNT_DEMAND_READ|
+ TNT_LLC_MISS,
+ },
+ [C(OP_WRITE)] = {
+ [C(RESULT_ACCESS)] = TNT_DEMAND_WRITE|
+ TNT_LLC_ACCESS,
+ [C(RESULT_MISS)] = TNT_DEMAND_WRITE|
+ TNT_LLC_MISS,
+ },
+ [C(OP_PREFETCH)] = {
+ [C(RESULT_ACCESS)] = 0x0,
+ [C(RESULT_MISS)] = 0x0,
+ },
+ },
+};
+
+static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
+ /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
+ EVENT_EXTRA_END
+};
+
#define KNL_OT_L2_HITE BIT_ULL(19) /* Other Tile L2 Hit */
#define KNL_OT_L2_HITF BIT_ULL(20) /* Other Tile L2 Hit */
#define KNL_MCDRAM_LOCAL BIT_ULL(21)
@@ -2015,7 +2083,7 @@ static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int
/*
* We're going to use PMC3, make sure TFA is set before we touch it.
*/
- if (cntr == 3 && !cpuc->is_fake)
+ if (cntr == 3)
intel_set_tfa(cpuc, true);
}
@@ -2091,15 +2159,19 @@ static void intel_pmu_disable_event(struct perf_event *event)
cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
cpuc->intel_cp_status &= ~(1ull << hwc->idx);
- if (unlikely(event->attr.precise_ip))
- intel_pmu_pebs_disable(event);
-
if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
intel_pmu_disable_fixed(hwc);
return;
}
x86_pmu_disable_event(event);
+
+ /*
+ * Needs to be called after x86_pmu_disable_event,
+ * so we don't trigger the event without PEBS bit set.
+ */
+ if (unlikely(event->attr.precise_ip))
+ intel_pmu_pebs_disable(event);
}
static void intel_pmu_del_event(struct perf_event *event)
@@ -2145,6 +2217,11 @@ static void intel_pmu_enable_fixed(struct perf_event *event)
bits <<= (idx * 4);
mask = 0xfULL << (idx * 4);
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
+ bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+ mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
+ }
+
rdmsrl(hwc->config_base, ctrl_val);
ctrl_val &= ~mask;
ctrl_val |= bits;
@@ -2688,7 +2765,7 @@ x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
if (x86_pmu.event_constraints) {
for_each_event_constraint(c, x86_pmu.event_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
+ if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
}
@@ -2838,7 +2915,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
struct intel_excl_states *xlo;
int tid = cpuc->excl_thread_id;
- int is_excl, i;
+ int is_excl, i, w;
/*
* validating a group does not require
@@ -2894,36 +2971,40 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
* SHARED : sibling counter measuring non-exclusive event
* UNUSED : sibling counter unused
*/
+ w = c->weight;
for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
/*
* exclusive event in sibling counter
* our corresponding counter cannot be used
* regardless of our event
*/
- if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
+ if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
/*
* if measuring an exclusive event, sibling
* measuring non-exclusive, then counter cannot
* be used
*/
- if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
+ if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
__clear_bit(i, c->idxmsk);
+ w--;
+ continue;
+ }
}
/*
- * recompute actual bit weight for scheduling algorithm
- */
- c->weight = hweight64(c->idxmsk64);
-
- /*
* if we return an empty mask, then switch
* back to static empty constraint to avoid
* the cost of freeing later on
*/
- if (c->weight == 0)
+ if (!w)
c = &emptyconstraint;
+ c->weight = w;
+
return c;
}
@@ -2931,11 +3012,9 @@ static struct event_constraint *
intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
- struct event_constraint *c1 = NULL;
- struct event_constraint *c2;
+ struct event_constraint *c1, *c2;
- if (idx >= 0) /* fake does < 0 */
- c1 = cpuc->event_constraint[idx];
+ c1 = cpuc->event_constraint[idx];
/*
* first time only
@@ -2943,7 +3022,8 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
* - dynamic constraint: handled by intel_get_excl_constraints()
*/
c2 = __intel_get_event_constraints(cpuc, idx, event);
- if (c1 && (c1->flags & PERF_X86_EVENT_DYNAMIC)) {
+ if (c1) {
+ WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
c1->weight = c2->weight;
c2 = c1;
@@ -3366,6 +3446,12 @@ static struct event_constraint counter0_constraint =
static struct event_constraint counter2_constraint =
EVENT_CONSTRAINT(0, 0x4, 0);
+static struct event_constraint fixed0_constraint =
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0);
+
+static struct event_constraint fixed0_counter0_constraint =
+ INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
+
static struct event_constraint *
hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
@@ -3385,6 +3471,21 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
}
static struct event_constraint *
+icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ /*
+ * Fixed counter 0 has less skid.
+ * Force instruction:ppp in Fixed counter 0
+ */
+ if ((event->attr.precise_ip == 3) &&
+ constraint_match(&fixed0_constraint, event->hw.config))
+ return &fixed0_constraint;
+
+ return hsw_get_event_constraints(cpuc, idx, event);
+}
+
+static struct event_constraint *
glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
struct perf_event *event)
{
@@ -3399,6 +3500,29 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
return c;
}
+static struct event_constraint *
+tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
+ struct perf_event *event)
+{
+ struct event_constraint *c;
+
+ /*
+ * :ppp means to do reduced skid PEBS,
+ * which is available on PMC0 and fixed counter 0.
+ */
+ if (event->attr.precise_ip == 3) {
+ /* Force instruction:ppp on PMC0 and Fixed counter 0 */
+ if (constraint_match(&fixed0_constraint, event->hw.config))
+ return &fixed0_counter0_constraint;
+
+ return &counter0_constraint;
+ }
+
+ c = intel_get_event_constraints(cpuc, idx, event);
+
+ return c;
+}
+
static bool allow_tsx_force_abort = true;
static struct event_constraint *
@@ -3410,7 +3534,7 @@ tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
/*
* Without TFA we must not use PMC3.
*/
- if (!allow_tsx_force_abort && test_bit(3, c->idxmsk) && idx >= 0) {
+ if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
c = dyn_constraint(cpuc, c, idx);
c->idxmsk64 &= ~(1ULL << 3);
c->weight--;
@@ -3507,6 +3631,8 @@ static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
{
+ cpuc->pebs_record_size = x86_pmu.pebs_record_size;
+
if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
cpuc->shared_regs = allocate_shared_regs(cpu);
if (!cpuc->shared_regs)
@@ -4114,6 +4240,42 @@ static struct attribute *hsw_tsx_events_attrs[] = {
NULL
};
+EVENT_ATTR_STR(tx-capacity-read, tx_capacity_read, "event=0x54,umask=0x80");
+EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
+EVENT_ATTR_STR(el-capacity-read, el_capacity_read, "event=0x54,umask=0x80");
+EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
+
+static struct attribute *icl_events_attrs[] = {
+ EVENT_PTR(mem_ld_hsw),
+ EVENT_PTR(mem_st_hsw),
+ NULL,
+};
+
+static struct attribute *icl_tsx_events_attrs[] = {
+ EVENT_PTR(tx_start),
+ EVENT_PTR(tx_abort),
+ EVENT_PTR(tx_commit),
+ EVENT_PTR(tx_capacity_read),
+ EVENT_PTR(tx_capacity_write),
+ EVENT_PTR(tx_conflict),
+ EVENT_PTR(el_start),
+ EVENT_PTR(el_abort),
+ EVENT_PTR(el_commit),
+ EVENT_PTR(el_capacity_read),
+ EVENT_PTR(el_capacity_write),
+ EVENT_PTR(el_conflict),
+ EVENT_PTR(cycles_t),
+ EVENT_PTR(cycles_ct),
+ NULL,
+};
+
+static __init struct attribute **get_icl_events_attrs(void)
+{
+ return boot_cpu_has(X86_FEATURE_RTM) ?
+ merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
+ icl_events_attrs;
+}
+
static ssize_t freeze_on_smi_show(struct device *cdev,
struct device_attribute *attr,
char *buf)
@@ -4153,6 +4315,50 @@ done:
return count;
}
+static void update_tfa_sched(void *ignored)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+
+ /*
+ * check if PMC3 is used
+ * and if so force schedule out for all event types all contexts
+ */
+ if (test_bit(3, cpuc->active_mask))
+ perf_pmu_resched(x86_get_pmu());
+}
+
+static ssize_t show_sysctl_tfa(struct device *cdev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
+}
+
+static ssize_t set_sysctl_tfa(struct device *cdev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ bool val;
+ ssize_t ret;
+
+ ret = kstrtobool(buf, &val);
+ if (ret)
+ return ret;
+
+ /* no change */
+ if (val == allow_tsx_force_abort)
+ return count;
+
+ allow_tsx_force_abort = val;
+
+ get_online_cpus();
+ on_each_cpu(update_tfa_sched, NULL, 1);
+ put_online_cpus();
+
+ return count;
+}
+
+
static DEVICE_ATTR_RW(freeze_on_smi);
static ssize_t branches_show(struct device *cdev,
@@ -4185,7 +4391,9 @@ static struct attribute *intel_pmu_caps_attrs[] = {
NULL
};
-static DEVICE_BOOL_ATTR(allow_tsx_force_abort, 0644, allow_tsx_force_abort);
+static DEVICE_ATTR(allow_tsx_force_abort, 0644,
+ show_sysctl_tfa,
+ set_sysctl_tfa);
static struct attribute *intel_pmu_attrs[] = {
&dev_attr_freeze_on_smi.attr,
@@ -4446,6 +4654,32 @@ __init int intel_pmu_init(void)
name = "goldmont_plus";
break;
+ case INTEL_FAM6_ATOM_TREMONT_X:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
+ sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_slm_event_constraints;
+ x86_pmu.extra_regs = intel_tnt_extra_regs;
+ /*
+ * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
+ * for precise cycles.
+ */
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.lbr_pt_coexist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.get_event_constraints = tnt_get_event_constraints;
+ extra_attr = slm_format_attr;
+ pr_cont("Tremont events, ");
+ name = "Tremont";
+ break;
+
case INTEL_FAM6_WESTMERE:
case INTEL_FAM6_WESTMERE_EP:
case INTEL_FAM6_WESTMERE_EX:
@@ -4694,13 +4928,41 @@ __init int intel_pmu_init(void)
x86_pmu.get_event_constraints = tfa_get_event_constraints;
x86_pmu.enable_all = intel_tfa_pmu_enable_all;
x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
- intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr.attr;
+ intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
}
pr_cont("Skylake events, ");
name = "skylake";
break;
+ case INTEL_FAM6_ICELAKE_MOBILE:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+ hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
+ intel_pmu_lbr_init_skl();
+
+ x86_pmu.event_constraints = intel_icl_event_constraints;
+ x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_icl_extra_regs;
+ x86_pmu.pebs_aliases = NULL;
+ x86_pmu.pebs_prec_dist = true;
+ x86_pmu.flags |= PMU_FL_HAS_RSP_1;
+ x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = icl_get_event_constraints;
+ extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
+ hsw_format_attr : nhm_format_attr;
+ extra_attr = merge_attr(extra_attr, skl_format_attr);
+ x86_pmu.cpu_events = get_icl_events_attrs();
+ x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
+ x86_pmu.lbr_pt_coexist = true;
+ intel_pmu_pebs_data_source_skl(false);
+ pr_cont("Icelake events, ");
+ name = "icelake";
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c
index d41de9af7a39..6072f92cb8ea 100644
--- a/arch/x86/events/intel/cstate.c
+++ b/arch/x86/events/intel/cstate.c
@@ -578,6 +578,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = {
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_X, glm_cstates),
X86_CSTATES_MODEL(INTEL_FAM6_ATOM_GOLDMONT_PLUS, glm_cstates),
+
+ X86_CSTATES_MODEL(INTEL_FAM6_ICELAKE_MOBILE, snb_cstates),
{ },
};
MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 10c99ce1fead..7a9f5dac5abe 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -849,6 +849,26 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_icl_pebs_event_constraints[] = {
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
+
+ INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
+ INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf), /* MEM_INST_RETIRED.STORE */
+
+ INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
+
+ INTEL_FLAGS_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_INST_RETIRED.* */
+
+ /*
+ * Everything else is handled by PMU_FL_PEBS_ALL, because we
+ * need the full constraints from the main table.
+ */
+
+ EVENT_CONSTRAINT_END
+};
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event)
{
struct event_constraint *c;
@@ -858,7 +878,7 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
if (x86_pmu.pebs_constraints) {
for_each_event_constraint(c, x86_pmu.pebs_constraints) {
- if ((event->hw.config & c->cmask) == c->code) {
+ if (constraint_match(c, event->hw.config)) {
event->hw.flags |= c->flags;
return c;
}
@@ -906,17 +926,87 @@ static inline void pebs_update_threshold(struct cpu_hw_events *cpuc)
if (cpuc->n_pebs == cpuc->n_large_pebs) {
threshold = ds->pebs_absolute_maximum -
- reserved * x86_pmu.pebs_record_size;
+ reserved * cpuc->pebs_record_size;
} else {
- threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+ threshold = ds->pebs_buffer_base + cpuc->pebs_record_size;
}
ds->pebs_interrupt_threshold = threshold;
}
+static void adaptive_pebs_record_size_update(void)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 pebs_data_cfg = cpuc->pebs_data_cfg;
+ int sz = sizeof(struct pebs_basic);
+
+ if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
+ sz += sizeof(struct pebs_meminfo);
+ if (pebs_data_cfg & PEBS_DATACFG_GP)
+ sz += sizeof(struct pebs_gprs);
+ if (pebs_data_cfg & PEBS_DATACFG_XMMS)
+ sz += sizeof(struct pebs_xmm);
+ if (pebs_data_cfg & PEBS_DATACFG_LBRS)
+ sz += x86_pmu.lbr_nr * sizeof(struct pebs_lbr_entry);
+
+ cpuc->pebs_record_size = sz;
+}
+
+#define PERF_PEBS_MEMINFO_TYPE (PERF_SAMPLE_ADDR | PERF_SAMPLE_DATA_SRC | \
+ PERF_SAMPLE_PHYS_ADDR | PERF_SAMPLE_WEIGHT | \
+ PERF_SAMPLE_TRANSACTION)
+
+static u64 pebs_update_adaptive_cfg(struct perf_event *event)
+{
+ struct perf_event_attr *attr = &event->attr;
+ u64 sample_type = attr->sample_type;
+ u64 pebs_data_cfg = 0;
+ bool gprs, tsx_weight;
+
+ if (!(sample_type & ~(PERF_SAMPLE_IP|PERF_SAMPLE_TIME)) &&
+ attr->precise_ip > 1)
+ return pebs_data_cfg;
+
+ if (sample_type & PERF_PEBS_MEMINFO_TYPE)
+ pebs_data_cfg |= PEBS_DATACFG_MEMINFO;
+
+ /*
+ * We need GPRs when:
+ * + user requested them
+ * + precise_ip < 2 for the non event IP
+ * + For RTM TSX weight we need GPRs for the abort code.
+ */
+ gprs = (sample_type & PERF_SAMPLE_REGS_INTR) &&
+ (attr->sample_regs_intr & PEBS_GP_REGS);
+
+ tsx_weight = (sample_type & PERF_SAMPLE_WEIGHT) &&
+ ((attr->config & INTEL_ARCH_EVENT_MASK) ==
+ x86_pmu.rtm_abort_event);
+
+ if (gprs || (attr->precise_ip < 2) || tsx_weight)
+ pebs_data_cfg |= PEBS_DATACFG_GP;
+
+ if ((sample_type & PERF_SAMPLE_REGS_INTR) &&
+ (attr->sample_regs_intr & PEBS_XMM_REGS))
+ pebs_data_cfg |= PEBS_DATACFG_XMMS;
+
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ /*
+ * For now always log all LBRs. Could configure this
+ * later.
+ */
+ pebs_data_cfg |= PEBS_DATACFG_LBRS |
+ ((x86_pmu.lbr_nr-1) << PEBS_DATACFG_LBR_SHIFT);
+ }
+
+ return pebs_data_cfg;
+}
+
static void
-pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
+pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
+ struct perf_event *event, bool add)
{
+ struct pmu *pmu = event->ctx->pmu;
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
@@ -933,6 +1023,29 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu)
update = true;
}
+ /*
+ * The PEBS record doesn't shrink on pmu::del(). Doing so would require
+ * iterating all remaining PEBS events to reconstruct the config.
+ */
+ if (x86_pmu.intel_cap.pebs_baseline && add) {
+ u64 pebs_data_cfg;
+
+ /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
+ if (cpuc->n_pebs == 1) {
+ cpuc->pebs_data_cfg = 0;
+ cpuc->pebs_record_size = sizeof(struct pebs_basic);
+ }
+
+ pebs_data_cfg = pebs_update_adaptive_cfg(event);
+
+ /* Update pebs_record_size if new event requires more data. */
+ if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
+ cpuc->pebs_data_cfg |= pebs_data_cfg;
+ adaptive_pebs_record_size_update();
+ update = true;
+ }
+ }
+
if (update)
pebs_update_threshold(cpuc);
}
@@ -947,7 +1060,7 @@ void intel_pmu_pebs_add(struct perf_event *event)
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs++;
- pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
+ pebs_update_state(needed_cb, cpuc, event, true);
}
void intel_pmu_pebs_enable(struct perf_event *event)
@@ -960,11 +1073,19 @@ void intel_pmu_pebs_enable(struct perf_event *event)
cpuc->pebs_enabled |= 1ULL << hwc->idx;
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+ if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) && (x86_pmu.version < 5))
cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled |= 1ULL << 63;
+ if (x86_pmu.intel_cap.pebs_baseline) {
+ hwc->config |= ICL_EVENTSEL_ADAPTIVE;
+ if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
+ wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
+ cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
+ }
+ }
+
/*
* Use auto-reload if possible to save a MSR write in the PMI.
* This must be done in pmu::start(), because PERF_EVENT_IOC_PERIOD.
@@ -991,7 +1112,7 @@ void intel_pmu_pebs_del(struct perf_event *event)
if (hwc->flags & PERF_X86_EVENT_LARGE_PEBS)
cpuc->n_large_pebs--;
- pebs_update_state(needed_cb, cpuc, event->ctx->pmu);
+ pebs_update_state(needed_cb, cpuc, event, false);
}
void intel_pmu_pebs_disable(struct perf_event *event)
@@ -1004,7 +1125,8 @@ void intel_pmu_pebs_disable(struct perf_event *event)
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
- if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
+ if ((event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT) &&
+ (x86_pmu.version < 5))
cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32));
else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
cpuc->pebs_enabled &= ~(1ULL << 63);
@@ -1125,34 +1247,57 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
return 0;
}
-static inline u64 intel_hsw_weight(struct pebs_record_skl *pebs)
+static inline u64 intel_get_tsx_weight(u64 tsx_tuning)
{
- if (pebs->tsx_tuning) {
- union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning };
+ if (tsx_tuning) {
+ union hsw_tsx_tuning tsx = { .value = tsx_tuning };
return tsx.cycles_last_block;
}
return 0;
}
-static inline u64 intel_hsw_transaction(struct pebs_record_skl *pebs)
+static inline u64 intel_get_tsx_transaction(u64 tsx_tuning, u64 ax)
{
- u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
+ u64 txn = (tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32;
/* For RTM XABORTs also log the abort code from AX */
- if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1))
- txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
+ if ((txn & PERF_TXN_TRANSACTION) && (ax & 1))
+ txn |= ((ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT;
return txn;
}
-static void setup_pebs_sample_data(struct perf_event *event,
- struct pt_regs *iregs, void *__pebs,
- struct perf_sample_data *data,
- struct pt_regs *regs)
+static inline u64 get_pebs_status(void *n)
{
+ if (x86_pmu.intel_cap.pebs_format < 4)
+ return ((struct pebs_record_nhm *)n)->status;
+ return ((struct pebs_basic *)n)->applicable_counters;
+}
+
#define PERF_X86_EVENT_PEBS_HSW_PREC \
(PERF_X86_EVENT_PEBS_ST_HSW | \
PERF_X86_EVENT_PEBS_LD_HSW | \
PERF_X86_EVENT_PEBS_NA_HSW)
+
+static u64 get_data_src(struct perf_event *event, u64 aux)
+{
+ u64 val = PERF_MEM_NA;
+ int fl = event->hw.flags;
+ bool fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+
+ if (fl & PERF_X86_EVENT_PEBS_LDLAT)
+ val = load_latency_data(aux);
+ else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
+ val = precise_datala_hsw(event, aux);
+ else if (fst)
+ val = precise_store_data(aux);
+ return val;
+}
+
+static void setup_pebs_fixed_sample_data(struct perf_event *event,
+ struct pt_regs *iregs, void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
/*
* We cast to the biggest pebs_record but are careful not to
* unconditionally access the 'extra' entries.
@@ -1160,17 +1305,13 @@ static void setup_pebs_sample_data(struct perf_event *event,
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct pebs_record_skl *pebs = __pebs;
u64 sample_type;
- int fll, fst, dsrc;
- int fl = event->hw.flags;
+ int fll;
if (pebs == NULL)
return;
sample_type = event->attr.sample_type;
- dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
-
- fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
- fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
+ fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
perf_sample_data_init(data, 0, event->hw.last_period);
@@ -1185,16 +1326,8 @@ static void setup_pebs_sample_data(struct perf_event *event,
/*
* data.data_src encodes the data source
*/
- if (dsrc) {
- u64 val = PERF_MEM_NA;
- if (fll)
- val = load_latency_data(pebs->dse);
- else if (fst && (fl & PERF_X86_EVENT_PEBS_HSW_PREC))
- val = precise_datala_hsw(event, pebs->dse);
- else if (fst)
- val = precise_store_data(pebs->dse);
- data->data_src.val = val;
- }
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
+ data->data_src.val = get_data_src(event, pebs->dse);
/*
* We must however always use iregs for the unwinder to stay sane; the
@@ -1281,10 +1414,11 @@ static void setup_pebs_sample_data(struct perf_event *event,
if (x86_pmu.intel_cap.pebs_format >= 2) {
/* Only set the TSX weight when no memory weight. */
if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
- data->weight = intel_hsw_weight(pebs);
+ data->weight = intel_get_tsx_weight(pebs->tsx_tuning);
if (sample_type & PERF_SAMPLE_TRANSACTION)
- data->txn = intel_hsw_transaction(pebs);
+ data->txn = intel_get_tsx_transaction(pebs->tsx_tuning,
+ pebs->ax);
}
/*
@@ -1301,6 +1435,140 @@ static void setup_pebs_sample_data(struct perf_event *event,
data->br_stack = &cpuc->lbr_stack;
}
+static void adaptive_pebs_save_regs(struct pt_regs *regs,
+ struct pebs_gprs *gprs)
+{
+ regs->ax = gprs->ax;
+ regs->bx = gprs->bx;
+ regs->cx = gprs->cx;
+ regs->dx = gprs->dx;
+ regs->si = gprs->si;
+ regs->di = gprs->di;
+ regs->bp = gprs->bp;
+ regs->sp = gprs->sp;
+#ifndef CONFIG_X86_32
+ regs->r8 = gprs->r8;
+ regs->r9 = gprs->r9;
+ regs->r10 = gprs->r10;
+ regs->r11 = gprs->r11;
+ regs->r12 = gprs->r12;
+ regs->r13 = gprs->r13;
+ regs->r14 = gprs->r14;
+ regs->r15 = gprs->r15;
+#endif
+}
+
+/*
+ * With adaptive PEBS the layout depends on what fields are configured.
+ */
+
+static void setup_pebs_adaptive_sample_data(struct perf_event *event,
+ struct pt_regs *iregs, void *__pebs,
+ struct perf_sample_data *data,
+ struct pt_regs *regs)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct pebs_basic *basic = __pebs;
+ void *next_record = basic + 1;
+ u64 sample_type;
+ u64 format_size;
+ struct pebs_meminfo *meminfo = NULL;
+ struct pebs_gprs *gprs = NULL;
+ struct x86_perf_regs *perf_regs;
+
+ if (basic == NULL)
+ return;
+
+ perf_regs = container_of(regs, struct x86_perf_regs, regs);
+ perf_regs->xmm_regs = NULL;
+
+ sample_type = event->attr.sample_type;
+ format_size = basic->format_size;
+ perf_sample_data_init(data, 0, event->hw.last_period);
+ data->period = event->hw.last_period;
+
+ if (event->attr.use_clockid == 0)
+ data->time = native_sched_clock_from_tsc(basic->tsc);
+
+ /*
+ * We must however always use iregs for the unwinder to stay sane; the
+ * record BP,SP,IP can point into thin air when the record is from a
+ * previous PMI context or an (I)RET happened between the record and
+ * PMI.
+ */
+ if (sample_type & PERF_SAMPLE_CALLCHAIN)
+ data->callchain = perf_callchain(event, iregs);
+
+ *regs = *iregs;
+ /* The ip in basic is EventingIP */
+ set_linear_ip(regs, basic->ip);
+ regs->flags = PERF_EFLAGS_EXACT;
+
+ /*
+ * The record for MEMINFO is in front of GP
+ * But PERF_SAMPLE_TRANSACTION needs gprs->ax.
+ * Save the pointer here but process later.
+ */
+ if (format_size & PEBS_DATACFG_MEMINFO) {
+ meminfo = next_record;
+ next_record = meminfo + 1;
+ }
+
+ if (format_size & PEBS_DATACFG_GP) {
+ gprs = next_record;
+ next_record = gprs + 1;
+
+ if (event->attr.precise_ip < 2) {
+ set_linear_ip(regs, gprs->ip);
+ regs->flags &= ~PERF_EFLAGS_EXACT;
+ }
+
+ if (sample_type & PERF_SAMPLE_REGS_INTR)
+ adaptive_pebs_save_regs(regs, gprs);
+ }
+
+ if (format_size & PEBS_DATACFG_MEMINFO) {
+ if (sample_type & PERF_SAMPLE_WEIGHT)
+ data->weight = meminfo->latency ?:
+ intel_get_tsx_weight(meminfo->tsx_tuning);
+
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
+ data->data_src.val = get_data_src(event, meminfo->aux);
+
+ if (sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
+ data->addr = meminfo->address;
+
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
+ data->txn = intel_get_tsx_transaction(meminfo->tsx_tuning,
+ gprs ? gprs->ax : 0);
+ }
+
+ if (format_size & PEBS_DATACFG_XMMS) {
+ struct pebs_xmm *xmm = next_record;
+
+ next_record = xmm + 1;
+ perf_regs->xmm_regs = xmm->xmm;
+ }
+
+ if (format_size & PEBS_DATACFG_LBRS) {
+ struct pebs_lbr *lbr = next_record;
+ int num_lbr = ((format_size >> PEBS_DATACFG_LBR_SHIFT)
+ & 0xff) + 1;
+ next_record = next_record + num_lbr*sizeof(struct pebs_lbr_entry);
+
+ if (has_branch_stack(event)) {
+ intel_pmu_store_pebs_lbrs(lbr);
+ data->br_stack = &cpuc->lbr_stack;
+ }
+ }
+
+ WARN_ONCE(next_record != __pebs + (format_size >> 48),
+ "PEBS record size %llu, expected %llu, config %llx\n",
+ format_size >> 48,
+ (u64)(next_record - __pebs),
+ basic->format_size);
+}
+
static inline void *
get_next_pebs_record_by_bit(void *base, void *top, int bit)
{
@@ -1318,19 +1586,19 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
if (base == NULL)
return NULL;
- for (at = base; at < top; at += x86_pmu.pebs_record_size) {
- struct pebs_record_nhm *p = at;
+ for (at = base; at < top; at += cpuc->pebs_record_size) {
+ unsigned long status = get_pebs_status(at);
- if (test_bit(bit, (unsigned long *)&p->status)) {
+ if (test_bit(bit, (unsigned long *)&status)) {
/* PEBS v3 has accurate status bits */
if (x86_pmu.intel_cap.pebs_format >= 3)
return at;
- if (p->status == (1 << bit))
+ if (status == (1 << bit))
return at;
/* clear non-PEBS bit and re-check */
- pebs_status = p->status & cpuc->pebs_enabled;
+ pebs_status = status & cpuc->pebs_enabled;
pebs_status &= PEBS_COUNTER_MASK;
if (pebs_status == (1 << bit))
return at;
@@ -1410,11 +1678,18 @@ intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
void *base, void *top,
- int bit, int count)
+ int bit, int count,
+ void (*setup_sample)(struct perf_event *,
+ struct pt_regs *,
+ void *,
+ struct perf_sample_data *,
+ struct pt_regs *))
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
struct perf_sample_data data;
- struct pt_regs regs;
+ struct x86_perf_regs perf_regs;
+ struct pt_regs *regs = &perf_regs.regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
@@ -1429,20 +1704,20 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
return;
while (count > 1) {
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
- perf_event_output(event, &data, &regs);
- at += x86_pmu.pebs_record_size;
+ setup_sample(event, iregs, at, &data, regs);
+ perf_event_output(event, &data, regs);
+ at += cpuc->pebs_record_size;
at = get_next_pebs_record_by_bit(at, top, bit);
count--;
}
- setup_pebs_sample_data(event, iregs, at, &data, &regs);
+ setup_sample(event, iregs, at, &data, regs);
/*
* All but the last records are processed.
* The last one is left to be able to call the overflow handler.
*/
- if (perf_event_overflow(event, &data, &regs)) {
+ if (perf_event_overflow(event, &data, regs)) {
x86_pmu_stop(event, 0);
return;
}
@@ -1483,7 +1758,27 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
return;
}
- __intel_pmu_pebs_event(event, iregs, at, top, 0, n);
+ __intel_pmu_pebs_event(event, iregs, at, top, 0, n,
+ setup_pebs_fixed_sample_data);
+}
+
+static void intel_pmu_pebs_event_update_no_drain(struct cpu_hw_events *cpuc, int size)
+{
+ struct perf_event *event;
+ int bit;
+
+ /*
+ * The drain_pebs() could be called twice in a short period
+ * for auto-reload event in pmu::read(). There are no
+ * overflows have happened in between.
+ * It needs to call intel_pmu_save_and_restart_reload() to
+ * update the event->count for this case.
+ */
+ for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, size) {
+ event = cpuc->events[bit];
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ intel_pmu_save_and_restart_reload(event, 0);
+ }
}
static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
@@ -1513,19 +1808,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
}
if (unlikely(base >= top)) {
- /*
- * The drain_pebs() could be called twice in a short period
- * for auto-reload event in pmu::read(). There are no
- * overflows have happened in between.
- * It needs to call intel_pmu_save_and_restart_reload() to
- * update the event->count for this case.
- */
- for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
- size) {
- event = cpuc->events[bit];
- if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
- intel_pmu_save_and_restart_reload(event, 0);
- }
+ intel_pmu_pebs_event_update_no_drain(cpuc, size);
return;
}
@@ -1538,8 +1821,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
/* PEBS v3 has more accurate status bits */
if (x86_pmu.intel_cap.pebs_format >= 3) {
- for_each_set_bit(bit, (unsigned long *)&pebs_status,
- size)
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
counts[bit]++;
continue;
@@ -1578,8 +1860,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
* If collision happened, the record will be dropped.
*/
if (p->status != (1ULL << bit)) {
- for_each_set_bit(i, (unsigned long *)&pebs_status,
- x86_pmu.max_pebs_events)
+ for_each_set_bit(i, (unsigned long *)&pebs_status, size)
error[i]++;
continue;
}
@@ -1587,7 +1868,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
counts[bit]++;
}
- for (bit = 0; bit < size; bit++) {
+ for_each_set_bit(bit, (unsigned long *)&mask, size) {
if ((counts[bit] == 0) && (error[bit] == 0))
continue;
@@ -1608,11 +1889,66 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
if (counts[bit]) {
__intel_pmu_pebs_event(event, iregs, base,
- top, bit, counts[bit]);
+ top, bit, counts[bit],
+ setup_pebs_fixed_sample_data);
}
}
}
+static void intel_pmu_drain_pebs_icl(struct pt_regs *iregs)
+{
+ short counts[INTEL_PMC_IDX_FIXED + MAX_FIXED_PEBS_EVENTS] = {};
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ struct debug_store *ds = cpuc->ds;
+ struct perf_event *event;
+ void *base, *at, *top;
+ int bit, size;
+ u64 mask;
+
+ if (!x86_pmu.pebs_active)
+ return;
+
+ base = (struct pebs_basic *)(unsigned long)ds->pebs_buffer_base;
+ top = (struct pebs_basic *)(unsigned long)ds->pebs_index;
+
+ ds->pebs_index = ds->pebs_buffer_base;
+
+ mask = ((1ULL << x86_pmu.max_pebs_events) - 1) |
+ (((1ULL << x86_pmu.num_counters_fixed) - 1) << INTEL_PMC_IDX_FIXED);
+ size = INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed;
+
+ if (unlikely(base >= top)) {
+ intel_pmu_pebs_event_update_no_drain(cpuc, size);
+ return;
+ }
+
+ for (at = base; at < top; at += cpuc->pebs_record_size) {
+ u64 pebs_status;
+
+ pebs_status = get_pebs_status(at) & cpuc->pebs_enabled;
+ pebs_status &= mask;
+
+ for_each_set_bit(bit, (unsigned long *)&pebs_status, size)
+ counts[bit]++;
+ }
+
+ for_each_set_bit(bit, (unsigned long *)&mask, size) {
+ if (counts[bit] == 0)
+ continue;
+
+ event = cpuc->events[bit];
+ if (WARN_ON_ONCE(!event))
+ continue;
+
+ if (WARN_ON_ONCE(!event->attr.precise_ip))
+ continue;
+
+ __intel_pmu_pebs_event(event, iregs, base,
+ top, bit, counts[bit],
+ setup_pebs_adaptive_sample_data);
+ }
+}
+
/*
* BTS, PEBS probe and setup
*/
@@ -1628,12 +1964,18 @@ void __init intel_ds_init(void)
x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS);
x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
x86_pmu.pebs_buffer_size = PEBS_BUFFER_SIZE;
- if (x86_pmu.version <= 4)
+ if (x86_pmu.version <= 4) {
x86_pmu.pebs_no_isolation = 1;
+ x86_pmu.pebs_no_xmm_regs = 1;
+ }
if (x86_pmu.pebs) {
char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-';
+ char *pebs_qual = "";
int format = x86_pmu.intel_cap.pebs_format;
+ if (format < 4)
+ x86_pmu.intel_cap.pebs_baseline = 0;
+
switch (format) {
case 0:
pr_cont("PEBS fmt0%c, ", pebs_type);
@@ -1669,6 +2011,29 @@ void __init intel_ds_init(void)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
break;
+ case 4:
+ x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl;
+ x86_pmu.pebs_record_size = sizeof(struct pebs_basic);
+ if (x86_pmu.intel_cap.pebs_baseline) {
+ x86_pmu.large_pebs_flags |=
+ PERF_SAMPLE_BRANCH_STACK |
+ PERF_SAMPLE_TIME;
+ x86_pmu.flags |= PMU_FL_PEBS_ALL;
+ pebs_qual = "-baseline";
+ } else {
+ /* Only basic record supported */
+ x86_pmu.pebs_no_xmm_regs = 1;
+ x86_pmu.large_pebs_flags &=
+ ~(PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_TIME |
+ PERF_SAMPLE_DATA_SRC |
+ PERF_SAMPLE_TRANSACTION |
+ PERF_SAMPLE_REGS_USER |
+ PERF_SAMPLE_REGS_INTR);
+ }
+ pr_cont("PEBS fmt4%c%s, ", pebs_type, pebs_qual);
+ break;
+
default:
pr_cont("no PEBS fmt%d%c, ", format, pebs_type);
x86_pmu.pebs = 0;
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 580c1b91c454..6f814a27416b 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -488,6 +488,8 @@ void intel_pmu_lbr_add(struct perf_event *event)
* be 'new'. Conversely, a new event can get installed through the
* context switch path for the first time.
*/
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
+ cpuc->lbr_pebs_users++;
perf_sched_cb_inc(event->ctx->pmu);
if (!cpuc->lbr_users++ && !event->total_time_running)
intel_pmu_lbr_reset();
@@ -507,8 +509,11 @@ void intel_pmu_lbr_del(struct perf_event *event)
task_ctx->lbr_callstack_users--;
}
+ if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip > 0)
+ cpuc->lbr_pebs_users--;
cpuc->lbr_users--;
WARN_ON_ONCE(cpuc->lbr_users < 0);
+ WARN_ON_ONCE(cpuc->lbr_pebs_users < 0);
perf_sched_cb_dec(event->ctx->pmu);
}
@@ -658,7 +663,13 @@ void intel_pmu_lbr_read(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- if (!cpuc->lbr_users)
+ /*
+ * Don't read when all LBRs users are using adaptive PEBS.
+ *
+ * This could be smarter and actually check the event,
+ * but this simple approach seems to work for now.
+ */
+ if (!cpuc->lbr_users || cpuc->lbr_users == cpuc->lbr_pebs_users)
return;
if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
@@ -1080,6 +1091,28 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
}
}
+void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr)
+{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ int i;
+
+ cpuc->lbr_stack.nr = x86_pmu.lbr_nr;
+ for (i = 0; i < x86_pmu.lbr_nr; i++) {
+ u64 info = lbr->lbr[i].info;
+ struct perf_branch_entry *e = &cpuc->lbr_entries[i];
+
+ e->from = lbr->lbr[i].from;
+ e->to = lbr->lbr[i].to;
+ e->mispred = !!(info & LBR_INFO_MISPRED);
+ e->predicted = !(info & LBR_INFO_MISPRED);
+ e->in_tx = !!(info & LBR_INFO_IN_TX);
+ e->abort = !!(info & LBR_INFO_ABORT);
+ e->cycles = info & LBR_INFO_CYCLES;
+ e->reserved = 0;
+ }
+ intel_pmu_lbr_filter(cpuc);
+}
+
/*
* Map interface branch filters onto LBR filters
*/
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index fb3a2f13fc70..339d7628080c 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -1525,8 +1525,7 @@ static __init int pt_init(void)
}
if (!intel_pt_validate_hw_cap(PT_CAP_topa_multiple_entries))
- pt_pmu.pmu.capabilities =
- PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_AUX_SW_DOUBLEBUF;
+ pt_pmu.pmu.capabilities = PERF_PMU_CAP_AUX_NO_SG;
pt_pmu.pmu.capabilities |= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_ITRACE;
pt_pmu.pmu.attr_groups = pt_attr_groups;
diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c
index 94dc564146ca..37ebf6fc5415 100644
--- a/arch/x86/events/intel/rapl.c
+++ b/arch/x86/events/intel/rapl.c
@@ -775,6 +775,8 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = {
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_X, hsw_rapl_init),
X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT_PLUS, hsw_rapl_init),
+
+ X86_RAPL_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, skl_rapl_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c
index 9fe64c01a2e5..fc40a1473058 100644
--- a/arch/x86/events/intel/uncore.c
+++ b/arch/x86/events/intel/uncore.c
@@ -1367,6 +1367,11 @@ static const struct intel_uncore_init_fun skx_uncore_init __initconst = {
.pci_init = skx_uncore_pci_init,
};
+static const struct intel_uncore_init_fun icl_uncore_init __initconst = {
+ .cpu_init = icl_uncore_cpu_init,
+ .pci_init = skl_uncore_pci_init,
+};
+
static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM_EP, nhm_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_NEHALEM, nhm_uncore_init),
@@ -1393,6 +1398,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = {
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init),
X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init),
+ X86_UNCORE_MODEL_MATCH(INTEL_FAM6_ICELAKE_MOBILE, icl_uncore_init),
{},
};
diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h
index 853a49a8ccf6..79eb2e21e4f0 100644
--- a/arch/x86/events/intel/uncore.h
+++ b/arch/x86/events/intel/uncore.h
@@ -512,6 +512,7 @@ int skl_uncore_pci_init(void);
void snb_uncore_cpu_init(void);
void nhm_uncore_cpu_init(void);
void skl_uncore_cpu_init(void);
+void icl_uncore_cpu_init(void);
int snb_pci2phy_map_init(int devid);
/* uncore_snbep.c */
diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c
index 13493f43b247..f8431819b3e1 100644
--- a/arch/x86/events/intel/uncore_snb.c
+++ b/arch/x86/events/intel/uncore_snb.c
@@ -34,6 +34,8 @@
#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33
#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca
#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32
+#define PCI_DEVICE_ID_INTEL_ICL_U_IMC 0x8a02
+#define PCI_DEVICE_ID_INTEL_ICL_U2_IMC 0x8a12
/* SNB event control */
#define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff
@@ -93,6 +95,12 @@
#define SKL_UNC_PERF_GLOBAL_CTL 0xe01
#define SKL_UNC_GLOBAL_CTL_CORE_ALL ((1 << 5) - 1)
+/* ICL Cbo register */
+#define ICL_UNC_CBO_CONFIG 0x396
+#define ICL_UNC_NUM_CBO_MASK 0xf
+#define ICL_UNC_CBO_0_PER_CTR0 0x702
+#define ICL_UNC_CBO_MSR_OFFSET 0x8
+
DEFINE_UNCORE_FORMAT_ATTR(event, event, "config:0-7");
DEFINE_UNCORE_FORMAT_ATTR(umask, umask, "config:8-15");
DEFINE_UNCORE_FORMAT_ATTR(edge, edge, "config:18");
@@ -280,6 +288,70 @@ void skl_uncore_cpu_init(void)
snb_uncore_arb.ops = &skl_uncore_msr_ops;
}
+static struct intel_uncore_type icl_uncore_cbox = {
+ .name = "cbox",
+ .num_counters = 4,
+ .perf_ctr_bits = 44,
+ .perf_ctr = ICL_UNC_CBO_0_PER_CTR0,
+ .event_ctl = SNB_UNC_CBO_0_PERFEVTSEL0,
+ .event_mask = SNB_UNC_RAW_EVENT_MASK,
+ .msr_offset = ICL_UNC_CBO_MSR_OFFSET,
+ .ops = &skl_uncore_msr_ops,
+ .format_group = &snb_uncore_format_group,
+};
+
+static struct uncore_event_desc icl_uncore_events[] = {
+ INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff"),
+ { /* end: all zeroes */ },
+};
+
+static struct attribute *icl_uncore_clock_formats_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group icl_uncore_clock_format_group = {
+ .name = "format",
+ .attrs = icl_uncore_clock_formats_attr,
+};
+
+static struct intel_uncore_type icl_uncore_clockbox = {
+ .name = "clock",
+ .num_counters = 1,
+ .num_boxes = 1,
+ .fixed_ctr_bits = 48,
+ .fixed_ctr = SNB_UNC_FIXED_CTR,
+ .fixed_ctl = SNB_UNC_FIXED_CTR_CTRL,
+ .single_fixed = 1,
+ .event_mask = SNB_UNC_CTL_EV_SEL_MASK,
+ .format_group = &icl_uncore_clock_format_group,
+ .ops = &skl_uncore_msr_ops,
+ .event_descs = icl_uncore_events,
+};
+
+static struct intel_uncore_type *icl_msr_uncores[] = {
+ &icl_uncore_cbox,
+ &snb_uncore_arb,
+ &icl_uncore_clockbox,
+ NULL,
+};
+
+static int icl_get_cbox_num(void)
+{
+ u64 num_boxes;
+
+ rdmsrl(ICL_UNC_CBO_CONFIG, num_boxes);
+
+ return num_boxes & ICL_UNC_NUM_CBO_MASK;
+}
+
+void icl_uncore_cpu_init(void)
+{
+ uncore_msr_uncores = icl_msr_uncores;
+ icl_uncore_cbox.num_boxes = icl_get_cbox_num();
+ snb_uncore_arb.ops = &skl_uncore_msr_ops;
+}
+
enum {
SNB_PCI_UNCORE_IMC,
};
@@ -668,6 +740,18 @@ static const struct pci_device_id skl_uncore_pci_ids[] = {
{ /* end: all zeroes */ },
};
+static const struct pci_device_id icl_uncore_pci_ids[] = {
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* IMC */
+ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICL_U2_IMC),
+ .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+ },
+ { /* end: all zeroes */ },
+};
+
static struct pci_driver snb_uncore_pci_driver = {
.name = "snb_uncore",
.id_table = snb_uncore_pci_ids,
@@ -693,6 +777,11 @@ static struct pci_driver skl_uncore_pci_driver = {
.id_table = skl_uncore_pci_ids,
};
+static struct pci_driver icl_uncore_pci_driver = {
+ .name = "icl_uncore",
+ .id_table = icl_uncore_pci_ids,
+};
+
struct imc_uncore_pci_dev {
__u32 pci_id;
struct pci_driver *driver;
@@ -732,6 +821,8 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */
IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */
IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */
+ IMC_DEV(ICL_U_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
+ IMC_DEV(ICL_U2_IMC, &icl_uncore_pci_driver), /* 10th Gen Core Mobile */
{ /* end marker */ }
};
diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c
index a878e6286e4a..f3f4c2263501 100644
--- a/arch/x86/events/msr.c
+++ b/arch/x86/events/msr.c
@@ -89,6 +89,7 @@ static bool test_intel(int idx)
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_KABYLAKE_DESKTOP:
+ case INTEL_FAM6_ICELAKE_MOBILE:
if (idx == PERF_MSR_SMI || idx == PERF_MSR_PPERF)
return true;
break;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 1e98a42b560a..07fc84bb85c1 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -49,28 +49,33 @@ struct event_constraint {
unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
u64 idxmsk64;
};
- u64 code;
- u64 cmask;
- int weight;
- int overlap;
- int flags;
+ u64 code;
+ u64 cmask;
+ int weight;
+ int overlap;
+ int flags;
+ unsigned int size;
};
+
+static inline bool constraint_match(struct event_constraint *c, u64 ecode)
+{
+ return ((ecode & c->cmask) - c->code) <= (u64)c->size;
+}
+
/*
* struct hw_perf_event.flags flags
*/
#define PERF_X86_EVENT_PEBS_LDLAT 0x0001 /* ld+ldlat data address sampling */
#define PERF_X86_EVENT_PEBS_ST 0x0002 /* st data address sampling */
#define PERF_X86_EVENT_PEBS_ST_HSW 0x0004 /* haswell style datala, store */
-#define PERF_X86_EVENT_COMMITTED 0x0008 /* event passed commit_txn */
-#define PERF_X86_EVENT_PEBS_LD_HSW 0x0010 /* haswell style datala, load */
-#define PERF_X86_EVENT_PEBS_NA_HSW 0x0020 /* haswell style datala, unknown */
-#define PERF_X86_EVENT_EXCL 0x0040 /* HT exclusivity on counter */
-#define PERF_X86_EVENT_DYNAMIC 0x0080 /* dynamic alloc'd constraint */
-#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0100 /* grant rdpmc permission */
-#define PERF_X86_EVENT_EXCL_ACCT 0x0200 /* accounted EXCL event */
-#define PERF_X86_EVENT_AUTO_RELOAD 0x0400 /* use PEBS auto-reload */
-#define PERF_X86_EVENT_LARGE_PEBS 0x0800 /* use large PEBS */
-
+#define PERF_X86_EVENT_PEBS_LD_HSW 0x0008 /* haswell style datala, load */
+#define PERF_X86_EVENT_PEBS_NA_HSW 0x0010 /* haswell style datala, unknown */
+#define PERF_X86_EVENT_EXCL 0x0020 /* HT exclusivity on counter */
+#define PERF_X86_EVENT_DYNAMIC 0x0040 /* dynamic alloc'd constraint */
+#define PERF_X86_EVENT_RDPMC_ALLOWED 0x0080 /* grant rdpmc permission */
+#define PERF_X86_EVENT_EXCL_ACCT 0x0100 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD 0x0200 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_LARGE_PEBS 0x0400 /* use large PEBS */
struct amd_nb {
int nb_id; /* NorthBridge id */
@@ -116,6 +121,24 @@ struct amd_nb {
(1ULL << PERF_REG_X86_R14) | \
(1ULL << PERF_REG_X86_R15))
+#define PEBS_XMM_REGS \
+ ((1ULL << PERF_REG_X86_XMM0) | \
+ (1ULL << PERF_REG_X86_XMM1) | \
+ (1ULL << PERF_REG_X86_XMM2) | \
+ (1ULL << PERF_REG_X86_XMM3) | \
+ (1ULL << PERF_REG_X86_XMM4) | \
+ (1ULL << PERF_REG_X86_XMM5) | \
+ (1ULL << PERF_REG_X86_XMM6) | \
+ (1ULL << PERF_REG_X86_XMM7) | \
+ (1ULL << PERF_REG_X86_XMM8) | \
+ (1ULL << PERF_REG_X86_XMM9) | \
+ (1ULL << PERF_REG_X86_XMM10) | \
+ (1ULL << PERF_REG_X86_XMM11) | \
+ (1ULL << PERF_REG_X86_XMM12) | \
+ (1ULL << PERF_REG_X86_XMM13) | \
+ (1ULL << PERF_REG_X86_XMM14) | \
+ (1ULL << PERF_REG_X86_XMM15))
+
/*
* Per register state.
*/
@@ -207,10 +230,16 @@ struct cpu_hw_events {
int n_pebs;
int n_large_pebs;
+ /* Current super set of events hardware configuration */
+ u64 pebs_data_cfg;
+ u64 active_pebs_data_cfg;
+ int pebs_record_size;
+
/*
* Intel LBR bits
*/
int lbr_users;
+ int lbr_pebs_users;
struct perf_branch_stack lbr_stack;
struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES];
struct er_account *lbr_sel;
@@ -257,18 +286,29 @@ struct cpu_hw_events {
void *kfree_on_online[X86_PERF_KFREE_MAX];
};
-#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\
+#define __EVENT_CONSTRAINT_RANGE(c, e, n, m, w, o, f) { \
{ .idxmsk64 = (n) }, \
.code = (c), \
+ .size = (e) - (c), \
.cmask = (m), \
.weight = (w), \
.overlap = (o), \
.flags = f, \
}
+#define __EVENT_CONSTRAINT(c, n, m, w, o, f) \
+ __EVENT_CONSTRAINT_RANGE(c, c, n, m, w, o, f)
+
#define EVENT_CONSTRAINT(c, n, m) \
__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0)
+/*
+ * The constraint_match() function only works for 'simple' event codes
+ * and not for extended (AMD64_EVENTSEL_EVENT) events codes.
+ */
+#define EVENT_CONSTRAINT_RANGE(c, e, n, m) \
+ __EVENT_CONSTRAINT_RANGE(c, e, n, m, HWEIGHT(n), 0, 0)
+
#define INTEL_EXCLEVT_CONSTRAINT(c, n) \
__EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT, HWEIGHT(n),\
0, PERF_X86_EVENT_EXCL)
@@ -304,6 +344,12 @@ struct cpu_hw_events {
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
/*
+ * Constraint on a range of Event codes
+ */
+#define INTEL_EVENT_CONSTRAINT_RANGE(c, e, n) \
+ EVENT_CONSTRAINT_RANGE(c, e, n, ARCH_PERFMON_EVENTSEL_EVENT)
+
+/*
* Constraint on the Event code + UMask + fixed-mask
*
* filter mask to validate fixed counter events.
@@ -350,6 +396,9 @@ struct cpu_hw_events {
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+#define INTEL_FLAGS_EVENT_CONSTRAINT_RANGE(c, e, n) \
+ EVENT_CONSTRAINT_RANGE(c, e, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS)
+
/* Check only flags, but allow all event/umask */
#define INTEL_ALL_EVENT_CONSTRAINT(code, n) \
EVENT_CONSTRAINT(code, n, X86_ALL_EVENT_FLAGS)
@@ -366,6 +415,11 @@ struct cpu_hw_events {
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(code, end, n) \
+ __EVENT_CONSTRAINT_RANGE(code, end, n, \
+ ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LD_HSW)
+
#define INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_XLD(code, n) \
__EVENT_CONSTRAINT(code, n, \
ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS, \
@@ -473,6 +527,7 @@ union perf_capabilities {
* values > 32bit.
*/
u64 full_width_write:1;
+ u64 pebs_baseline:1;
};
u64 capabilities;
};
@@ -613,14 +668,16 @@ struct x86_pmu {
pebs_broken :1,
pebs_prec_dist :1,
pebs_no_tlb :1,
- pebs_no_isolation :1;
+ pebs_no_isolation :1,
+ pebs_no_xmm_regs :1;
int pebs_record_size;
int pebs_buffer_size;
+ int max_pebs_events;
void (*drain_pebs)(struct pt_regs *regs);
struct event_constraint *pebs_constraints;
void (*pebs_aliases)(struct perf_event *event);
- int max_pebs_events;
unsigned long large_pebs_flags;
+ u64 rtm_abort_event;
/*
* Intel LBR
@@ -714,6 +771,7 @@ static struct perf_pmu_events_ht_attr event_attr_##v = { \
.event_str_ht = ht, \
}
+struct pmu *x86_get_pmu(void);
extern struct x86_pmu x86_pmu __read_mostly;
static inline bool x86_pmu_has_lbr_callstack(void)
@@ -941,6 +999,8 @@ extern struct event_constraint intel_bdw_pebs_event_constraints[];
extern struct event_constraint intel_skl_pebs_event_constraints[];
+extern struct event_constraint intel_icl_pebs_event_constraints[];
+
struct event_constraint *intel_pebs_constraints(struct perf_event *event);
void intel_pmu_pebs_add(struct perf_event *event);
@@ -959,6 +1019,8 @@ void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
void intel_pmu_auto_reload_read(struct perf_event *event);
+void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
+
void intel_ds_init(void);
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 8eb6fbee8e13..5c056b8aebef 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -86,6 +86,11 @@ static void hv_apic_write(u32 reg, u32 val)
static void hv_apic_eoi_write(u32 reg, u32 val)
{
+ struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()];
+
+ if (hvp && (xchg(&hvp->apic_assist, 0) & 0x1))
+ return;
+
wrmsr(HV_X64_MSR_EOI, val, 0);
}
diff --git a/arch/x86/hyperv/hv_spinlock.c b/arch/x86/hyperv/hv_spinlock.c
index a861b0456b1a..07f21a06392f 100644
--- a/arch/x86/hyperv/hv_spinlock.c
+++ b/arch/x86/hyperv/hv_spinlock.c
@@ -56,7 +56,7 @@ static void hv_qlock_wait(u8 *byte, u8 val)
/*
* Hyper-V does not support this so far.
*/
-bool hv_vcpu_is_preempted(int vcpu)
+__visible bool hv_vcpu_is_preempted(int vcpu)
{
return false;
}
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 321fe5f5d0e9..629d1ee05599 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -61,9 +61,8 @@
} while (0)
#define RELOAD_SEG(seg) { \
- unsigned int pre = GET_SEG(seg); \
+ unsigned int pre = (seg) | 3; \
unsigned int cur = get_user_seg(seg); \
- pre |= 3; \
if (pre != cur) \
set_user_seg(seg, pre); \
}
@@ -72,6 +71,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
struct sigcontext_32 __user *sc)
{
unsigned int tmpflags, err = 0;
+ u16 gs, fs, es, ds;
void __user *buf;
u32 tmp;
@@ -79,16 +79,10 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
current->restart_block.fn = do_no_restart_syscall;
get_user_try {
- /*
- * Reload fs and gs if they have changed in the signal
- * handler. This does not handle long fs/gs base changes in
- * the handler, but does not clobber them at least in the
- * normal case.
- */
- RELOAD_SEG(gs);
- RELOAD_SEG(fs);
- RELOAD_SEG(ds);
- RELOAD_SEG(es);
+ gs = GET_SEG(gs);
+ fs = GET_SEG(fs);
+ ds = GET_SEG(ds);
+ es = GET_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
COPY(dx); COPY(cx); COPY(ip); COPY(ax);
@@ -106,6 +100,17 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
buf = compat_ptr(tmp);
} get_user_catch(err);
+ /*
+ * Reload fs and gs if they have changed in the signal
+ * handler. This does not handle long fs/gs base changes in
+ * the handler, but does not clobber them at least in the
+ * normal case.
+ */
+ RELOAD_SEG(gs);
+ RELOAD_SEG(fs);
+ RELOAD_SEG(ds);
+ RELOAD_SEG(es);
+
err |= fpu__restore_sig(buf, 1);
force_iret();
@@ -216,8 +221,7 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
size_t frame_size,
void __user **fpstate)
{
- struct fpu *fpu = &current->thread.fpu;
- unsigned long sp;
+ unsigned long sp, fx_aligned, math_size;
/* Default to using normal stack */
sp = regs->sp;
@@ -231,15 +235,11 @@ static void __user *get_sigframe(struct ksignal *ksig, struct pt_regs *regs,
ksig->ka.sa.sa_restorer)
sp = (unsigned long) ksig->ka.sa.sa_restorer;
- if (fpu->initialized) {
- unsigned long fx_aligned, math_size;
-
- sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
- *fpstate = (struct _fpstate_32 __user *) sp;
- if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
- math_size) < 0)
- return (void __user *) -1L;
- }
+ sp = fpu__alloc_mathframe(sp, 1, &fx_aligned, &math_size);
+ *fpstate = (struct _fpstate_32 __user *) sp;
+ if (copy_fpstate_to_sigframe(*fpstate, (void __user *)fx_aligned,
+ math_size) < 0)
+ return (void __user *) -1L;
sp -= frame_size;
/* Align the stack pointer according to the i386 ABI,
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a0ab9ab61c75..eebd05942e6c 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -11,3 +11,4 @@ generic-y += early_ioremap.h
generic-y += export.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 31b627b43a8e..464034db299f 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -20,6 +20,17 @@
#endif
/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+.macro ANNOTATE_IGNORE_ALTERNATIVE
+ .Lannotate_\@:
+ .pushsection .discard.ignore_alts
+ .long .Lannotate_\@ - .
+ .popsection
+.endm
+
+/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4c74073a19cc..094fbc9c0b1c 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -45,6 +45,16 @@
#define LOCK_PREFIX ""
#endif
+/*
+ * objtool annotation to ignore the alternatives and only consider the original
+ * instruction(s).
+ */
+#define ANNOTATE_IGNORE_ALTERNATIVE \
+ "999:\n\t" \
+ ".pushsection .discard.ignore_alts\n\t" \
+ ".long 999b - .\n\t" \
+ ".popsection\n\t"
+
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 6467757bb39f..3ff577c0b102 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -148,30 +148,6 @@
_ASM_PTR (entry); \
.popsection
-.macro ALIGN_DESTINATION
- /* check for bad alignment of destination */
- movl %edi,%ecx
- andl $7,%ecx
- jz 102f /* already aligned */
- subl $8,%ecx
- negl %ecx
- subl %ecx,%edx
-100: movb (%rsi),%al
-101: movb %al,(%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz 100b
-102:
- .section .fixup,"ax"
-103: addl %ecx,%edx /* ecx is zerorest also */
- jmp copy_user_handle_tail
- .previous
-
- _ASM_EXTABLE_UA(100b, 103b)
- _ASM_EXTABLE_UA(101b, 103b)
- .endm
-
#else
# define _EXPAND_EXTABLE_HANDLE(x) #x
# define _ASM_EXTABLE_HANDLE(from, to, handler) \
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 29c706415443..cff3f3f3bfe0 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -7,6 +7,64 @@
#include <asm/processor.h>
#include <asm/intel_ds.h>
+#ifdef CONFIG_X86_64
+
+/* Macro to enforce the same ordering and stack sizes */
+#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
+ char DF_stack_guard[guardsize]; \
+ char DF_stack[EXCEPTION_STKSZ]; \
+ char NMI_stack_guard[guardsize]; \
+ char NMI_stack[EXCEPTION_STKSZ]; \
+ char DB2_stack_guard[guardsize]; \
+ char DB2_stack[db2_holesize]; \
+ char DB1_stack_guard[guardsize]; \
+ char DB1_stack[EXCEPTION_STKSZ]; \
+ char DB_stack_guard[guardsize]; \
+ char DB_stack[EXCEPTION_STKSZ]; \
+ char MCE_stack_guard[guardsize]; \
+ char MCE_stack[EXCEPTION_STKSZ]; \
+ char IST_top_guard[guardsize]; \
+
+/* The exception stacks' physical storage. No guard pages required */
+struct exception_stacks {
+ ESTACKS_MEMBERS(0, 0)
+};
+
+/* The effective cpu entry area mapping with guard pages. */
+struct cea_exception_stacks {
+ ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
+};
+
+/*
+ * The exception stack ordering in [cea_]exception_stacks
+ */
+enum exception_stack_ordering {
+ ESTACK_DF,
+ ESTACK_NMI,
+ ESTACK_DB2,
+ ESTACK_DB1,
+ ESTACK_DB,
+ ESTACK_MCE,
+ N_EXCEPTION_STACKS
+};
+
+#define CEA_ESTACK_SIZE(st) \
+ sizeof(((struct cea_exception_stacks *)0)->st## _stack)
+
+#define CEA_ESTACK_BOT(ceastp, st) \
+ ((unsigned long)&(ceastp)->st## _stack)
+
+#define CEA_ESTACK_TOP(ceastp, st) \
+ (CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
+
+#define CEA_ESTACK_OFFS(st) \
+ offsetof(struct cea_exception_stacks, st## _stack)
+
+#define CEA_ESTACK_PAGES \
+ (sizeof(struct cea_exception_stacks) / PAGE_SIZE)
+
+#endif
+
/*
* cpu_entry_area is a percpu region that contains things needed by the CPU
* and early entry/exit code. Real types aren't used for all fields here
@@ -32,12 +90,9 @@ struct cpu_entry_area {
#ifdef CONFIG_X86_64
/*
- * Exception stacks used for IST entries.
- *
- * In the future, this should have a separate slot for each stack
- * with guard pages between them.
+ * Exception stacks used for IST entries with guard pages.
*/
- char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
+ struct cea_exception_stacks estacks;
#endif
#ifdef CONFIG_CPU_SUP_INTEL
/*
@@ -57,6 +112,7 @@ struct cpu_entry_area {
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
+DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
@@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
}
+#define __this_cpu_ist_top_va(name) \
+ CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
+
#endif
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0e56ff7e4848..1d337c51f7e6 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -156,11 +156,14 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
#else
/*
- * Static testing of CPU features. Used the same as boot_cpu_has().
- * These will statically patch the target code for additional
- * performance.
+ * Static testing of CPU features. Used the same as boot_cpu_has(). It
+ * statically patches the target code for additional performance. Use
+ * static_cpu_has() only in fast paths, where every cycle counts. Which
+ * means that the boot_cpu_has() variant is already fast enough for the
+ * majority of cases and you should stick to using it as it is generally
+ * only two instructions: a RIP-relative MOV and a TEST.
*/
-static __always_inline __pure bool _static_cpu_has(u16 bit)
+static __always_inline bool _static_cpu_has(u16 bit)
{
asm_volatile_goto("1: jmp 6f\n"
"2:\n"
diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index 9e5ca30738e5..1a8609a15856 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
{
__this_cpu_dec(debug_stack_usage);
}
-int is_debug_stack(unsigned long addr);
void debug_stack_set_zero(void);
void debug_stack_reset(void);
#else /* !X86_64 */
-static inline int is_debug_stack(unsigned long addr) { return 0; }
static inline void debug_stack_set_zero(void) { }
static inline void debug_stack_reset(void) { }
static inline void debug_stack_usage_inc(void) { }
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index ce4d176b3d13..6b15a24930e0 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -13,14 +13,7 @@
#include <asm/swiotlb.h>
#include <linux/dma-contiguous.h>
-#ifdef CONFIG_ISA
-# define ISA_DMA_BIT_MASK DMA_BIT_MASK(24)
-#else
-# define ISA_DMA_BIT_MASK DMA_BIT_MASK(32)
-#endif
-
extern int iommu_merge;
-extern struct device x86_dma_fallback_dev;
extern int panic_on_overflow;
extern const struct dma_map_ops *dma_ops;
@@ -30,7 +23,4 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
return dma_ops;
}
-bool arch_dma_alloc_attrs(struct device **dev);
-#define arch_dma_alloc_attrs arch_dma_alloc_attrs
-
#endif
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 50ba74a34a37..9da8cccdf3fb 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -103,8 +103,6 @@ enum fixed_addresses {
#ifdef CONFIG_PARAVIRT
FIX_PARAVIRT_BOOTMAP,
#endif
- FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
- FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h
index b56d504af654..b774c52e5411 100644
--- a/arch/x86/include/asm/fpu/api.h
+++ b/arch/x86/include/asm/fpu/api.h
@@ -10,6 +10,7 @@
#ifndef _ASM_X86_FPU_API_H
#define _ASM_X86_FPU_API_H
+#include <linux/bottom_half.h>
/*
* Use kernel_fpu_begin/end() if you intend to use FPU in kernel context. It
@@ -21,6 +22,36 @@
extern void kernel_fpu_begin(void);
extern void kernel_fpu_end(void);
extern bool irq_fpu_usable(void);
+extern void fpregs_mark_activate(void);
+
+/*
+ * Use fpregs_lock() while editing CPU's FPU registers or fpu->state.
+ * A context switch will (and softirq might) save CPU's FPU registers to
+ * fpu->state and set TIF_NEED_FPU_LOAD leaving CPU's FPU registers in
+ * a random state.
+ */
+static inline void fpregs_lock(void)
+{
+ preempt_disable();
+ local_bh_disable();
+}
+
+static inline void fpregs_unlock(void)
+{
+ local_bh_enable();
+ preempt_enable();
+}
+
+#ifdef CONFIG_X86_DEBUG_FPU
+extern void fpregs_assert_state_consistent(void);
+#else
+static inline void fpregs_assert_state_consistent(void) { }
+#endif
+
+/*
+ * Load the task FPU state before returning to userspace.
+ */
+extern void switch_fpu_return(void);
/*
* Query the presence of one or more xfeatures. Works on any legacy CPU as well.
diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index fb04a3ded7dd..9e27fa05a7ae 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -14,6 +14,7 @@
#include <linux/compat.h>
#include <linux/sched.h>
#include <linux/slab.h>
+#include <linux/mm.h>
#include <asm/user.h>
#include <asm/fpu/api.h>
@@ -24,14 +25,12 @@
/*
* High level FPU state handling functions:
*/
-extern void fpu__initialize(struct fpu *fpu);
extern void fpu__prepare_read(struct fpu *fpu);
extern void fpu__prepare_write(struct fpu *fpu);
extern void fpu__save(struct fpu *fpu);
-extern void fpu__restore(struct fpu *fpu);
extern int fpu__restore_sig(void __user *buf, int ia32_frame);
extern void fpu__drop(struct fpu *fpu);
-extern int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu);
+extern int fpu__copy(struct task_struct *dst, struct task_struct *src);
extern void fpu__clear(struct fpu *fpu);
extern int fpu__exception_code(struct fpu *fpu, int trap_nr);
extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate);
@@ -122,6 +121,21 @@ extern void fpstate_sanitize_xstate(struct fpu *fpu);
err; \
})
+#define kernel_insn_err(insn, output, input...) \
+({ \
+ int err; \
+ asm volatile("1:" #insn "\n\t" \
+ "2:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "3: movl $-1,%[err]\n" \
+ " jmp 2b\n" \
+ ".previous\n" \
+ _ASM_EXTABLE(1b, 3b) \
+ : [err] "=r" (err), output \
+ : "0"(0), input); \
+ err; \
+})
+
#define kernel_insn(insn, output, input...) \
asm volatile("1:" #insn "\n\t" \
"2:\n" \
@@ -150,6 +164,14 @@ static inline void copy_kernel_to_fxregs(struct fxregs_state *fx)
kernel_insn(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
}
+static inline int copy_kernel_to_fxregs_err(struct fxregs_state *fx)
+{
+ if (IS_ENABLED(CONFIG_X86_32))
+ return kernel_insn_err(fxrstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+ else
+ return kernel_insn_err(fxrstorq %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
static inline int copy_user_to_fxregs(struct fxregs_state __user *fx)
{
if (IS_ENABLED(CONFIG_X86_32))
@@ -163,6 +185,11 @@ static inline void copy_kernel_to_fregs(struct fregs_state *fx)
kernel_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
}
+static inline int copy_kernel_to_fregs_err(struct fregs_state *fx)
+{
+ return kernel_insn_err(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
+}
+
static inline int copy_user_to_fregs(struct fregs_state __user *fx)
{
return user_insn(frstor %[fx], "=m" (*fx), [fx] "m" (*fx));
@@ -253,7 +280,7 @@ static inline void copy_xregs_to_kernel_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has(X86_FEATURE_XSAVES))
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XSAVES, xstate, lmask, hmask, err);
else
XSTATE_OP(XSAVE, xstate, lmask, hmask, err);
@@ -275,7 +302,7 @@ static inline void copy_kernel_to_xregs_booting(struct xregs_state *xstate)
WARN_ON(system_state != SYSTEM_BOOTING);
- if (static_cpu_has(X86_FEATURE_XSAVES))
+ if (boot_cpu_has(X86_FEATURE_XSAVES))
XSTATE_OP(XRSTORS, xstate, lmask, hmask, err);
else
XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
@@ -363,6 +390,21 @@ static inline int copy_user_to_xregs(struct xregs_state __user *buf, u64 mask)
}
/*
+ * Restore xstate from kernel space xsave area, return an error code instead of
+ * an exception.
+ */
+static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask)
+{
+ u32 lmask = mask;
+ u32 hmask = mask >> 32;
+ int err;
+
+ XSTATE_OP(XRSTOR, xstate, lmask, hmask, err);
+
+ return err;
+}
+
+/*
* These must be called with preempt disabled. Returns
* 'true' if the FPU state is still intact and we can
* keep registers active.
@@ -487,6 +529,25 @@ static inline void fpregs_activate(struct fpu *fpu)
}
/*
+ * Internal helper, do not use directly. Use switch_fpu_return() instead.
+ */
+static inline void __fpregs_load_activate(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+ int cpu = smp_processor_id();
+
+ if (WARN_ON_ONCE(current->mm == NULL))
+ return;
+
+ if (!fpregs_state_valid(fpu, cpu)) {
+ copy_kernel_to_fpregs(&fpu->state);
+ fpregs_activate(fpu);
+ fpu->last_cpu = cpu;
+ }
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
+}
+
+/*
* FPU state switching for scheduling.
*
* This is a two-stage process:
@@ -494,13 +555,23 @@ static inline void fpregs_activate(struct fpu *fpu)
* - switch_fpu_prepare() saves the old state.
* This is done within the context of the old process.
*
- * - switch_fpu_finish() restores the new state as
- * necessary.
+ * - switch_fpu_finish() sets TIF_NEED_FPU_LOAD; the floating point state
+ * will get loaded on return to userspace, or when the kernel needs it.
+ *
+ * If TIF_NEED_FPU_LOAD is cleared then the CPU's FPU registers
+ * are saved in the current thread's FPU register state.
+ *
+ * If TIF_NEED_FPU_LOAD is set then CPU's FPU registers may not
+ * hold current()'s FPU registers. It is required to load the
+ * registers before returning to userland or using the content
+ * otherwise.
+ *
+ * The FPU context is only stored/restored for a user task and
+ * ->mm is used to distinguish between kernel and user threads.
*/
-static inline void
-switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
{
- if (static_cpu_has(X86_FEATURE_FPU) && old_fpu->initialized) {
+ if (static_cpu_has(X86_FEATURE_FPU) && current->mm) {
if (!copy_fpregs_to_fpstate(old_fpu))
old_fpu->last_cpu = -1;
else
@@ -508,8 +579,7 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
/* But leave fpu_fpregs_owner_ctx! */
trace_x86_fpu_regs_deactivated(old_fpu);
- } else
- old_fpu->last_cpu = -1;
+ }
}
/*
@@ -517,36 +587,32 @@ switch_fpu_prepare(struct fpu *old_fpu, int cpu)
*/
/*
- * Set up the userspace FPU context for the new task, if the task
- * has used the FPU.
+ * Load PKRU from the FPU context if available. Delay loading of the
+ * complete FPU state until the return to userland.
*/
-static inline void switch_fpu_finish(struct fpu *new_fpu, int cpu)
+static inline void switch_fpu_finish(struct fpu *new_fpu)
{
- bool preload = static_cpu_has(X86_FEATURE_FPU) &&
- new_fpu->initialized;
+ u32 pkru_val = init_pkru_value;
+ struct pkru_state *pk;
- if (preload) {
- if (!fpregs_state_valid(new_fpu, cpu))
- copy_kernel_to_fpregs(&new_fpu->state);
- fpregs_activate(new_fpu);
- }
-}
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return;
-/*
- * Needs to be preemption-safe.
- *
- * NOTE! user_fpu_begin() must be used only immediately before restoring
- * the save state. It does not do any saving/restoring on its own. In
- * lazy FPU mode, it is just an optimization to avoid a #NM exception,
- * the task can lose the FPU right after preempt_enable().
- */
-static inline void user_fpu_begin(void)
-{
- struct fpu *fpu = &current->thread.fpu;
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+
+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))
+ return;
- preempt_disable();
- fpregs_activate(fpu);
- preempt_enable();
+ /*
+ * PKRU state is switched eagerly because it needs to be valid before we
+ * return to userland e.g. for a copy_to_user() operation.
+ */
+ if (current->mm) {
+ pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
+ if (pk)
+ pkru_val = pk->pkru;
+ }
+ __write_pkru(pkru_val);
}
/*
diff --git a/arch/x86/include/asm/fpu/signal.h b/arch/x86/include/asm/fpu/signal.h
index 44bbc39a57b3..7fb516b6893a 100644
--- a/arch/x86/include/asm/fpu/signal.h
+++ b/arch/x86/include/asm/fpu/signal.h
@@ -22,7 +22,7 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
extern void convert_from_fxsr(struct user_i387_ia32_struct *env,
struct task_struct *tsk);
-extern void convert_to_fxsr(struct task_struct *tsk,
+extern void convert_to_fxsr(struct fxregs_state *fxsave,
const struct user_i387_ia32_struct *env);
unsigned long
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 2e32e178e064..f098f6cab94b 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -294,15 +294,6 @@ struct fpu {
unsigned int last_cpu;
/*
- * @initialized:
- *
- * This flag indicates whether this context is initialized: if the task
- * is not running then we can restore from this context, if the task
- * is running then we should save into this context.
- */
- unsigned char initialized;
-
- /*
* @avx512_timestamp:
*
* Records the timestamp of AVX512 use during last context switch.
diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h
index 48581988d78c..7e42b285c856 100644
--- a/arch/x86/include/asm/fpu/xstate.h
+++ b/arch/x86/include/asm/fpu/xstate.h
@@ -2,9 +2,11 @@
#ifndef __ASM_X86_XSAVE_H
#define __ASM_X86_XSAVE_H
+#include <linux/uaccess.h>
#include <linux/types.h>
+
#include <asm/processor.h>
-#include <linux/uaccess.h>
+#include <asm/user.h>
/* Bit 63 of XCR0 is reserved for future expansion */
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))
@@ -46,8 +48,8 @@ extern void __init update_regset_xstate_info(unsigned int size,
u64 xstate_mask);
void fpu__xstate_clear_all_cpu_caps(void);
-void *get_xsave_addr(struct xregs_state *xsave, int xstate);
-const void *get_xsave_field_ptr(int xstate_field);
+void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr);
+const void *get_xsave_field_ptr(int xfeature_nr);
int using_compacted_format(void);
int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size);
diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
index ae26df1c2789..8380c3ddd4b2 100644
--- a/arch/x86/include/asm/intel_ds.h
+++ b/arch/x86/include/asm/intel_ds.h
@@ -8,7 +8,7 @@
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 8
-#define MAX_FIXED_PEBS_EVENTS 3
+#define MAX_FIXED_PEBS_EVENTS 4
/*
* A debug store configuration.
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 686247db3106..a06a9f8294ea 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -90,8 +90,6 @@ build_mmio_write(__writel, "l", unsigned int, "r", )
#define __raw_writew __writew
#define __raw_writel __writel
-#define mmiowb() barrier()
-
#ifdef CONFIG_X86_64
build_mmio_read(readq, "q", u64, "=r", :"memory")
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index fbb16e6b6c18..8f95686ec27e 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
return ((irq == 2) ? 9 : irq);
}
-#ifdef CONFIG_X86_32
-extern void irq_ctx_init(int cpu);
-#else
-# define irq_ctx_init(cpu) do { } while (0)
-#endif
+extern int irq_init_percpu_irqstack(unsigned int cpu);
#define __ARCH_HAS_DO_SOFTIRQ
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 548d90bbf919..889f8b1b5b7f 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -18,8 +18,8 @@
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
* Vectors 32 ... 127 : device interrupts
* Vector 128 : legacy int80 syscall interface
- * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
- * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
+ * Vectors 129 ... LOCAL_TIMER_VECTOR-1
+ * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
*
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
*
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a9d03af34030..c79abe7ca093 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -295,6 +295,7 @@ union kvm_mmu_extended_role {
unsigned int valid:1;
unsigned int execonly:1;
unsigned int cr0_pg:1;
+ unsigned int cr4_pae:1;
unsigned int cr4_pse:1;
unsigned int cr4_pke:1;
unsigned int cr4_smap:1;
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 22d05e3835f0..dc2d4b206ab7 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -210,16 +210,6 @@ static inline void cmci_rediscover(void) {}
static inline void cmci_recheck(void) {}
#endif
-#ifdef CONFIG_X86_MCE_AMD
-void mce_amd_feature_init(struct cpuinfo_x86 *c);
-int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
-#else
-static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
-static inline int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
-#endif
-
-static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
-
int mce_available(struct cpuinfo_x86 *c);
bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
@@ -345,12 +335,19 @@ extern bool amd_mce_is_memory_error(struct mce *m);
extern int mce_threshold_create_device(unsigned int cpu);
extern int mce_threshold_remove_device(unsigned int cpu);
-#else
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+int umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr);
-static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
-static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
-static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
+#else
+static inline int mce_threshold_create_device(unsigned int cpu) { return 0; };
+static inline int mce_threshold_remove_device(unsigned int cpu) { return 0; };
+static inline bool amd_mce_is_memory_error(struct mce *m) { return false; };
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
+static inline int
+umc_normaddr_to_sysaddr(u64 norm_addr, u16 nid, u8 umc, u64 *sys_addr) { return -EINVAL; };
#endif
+static inline void mce_hygon_feature_init(struct cpuinfo_x86 *c) { return mce_amd_feature_init(c); }
+
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 19d18fae6ec6..93dff1963337 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -13,6 +13,7 @@
#include <asm/tlbflush.h>
#include <asm/paravirt.h>
#include <asm/mpx.h>
+#include <asm/debugreg.h>
extern atomic64_t last_mm_ctx_id;
@@ -356,4 +357,59 @@ static inline unsigned long __get_current_cr3_fast(void)
return cr3;
}
+typedef struct {
+ struct mm_struct *mm;
+} temp_mm_state_t;
+
+/*
+ * Using a temporary mm allows to set temporary mappings that are not accessible
+ * by other CPUs. Such mappings are needed to perform sensitive memory writes
+ * that override the kernel memory protections (e.g., W^X), without exposing the
+ * temporary page-table mappings that are required for these write operations to
+ * other CPUs. Using a temporary mm also allows to avoid TLB shootdowns when the
+ * mapping is torn down.
+ *
+ * Context: The temporary mm needs to be used exclusively by a single core. To
+ * harden security IRQs must be disabled while the temporary mm is
+ * loaded, thereby preventing interrupt handler bugs from overriding
+ * the kernel memory protection.
+ */
+static inline temp_mm_state_t use_temporary_mm(struct mm_struct *mm)
+{
+ temp_mm_state_t temp_state;
+
+ lockdep_assert_irqs_disabled();
+ temp_state.mm = this_cpu_read(cpu_tlbstate.loaded_mm);
+ switch_mm_irqs_off(NULL, mm, current);
+
+ /*
+ * If breakpoints are enabled, disable them while the temporary mm is
+ * used. Userspace might set up watchpoints on addresses that are used
+ * in the temporary mm, which would lead to wrong signals being sent or
+ * crashes.
+ *
+ * Note that breakpoints are not disabled selectively, which also causes
+ * kernel breakpoints (e.g., perf's) to be disabled. This might be
+ * undesirable, but still seems reasonable as the code that runs in the
+ * temporary mm should be short.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_disable();
+
+ return temp_state;
+}
+
+static inline void unuse_temporary_mm(temp_mm_state_t prev_state)
+{
+ lockdep_assert_irqs_disabled();
+ switch_mm_irqs_off(NULL, prev_state.mm, current);
+
+ /*
+ * Restore the breakpoints if they were disabled before the temporary mm
+ * was loaded.
+ */
+ if (hw_breakpoint_active())
+ hw_breakpoint_restore();
+}
+
#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index ca5bc0eacb95..1378518cf63f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -116,6 +116,7 @@
#define LBR_INFO_CYCLES 0xffff
#define MSR_IA32_PEBS_ENABLE 0x000003f1
+#define MSR_PEBS_DATA_CFG 0x000003f2
#define MSR_IA32_DS_AREA 0x00000600
#define MSR_IA32_PERF_CAPABILITIES 0x00000345
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index dad12b767ba0..daf25b60c9e3 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -11,6 +11,15 @@
#include <asm/msr-index.h>
/*
+ * This should be used immediately before a retpoline alternative. It tells
+ * objtool where the retpolines are so that it can make sense of the control
+ * flow by just reading the original instruction(s) and ignoring the
+ * alternatives.
+ */
+#define ANNOTATE_NOSPEC_ALTERNATIVE \
+ ANNOTATE_IGNORE_ALTERNATIVE
+
+/*
* Fill the CPU return stack buffer.
*
* Each entry in the RSB, if used for a speculative 'ret', contains an
@@ -57,19 +66,6 @@
#ifdef __ASSEMBLY__
/*
- * This should be used immediately before a retpoline alternative. It tells
- * objtool where the retpolines are so that it can make sense of the control
- * flow by just reading the original instruction(s) and ignoring the
- * alternatives.
- */
-.macro ANNOTATE_NOSPEC_ALTERNATIVE
- .Lannotate_\@:
- .pushsection .discard.nospec
- .long .Lannotate_\@ - .
- .popsection
-.endm
-
-/*
* This should be used immediately before an indirect jump/call. It tells
* objtool the subsequent indirect jump/call is vouched safe for retpoline
* builds.
@@ -152,12 +148,6 @@
#else /* __ASSEMBLY__ */
-#define ANNOTATE_NOSPEC_ALTERNATIVE \
- "999:\n\t" \
- ".pushsection .discard.nospec\n\t" \
- ".long 999b - .\n\t" \
- ".popsection\n\t"
-
#define ANNOTATE_RETPOLINE_SAFE \
"999:\n\t" \
".pushsection .discard.retpoline_safe\n\t" \
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index 0d5c739eebd7..565ad755c785 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -22,11 +22,9 @@
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 0
-#define DEBUG_STACK 0
-#define MCE_STACK 0
-#define N_EXCEPTION_STACKS 1
+#define IRQ_STACK_SIZE THREAD_SIZE
+
+#define N_EXCEPTION_STACKS 1
#ifdef CONFIG_X86_PAE
/*
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 8f657286d599..793c14c372cb 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -14,22 +14,20 @@
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
-#define CURRENT_MASK (~(THREAD_SIZE - 1))
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
-#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
-#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
-
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
-#define DOUBLEFAULT_STACK 1
-#define NMI_STACK 2
-#define DEBUG_STACK 3
-#define MCE_STACK 4
-#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
+/*
+ * The index for the tss.ist[] array. The hardware limit is 7 entries.
+ */
+#define IST_INDEX_DF 0
+#define IST_INDEX_NMI 1
+#define IST_INDEX_DB 2
+#define IST_INDEX_MCE 3
/*
* Set __PAGE_OFFSET to the most negative possible address +
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8bdf74902293..1392d5e6e8d6 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -7,7 +7,7 @@
*/
#define INTEL_PMC_MAX_GENERIC 32
-#define INTEL_PMC_MAX_FIXED 3
+#define INTEL_PMC_MAX_FIXED 4
#define INTEL_PMC_IDX_FIXED 32
#define X86_PMC_IDX_MAX 64
@@ -32,6 +32,8 @@
#define HSW_IN_TX (1ULL << 32)
#define HSW_IN_TX_CHECKPOINTED (1ULL << 33)
+#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
+#define ICL_FIXED_0_ADAPTIVE (1ULL << 32)
#define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36)
#define AMD64_EVENTSEL_GUESTONLY (1ULL << 40)
@@ -87,6 +89,12 @@
#define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6
#define ARCH_PERFMON_EVENTS_COUNT 7
+#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
+#define PEBS_DATACFG_GP BIT_ULL(1)
+#define PEBS_DATACFG_XMMS BIT_ULL(2)
+#define PEBS_DATACFG_LBRS BIT_ULL(3)
+#define PEBS_DATACFG_LBR_SHIFT 24
+
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
@@ -177,6 +185,41 @@ struct x86_pmu_capability {
#define GLOBAL_STATUS_TRACE_TOPAPMI BIT_ULL(55)
/*
+ * Adaptive PEBS v4
+ */
+
+struct pebs_basic {
+ u64 format_size;
+ u64 ip;
+ u64 applicable_counters;
+ u64 tsc;
+};
+
+struct pebs_meminfo {
+ u64 address;
+ u64 aux;
+ u64 latency;
+ u64 tsx_tuning;
+};
+
+struct pebs_gprs {
+ u64 flags, ip, ax, cx, dx, bx, sp, bp, si, di;
+ u64 r8, r9, r10, r11, r12, r13, r14, r15;
+};
+
+struct pebs_xmm {
+ u64 xmm[16*2]; /* two entries for each register */
+};
+
+struct pebs_lbr_entry {
+ u64 from, to, info;
+};
+
+struct pebs_lbr {
+ struct pebs_lbr_entry lbr[0]; /* Variable length */
+};
+
+/*
* IBS cpuid feature detection
*/
@@ -248,6 +291,11 @@ extern void perf_events_lapic_init(void);
#define PERF_EFLAGS_VM (1UL << 5)
struct pt_regs;
+struct x86_perf_regs {
+ struct pt_regs regs;
+ u64 *xmm_regs;
+};
+
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
@@ -260,14 +308,9 @@ extern unsigned long perf_misc_flags(struct pt_regs *regs);
*/
#define perf_arch_fetch_caller_regs(regs, __ip) { \
(regs)->ip = (__ip); \
- (regs)->bp = caller_frame_pointer(); \
+ (regs)->sp = (unsigned long)__builtin_frame_address(0); \
(regs)->cs = __KERNEL_CS; \
regs->flags = 0; \
- asm volatile( \
- _ASM_MOV "%%"_ASM_SP ", %0\n" \
- : "=m" ((regs)->sp) \
- :: "memory" \
- ); \
}
struct perf_guest_switch_msr {
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 2779ace16d23..5e0509b41986 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -23,6 +23,8 @@
#ifndef __ASSEMBLY__
#include <asm/x86_init.h>
+#include <asm/fpu/xstate.h>
+#include <asm/fpu/api.h>
extern pgd_t early_top_pgt[PTRS_PER_PGD];
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
@@ -46,7 +48,7 @@ void ptdump_walk_user_pgd_level_checkwx(void);
*/
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__visible;
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+#define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
@@ -127,14 +129,29 @@ static inline int pte_dirty(pte_t pte)
static inline u32 read_pkru(void)
{
if (boot_cpu_has(X86_FEATURE_OSPKE))
- return __read_pkru();
+ return rdpkru();
return 0;
}
static inline void write_pkru(u32 pkru)
{
- if (boot_cpu_has(X86_FEATURE_OSPKE))
- __write_pkru(pkru);
+ struct pkru_state *pk;
+
+ if (!boot_cpu_has(X86_FEATURE_OSPKE))
+ return;
+
+ pk = get_xsave_addr(&current->thread.fpu.state.xsave, XFEATURE_PKRU);
+
+ /*
+ * The PKRU value in xstate needs to be in sync with the value that is
+ * written to the CPU. The FPU restore on return to userland would
+ * otherwise load the previous value again.
+ */
+ fpregs_lock();
+ if (pk)
+ pk->pkru = pkru;
+ __write_pkru(pkru);
+ fpregs_unlock();
}
static inline int pte_young(pte_t pte)
@@ -1021,6 +1038,9 @@ static inline void __meminit init_trampoline_default(void)
/* Default trampoline pgd value */
trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
}
+
+void __init poking_init(void);
+
# ifdef CONFIG_RANDOMIZE_MEMORY
void __meminit init_trampoline(void);
# else
@@ -1355,6 +1375,12 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
#define PKRU_WD_BIT 0x2
#define PKRU_BITS_PER_PKEY 2
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+extern u32 init_pkru_value;
+#else
+#define init_pkru_value 0
+#endif
+
static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
{
int pkru_pkey_bits = pkey * PKRU_BITS_PER_PKEY;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 2bb3a648fc12..7e99ef67bff0 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
#define __KERNEL_TSS_LIMIT \
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
+/* Per CPU interrupt stacks */
+struct irq_stack {
+ char stack[IRQ_STACK_SIZE];
+} __aligned(IRQ_STACK_SIZE);
+
+DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+
#ifdef CONFIG_X86_32
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#else
@@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
#endif
-/*
- * Save the original ist values for checking stack pointers during debugging
- */
-struct orig_ist {
- unsigned long ist[7];
-};
-
#ifdef CONFIG_X86_64
-DECLARE_PER_CPU(struct orig_ist, orig_ist);
-
-union irq_stack_union {
- char irq_stack[IRQ_STACK_SIZE];
+struct fixed_percpu_data {
/*
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*/
- struct {
- char gs_base[40];
- unsigned long stack_canary;
- };
+ char gs_base[40];
+ unsigned long stack_canary;
};
-DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
-DECLARE_INIT_PER_CPU(irq_stack_union);
+DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
+DECLARE_INIT_PER_CPU(fixed_percpu_data);
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
{
- return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
+ return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
}
-DECLARE_PER_CPU(char *, irq_stack_ptr);
DECLARE_PER_CPU(unsigned int, irq_count);
extern asmlinkage void ignore_sysret(void);
@@ -427,15 +421,8 @@ struct stack_canary {
};
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
-/*
- * per-CPU IRQ handling stacks
- */
-struct irq_stack {
- u32 stack[THREAD_SIZE/sizeof(u32)];
-} __aligned(THREAD_SIZE);
-
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
+/* Per CPU softirq stack pointer */
+DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
#endif /* X86_64 */
extern unsigned int fpu_kernel_xstate_size;
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
deleted file mode 100644
index 4c25cf6caefa..000000000000
--- a/arch/x86/include/asm/rwsem.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
- *
- * Written by David Howells (dhowells@redhat.com).
- *
- * Derived from asm-x86/semaphore.h
- *
- *
- * The MSW of the count is the negated number of active writers and waiting
- * lockers, and the LSW is the total number of active locks
- *
- * The lock count is initialized to 0 (no active and no waiting lockers).
- *
- * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
- * uncontended lock. This can be determined because XADD returns the old value.
- * Readers increment by 1 and see a positive value when uncontended, negative
- * if there are writers (and maybe) readers waiting (in which case it goes to
- * sleep).
- *
- * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
- * be extended to 65534 by manually checking the whole MSW rather than relying
- * on the S flag.
- *
- * The value of ACTIVE_BIAS supports up to 65535 active processes.
- *
- * This should be totally fair - if anything is waiting, a process that wants a
- * lock will go to the back of the queue. When the currently active lock is
- * released, if there's a writer at the front of the queue, then that and only
- * that will be woken up; if there's a bunch of consecutive readers at the
- * front, then they'll all be woken up, but no other readers will be.
- */
-
-#ifndef _ASM_X86_RWSEM_H
-#define _ASM_X86_RWSEM_H
-
-#ifndef _LINUX_RWSEM_H
-#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
-#endif
-
-#ifdef __KERNEL__
-#include <asm/asm.h>
-
-/*
- * The bias values and the counter type limits the number of
- * potential readers/writers to 32767 for 32 bits and 2147483647
- * for 64 bits.
- */
-
-#ifdef CONFIG_X86_64
-# define RWSEM_ACTIVE_MASK 0xffffffffL
-#else
-# define RWSEM_ACTIVE_MASK 0x0000ffffL
-#endif
-
-#define RWSEM_UNLOCKED_VALUE 0x00000000L
-#define RWSEM_ACTIVE_BIAS 0x00000001L
-#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
-#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
-#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
-
-/*
- * lock for reading
- */
-#define ____down_read(sem, slow_path) \
-({ \
- struct rw_semaphore* ret; \
- asm volatile("# beginning down_read\n\t" \
- LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \
- /* adds 0x00000001 */ \
- " jns 1f\n" \
- " call " slow_path "\n" \
- "1:\n\t" \
- "# ending down_read\n\t" \
- : "+m" (sem->count), "=a" (ret), \
- ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_read(struct rw_semaphore *sem)
-{
- ____down_read(sem, "call_rwsem_down_read_failed");
-}
-
-static inline int __down_read_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
- return -EINTR;
- return 0;
-}
-
-/*
- * trylock for reading -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_read_trylock(struct rw_semaphore *sem)
-{
- long result, tmp;
- asm volatile("# beginning __down_read_trylock\n\t"
- " mov %[count],%[result]\n\t"
- "1:\n\t"
- " mov %[result],%[tmp]\n\t"
- " add %[inc],%[tmp]\n\t"
- " jle 2f\n\t"
- LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- "# ending __down_read_trylock\n\t"
- : [count] "+m" (sem->count), [result] "=&a" (result),
- [tmp] "=&r" (tmp)
- : [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
- return result >= 0;
-}
-
-/*
- * lock for writing
- */
-#define ____down_write(sem, slow_path) \
-({ \
- long tmp; \
- struct rw_semaphore* ret; \
- \
- asm volatile("# beginning down_write\n\t" \
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
- /* adds 0xffff0001, returns the old value */ \
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
- /* was the active mask 0 before? */\
- " jz 1f\n" \
- " call " slow_path "\n" \
- "1:\n" \
- "# ending down_write" \
- : "+m" (sem->count), [tmp] "=d" (tmp), \
- "=a" (ret), ASM_CALL_CONSTRAINT \
- : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
- : "memory", "cc"); \
- ret; \
-})
-
-static inline void __down_write(struct rw_semaphore *sem)
-{
- ____down_write(sem, "call_rwsem_down_write_failed");
-}
-
-static inline int __down_write_killable(struct rw_semaphore *sem)
-{
- if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
- return -EINTR;
-
- return 0;
-}
-
-/*
- * trylock for writing -- returns 1 if successful, 0 if contention
- */
-static inline bool __down_write_trylock(struct rw_semaphore *sem)
-{
- bool result;
- long tmp0, tmp1;
- asm volatile("# beginning __down_write_trylock\n\t"
- " mov %[count],%[tmp0]\n\t"
- "1:\n\t"
- " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
- /* was the active mask 0 before? */
- " jnz 2f\n\t"
- " mov %[tmp0],%[tmp1]\n\t"
- " add %[inc],%[tmp1]\n\t"
- LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t"
- " jnz 1b\n\t"
- "2:\n\t"
- CC_SET(e)
- "# ending __down_write_trylock\n\t"
- : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
- [tmp1] "=&r" (tmp1), CC_OUT(e) (result)
- : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
- : "memory");
- return result;
-}
-
-/*
- * unlock after reading
- */
-static inline void __up_read(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_read\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 1, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n"
- "# ending __up_read\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
- : "memory", "cc");
-}
-
-/*
- * unlock after writing
- */
-static inline void __up_write(struct rw_semaphore *sem)
-{
- long tmp;
- asm volatile("# beginning __up_write\n\t"
- LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
- /* subtracts 0xffff0001, returns the old value */
- " jns 1f\n\t"
- " call call_rwsem_wake\n" /* expects old value in %edx */
- "1:\n\t"
- "# ending __up_write\n"
- : "+m" (sem->count), [tmp] "=d" (tmp)
- : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
- : "memory", "cc");
-}
-
-/*
- * downgrade write lock to read lock
- */
-static inline void __downgrade_write(struct rw_semaphore *sem)
-{
- asm volatile("# beginning __downgrade_write\n\t"
- LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
- /*
- * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
- * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
- */
- " jns 1f\n\t"
- " call call_rwsem_downgrade_wake\n"
- "1:\n\t"
- "# ending __downgrade_write\n"
- : "+m" (sem->count)
- : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
- : "memory", "cc");
-}
-
-#endif /* __KERNEL__ */
-#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index 07a25753e85c..ae7b909dc242 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -85,6 +85,9 @@ int set_pages_nx(struct page *page, int numpages);
int set_pages_ro(struct page *page, int numpages);
int set_pages_rw(struct page *page, int numpages);
+int set_direct_map_invalid_noflush(struct page *page);
+int set_direct_map_default_noflush(struct page *page);
+
extern int kernel_set_to_readonly;
void set_kernel_text_rw(void);
void set_kernel_text_ro(void);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index db333300bd4b..f94a7d0ddd49 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -13,13 +13,12 @@
#ifndef _ASM_X86_SMAP_H
#define _ASM_X86_SMAP_H
-#include <linux/stringify.h>
#include <asm/nops.h>
#include <asm/cpufeatures.h>
/* "Raw" instruction opcodes */
-#define __ASM_CLAC .byte 0x0f,0x01,0xca
-#define __ASM_STAC .byte 0x0f,0x01,0xcb
+#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
+#define __ASM_STAC ".byte 0x0f,0x01,0xcb"
#ifdef __ASSEMBLY__
@@ -28,10 +27,10 @@
#ifdef CONFIG_X86_SMAP
#define ASM_CLAC \
- ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", __ASM_CLAC, X86_FEATURE_SMAP
#define ASM_STAC \
- ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
+ ALTERNATIVE "", __ASM_STAC, X86_FEATURE_SMAP
#else /* CONFIG_X86_SMAP */
@@ -49,26 +48,46 @@
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+ alternative("", __ASM_CLAC, X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+ alternative("", __ASM_STAC, X86_FEATURE_SMAP);
+}
+
+static __always_inline unsigned long smap_save(void)
+{
+ unsigned long flags;
+
+ asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC,
+ X86_FEATURE_SMAP)
+ : "=rm" (flags) : : "memory", "cc");
+
+ return flags;
+}
+
+static __always_inline void smap_restore(unsigned long flags)
+{
+ asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP)
+ : : "g" (flags) : "memory", "cc");
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __ASM_CLAC, X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __ASM_STAC, X86_FEATURE_SMAP)
#else /* CONFIG_X86_SMAP */
static inline void clac(void) { }
static inline void stac(void) { }
+static inline unsigned long smap_save(void) { return 0; }
+static inline void smap_restore(unsigned long flags) { }
+
#define ASM_CLAC
#define ASM_STAC
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 2e95b6c1bca3..da545df207b2 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void calculate_max_logical_packages(void);
void native_smp_cpus_done(unsigned int max_cpus);
-void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
+int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
int common_cpu_die(unsigned int cpu);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 43c029cdc3fe..0a3c4cab39db 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -92,7 +92,7 @@ static inline void native_write_cr8(unsigned long val)
#endif
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
-static inline u32 __read_pkru(void)
+static inline u32 rdpkru(void)
{
u32 ecx = 0;
u32 edx, pkru;
@@ -107,7 +107,7 @@ static inline u32 __read_pkru(void)
return pkru;
}
-static inline void __write_pkru(u32 pkru)
+static inline void wrpkru(u32 pkru)
{
u32 ecx = 0, edx = 0;
@@ -118,8 +118,21 @@ static inline void __write_pkru(u32 pkru)
asm volatile(".byte 0x0f,0x01,0xef\n\t"
: : "a" (pkru), "c"(ecx), "d"(edx));
}
+
+static inline void __write_pkru(u32 pkru)
+{
+ /*
+ * WRPKRU is relatively expensive compared to RDPKRU.
+ * Avoid WRPKRU when it would not change the value.
+ */
+ if (pkru == rdpkru())
+ return;
+
+ wrpkru(pkru);
+}
+
#else
-static inline u32 __read_pkru(void)
+static inline u32 rdpkru(void)
{
return 0;
}
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 8ec97a62c245..91e29b6a86a5 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -13,7 +13,7 @@
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
- * irq_stack_union which contains stack_canary at offset 40. Userland
+ * fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
*
@@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
u64 tsc;
#ifdef CONFIG_X86_64
- BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
+ BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
#endif
/*
* We both use the random pool and the current TSC as a source
@@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
current->stack_canary = canary;
#ifdef CONFIG_X86_64
- this_cpu_write(irq_stack_union.stack_canary, canary);
+ this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
this_cpu_write(stack_canary.canary, canary);
#endif
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index f335aad404a4..a8d0cdf48616 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -9,6 +9,8 @@
#include <linux/uaccess.h>
#include <linux/ptrace.h>
+
+#include <asm/cpu_entry_area.h>
#include <asm/switch_to.h>
enum stack_type {
@@ -98,19 +100,6 @@ struct stack_frame_ia32 {
u32 return_address;
};
-static inline unsigned long caller_frame_pointer(void)
-{
- struct stack_frame *frame;
-
- frame = __builtin_frame_address(0);
-
-#ifdef CONFIG_FRAME_POINTER
- frame = frame->next_frame;
-#endif
-
- return (unsigned long)frame;
-}
-
void show_opcodes(struct pt_regs *regs, const char *loglvl);
void show_ip(struct pt_regs *regs, const char *loglvl);
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 7cf1a270d891..18a4b6890fa8 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -46,6 +46,7 @@ struct inactive_task_frame {
unsigned long r13;
unsigned long r12;
#else
+ unsigned long flags;
unsigned long si;
unsigned long di;
#endif
diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h
index 2fe745356fb1..6d8d6bc183b7 100644
--- a/arch/x86/include/asm/sync_bitops.h
+++ b/arch/x86/include/asm/sync_bitops.h
@@ -14,6 +14,8 @@
* bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1).
*/
+#include <asm/rmwcc.h>
+
#define ADDR (*(volatile long *)addr)
/**
@@ -29,7 +31,7 @@
*/
static inline void sync_set_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; bts %1,%0"
+ asm volatile("lock; " __ASM_SIZE(bts) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -47,7 +49,7 @@ static inline void sync_set_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; btr %1,%0"
+ asm volatile("lock; " __ASM_SIZE(btr) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -64,7 +66,7 @@ static inline void sync_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline void sync_change_bit(long nr, volatile unsigned long *addr)
{
- asm volatile("lock; btc %1,%0"
+ asm volatile("lock; " __ASM_SIZE(btc) " %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
@@ -78,14 +80,9 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr)
* This operation is atomic and cannot be reordered.
* It also implies a memory barrier.
*/
-static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
+static inline bool sync_test_and_set_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; bts %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(bts), *addr, c, "Ir", nr);
}
/**
@@ -98,12 +95,7 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; btr %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btr), *addr, c, "Ir", nr);
}
/**
@@ -116,12 +108,7 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr)
*/
static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr)
{
- unsigned char oldbit;
-
- asm volatile("lock; btc %2,%1\n\tsetc %0"
- : "=qm" (oldbit), "+m" (ADDR)
- : "Ir" (nr) : "memory");
- return oldbit;
+ return GEN_BINARY_RMWcc("lock; " __ASM_SIZE(btc), *addr, c, "Ir", nr);
}
#define sync_test_bit(nr, addr) test_bit(nr, addr)
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 4c305471ec33..b05ad16174e5 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -105,7 +105,7 @@ static inline void syscall_set_arguments(struct task_struct *task,
memcpy(&regs->bx + i, args, n * sizeof(args[0]));
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_I386;
}
@@ -160,10 +160,12 @@ static inline void syscall_set_arguments(struct task_struct *task,
}
}
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
/* x32 tasks should be considered AUDIT_ARCH_X86_64. */
- return in_ia32_syscall() ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
+ return (IS_ENABLED(CONFIG_IA32_EMULATION) &&
+ task->thread_info.status & TS_COMPAT)
+ ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
}
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3..c90678fd391a 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -18,7 +18,7 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define __parainstructions_end NULL
#endif
-extern void *text_poke_early(void *addr, const void *opcode, size_t len);
+extern void text_poke_early(void *addr, const void *opcode, size_t len);
/*
* Clear and restore the kernel write-protection flag on the local CPU.
@@ -35,8 +35,11 @@ extern void *text_poke_early(void *addr, const void *opcode, size_t len);
* inconsistent instruction while you patch.
*/
extern void *text_poke(void *addr, const void *opcode, size_t len);
+extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
extern int after_bootmem;
+extern __ro_after_init struct mm_struct *poking_mm;
+extern __ro_after_init unsigned long poking_addr;
#endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0eccbcb8447..f9453536f9bb 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -88,6 +88,7 @@ struct thread_info {
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_PATCH_PENDING 13 /* pending live patching update */
+#define TIF_NEED_FPU_LOAD 14 /* load FPU on return to userspace */
#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
@@ -117,6 +118,7 @@ struct thread_info {
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING)
+#define _TIF_NEED_FPU_LOAD (1 << TIF_NEED_FPU_LOAD)
#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
diff --git a/arch/x86/include/asm/tlb.h b/arch/x86/include/asm/tlb.h
index 404b8b1d44f5..f23e7aaff4cd 100644
--- a/arch/x86/include/asm/tlb.h
+++ b/arch/x86/include/asm/tlb.h
@@ -6,6 +6,7 @@
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
+#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
#include <asm-generic/tlb.h>
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4204bf377fc..dee375831962 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -167,7 +167,7 @@ struct tlb_state {
*/
struct mm_struct *loaded_mm;
-#define LOADED_MM_SWITCHING ((struct mm_struct *)1)
+#define LOADED_MM_SWITCHING ((struct mm_struct *)1UL)
/* Last user mm for optimizing IBPB */
union {
@@ -274,6 +274,8 @@ static inline bool nmi_uaccess_okay(void)
return true;
}
+#define nmi_uaccess_okay nmi_uaccess_okay
+
/* Initialize cr4 shadow for this CPU. */
static inline void cr4_init_shadow(void)
{
diff --git a/arch/x86/include/asm/trace/exceptions.h b/arch/x86/include/asm/trace/exceptions.h
index e0e6d7f21399..6b1e87194809 100644
--- a/arch/x86/include/asm/trace/exceptions.h
+++ b/arch/x86/include/asm/trace/exceptions.h
@@ -30,7 +30,7 @@ DECLARE_EVENT_CLASS(x86_exceptions,
__entry->error_code = error_code;
),
- TP_printk("address=%pf ip=%pf error_code=0x%lx",
+ TP_printk("address=%ps ip=%ps error_code=0x%lx",
(void *)__entry->address, (void *)__entry->ip,
__entry->error_code) );
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index 069c04be1507..879b77792f94 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -13,22 +13,22 @@ DECLARE_EVENT_CLASS(x86_fpu,
TP_STRUCT__entry(
__field(struct fpu *, fpu)
- __field(bool, initialized)
+ __field(bool, load_fpu)
__field(u64, xfeatures)
__field(u64, xcomp_bv)
),
TP_fast_assign(
__entry->fpu = fpu;
- __entry->initialized = fpu->initialized;
+ __entry->load_fpu = test_thread_flag(TIF_NEED_FPU_LOAD);
if (boot_cpu_has(X86_FEATURE_OSXSAVE)) {
__entry->xfeatures = fpu->state.xsave.header.xfeatures;
__entry->xcomp_bv = fpu->state.xsave.header.xcomp_bv;
}
),
- TP_printk("x86/fpu: %p initialized: %d xfeatures: %llx xcomp_bv: %llx",
+ TP_printk("x86/fpu: %p load: %d xfeatures: %llx xcomp_bv: %llx",
__entry->fpu,
- __entry->initialized,
+ __entry->load_fpu,
__entry->xfeatures,
__entry->xcomp_bv
)
@@ -64,11 +64,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_regs_deactivated,
TP_ARGS(fpu)
);
-DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
- TP_PROTO(struct fpu *fpu),
- TP_ARGS(fpu)
-);
-
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
TP_PROTO(struct fpu *fpu),
TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 1954dd5552a2..c82abd6e4ca3 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -427,10 +427,11 @@ do { \
({ \
__label__ __pu_label; \
int __pu_err = -EFAULT; \
- __typeof__(*(ptr)) __pu_val; \
- __pu_val = x; \
+ __typeof__(*(ptr)) __pu_val = (x); \
+ __typeof__(ptr) __pu_ptr = (ptr); \
+ __typeof__(size) __pu_size = (size); \
__uaccess_begin(); \
- __put_user_size(__pu_val, (ptr), (size), __pu_label); \
+ __put_user_size(__pu_val, __pu_ptr, __pu_size, __pu_label); \
__pu_err = 0; \
__pu_label: \
__uaccess_end(); \
@@ -585,7 +586,6 @@ extern void __cmpxchg_wrong_size(void)
#define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \
({ \
int __ret = 0; \
- __typeof__(ptr) __uval = (uval); \
__typeof__(*(ptr)) __old = (old); \
__typeof__(*(ptr)) __new = (new); \
__uaccess_begin_nospec(); \
@@ -661,7 +661,7 @@ extern void __cmpxchg_wrong_size(void)
__cmpxchg_wrong_size(); \
} \
__uaccess_end(); \
- *__uval = __old; \
+ *(uval) = __old; \
__ret; \
})
@@ -705,7 +705,7 @@ extern struct movsl_mask {
* checking before using them, but you have to surround them with the
* user_access_begin/end() pair.
*/
-static __must_check inline bool user_access_begin(const void __user *ptr, size_t len)
+static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len)
{
if (unlikely(!access_ok(ptr,len)))
return 0;
@@ -715,6 +715,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t
#define user_access_begin(a,b) user_access_begin(a,b)
#define user_access_end() __uaccess_end()
+#define user_access_save() smap_save()
+#define user_access_restore(x) smap_restore(x)
+
#define unsafe_put_user(x, ptr, label) \
__put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label)
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index a9d637bc301d..5cd1caa8bc65 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -208,9 +208,6 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
}
unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long
mcsafe_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 2863c2026655..d50c7b747d8b 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -217,6 +217,22 @@ xen_single_call(unsigned int call,
return (long)__res;
}
+static __always_inline void __xen_stac(void)
+{
+ /*
+ * Suppress objtool seeing the STAC/CLAC and getting confused about it
+ * calling random code with AC=1.
+ */
+ asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+ ASM_STAC ::: "memory", "flags");
+}
+
+static __always_inline void __xen_clac(void)
+{
+ asm volatile(ANNOTATE_IGNORE_ALTERNATIVE
+ ASM_CLAC ::: "memory", "flags");
+}
+
static inline long
privcmd_call(unsigned int call,
unsigned long a1, unsigned long a2,
@@ -225,9 +241,9 @@ privcmd_call(unsigned int call,
{
long res;
- stac();
+ __xen_stac();
res = xen_single_call(call, a1, a2, a3, a4, a5);
- clac();
+ __xen_clac();
return res;
}
@@ -424,9 +440,9 @@ HYPERVISOR_dm_op(
domid_t dom, unsigned int nr_bufs, struct xen_dm_op_buf *bufs)
{
int ret;
- stac();
+ __xen_stac();
ret = _hypercall3(int, dm_op, dom, nr_bufs, bufs);
- clac();
+ __xen_clac();
return ret;
}
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index dabfcf7c3941..7a0e64ccd6ff 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -381,6 +381,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index f3329cabce5c..ac67bbea10ca 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,8 +27,29 @@ enum perf_event_x86_regs {
PERF_REG_X86_R13,
PERF_REG_X86_R14,
PERF_REG_X86_R15,
-
+ /* These are the limits for the GPRs. */
PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+
+ /* These all need two bits set because they are 128bit */
+ PERF_REG_X86_XMM0 = 32,
+ PERF_REG_X86_XMM1 = 34,
+ PERF_REG_X86_XMM2 = 36,
+ PERF_REG_X86_XMM3 = 38,
+ PERF_REG_X86_XMM4 = 40,
+ PERF_REG_X86_XMM5 = 42,
+ PERF_REG_X86_XMM6 = 44,
+ PERF_REG_X86_XMM7 = 46,
+ PERF_REG_X86_XMM8 = 48,
+ PERF_REG_X86_XMM9 = 50,
+ PERF_REG_X86_XMM10 = 52,
+ PERF_REG_X86_XMM11 = 54,
+ PERF_REG_X86_XMM12 = 56,
+ PERF_REG_X86_XMM13 = 58,
+ PERF_REG_X86_XMM14 = 60,
+ PERF_REG_X86_XMM15 = 62,
+
+ /* These include both GPRs and XMMX registers */
+ PERF_REG_X86_XMM_MAX = PERF_REG_X86_XMM15 + 2,
};
#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/include/uapi/asm/sockios.h b/arch/x86/include/uapi/asm/sockios.h
deleted file mode 100644
index def6d4746ee7..000000000000
--- a/arch/x86/include/uapi/asm/sockios.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/sockios.h>
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 8dcbf6890714..9fc92e4539d8 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -197,7 +197,7 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
}
static int __init
-acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
+acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_local_x2apic *processor = NULL;
#ifdef CONFIG_X86_X2APIC
@@ -210,7 +210,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
#ifdef CONFIG_X86_X2APIC
apic_id = processor->local_apic_id;
@@ -242,7 +242,7 @@ acpi_parse_x2apic(struct acpi_subtable_header *header, const unsigned long end)
}
static int __init
-acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lapic(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_local_apic *processor = NULL;
@@ -251,7 +251,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
/* Ignore invalid ID */
if (processor->id == 0xff)
@@ -272,7 +272,7 @@ acpi_parse_lapic(struct acpi_subtable_header * header, const unsigned long end)
}
static int __init
-acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
+acpi_parse_sapic(union acpi_subtable_headers *header, const unsigned long end)
{
struct acpi_madt_local_sapic *processor = NULL;
@@ -281,7 +281,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
if (BAD_MADT_ENTRY(processor, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
acpi_register_lapic((processor->id << 8) | processor->eid,/* APIC ID */
processor->processor_id, /* ACPI ID */
@@ -291,7 +291,7 @@ acpi_parse_sapic(struct acpi_subtable_header *header, const unsigned long end)
}
static int __init
-acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
+acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_local_apic_override *lapic_addr_ovr = NULL;
@@ -301,7 +301,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
if (BAD_MADT_ENTRY(lapic_addr_ovr, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
acpi_lapic_addr = lapic_addr_ovr->address;
@@ -309,7 +309,7 @@ acpi_parse_lapic_addr_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
+acpi_parse_x2apic_nmi(union acpi_subtable_headers *header,
const unsigned long end)
{
struct acpi_madt_local_x2apic_nmi *x2apic_nmi = NULL;
@@ -319,7 +319,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
if (BAD_MADT_ENTRY(x2apic_nmi, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
if (x2apic_nmi->lint != 1)
printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
@@ -328,7 +328,7 @@ acpi_parse_x2apic_nmi(struct acpi_subtable_header *header,
}
static int __init
-acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_local_apic_nmi *lapic_nmi = NULL;
@@ -337,7 +337,7 @@ acpi_parse_lapic_nmi(struct acpi_subtable_header * header, const unsigned long e
if (BAD_MADT_ENTRY(lapic_nmi, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
if (lapic_nmi->lint != 1)
printk(KERN_WARNING PREFIX "NMI not connected to LINT 1!\n");
@@ -449,7 +449,7 @@ static int __init mp_register_ioapic_irq(u8 bus_irq, u8 polarity,
}
static int __init
-acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_ioapic(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_io_apic *ioapic = NULL;
struct ioapic_domain_cfg cfg = {
@@ -462,7 +462,7 @@ acpi_parse_ioapic(struct acpi_subtable_header * header, const unsigned long end)
if (BAD_MADT_ENTRY(ioapic, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
/* Statically assign IRQ numbers for IOAPICs hosting legacy IRQs */
if (ioapic->global_irq_base < nr_legacy_irqs())
@@ -508,7 +508,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger,
}
static int __init
-acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
+acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
const unsigned long end)
{
struct acpi_madt_interrupt_override *intsrc = NULL;
@@ -518,7 +518,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
if (BAD_MADT_ENTRY(intsrc, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
acpi_sci_ioapic_setup(intsrc->source_irq,
@@ -550,7 +550,7 @@ acpi_parse_int_src_ovr(struct acpi_subtable_header * header,
}
static int __init
-acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end)
+acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end)
{
struct acpi_madt_nmi_source *nmi_src = NULL;
@@ -559,7 +559,7 @@ acpi_parse_nmi_src(struct acpi_subtable_header * header, const unsigned long end
if (BAD_MADT_ENTRY(nmi_src, end))
return -EINVAL;
- acpi_table_print_madt_entry(header);
+ acpi_table_print_madt_entry(&header->common);
/* TBD: Support nimsrc entries? */
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c
index 158ad1483c43..cb6e076a6d39 100644
--- a/arch/x86/kernel/acpi/cstate.c
+++ b/arch/x86/kernel/acpi/cstate.c
@@ -51,6 +51,18 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
if (c->x86_vendor == X86_VENDOR_INTEL &&
(c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
flags->bm_control = 0;
+ /*
+ * For all recent Centaur CPUs, the ucode will make sure that each
+ * core can keep cache coherence with each other while entering C3
+ * type state. So, set bm_check to 1 to indicate that the kernel
+ * doesn't need to execute a cache flush operation (WBINVD) when
+ * entering C3 type state.
+ */
+ if (c->x86_vendor == X86_VENDOR_CENTAUR) {
+ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f &&
+ c->x86_stepping >= 0x0e))
+ flags->bm_check = 1;
+ }
}
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9a79c7808f9c..7b9b49dfc05a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/kdebug.h>
#include <linux/kprobes.h>
+#include <linux/mmu_context.h>
#include <asm/text-patching.h>
#include <asm/alternative.h>
#include <asm/sections.h>
@@ -264,7 +265,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
-void *text_poke_early(void *addr, const void *opcode, size_t len);
+void text_poke_early(void *addr, const void *opcode, size_t len);
/*
* Are we looking at a near JMP with a 1 or 4-byte displacement.
@@ -666,16 +667,136 @@ void __init alternative_instructions(void)
* instructions. And on the local CPU you need to be protected again NMI or MCE
* handlers seeing an inconsistent instruction while you patch.
*/
-void *__init_or_module text_poke_early(void *addr, const void *opcode,
- size_t len)
+void __init_or_module text_poke_early(void *addr, const void *opcode,
+ size_t len)
{
unsigned long flags;
+
+ if (boot_cpu_has(X86_FEATURE_NX) &&
+ is_module_text_address((unsigned long)addr)) {
+ /*
+ * Modules text is marked initially as non-executable, so the
+ * code cannot be running and speculative code-fetches are
+ * prevented. Just change the code.
+ */
+ memcpy(addr, opcode, len);
+ } else {
+ local_irq_save(flags);
+ memcpy(addr, opcode, len);
+ local_irq_restore(flags);
+ sync_core();
+
+ /*
+ * Could also do a CLFLUSH here to speed up CPU recovery; but
+ * that causes hangs on some VIA CPUs.
+ */
+ }
+}
+
+__ro_after_init struct mm_struct *poking_mm;
+__ro_after_init unsigned long poking_addr;
+
+static void *__text_poke(void *addr, const void *opcode, size_t len)
+{
+ bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
+ struct page *pages[2] = {NULL};
+ temp_mm_state_t prev;
+ unsigned long flags;
+ pte_t pte, *ptep;
+ spinlock_t *ptl;
+ pgprot_t pgprot;
+
+ /*
+ * While boot memory allocator is running we cannot use struct pages as
+ * they are not yet initialized. There is no way to recover.
+ */
+ BUG_ON(!after_bootmem);
+
+ if (!core_kernel_text((unsigned long)addr)) {
+ pages[0] = vmalloc_to_page(addr);
+ if (cross_page_boundary)
+ pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
+ } else {
+ pages[0] = virt_to_page(addr);
+ WARN_ON(!PageReserved(pages[0]));
+ if (cross_page_boundary)
+ pages[1] = virt_to_page(addr + PAGE_SIZE);
+ }
+ /*
+ * If something went wrong, crash and burn since recovery paths are not
+ * implemented.
+ */
+ BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
+
local_irq_save(flags);
- memcpy(addr, opcode, len);
+
+ /*
+ * Map the page without the global bit, as TLB flushing is done with
+ * flush_tlb_mm_range(), which is intended for non-global PTEs.
+ */
+ pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
+
+ /*
+ * The lock is not really needed, but this allows to avoid open-coding.
+ */
+ ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+
+ /*
+ * This must not fail; preallocated in poking_init().
+ */
+ VM_BUG_ON(!ptep);
+
+ pte = mk_pte(pages[0], pgprot);
+ set_pte_at(poking_mm, poking_addr, ptep, pte);
+
+ if (cross_page_boundary) {
+ pte = mk_pte(pages[1], pgprot);
+ set_pte_at(poking_mm, poking_addr + PAGE_SIZE, ptep + 1, pte);
+ }
+
+ /*
+ * Loading the temporary mm behaves as a compiler barrier, which
+ * guarantees that the PTE will be set at the time memcpy() is done.
+ */
+ prev = use_temporary_mm(poking_mm);
+
+ kasan_disable_current();
+ memcpy((u8 *)poking_addr + offset_in_page(addr), opcode, len);
+ kasan_enable_current();
+
+ /*
+ * Ensure that the PTE is only cleared after the instructions of memcpy
+ * were issued by using a compiler barrier.
+ */
+ barrier();
+
+ pte_clear(poking_mm, poking_addr, ptep);
+ if (cross_page_boundary)
+ pte_clear(poking_mm, poking_addr + PAGE_SIZE, ptep + 1);
+
+ /*
+ * Loading the previous page-table hierarchy requires a serializing
+ * instruction that already allows the core to see the updated version.
+ * Xen-PV is assumed to serialize execution in a similar manner.
+ */
+ unuse_temporary_mm(prev);
+
+ /*
+ * Flushing the TLB might involve IPIs, which would require enabled
+ * IRQs, but not if the mm is not used, as it is in this point.
+ */
+ flush_tlb_mm_range(poking_mm, poking_addr, poking_addr +
+ (cross_page_boundary ? 2 : 1) * PAGE_SIZE,
+ PAGE_SHIFT, false);
+
+ /*
+ * If the text does not match what we just wrote then something is
+ * fundamentally screwy; there's nothing we can really do about that.
+ */
+ BUG_ON(memcmp(addr, opcode, len));
+
+ pte_unmap_unlock(ptep, ptl);
local_irq_restore(flags);
- sync_core();
- /* Could also do a CLFLUSH here to speed up CPU recovery; but
- that causes hangs on some VIA CPUs. */
return addr;
}
@@ -689,48 +810,36 @@ void *__init_or_module text_poke_early(void *addr, const void *opcode,
* It means the size must be writable atomically and the address must be aligned
* in a way that permits an atomic write. It also makes sure we fit on a single
* page.
+ *
+ * Note that the caller must ensure that if the modified code is part of a
+ * module, the module would not be removed during poking. This can be achieved
+ * by registering a module notifier, and ordering module removal and patching
+ * trough a mutex.
*/
void *text_poke(void *addr, const void *opcode, size_t len)
{
- unsigned long flags;
- char *vaddr;
- struct page *pages[2];
- int i;
-
- /*
- * While boot memory allocator is runnig we cannot use struct
- * pages as they are not yet initialized.
- */
- BUG_ON(!after_bootmem);
-
lockdep_assert_held(&text_mutex);
- if (!core_kernel_text((unsigned long)addr)) {
- pages[0] = vmalloc_to_page(addr);
- pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
- } else {
- pages[0] = virt_to_page(addr);
- WARN_ON(!PageReserved(pages[0]));
- pages[1] = virt_to_page(addr + PAGE_SIZE);
- }
- BUG_ON(!pages[0]);
- local_irq_save(flags);
- set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
- if (pages[1])
- set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
- vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
- memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
- clear_fixmap(FIX_TEXT_POKE0);
- if (pages[1])
- clear_fixmap(FIX_TEXT_POKE1);
- local_flush_tlb();
- sync_core();
- /* Could also do a CLFLUSH here to speed up CPU recovery; but
- that causes hangs on some VIA CPUs. */
- for (i = 0; i < len; i++)
- BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
- local_irq_restore(flags);
- return addr;
+ return __text_poke(addr, opcode, len);
+}
+
+/**
+ * text_poke_kgdb - Update instructions on a live kernel by kgdb
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy
+ *
+ * Only atomic text poke/set should be allowed when not doing early patching.
+ * It means the size must be writable atomically and the address must be aligned
+ * in a way that permits an atomic write. It also makes sure we fit on a single
+ * page.
+ *
+ * Context: should only be used by kgdb, which ensures no other core is running,
+ * despite the fact it does not hold the text_mutex.
+ */
+void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
+{
+ return __text_poke(addr, opcode, len);
}
static void do_sync_core(void *info)
@@ -788,7 +897,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* replacing opcode
* - sync cores
*/
-void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
{
unsigned char int3 = 0xcc;
@@ -830,7 +939,5 @@ void *text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
* the writing of the new instruction.
*/
bp_patching_in_progress = false;
-
- return addr;
}
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 2c0aa34af69c..bf7f13ea3c64 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -233,9 +233,6 @@ static dma_addr_t gart_map_page(struct device *dev, struct page *page,
unsigned long bus;
phys_addr_t paddr = page_to_phys(page) + offset;
- if (!dev)
- dev = &x86_dma_fallback_dev;
-
if (!need_iommu(dev, paddr, size))
return paddr;
@@ -392,9 +389,6 @@ static int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents,
if (nents == 0)
return 0;
- if (!dev)
- dev = &x86_dma_fallback_dev;
-
out = 0;
start = 0;
start_sg = sg;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index b7bcdd781651..ab6af775f06c 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -802,6 +802,24 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
return 0;
}
+static int __init lapic_init_clockevent(void)
+{
+ if (!lapic_timer_frequency)
+ return -1;
+
+ /* Calculate the scaled math multiplication factor */
+ lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
+ TICK_NSEC, lapic_clockevent.shift);
+ lapic_clockevent.max_delta_ns =
+ clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
+ lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
+ lapic_clockevent.min_delta_ns =
+ clockevent_delta2ns(0xF, &lapic_clockevent);
+ lapic_clockevent.min_delta_ticks = 0xF;
+
+ return 0;
+}
+
static int __init calibrate_APIC_clock(void)
{
struct clock_event_device *levt = this_cpu_ptr(&lapic_events);
@@ -810,25 +828,21 @@ static int __init calibrate_APIC_clock(void)
long delta, deltatsc;
int pm_referenced = 0;
- /**
- * check if lapic timer has already been calibrated by platform
- * specific routine, such as tsc calibration code. if so, we just fill
+ if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
+ return 0;
+
+ /*
+ * Check if lapic timer has already been calibrated by platform
+ * specific routine, such as tsc calibration code. If so just fill
* in the clockevent structure and return.
*/
-
- if (boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER)) {
- return 0;
- } else if (lapic_timer_frequency) {
+ if (!lapic_init_clockevent()) {
apic_printk(APIC_VERBOSE, "lapic timer already calibrated %d\n",
- lapic_timer_frequency);
- lapic_clockevent.mult = div_sc(lapic_timer_frequency/APIC_DIVISOR,
- TICK_NSEC, lapic_clockevent.shift);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFF, &lapic_clockevent);
- lapic_clockevent.max_delta_ticks = 0x7FFFFF;
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
- lapic_clockevent.min_delta_ticks = 0xF;
+ lapic_timer_frequency);
+ /*
+ * Direct calibration methods must have an always running
+ * local APIC timer, no need for broadcast timer.
+ */
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY;
return 0;
}
@@ -869,17 +883,8 @@ static int __init calibrate_APIC_clock(void)
pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
&delta, &deltatsc);
- /* Calculate the scaled math multiplication factor */
- lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
- lapic_clockevent.shift);
- lapic_clockevent.max_delta_ns =
- clockevent_delta2ns(0x7FFFFFFF, &lapic_clockevent);
- lapic_clockevent.max_delta_ticks = 0x7FFFFFFF;
- lapic_clockevent.min_delta_ns =
- clockevent_delta2ns(0xF, &lapic_clockevent);
- lapic_clockevent.min_delta_ticks = 0xF;
-
lapic_timer_frequency = (delta * APIC_DIVISOR) / LAPIC_CAL_LOOPS;
+ lapic_init_clockevent();
apic_printk(APIC_VERBOSE, "..... delta %ld\n", delta);
apic_printk(APIC_VERBOSE, "..... mult: %u\n", lapic_clockevent.mult);
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index 78778b54f904..a5464b8b6c46 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -175,7 +175,7 @@ static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
- if (static_cpu_has(X86_FEATURE_NODEID_MSR)) {
+ if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
rdmsrl(MSR_FAM10H_NODE_ID, val);
nodes = ((val >> 3) & 7) + 1;
}
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index ddced33184b5..d3d075226c0a 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -68,10 +68,12 @@ int main(void)
#undef ENTRY
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+ DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
+ offsetof(struct cea_exception_stacks, DB1_stack));
BLANK();
#ifdef CONFIG_STACKPROTECTOR
- DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
+ DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
BLANK();
#endif
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index cfd24f9f7614..1796d2bdcaaa 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
-obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o
+obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o intel_epb.o
obj-$(CONFIG_CPU_SUP_AMD) += amd.o
obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o
obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 01004bfb1a1b..fb6a64bd765f 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -82,11 +82,14 @@ static inline int wrmsrl_amd_safe(unsigned msr, unsigned long long val)
* performance at the same time..
*/
+#ifdef CONFIG_X86_32
extern __visible void vide(void);
-__asm__(".globl vide\n"
+__asm__(".text\n"
+ ".globl vide\n"
".type vide, @function\n"
".align 4\n"
"vide: ret\n");
+#endif
static void init_amd_k5(struct cpuinfo_x86 *c)
{
diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c
index 804c49493938..64d5aec24203 100644
--- a/arch/x86/kernel/cpu/aperfmperf.c
+++ b/arch/x86/kernel/cpu/aperfmperf.c
@@ -83,7 +83,7 @@ unsigned int aperfmperf_get_khz(int cpu)
if (!cpu_khz)
return 0;
- if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
@@ -99,7 +99,7 @@ void arch_freq_prepare_all(void)
if (!cpu_khz)
return;
- if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return;
for_each_online_cpu(cpu)
@@ -115,7 +115,7 @@ unsigned int arch_freq_get_on_cpu(int cpu)
if (!cpu_khz)
return 0;
- if (!static_cpu_has(X86_FEATURE_APERFMPERF))
+ if (!boot_cpu_has(X86_FEATURE_APERFMPERF))
return 0;
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b91b3bfa5cfb..29630393f300 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -440,7 +440,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
char arg[20];
int ret, i;
- if (cmdline_find_option_bool(boot_command_line, "nospectre_v2"))
+ if (cmdline_find_option_bool(boot_command_line, "nospectre_v2") ||
+ cpu_mitigations_off())
return SPECTRE_V2_CMD_NONE;
ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg));
@@ -672,7 +673,8 @@ static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
char arg[20];
int ret, i;
- if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable") ||
+ cpu_mitigations_off()) {
return SPEC_STORE_BYPASS_CMD_NONE;
} else {
ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
@@ -1008,6 +1010,11 @@ static void __init l1tf_select_mitigation(void)
if (!boot_cpu_has_bug(X86_BUG_L1TF))
return;
+ if (cpu_mitigations_off())
+ l1tf_mitigation = L1TF_MITIGATION_OFF;
+ else if (cpu_mitigations_auto_nosmt())
+ l1tf_mitigation = L1TF_MITIGATION_FLUSH_NOSMT;
+
override_cache_bits(&boot_cpu_data);
switch (l1tf_mitigation) {
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cb28e98a0659..8739bdfe9bdf 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -372,6 +372,8 @@ static bool pku_disabled;
static __always_inline void setup_pku(struct cpuinfo_x86 *c)
{
+ struct pkru_state *pk;
+
/* check the boot processor, plus compile options for PKU: */
if (!cpu_feature_enabled(X86_FEATURE_PKU))
return;
@@ -382,6 +384,9 @@ static __always_inline void setup_pku(struct cpuinfo_x86 *c)
return;
cr4_set_bits(X86_CR4_PKE);
+ pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
+ if (pk)
+ pk->pkru = init_pkru_value;
/*
* Seting X86_CR4_PKE will cause the X86_FEATURE_OSPKE
* cpuid bit to be set. We need to ensure that we
@@ -507,19 +512,6 @@ void load_percpu_segment(int cpu)
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
#endif
-#ifdef CONFIG_X86_64
-/*
- * Special IST stacks which the CPU switches to when it calls
- * an IST-marked descriptor entry. Up to 7 stacks (hardware
- * limit), all of them are 4K, except the debug stack which
- * is 8K.
- */
-static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
-};
-#endif
-
/* Load the original GDT from the per-cpu structure */
void load_direct_gdt(int cpu)
{
@@ -1511,9 +1503,9 @@ static __init int setup_clearcpuid(char *arg)
__setup("clearcpuid=", setup_clearcpuid);
#ifdef CONFIG_X86_64
-DEFINE_PER_CPU_FIRST(union irq_stack_union,
- irq_stack_union) __aligned(PAGE_SIZE) __visible;
-EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
+DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
+ fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
+EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
/*
* The following percpu variables are hot. Align current_task to
@@ -1523,9 +1515,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
EXPORT_PER_CPU_SYMBOL(current_task);
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
-
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
@@ -1562,23 +1552,7 @@ void syscall_init(void)
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
}
-/*
- * Copies of the original ist values from the tss are only accessed during
- * debugging, no special alignment required.
- */
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
-
-static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
DEFINE_PER_CPU(int, debug_stack_usage);
-
-int is_debug_stack(unsigned long addr)
-{
- return __this_cpu_read(debug_stack_usage) ||
- (addr <= __this_cpu_read(debug_stack_addr) &&
- addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
-}
-NOKPROBE_SYMBOL(is_debug_stack);
-
DEFINE_PER_CPU(u32, debug_idt_ctr);
void debug_stack_set_zero(void)
@@ -1668,7 +1642,7 @@ static void setup_getcpu(int cpu)
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
struct desc_struct d = { };
- if (static_cpu_has(X86_FEATURE_RDTSCP))
+ if (boot_cpu_has(X86_FEATURE_RDTSCP))
write_rdtscp_aux(cpudata);
/* Store CPU and node number in limit. */
@@ -1690,17 +1664,14 @@ static void setup_getcpu(int cpu)
* initialized (naturally) in the bootstrap process, such as the GDT
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
- * A lot of state is already set up in PDA init for 64 bit
*/
#ifdef CONFIG_X86_64
void cpu_init(void)
{
- struct orig_ist *oist;
+ int cpu = raw_smp_processor_id();
struct task_struct *me;
struct tss_struct *t;
- unsigned long v;
- int cpu = raw_smp_processor_id();
int i;
wait_for_master_cpu(cpu);
@@ -1715,7 +1686,6 @@ void cpu_init(void)
load_ucode_ap();
t = &per_cpu(cpu_tss_rw, cpu);
- oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
if (this_cpu_read(numa_node) == 0 &&
@@ -1753,16 +1723,11 @@ void cpu_init(void)
/*
* set up and load the per-CPU TSS
*/
- if (!oist->ist[0]) {
- char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
-
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
- estacks += exception_stack_sizes[v];
- oist->ist[v] = t->x86_tss.ist[v] =
- (unsigned long)estacks;
- if (v == DEBUG_STACK-1)
- per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
- }
+ if (!t->x86_tss.ist[0]) {
+ t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
+ t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
+ t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
+ t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
}
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
@@ -1864,23 +1829,6 @@ void cpu_init(void)
}
#endif
-static void bsp_resume(void)
-{
- if (this_cpu->c_bsp_resume)
- this_cpu->c_bsp_resume(&boot_cpu_data);
-}
-
-static struct syscore_ops cpu_syscore_ops = {
- .resume = bsp_resume,
-};
-
-static int __init init_cpu_syscore(void)
-{
- register_syscore_ops(&cpu_syscore_ops);
- return 0;
-}
-core_initcall(init_cpu_syscore);
-
/*
* The microcode loader calls this upon late microcode load to recheck features,
* only when microcode has been updated. Caller holds microcode_mutex and CPU
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 5eb946b9a9f3..c0e2407abdd6 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -14,7 +14,6 @@ struct cpu_dev {
void (*c_init)(struct cpuinfo_x86 *);
void (*c_identify)(struct cpuinfo_x86 *);
void (*c_detect_tlb)(struct cpuinfo_x86 *);
- void (*c_bsp_resume)(struct cpuinfo_x86 *);
int c_x86_vendor;
#ifdef CONFIG_X86_32
/* Optional vendor specific routine to obtain the cache size. */
diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c
index cf25405444ab..415621ddb8a2 100644
--- a/arch/x86/kernel/cpu/hygon.c
+++ b/arch/x86/kernel/cpu/hygon.c
@@ -19,6 +19,8 @@
#include "cpu.h"
+#define APICID_SOCKET_ID_BIT 6
+
/*
* nodes_per_socket: Stores the number of nodes per socket.
* Refer to CPUID Fn8000_001E_ECX Node Identifiers[10:8]
@@ -87,6 +89,9 @@ static void hygon_get_topology(struct cpuinfo_x86 *c)
if (!err)
c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+ /* Socket ID is ApicId[6] for these processors. */
+ c->phys_proc_id = c->apicid >> APICID_SOCKET_ID_BIT;
+
cacheinfo_hygon_init_llc_id(c, cpu, node_id);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3142fd7a9b32..f17c1a714779 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -596,36 +596,6 @@ detect_keyid_bits:
c->x86_phys_bits -= keyid_bits;
}
-static void init_intel_energy_perf(struct cpuinfo_x86 *c)
-{
- u64 epb;
-
- /*
- * Initialize MSR_IA32_ENERGY_PERF_BIAS if not already initialized.
- * (x86_energy_perf_policy(8) is available to change it at run-time.)
- */
- if (!cpu_has(c, X86_FEATURE_EPB))
- return;
-
- rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
- if ((epb & 0xF) != ENERGY_PERF_BIAS_PERFORMANCE)
- return;
-
- pr_info_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
- pr_info_once("ENERGY_PERF_BIAS: View and update with x86_energy_perf_policy(8)\n");
- epb = (epb & ~0xF) | ENERGY_PERF_BIAS_NORMAL;
- wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
-}
-
-static void intel_bsp_resume(struct cpuinfo_x86 *c)
-{
- /*
- * MSR_IA32_ENERGY_PERF_BIAS is lost across suspend/resume,
- * so reinitialize it properly like during bootup:
- */
- init_intel_energy_perf(c);
-}
-
static void init_cpuid_fault(struct cpuinfo_x86 *c)
{
u64 msr;
@@ -763,8 +733,6 @@ static void init_intel(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_TME))
detect_tme(c);
- init_intel_energy_perf(c);
-
init_intel_misc_features(c);
}
@@ -1023,9 +991,7 @@ static const struct cpu_dev intel_cpu_dev = {
.c_detect_tlb = intel_detect_tlb,
.c_early_init = early_init_intel,
.c_init = init_intel,
- .c_bsp_resume = intel_bsp_resume,
.c_x86_vendor = X86_VENDOR_INTEL,
};
cpu_dev_register(intel_cpu_dev);
-
diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c
new file mode 100644
index 000000000000..f4dd73396f28
--- /dev/null
+++ b/arch/x86/kernel/cpu/intel_epb.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Performance and Energy Bias Hint support.
+ *
+ * Copyright (C) 2019 Intel Corporation
+ *
+ * Author:
+ * Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/syscore_ops.h>
+#include <linux/pm.h>
+
+#include <asm/cpufeature.h>
+#include <asm/msr.h>
+
+/**
+ * DOC: overview
+ *
+ * The Performance and Energy Bias Hint (EPB) allows software to specify its
+ * preference with respect to the power-performance tradeoffs present in the
+ * processor. Generally, the EPB is expected to be set by user space (directly
+ * via sysfs or with the help of the x86_energy_perf_policy tool), but there are
+ * two reasons for the kernel to update it.
+ *
+ * First, there are systems where the platform firmware resets the EPB during
+ * system-wide transitions from sleep states back into the working state
+ * effectively causing the previous EPB updates by user space to be lost.
+ * Thus the kernel needs to save the current EPB values for all CPUs during
+ * system-wide transitions to sleep states and restore them on the way back to
+ * the working state. That can be achieved by saving EPB for secondary CPUs
+ * when they are taken offline during transitions into system sleep states and
+ * for the boot CPU in a syscore suspend operation, so that it can be restored
+ * for the boot CPU in a syscore resume operation and for the other CPUs when
+ * they are brought back online. However, CPUs that are already offline when
+ * a system-wide PM transition is started are not taken offline again, but their
+ * EPB values may still be reset by the platform firmware during the transition,
+ * so in fact it is necessary to save the EPB of any CPU taken offline and to
+ * restore it when the given CPU goes back online at all times.
+ *
+ * Second, on many systems the initial EPB value coming from the platform
+ * firmware is 0 ('performance') and at least on some of them that is because
+ * the platform firmware does not initialize EPB at all with the assumption that
+ * the OS will do that anyway. That sometimes is problematic, as it may cause
+ * the system battery to drain too fast, for example, so it is better to adjust
+ * it on CPU bring-up and if the initial EPB value for a given CPU is 0, the
+ * kernel changes it to 6 ('normal').
+ */
+
+static DEFINE_PER_CPU(u8, saved_epb);
+
+#define EPB_MASK 0x0fULL
+#define EPB_SAVED 0x10ULL
+#define MAX_EPB EPB_MASK
+
+static int intel_epb_save(void)
+{
+ u64 epb;
+
+ rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ /*
+ * Ensure that saved_epb will always be nonzero after this write even if
+ * the EPB value read from the MSR is 0.
+ */
+ this_cpu_write(saved_epb, (epb & EPB_MASK) | EPB_SAVED);
+
+ return 0;
+}
+
+static void intel_epb_restore(void)
+{
+ u64 val = this_cpu_read(saved_epb);
+ u64 epb;
+
+ rdmsrl(MSR_IA32_ENERGY_PERF_BIAS, epb);
+ if (val) {
+ val &= EPB_MASK;
+ } else {
+ /*
+ * Because intel_epb_save() has not run for the current CPU yet,
+ * it is going online for the first time, so if its EPB value is
+ * 0 ('performance') at this point, assume that it has not been
+ * initialized by the platform firmware and set it to 6
+ * ('normal').
+ */
+ val = epb & EPB_MASK;
+ if (val == ENERGY_PERF_BIAS_PERFORMANCE) {
+ val = ENERGY_PERF_BIAS_NORMAL;
+ pr_warn_once("ENERGY_PERF_BIAS: Set to 'normal', was 'performance'\n");
+ }
+ }
+ wrmsrl(MSR_IA32_ENERGY_PERF_BIAS, (epb & ~EPB_MASK) | val);
+}
+
+static struct syscore_ops intel_epb_syscore_ops = {
+ .suspend = intel_epb_save,
+ .resume = intel_epb_restore,
+};
+
+static const char * const energy_perf_strings[] = {
+ "performance",
+ "balance-performance",
+ "normal",
+ "balance-power",
+ "power"
+};
+static const u8 energ_perf_values[] = {
+ ENERGY_PERF_BIAS_PERFORMANCE,
+ ENERGY_PERF_BIAS_BALANCE_PERFORMANCE,
+ ENERGY_PERF_BIAS_NORMAL,
+ ENERGY_PERF_BIAS_BALANCE_POWERSAVE,
+ ENERGY_PERF_BIAS_POWERSAVE
+};
+
+static ssize_t energy_perf_bias_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ unsigned int cpu = dev->id;
+ u64 epb;
+ int ret;
+
+ ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%llu\n", epb);
+}
+
+static ssize_t energy_perf_bias_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ unsigned int cpu = dev->id;
+ u64 epb, val;
+ int ret;
+
+ ret = __sysfs_match_string(energy_perf_strings,
+ ARRAY_SIZE(energy_perf_strings), buf);
+ if (ret >= 0)
+ val = energ_perf_values[ret];
+ else if (kstrtou64(buf, 0, &val) || val > MAX_EPB)
+ return -EINVAL;
+
+ ret = rdmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS, &epb);
+ if (ret < 0)
+ return ret;
+
+ ret = wrmsrl_on_cpu(cpu, MSR_IA32_ENERGY_PERF_BIAS,
+ (epb & ~EPB_MASK) | val);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(energy_perf_bias);
+
+static struct attribute *intel_epb_attrs[] = {
+ &dev_attr_energy_perf_bias.attr,
+ NULL
+};
+
+static const struct attribute_group intel_epb_attr_group = {
+ .name = power_group_name,
+ .attrs = intel_epb_attrs
+};
+
+static int intel_epb_online(unsigned int cpu)
+{
+ struct device *cpu_dev = get_cpu_device(cpu);
+
+ intel_epb_restore();
+ if (!cpuhp_tasks_frozen)
+ sysfs_merge_group(&cpu_dev->kobj, &intel_epb_attr_group);
+
+ return 0;
+}
+
+static int intel_epb_offline(unsigned int cpu)
+{
+ struct device *cpu_dev = get_cpu_device(cpu);
+
+ if (!cpuhp_tasks_frozen)
+ sysfs_unmerge_group(&cpu_dev->kobj, &intel_epb_attr_group);
+
+ intel_epb_save();
+ return 0;
+}
+
+static __init int intel_epb_init(void)
+{
+ int ret;
+
+ if (!boot_cpu_has(X86_FEATURE_EPB))
+ return -ENODEV;
+
+ ret = cpuhp_setup_state(CPUHP_AP_X86_INTEL_EPB_ONLINE,
+ "x86/intel/epb:online", intel_epb_online,
+ intel_epb_offline);
+ if (ret < 0)
+ goto err_out_online;
+
+ register_syscore_ops(&intel_epb_syscore_ops);
+ return 0;
+
+err_out_online:
+ cpuhp_remove_state(CPUHP_AP_X86_INTEL_EPB_ONLINE);
+ return ret;
+}
+subsys_initcall(intel_epb_init);
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index e64de5149e50..d904aafe6409 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -563,33 +563,59 @@ out:
return offset;
}
+bool amd_filter_mce(struct mce *m)
+{
+ enum smca_bank_types bank_type = smca_get_bank_type(m->bank);
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ u8 xec = (m->status >> 16) & 0x3F;
+
+ /* See Family 17h Models 10h-2Fh Erratum #1114. */
+ if (c->x86 == 0x17 &&
+ c->x86_model >= 0x10 && c->x86_model <= 0x2F &&
+ bank_type == SMCA_IF && xec == 10)
+ return true;
+
+ return false;
+}
+
/*
- * Turn off MC4_MISC thresholding banks on all family 0x15 models since
- * they're not supported there.
+ * Turn off thresholding banks for the following conditions:
+ * - MC4_MISC thresholding is not supported on Family 0x15.
+ * - Prevent possible spurious interrupts from the IF bank on Family 0x17
+ * Models 0x10-0x2F due to Erratum #1114.
*/
-void disable_err_thresholding(struct cpuinfo_x86 *c)
+void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
{
- int i;
+ int i, num_msrs;
u64 hwcr;
bool need_toggle;
- u32 msrs[] = {
- 0x00000413, /* MC4_MISC0 */
- 0xc0000408, /* MC4_MISC1 */
- };
+ u32 msrs[NR_BLOCKS];
+
+ if (c->x86 == 0x15 && bank == 4) {
+ msrs[0] = 0x00000413; /* MC4_MISC0 */
+ msrs[1] = 0xc0000408; /* MC4_MISC1 */
+ num_msrs = 2;
+ } else if (c->x86 == 0x17 &&
+ (c->x86_model >= 0x10 && c->x86_model <= 0x2F)) {
- if (c->x86 != 0x15)
+ if (smca_get_bank_type(bank) != SMCA_IF)
+ return;
+
+ msrs[0] = MSR_AMD64_SMCA_MCx_MISC(bank);
+ num_msrs = 1;
+ } else {
return;
+ }
rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
-
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
- for (i = 0; i < ARRAY_SIZE(msrs); i++)
+ for (i = 0; i < num_msrs; i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */
@@ -604,12 +630,12 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
- disable_err_thresholding(c);
-
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
+ disable_err_thresholding(c, bank);
+
for (block = 0; block < NR_BLOCKS; ++block) {
address = get_block_address(address, low, high, bank, block);
if (!address)
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index b7fb541a4873..5112a50e6486 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -460,23 +460,6 @@ static void mce_irq_work_cb(struct irq_work *entry)
mce_schedule_work();
}
-static void mce_report_event(struct pt_regs *regs)
-{
- if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
- mce_notify_irq();
- /*
- * Triggering the work queue here is just an insurance
- * policy in case the syscall exit notify handler
- * doesn't run soon enough or ends up running on the
- * wrong CPU (can happen when audit sleeps)
- */
- mce_schedule_work();
- return;
- }
-
- irq_work_queue(&mce_irq_work);
-}
-
/*
* Check if the address reported by the CPU is in a format we can parse.
* It would be possible to add code for most other cases, but all would
@@ -712,19 +695,49 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
barrier();
m.status = mce_rdmsrl(msr_ops.status(i));
+
+ /* If this entry is not valid, ignore it */
if (!(m.status & MCI_STATUS_VAL))
continue;
/*
- * Uncorrected or signalled events are handled by the exception
- * handler when it is enabled, so don't process those here.
- *
- * TBD do the same check for MCI_STATUS_EN here?
+ * If we are logging everything (at CPU online) or this
+ * is a corrected error, then we must log it.
*/
- if (!(flags & MCP_UC) &&
- (m.status & (mca_cfg.ser ? MCI_STATUS_S : MCI_STATUS_UC)))
- continue;
+ if ((flags & MCP_UC) || !(m.status & MCI_STATUS_UC))
+ goto log_it;
+
+ /*
+ * Newer Intel systems that support software error
+ * recovery need to make additional checks. Other
+ * CPUs should skip over uncorrected errors, but log
+ * everything else.
+ */
+ if (!mca_cfg.ser) {
+ if (m.status & MCI_STATUS_UC)
+ continue;
+ goto log_it;
+ }
+ /* Log "not enabled" (speculative) errors */
+ if (!(m.status & MCI_STATUS_EN))
+ goto log_it;
+
+ /*
+ * Log UCNA (SDM: 15.6.3 "UCR Error Classification")
+ * UC == 1 && PCC == 0 && S == 0
+ */
+ if (!(m.status & MCI_STATUS_PCC) && !(m.status & MCI_STATUS_S))
+ goto log_it;
+
+ /*
+ * Skip anything else. Presumption is that our read of this
+ * bank is racing with a machine check. Leave the log alone
+ * for do_machine_check() to deal with it.
+ */
+ continue;
+
+log_it:
error_seen = true;
mce_read_aux(&m, i);
@@ -1301,7 +1314,8 @@ void do_machine_check(struct pt_regs *regs, long error_code)
mce_panic("Fatal machine check on current CPU", &m, msg);
if (worst > 0)
- mce_report_event(regs);
+ irq_work_queue(&mce_irq_work);
+
mce_wrmsrl(MSR_IA32_MCG_STATUS, 0);
sync_core();
@@ -1451,13 +1465,12 @@ EXPORT_SYMBOL_GPL(mce_notify_irq);
static int __mcheck_cpu_mce_banks_init(void)
{
int i;
- u8 num_banks = mca_cfg.banks;
- mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL);
+ mce_banks = kcalloc(MAX_NR_BANKS, sizeof(struct mce_bank), GFP_KERNEL);
if (!mce_banks)
return -ENOMEM;
- for (i = 0; i < num_banks; i++) {
+ for (i = 0; i < MAX_NR_BANKS; i++) {
struct mce_bank *b = &mce_banks[i];
b->ctl = -1ULL;
@@ -1471,28 +1484,19 @@ static int __mcheck_cpu_mce_banks_init(void)
*/
static int __mcheck_cpu_cap_init(void)
{
- unsigned b;
u64 cap;
+ u8 b;
rdmsrl(MSR_IA32_MCG_CAP, cap);
b = cap & MCG_BANKCNT_MASK;
- if (!mca_cfg.banks)
- pr_info("CPU supports %d MCE banks\n", b);
-
- if (b > MAX_NR_BANKS) {
- pr_warn("Using only %u machine check banks out of %u\n",
- MAX_NR_BANKS, b);
+ if (WARN_ON_ONCE(b > MAX_NR_BANKS))
b = MAX_NR_BANKS;
- }
- /* Don't support asymmetric configurations today */
- WARN_ON(mca_cfg.banks != 0 && b != mca_cfg.banks);
- mca_cfg.banks = b;
+ mca_cfg.banks = max(mca_cfg.banks, b);
if (!mce_banks) {
int err = __mcheck_cpu_mce_banks_init();
-
if (err)
return err;
}
@@ -1771,6 +1775,14 @@ static void __mcheck_cpu_init_timer(void)
mce_start_timer(t);
}
+bool filter_mce(struct mce *m)
+{
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+ return amd_filter_mce(m);
+
+ return false;
+}
+
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct pt_regs *regs, long error_code)
{
@@ -2425,8 +2437,8 @@ static int fake_panic_set(void *data, u64 val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
- fake_panic_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fake_panic_fops, fake_panic_get, fake_panic_set,
+ "%llu\n");
static int __init mcheck_debugfs_init(void)
{
@@ -2435,8 +2447,8 @@ static int __init mcheck_debugfs_init(void)
dmce = mce_get_debugfs_dir();
if (!dmce)
return -ENOMEM;
- ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
- &fake_panic_fops);
+ ffake_panic = debugfs_create_file_unsafe("fake_panic", 0444, dmce,
+ NULL, &fake_panic_fops);
if (!ffake_panic)
return -ENOMEM;
@@ -2451,6 +2463,8 @@ EXPORT_SYMBOL_GPL(mcsafe_key);
static int __init mcheck_late_init(void)
{
+ pr_info("Using %d MCE banks\n", mca_cfg.banks);
+
if (mca_cfg.recovery)
static_branch_inc(&mcsafe_key);
diff --git a/arch/x86/kernel/cpu/mce/genpool.c b/arch/x86/kernel/cpu/mce/genpool.c
index 3395549c51d3..64d1d5a00f39 100644
--- a/arch/x86/kernel/cpu/mce/genpool.c
+++ b/arch/x86/kernel/cpu/mce/genpool.c
@@ -99,6 +99,9 @@ int mce_gen_pool_add(struct mce *mce)
{
struct mce_evt_llist *node;
+ if (filter_mce(mce))
+ return -EINVAL;
+
if (!mce_evt_pool)
return -EINVAL;
diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c
index 8492ef7d9015..a6026170af92 100644
--- a/arch/x86/kernel/cpu/mce/inject.c
+++ b/arch/x86/kernel/cpu/mce/inject.c
@@ -46,8 +46,6 @@
static struct mce i_mce;
static struct dentry *dfs_inj;
-static u8 n_banks;
-
#define MAX_FLAG_OPT_SIZE 4
#define NBCFG 0x44
@@ -528,7 +526,7 @@ static void do_inject(void)
* only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
* Fam10h and later BKDGs.
*/
- if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
+ if (boot_cpu_has(X86_FEATURE_AMD_DCM) &&
b == 4 &&
boot_cpu_data.x86 < 0x17) {
toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
@@ -570,9 +568,15 @@ err:
static int inj_bank_set(void *data, u64 val)
{
struct mce *m = (struct mce *)data;
+ u8 n_banks;
+ u64 cap;
+
+ /* Get bank count on target CPU so we can handle non-uniform values. */
+ rdmsrl_on_cpu(m->extcpu, MSR_IA32_MCG_CAP, &cap);
+ n_banks = cap & MCG_BANKCNT_MASK;
if (val >= n_banks) {
- pr_err("Non-existent MCE bank: %llu\n", val);
+ pr_err("MCA bank %llu non-existent on CPU%d\n", val, m->extcpu);
return -EINVAL;
}
@@ -665,10 +669,6 @@ static struct dfs_node {
static int __init debugfs_init(void)
{
unsigned int i;
- u64 cap;
-
- rdmsrl(MSR_IA32_MCG_CAP, cap);
- n_banks = cap & MCG_BANKCNT_MASK;
dfs_inj = debugfs_create_dir("mce-inject", NULL);
if (!dfs_inj)
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index af5eab1e65e2..a34b55baa7aa 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -173,4 +173,13 @@ struct mca_msr_regs {
extern struct mca_msr_regs msr_ops;
+/* Decide whether to add MCE record to MCE event pool or filter it out. */
+extern bool filter_mce(struct mce *m);
+
+#ifdef CONFIG_X86_MCE_AMD
+extern bool amd_filter_mce(struct mce *m);
+#else
+static inline bool amd_filter_mce(struct mce *m) { return false; };
+#endif
+
#endif /* __X86_MCE_INTERNAL_H__ */
diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c
index 5260185cbf7b..c321f4f513f9 100644
--- a/arch/x86/kernel/cpu/microcode/core.c
+++ b/arch/x86/kernel/cpu/microcode/core.c
@@ -418,8 +418,9 @@ static int do_microcode_update(const void __user *buf, size_t size)
if (ustate == UCODE_ERROR) {
error = -1;
break;
- } else if (ustate == UCODE_OK)
+ } else if (ustate == UCODE_NEW) {
apply_microcode_on_target(cpu);
+ }
}
return error;
@@ -427,7 +428,7 @@ static int do_microcode_update(const void __user *buf, size_t size)
static int microcode_open(struct inode *inode, struct file *file)
{
- return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM;
+ return capable(CAP_SYS_RAWIO) ? stream_open(inode, file) : -EPERM;
}
static ssize_t microcode_write(struct file *file, const char __user *buf,
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index 16936a24795c..a44bdbe7c55e 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -31,6 +31,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/uio.h>
#include <linux/mm.h>
#include <asm/microcode_intel.h>
@@ -861,32 +862,33 @@ out:
return ret;
}
-static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
- int (*get_ucode_data)(void *, const void *, size_t))
+static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- u8 *ucode_ptr = data, *new_mc = NULL, *mc = NULL;
- int new_rev = uci->cpu_sig.rev;
- unsigned int leftover = size;
unsigned int curr_mc_size = 0, new_mc_size = 0;
- unsigned int csig, cpf;
enum ucode_state ret = UCODE_OK;
+ int new_rev = uci->cpu_sig.rev;
+ u8 *new_mc = NULL, *mc = NULL;
+ unsigned int csig, cpf;
- while (leftover) {
+ while (iov_iter_count(iter)) {
struct microcode_header_intel mc_header;
- unsigned int mc_size;
+ unsigned int mc_size, data_size;
+ u8 *data;
- if (leftover < sizeof(mc_header)) {
- pr_err("error! Truncated header in microcode data file\n");
+ if (!copy_from_iter_full(&mc_header, sizeof(mc_header), iter)) {
+ pr_err("error! Truncated or inaccessible header in microcode data file\n");
break;
}
- if (get_ucode_data(&mc_header, ucode_ptr, sizeof(mc_header)))
- break;
-
mc_size = get_totalsize(&mc_header);
- if (!mc_size || mc_size > leftover) {
- pr_err("error! Bad data in microcode data file\n");
+ if (mc_size < sizeof(mc_header)) {
+ pr_err("error! Bad data in microcode data file (totalsize too small)\n");
+ break;
+ }
+ data_size = mc_size - sizeof(mc_header);
+ if (data_size > iov_iter_count(iter)) {
+ pr_err("error! Bad data in microcode data file (truncated file?)\n");
break;
}
@@ -899,7 +901,9 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
curr_mc_size = mc_size;
}
- if (get_ucode_data(mc, ucode_ptr, mc_size) ||
+ memcpy(mc, &mc_header, sizeof(mc_header));
+ data = mc + sizeof(mc_header);
+ if (!copy_from_iter_full(data, data_size, iter) ||
microcode_sanity_check(mc, 1) < 0) {
break;
}
@@ -914,14 +918,11 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
mc = NULL; /* trigger new vmalloc */
ret = UCODE_NEW;
}
-
- ucode_ptr += mc_size;
- leftover -= mc_size;
}
vfree(mc);
- if (leftover) {
+ if (iov_iter_count(iter)) {
vfree(new_mc);
return UCODE_ERROR;
}
@@ -945,12 +946,6 @@ static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size,
return ret;
}
-static int get_ucode_fw(void *to, const void *from, size_t n)
-{
- memcpy(to, from, n);
- return 0;
-}
-
static bool is_blacklisted(unsigned int cpu)
{
struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -977,10 +972,12 @@ static bool is_blacklisted(unsigned int cpu)
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
bool refresh_fw)
{
- char name[30];
struct cpuinfo_x86 *c = &cpu_data(cpu);
const struct firmware *firmware;
+ struct iov_iter iter;
enum ucode_state ret;
+ struct kvec kvec;
+ char name[30];
if (is_blacklisted(cpu))
return UCODE_NFOUND;
@@ -993,26 +990,30 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
return UCODE_NFOUND;
}
- ret = generic_load_microcode(cpu, (void *)firmware->data,
- firmware->size, &get_ucode_fw);
+ kvec.iov_base = (void *)firmware->data;
+ kvec.iov_len = firmware->size;
+ iov_iter_kvec(&iter, WRITE, &kvec, 1, firmware->size);
+ ret = generic_load_microcode(cpu, &iter);
release_firmware(firmware);
return ret;
}
-static int get_ucode_user(void *to, const void *from, size_t n)
-{
- return copy_from_user(to, from, n);
-}
-
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
+ struct iov_iter iter;
+ struct iovec iov;
+
if (is_blacklisted(cpu))
return UCODE_NFOUND;
- return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
+ iov.iov_base = (void __user *)buf;
+ iov.iov_len = size;
+ iov_iter_init(&iter, WRITE, &iov, 1, size);
+
+ return generic_load_microcode(cpu, &iter);
}
static struct microcode_ops microcode_intel_ops = {
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 2c8522a39ed5..cb2e49810d68 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -35,11 +35,11 @@ static void show_cpuinfo_misc(struct seq_file *m, struct cpuinfo_x86 *c)
"fpu_exception\t: %s\n"
"cpuid level\t: %d\n"
"wp\t\t: yes\n",
- static_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
- static_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
- static_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
- static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
- static_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+ boot_cpu_has_bug(X86_BUG_FDIV) ? "yes" : "no",
+ boot_cpu_has_bug(X86_BUG_F00F) ? "yes" : "no",
+ boot_cpu_has_bug(X86_BUG_COMA) ? "yes" : "no",
+ boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
+ boot_cpu_has(X86_FEATURE_FPU) ? "yes" : "no",
c->cpuid_level);
}
#else
diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
index 2dbd990a2eb7..89320c0396b1 100644
--- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
+++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c
@@ -342,10 +342,10 @@ int update_domains(struct rdt_resource *r, int closid)
if (cpumask_empty(cpu_mask) || mba_sc)
goto done;
cpu = get_cpu();
- /* Update CBM on this cpu if it's in cpu_mask. */
+ /* Update resource control msr on this CPU if it's in cpu_mask. */
if (cpumask_test_cpu(cpu, cpu_mask))
rdt_ctrl_update(&msr_param);
- /* Update CBM on other cpus. */
+ /* Update resource control msr on other CPUs. */
smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
put_cpu();
diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
index 85212a32b54d..333c177a2471 100644
--- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c
+++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c
@@ -2516,100 +2516,127 @@ static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
bitmap_clear(val, zero_bit, cbm_len - zero_bit);
}
-/**
- * rdtgroup_init_alloc - Initialize the new RDT group's allocations
- *
- * A new RDT group is being created on an allocation capable (CAT)
- * supporting system. Set this group up to start off with all usable
- * allocations. That is, all shareable and unused bits.
+/*
+ * Initialize cache resources per RDT domain
*
- * All-zero CBM is invalid. If there are no more shareable bits available
- * on any domain then the entire allocation will fail.
+ * Set the RDT domain up to start off with all usable allocations. That is,
+ * all shareable and unused bits. All-zero CBM is invalid.
*/
-static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+static int __init_one_rdt_domain(struct rdt_domain *d, struct rdt_resource *r,
+ u32 closid)
{
struct rdt_resource *r_cdp = NULL;
struct rdt_domain *d_cdp = NULL;
u32 used_b = 0, unused_b = 0;
- u32 closid = rdtgrp->closid;
- struct rdt_resource *r;
unsigned long tmp_cbm;
enum rdtgrp_mode mode;
- struct rdt_domain *d;
u32 peer_ctl, *ctrl;
- int i, ret;
+ int i;
- for_each_alloc_enabled_rdt_resource(r) {
- /*
- * Only initialize default allocations for CBM cache
- * resources
- */
- if (r->rid == RDT_RESOURCE_MBA)
- continue;
- list_for_each_entry(d, &r->domains, list) {
- rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
- d->have_new_ctrl = false;
- d->new_ctrl = r->cache.shareable_bits;
- used_b = r->cache.shareable_bits;
- ctrl = d->ctrl_val;
- for (i = 0; i < closids_supported(); i++, ctrl++) {
- if (closid_allocated(i) && i != closid) {
- mode = rdtgroup_mode_by_closid(i);
- if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
- break;
- /*
- * If CDP is active include peer
- * domain's usage to ensure there
- * is no overlap with an exclusive
- * group.
- */
- if (d_cdp)
- peer_ctl = d_cdp->ctrl_val[i];
- else
- peer_ctl = 0;
- used_b |= *ctrl | peer_ctl;
- if (mode == RDT_MODE_SHAREABLE)
- d->new_ctrl |= *ctrl | peer_ctl;
- }
- }
- if (d->plr && d->plr->cbm > 0)
- used_b |= d->plr->cbm;
- unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
- unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
- d->new_ctrl |= unused_b;
- /*
- * Force the initial CBM to be valid, user can
- * modify the CBM based on system availability.
- */
- cbm_ensure_valid(&d->new_ctrl, r);
+ rdt_cdp_peer_get(r, d, &r_cdp, &d_cdp);
+ d->have_new_ctrl = false;
+ d->new_ctrl = r->cache.shareable_bits;
+ used_b = r->cache.shareable_bits;
+ ctrl = d->ctrl_val;
+ for (i = 0; i < closids_supported(); i++, ctrl++) {
+ if (closid_allocated(i) && i != closid) {
+ mode = rdtgroup_mode_by_closid(i);
+ if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
+ break;
/*
- * Assign the u32 CBM to an unsigned long to ensure
- * that bitmap_weight() does not access out-of-bound
- * memory.
+ * If CDP is active include peer domain's
+ * usage to ensure there is no overlap
+ * with an exclusive group.
*/
- tmp_cbm = d->new_ctrl;
- if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
- r->cache.min_cbm_bits) {
- rdt_last_cmd_printf("No space on %s:%d\n",
- r->name, d->id);
- return -ENOSPC;
- }
- d->have_new_ctrl = true;
+ if (d_cdp)
+ peer_ctl = d_cdp->ctrl_val[i];
+ else
+ peer_ctl = 0;
+ used_b |= *ctrl | peer_ctl;
+ if (mode == RDT_MODE_SHAREABLE)
+ d->new_ctrl |= *ctrl | peer_ctl;
}
}
+ if (d->plr && d->plr->cbm > 0)
+ used_b |= d->plr->cbm;
+ unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
+ unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
+ d->new_ctrl |= unused_b;
+ /*
+ * Force the initial CBM to be valid, user can
+ * modify the CBM based on system availability.
+ */
+ cbm_ensure_valid(&d->new_ctrl, r);
+ /*
+ * Assign the u32 CBM to an unsigned long to ensure that
+ * bitmap_weight() does not access out-of-bound memory.
+ */
+ tmp_cbm = d->new_ctrl;
+ if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
+ rdt_last_cmd_printf("No space on %s:%d\n", r->name, d->id);
+ return -ENOSPC;
+ }
+ d->have_new_ctrl = true;
+
+ return 0;
+}
+
+/*
+ * Initialize cache resources with default values.
+ *
+ * A new RDT group is being created on an allocation capable (CAT)
+ * supporting system. Set this group up to start off with all usable
+ * allocations.
+ *
+ * If there are no more shareable bits available on any domain then
+ * the entire allocation will fail.
+ */
+static int rdtgroup_init_cat(struct rdt_resource *r, u32 closid)
+{
+ struct rdt_domain *d;
+ int ret;
+
+ list_for_each_entry(d, &r->domains, list) {
+ ret = __init_one_rdt_domain(d, r, closid);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Initialize MBA resource with default values. */
+static void rdtgroup_init_mba(struct rdt_resource *r)
+{
+ struct rdt_domain *d;
+
+ list_for_each_entry(d, &r->domains, list) {
+ d->new_ctrl = is_mba_sc(r) ? MBA_MAX_MBPS : r->default_ctrl;
+ d->have_new_ctrl = true;
+ }
+}
+
+/* Initialize the RDT group's allocations. */
+static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
+{
+ struct rdt_resource *r;
+ int ret;
for_each_alloc_enabled_rdt_resource(r) {
- /*
- * Only initialize default allocations for CBM cache
- * resources
- */
- if (r->rid == RDT_RESOURCE_MBA)
- continue;
+ if (r->rid == RDT_RESOURCE_MBA) {
+ rdtgroup_init_mba(r);
+ } else {
+ ret = rdtgroup_init_cat(r, rdtgrp->closid);
+ if (ret < 0)
+ return ret;
+ }
+
ret = update_domains(r, rdtgrp->closid);
if (ret < 0) {
rdt_last_cmd_puts("Failed to initialize allocations\n");
return ret;
}
+
}
rdtgrp->mode = RDT_MODE_SHAREABLE;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 17ffc869cab8..a96ca8584803 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -204,8 +204,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
* another range split. So add extra two slots here.
*/
nr_ranges += 2;
- cmem = vzalloc(sizeof(struct crash_mem) +
- sizeof(struct crash_mem_range) * nr_ranges);
+ cmem = vzalloc(struct_size(cmem, ranges, nr_ranges));
if (!cmem)
return NULL;
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index cd53f3030e40..64a59d726639 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -34,14 +34,14 @@ const char *stack_type_name(enum stack_type type)
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
+ unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack <= begin || stack > end)
+ if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_IRQ;
@@ -59,14 +59,14 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
+ unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack <= begin || stack > end)
+ if (stack < begin || stack > end)
return false;
info->type = STACK_TYPE_SOFTIRQ;
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 5cdb9e84da57..753b8cfe8b8a 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,23 +16,21 @@
#include <linux/bug.h>
#include <linux/nmi.h>
+#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
-static char *exception_stack_names[N_EXCEPTION_STACKS] = {
- [ DOUBLEFAULT_STACK-1 ] = "#DF",
- [ NMI_STACK-1 ] = "NMI",
- [ DEBUG_STACK-1 ] = "#DB",
- [ MCE_STACK-1 ] = "#MC",
-};
-
-static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
- [DEBUG_STACK - 1] = DEBUG_STKSZ
+static const char * const exception_stack_names[] = {
+ [ ESTACK_DF ] = "#DF",
+ [ ESTACK_NMI ] = "NMI",
+ [ ESTACK_DB2 ] = "#DB2",
+ [ ESTACK_DB1 ] = "#DB1",
+ [ ESTACK_DB ] = "#DB",
+ [ ESTACK_MCE ] = "#MC",
};
const char *stack_type_name(enum stack_type type)
{
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
if (type == STACK_TYPE_IRQ)
return "IRQ";
@@ -52,43 +50,84 @@ const char *stack_type_name(enum stack_type type)
return NULL;
}
+/**
+ * struct estack_pages - Page descriptor for exception stacks
+ * @offs: Offset from the start of the exception stack area
+ * @size: Size of the exception stack
+ * @type: Type to store in the stack_info struct
+ */
+struct estack_pages {
+ u32 offs;
+ u16 size;
+ u16 type;
+};
+
+#define EPAGERANGE(st) \
+ [PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \
+ PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \
+ .offs = CEA_ESTACK_OFFS(st), \
+ .size = CEA_ESTACK_SIZE(st), \
+ .type = STACK_TYPE_EXCEPTION + ESTACK_ ##st, }
+
+/*
+ * Array of exception stack page descriptors. If the stack is larger than
+ * PAGE_SIZE, all pages covering a particular stack will have the same
+ * info. The guard pages including the not mapped DB2 stack are zeroed
+ * out.
+ */
+static const
+struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
+ EPAGERANGE(DF),
+ EPAGERANGE(NMI),
+ EPAGERANGE(DB1),
+ EPAGERANGE(DB),
+ EPAGERANGE(MCE),
+};
+
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *begin, *end;
+ unsigned long begin, end, stk = (unsigned long)stack;
+ const struct estack_pages *ep;
struct pt_regs *regs;
- unsigned k;
+ unsigned int k;
- BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
+ BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
- for (k = 0; k < N_EXCEPTION_STACKS; k++) {
- end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
- begin = end - (exception_stack_sizes[k] / sizeof(long));
- regs = (struct pt_regs *)end - 1;
-
- if (stack <= begin || stack >= end)
- continue;
+ begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
+ end = begin + sizeof(struct cea_exception_stacks);
+ /* Bail if @stack is outside the exception stack area. */
+ if (stk < begin || stk >= end)
+ return false;
- info->type = STACK_TYPE_EXCEPTION + k;
- info->begin = begin;
- info->end = end;
- info->next_sp = (unsigned long *)regs->sp;
+ /* Calc page offset from start of exception stacks */
+ k = (stk - begin) >> PAGE_SHIFT;
+ /* Lookup the page descriptor */
+ ep = &estack_pages[k];
+ /* Guard page? */
+ if (!ep->size)
+ return false;
- return true;
- }
+ begin += (unsigned long)ep->offs;
+ end = begin + (unsigned long)ep->size;
+ regs = (struct pt_regs *)end - 1;
- return false;
+ info->type = ep->type;
+ info->begin = (unsigned long *)begin;
+ info->end = (unsigned long *)end;
+ info->next_sp = (unsigned long *)regs->sp;
+ return true;
}
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
{
- unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
+ unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
/*
* This is a software stack, so 'end' can be a valid stack pointer.
* It just means the stack is empty.
*/
- if (stack <= begin || stack > end)
+ if (stack < begin || stack >= end)
return false;
info->type = STACK_TYPE_IRQ;
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index 50d5848bf22e..6c4f01540833 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -525,7 +525,8 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_I945G_IDS(&gen3_early_ops),
INTEL_I945GM_IDS(&gen3_early_ops),
INTEL_VLV_IDS(&gen6_early_ops),
- INTEL_PINEVIEW_IDS(&gen3_early_ops),
+ INTEL_PINEVIEW_G_IDS(&gen3_early_ops),
+ INTEL_PINEVIEW_M_IDS(&gen3_early_ops),
INTEL_I965G_IDS(&gen3_early_ops),
INTEL_G33_IDS(&gen3_early_ops),
INTEL_I965GM_IDS(&gen3_early_ops),
@@ -547,6 +548,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
INTEL_GLK_IDS(&gen9_early_ops),
INTEL_CNL_IDS(&gen9_early_ops),
INTEL_ICL_11_IDS(&gen11_early_ops),
+ INTEL_EHL_IDS(&gen11_early_ops),
};
struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 2e5003fef51a..ce243f76bdb7 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -101,24 +101,21 @@ static void __kernel_fpu_begin(void)
kernel_fpu_disable();
- if (fpu->initialized) {
- /*
- * Ignore return value -- we don't care if reg state
- * is clobbered.
- */
- copy_fpregs_to_fpstate(fpu);
- } else {
- __cpu_invalidate_fpregs_state();
+ if (current->mm) {
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ /*
+ * Ignore return value -- we don't care if reg state
+ * is clobbered.
+ */
+ copy_fpregs_to_fpstate(fpu);
+ }
}
+ __cpu_invalidate_fpregs_state();
}
static void __kernel_fpu_end(void)
{
- struct fpu *fpu = &current->thread.fpu;
-
- if (fpu->initialized)
- copy_kernel_to_fpregs(&fpu->state);
-
kernel_fpu_enable();
}
@@ -145,15 +142,17 @@ void fpu__save(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu);
- preempt_disable();
+ fpregs_lock();
trace_x86_fpu_before_save(fpu);
- if (fpu->initialized) {
+
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD)) {
if (!copy_fpregs_to_fpstate(fpu)) {
copy_kernel_to_fpregs(&fpu->state);
}
}
+
trace_x86_fpu_after_save(fpu);
- preempt_enable();
+ fpregs_unlock();
}
EXPORT_SYMBOL_GPL(fpu__save);
@@ -186,11 +185,14 @@ void fpstate_init(union fpregs_state *state)
}
EXPORT_SYMBOL_GPL(fpstate_init);
-int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
+int fpu__copy(struct task_struct *dst, struct task_struct *src)
{
+ struct fpu *dst_fpu = &dst->thread.fpu;
+ struct fpu *src_fpu = &src->thread.fpu;
+
dst_fpu->last_cpu = -1;
- if (!src_fpu->initialized || !static_cpu_has(X86_FEATURE_FPU))
+ if (!static_cpu_has(X86_FEATURE_FPU))
return 0;
WARN_ON_FPU(src_fpu != &current->thread.fpu);
@@ -202,16 +204,23 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
/*
- * Save current FPU registers directly into the child
- * FPU context, without any memory-to-memory copying.
+ * If the FPU registers are not current just memcpy() the state.
+ * Otherwise save current FPU registers directly into the child's FPU
+ * context, without any memory-to-memory copying.
*
* ( The function 'fails' in the FNSAVE case, which destroys
- * register contents so we have to copy them back. )
+ * register contents so we have to load them back. )
*/
- if (!copy_fpregs_to_fpstate(dst_fpu)) {
- memcpy(&src_fpu->state, &dst_fpu->state, fpu_kernel_xstate_size);
- copy_kernel_to_fpregs(&src_fpu->state);
- }
+ fpregs_lock();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ memcpy(&dst_fpu->state, &src_fpu->state, fpu_kernel_xstate_size);
+
+ else if (!copy_fpregs_to_fpstate(dst_fpu))
+ copy_kernel_to_fpregs(&dst_fpu->state);
+
+ fpregs_unlock();
+
+ set_tsk_thread_flag(dst, TIF_NEED_FPU_LOAD);
trace_x86_fpu_copy_src(src_fpu);
trace_x86_fpu_copy_dst(dst_fpu);
@@ -223,20 +232,14 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Activate the current task's in-memory FPU context,
* if it has not been used before:
*/
-void fpu__initialize(struct fpu *fpu)
+static void fpu__initialize(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu);
- if (!fpu->initialized) {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
-
- trace_x86_fpu_activate_state(fpu);
- /* Safe to do for the current task: */
- fpu->initialized = 1;
- }
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ fpstate_init(&fpu->state);
+ trace_x86_fpu_init_state(fpu);
}
-EXPORT_SYMBOL_GPL(fpu__initialize);
/*
* This function must be called before we read a task's fpstate.
@@ -248,32 +251,20 @@ EXPORT_SYMBOL_GPL(fpu__initialize);
*
* - or it's called for stopped tasks (ptrace), in which case the
* registers were already saved by the context-switch code when
- * the task scheduled out - we only have to initialize the registers
- * if they've never been initialized.
+ * the task scheduled out.
*
* If the task has used the FPU before then save it.
*/
void fpu__prepare_read(struct fpu *fpu)
{
- if (fpu == &current->thread.fpu) {
+ if (fpu == &current->thread.fpu)
fpu__save(fpu);
- } else {
- if (!fpu->initialized) {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
-
- trace_x86_fpu_activate_state(fpu);
- /* Safe to do for current and for stopped child tasks: */
- fpu->initialized = 1;
- }
- }
}
/*
* This function must be called before we write a task's fpstate.
*
- * If the task has used the FPU before then invalidate any cached FPU registers.
- * If the task has not used the FPU before then initialize its fpstate.
+ * Invalidate any cached FPU registers.
*
* After this function call, after registers in the fpstate are
* modified and the child task has woken up, the child task will
@@ -290,44 +281,11 @@ void fpu__prepare_write(struct fpu *fpu)
*/
WARN_ON_FPU(fpu == &current->thread.fpu);
- if (fpu->initialized) {
- /* Invalidate any cached state: */
- __fpu_invalidate_fpregs_state(fpu);
- } else {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
-
- trace_x86_fpu_activate_state(fpu);
- /* Safe to do for stopped child tasks: */
- fpu->initialized = 1;
- }
+ /* Invalidate any cached state: */
+ __fpu_invalidate_fpregs_state(fpu);
}
/*
- * 'fpu__restore()' is called to copy FPU registers from
- * the FPU fpstate to the live hw registers and to activate
- * access to the hardware registers, so that FPU instructions
- * can be used afterwards.
- *
- * Must be called with kernel preemption disabled (for example
- * with local interrupts disabled, as it is in the case of
- * do_device_not_available()).
- */
-void fpu__restore(struct fpu *fpu)
-{
- fpu__initialize(fpu);
-
- /* Avoid __kernel_fpu_begin() right after fpregs_activate() */
- kernel_fpu_disable();
- trace_x86_fpu_before_restore(fpu);
- fpregs_activate(fpu);
- copy_kernel_to_fpregs(&fpu->state);
- trace_x86_fpu_after_restore(fpu);
- kernel_fpu_enable();
-}
-EXPORT_SYMBOL_GPL(fpu__restore);
-
-/*
* Drops current FPU state: deactivates the fpregs and
* the fpstate. NOTE: it still leaves previous contents
* in the fpregs in the eager-FPU case.
@@ -341,17 +299,13 @@ void fpu__drop(struct fpu *fpu)
preempt_disable();
if (fpu == &current->thread.fpu) {
- if (fpu->initialized) {
- /* Ignore delayed exceptions from user space */
- asm volatile("1: fwait\n"
- "2:\n"
- _ASM_EXTABLE(1b, 2b));
- fpregs_deactivate(fpu);
- }
+ /* Ignore delayed exceptions from user space */
+ asm volatile("1: fwait\n"
+ "2:\n"
+ _ASM_EXTABLE(1b, 2b));
+ fpregs_deactivate(fpu);
}
- fpu->initialized = 0;
-
trace_x86_fpu_dropped(fpu);
preempt_enable();
@@ -363,6 +317,8 @@ void fpu__drop(struct fpu *fpu)
*/
static inline void copy_init_fpstate_to_fpregs(void)
{
+ fpregs_lock();
+
if (use_xsave())
copy_kernel_to_xregs(&init_fpstate.xsave, -1);
else if (static_cpu_has(X86_FEATURE_FXSR))
@@ -372,6 +328,9 @@ static inline void copy_init_fpstate_to_fpregs(void)
if (boot_cpu_has(X86_FEATURE_OSPKE))
copy_init_pkru_to_fpregs();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
}
/*
@@ -389,16 +348,52 @@ void fpu__clear(struct fpu *fpu)
/*
* Make sure fpstate is cleared and initialized.
*/
- if (static_cpu_has(X86_FEATURE_FPU)) {
- preempt_disable();
- fpu__initialize(fpu);
- user_fpu_begin();
+ fpu__initialize(fpu);
+ if (static_cpu_has(X86_FEATURE_FPU))
copy_init_fpstate_to_fpregs();
- preempt_enable();
- }
}
/*
+ * Load FPU context before returning to userspace.
+ */
+void switch_fpu_return(void)
+{
+ if (!static_cpu_has(X86_FEATURE_FPU))
+ return;
+
+ __fpregs_load_activate();
+}
+EXPORT_SYMBOL_GPL(switch_fpu_return);
+
+#ifdef CONFIG_X86_DEBUG_FPU
+/*
+ * If current FPU state according to its tracking (loaded FPU context on this
+ * CPU) is not valid then we must have TIF_NEED_FPU_LOAD set so the context is
+ * loaded on return to userland.
+ */
+void fpregs_assert_state_consistent(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ return;
+
+ WARN_ON_FPU(!fpregs_state_valid(fpu, smp_processor_id()));
+}
+EXPORT_SYMBOL_GPL(fpregs_assert_state_consistent);
+#endif
+
+void fpregs_mark_activate(void)
+{
+ struct fpu *fpu = &current->thread.fpu;
+
+ fpregs_activate(fpu);
+ fpu->last_cpu = smp_processor_id();
+ clear_thread_flag(TIF_NEED_FPU_LOAD);
+}
+EXPORT_SYMBOL_GPL(fpregs_mark_activate);
+
+/*
* x87 math exception handling:
*/
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 6abd83572b01..20d8fa7124c7 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -239,8 +239,6 @@ static void __init fpu__init_system_ctx_switch(void)
WARN_ON_FPU(!on_boot_cpu);
on_boot_cpu = 0;
-
- WARN_ON_FPU(current->thread.fpu.initialized);
}
/*
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index bc02f5144b95..d652b939ccfb 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -15,16 +15,12 @@
*/
int regset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
- struct fpu *target_fpu = &target->thread.fpu;
-
- return target_fpu->initialized ? regset->n : 0;
+ return regset->n;
}
int regset_xregset_fpregs_active(struct task_struct *target, const struct user_regset *regset)
{
- struct fpu *target_fpu = &target->thread.fpu;
-
- if (boot_cpu_has(X86_FEATURE_FXSR) && target_fpu->initialized)
+ if (boot_cpu_has(X86_FEATURE_FXSR))
return regset->n;
else
return 0;
@@ -269,11 +265,10 @@ convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk)
memcpy(&to[i], &from[i], sizeof(to[0]));
}
-void convert_to_fxsr(struct task_struct *tsk,
+void convert_to_fxsr(struct fxregs_state *fxsave,
const struct user_i387_ia32_struct *env)
{
- struct fxregs_state *fxsave = &tsk->thread.fpu.state.fxsave;
struct _fpreg *from = (struct _fpreg *) &env->st_space[0];
struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0];
int i;
@@ -350,7 +345,7 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &env, 0, -1);
if (!ret)
- convert_to_fxsr(target, &env);
+ convert_to_fxsr(&target->thread.fpu.state.fxsave, &env);
/*
* update the header bit in the xsave header, indicating the
@@ -371,16 +366,9 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
int dump_fpu(struct pt_regs *regs, struct user_i387_struct *ufpu)
{
struct task_struct *tsk = current;
- struct fpu *fpu = &tsk->thread.fpu;
- int fpvalid;
-
- fpvalid = fpu->initialized;
- if (fpvalid)
- fpvalid = !fpregs_get(tsk, NULL,
- 0, sizeof(struct user_i387_ia32_struct),
- ufpu, NULL);
- return fpvalid;
+ return !fpregs_get(tsk, NULL, 0, sizeof(struct user_i387_ia32_struct),
+ ufpu, NULL);
}
EXPORT_SYMBOL(dump_fpu);
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index f6a1d299627c..5a8d118bc423 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -92,13 +92,13 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
return err;
err |= __put_user(FP_XSTATE_MAGIC2,
- (__u32 *)(buf + fpu_user_xstate_size));
+ (__u32 __user *)(buf + fpu_user_xstate_size));
/*
* Read the xfeatures which we copied (directly from the cpu or
* from the state in task struct) to the user buffers.
*/
- err |= __get_user(xfeatures, (__u32 *)&x->header.xfeatures);
+ err |= __get_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
/*
* For legacy compatible, we always set FP/SSE bits in the bit
@@ -113,7 +113,7 @@ static inline int save_xstate_epilog(void __user *buf, int ia32_frame)
*/
xfeatures |= XFEATURE_MASK_FPSSE;
- err |= __put_user(xfeatures, (__u32 *)&x->header.xfeatures);
+ err |= __put_user(xfeatures, (__u32 __user *)&x->header.xfeatures);
return err;
}
@@ -144,9 +144,10 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
* buf == buf_fx for 64-bit frames and 32-bit fsave frame.
* buf != buf_fx for 32-bit frames with fxstate.
*
- * If the fpu, extended register state is live, save the state directly
- * to the user frame pointed by the aligned pointer 'buf_fx'. Otherwise,
- * copy the thread's fpu state to the user frame starting at 'buf_fx'.
+ * Try to save it directly to the user frame with disabled page fault handler.
+ * If this fails then do the slow path where the FPU state is first saved to
+ * task's fpu->state and then copy it to the user frame pointed to by the
+ * aligned pointer 'buf_fx'.
*
* If this is a 32-bit frame with fxstate, put a fsave header before
* the aligned state at 'buf_fx'.
@@ -156,10 +157,9 @@ static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf)
*/
int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
{
- struct fpu *fpu = &current->thread.fpu;
- struct xregs_state *xsave = &fpu->state.xsave;
struct task_struct *tsk = current;
int ia32_fxstate = (buf != buf_fx);
+ int ret;
ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -172,28 +172,34 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
sizeof(struct user_i387_ia32_struct), NULL,
(struct _fpstate_32 __user *) buf) ? -1 : 1;
- if (fpu->initialized || using_compacted_format()) {
- /* Save the live register state to the user directly. */
- if (copy_fpregs_to_sigframe(buf_fx))
- return -1;
- /* Update the thread's fxstate to save the fsave header. */
- if (ia32_fxstate)
- copy_fxregs_to_kernel(fpu);
- } else {
- /*
- * It is a *bug* if kernel uses compacted-format for xsave
- * area and we copy it out directly to a signal frame. It
- * should have been handled above by saving the registers
- * directly.
- */
- if (boot_cpu_has(X86_FEATURE_XSAVES)) {
- WARN_ONCE(1, "x86/fpu: saving compacted-format xsave area to a signal frame!\n");
- return -1;
- }
-
- fpstate_sanitize_xstate(fpu);
- if (__copy_to_user(buf_fx, xsave, fpu_user_xstate_size))
- return -1;
+retry:
+ /*
+ * Load the FPU registers if they are not valid for the current task.
+ * With a valid FPU state we can attempt to save the state directly to
+ * userland's stack frame which will likely succeed. If it does not,
+ * resolve the fault in the user memory and try again.
+ */
+ fpregs_lock();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ __fpregs_load_activate();
+
+ pagefault_disable();
+ ret = copy_fpregs_to_sigframe(buf_fx);
+ pagefault_enable();
+ fpregs_unlock();
+
+ if (ret) {
+ int aligned_size;
+ int nr_pages;
+
+ aligned_size = offset_in_page(buf_fx) + fpu_user_xstate_size;
+ nr_pages = DIV_ROUND_UP(aligned_size, PAGE_SIZE);
+
+ ret = get_user_pages_unlocked((unsigned long)buf_fx, nr_pages,
+ NULL, FOLL_WRITE);
+ if (ret == nr_pages)
+ goto retry;
+ return -EFAULT;
}
/* Save the fsave header for the 32-bit frames. */
@@ -207,11 +213,11 @@ int copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size)
}
static inline void
-sanitize_restored_xstate(struct task_struct *tsk,
+sanitize_restored_xstate(union fpregs_state *state,
struct user_i387_ia32_struct *ia32_env,
u64 xfeatures, int fx_only)
{
- struct xregs_state *xsave = &tsk->thread.fpu.state.xsave;
+ struct xregs_state *xsave = &state->xsave;
struct xstate_header *header = &xsave->header;
if (use_xsave()) {
@@ -238,17 +244,18 @@ sanitize_restored_xstate(struct task_struct *tsk,
*/
xsave->i387.mxcsr &= mxcsr_feature_mask;
- convert_to_fxsr(tsk, ia32_env);
+ if (ia32_env)
+ convert_to_fxsr(&state->fxsave, ia32_env);
}
}
/*
* Restore the extended state if present. Otherwise, restore the FP/SSE state.
*/
-static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
+static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only)
{
if (use_xsave()) {
- if ((unsigned long)buf % 64 || fx_only) {
+ if (fx_only) {
u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
return copy_user_to_fxregs(buf);
@@ -266,12 +273,15 @@ static inline int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_
static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
{
+ struct user_i387_ia32_struct *envp = NULL;
+ int state_size = fpu_kernel_xstate_size;
int ia32_fxstate = (buf != buf_fx);
struct task_struct *tsk = current;
struct fpu *fpu = &tsk->thread.fpu;
- int state_size = fpu_kernel_xstate_size;
+ struct user_i387_ia32_struct env;
u64 xfeatures = 0;
int fx_only = 0;
+ int ret = 0;
ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) ||
IS_ENABLED(CONFIG_IA32_EMULATION));
@@ -284,8 +294,6 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
if (!access_ok(buf, size))
return -EACCES;
- fpu__initialize(fpu);
-
if (!static_cpu_has(X86_FEATURE_FPU))
return fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct),
@@ -308,61 +316,101 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}
}
+ /*
+ * The current state of the FPU registers does not matter. By setting
+ * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate
+ * is not modified on context switch and that the xstate is considered
+ * to be loaded again on return to userland (overriding last_cpu avoids
+ * the optimisation).
+ */
+ set_thread_flag(TIF_NEED_FPU_LOAD);
+ __fpu_invalidate_fpregs_state(fpu);
+
+ if ((unsigned long)buf_fx % 64)
+ fx_only = 1;
+ /*
+ * For 32-bit frames with fxstate, copy the fxstate so it can be
+ * reconstructed later.
+ */
if (ia32_fxstate) {
+ ret = __copy_from_user(&env, buf, sizeof(env));
+ if (ret)
+ goto err_out;
+ envp = &env;
+ } else {
/*
- * For 32-bit frames with fxstate, copy the user state to the
- * thread's fpu state, reconstruct fxstate from the fsave
- * header. Validate and sanitize the copied state.
+ * Attempt to restore the FPU registers directly from user
+ * memory. For that to succeed, the user access cannot cause
+ * page faults. If it does, fall back to the slow path below,
+ * going through the kernel buffer with the enabled pagefault
+ * handler.
*/
- struct user_i387_ia32_struct env;
- int err = 0;
+ fpregs_lock();
+ pagefault_disable();
+ ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only);
+ pagefault_enable();
+ if (!ret) {
+ fpregs_mark_activate();
+ fpregs_unlock();
+ return 0;
+ }
+ fpregs_unlock();
+ }
- /*
- * Drop the current fpu which clears fpu->initialized. This ensures
- * that any context-switch during the copy of the new state,
- * avoids the intermediate state from getting restored/saved.
- * Thus avoiding the new restored state from getting corrupted.
- * We will be ready to restore/save the state only after
- * fpu->initialized is again set.
- */
- fpu__drop(fpu);
+
+ if (use_xsave() && !fx_only) {
+ u64 init_bv = xfeatures_mask & ~xfeatures;
if (using_compacted_format()) {
- err = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
+ ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx);
} else {
- err = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
+ ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size);
- if (!err && state_size > offsetof(struct xregs_state, header))
- err = validate_xstate_header(&fpu->state.xsave.header);
+ if (!ret && state_size > offsetof(struct xregs_state, header))
+ ret = validate_xstate_header(&fpu->state.xsave.header);
}
+ if (ret)
+ goto err_out;
- if (err || __copy_from_user(&env, buf, sizeof(env))) {
- fpstate_init(&fpu->state);
- trace_x86_fpu_init_state(fpu);
- err = -1;
- } else {
- sanitize_restored_xstate(tsk, &env, xfeatures, fx_only);
+ sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
+
+ fpregs_lock();
+ if (unlikely(init_bv))
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
+ ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures);
+
+ } else if (use_fxsr()) {
+ ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size);
+ if (ret) {
+ ret = -EFAULT;
+ goto err_out;
}
- local_bh_disable();
- fpu->initialized = 1;
- fpu__restore(fpu);
- local_bh_enable();
+ sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only);
- return err;
- } else {
- /*
- * For 64-bit frames and 32-bit fsave frames, restore the user
- * state to the registers directly (with exceptions handled).
- */
- user_fpu_begin();
- if (copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only)) {
- fpu__clear(fpu);
- return -1;
+ fpregs_lock();
+ if (use_xsave()) {
+ u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE;
+ copy_kernel_to_xregs(&init_fpstate.xsave, init_bv);
}
+
+ ret = copy_kernel_to_fxregs_err(&fpu->state.fxsave);
+ } else {
+ ret = __copy_from_user(&fpu->state.fsave, buf_fx, state_size);
+ if (ret)
+ goto err_out;
+
+ fpregs_lock();
+ ret = copy_kernel_to_fregs_err(&fpu->state.fsave);
}
+ if (!ret)
+ fpregs_mark_activate();
+ fpregs_unlock();
- return 0;
+err_out:
+ if (ret)
+ fpu__clear(fpu);
+ return ret;
}
static inline int xstate_sigframe_size(void)
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index d7432c2b1051..9c459fd1d38e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -805,20 +805,18 @@ void fpu__resume_cpu(void)
}
/*
- * Given an xstate feature mask, calculate where in the xsave
+ * Given an xstate feature nr, calculate where in the xsave
* buffer the state is. Callers should ensure that the buffer
* is valid.
*/
-static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask)
+static void *__raw_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
- int feature_nr = fls64(xstate_feature_mask) - 1;
-
- if (!xfeature_enabled(feature_nr)) {
+ if (!xfeature_enabled(xfeature_nr)) {
WARN_ON_FPU(1);
return NULL;
}
- return (void *)xsave + xstate_comp_offsets[feature_nr];
+ return (void *)xsave + xstate_comp_offsets[xfeature_nr];
}
/*
* Given the xsave area and a state inside, this function returns the
@@ -832,13 +830,13 @@ static void *__raw_xsave_addr(struct xregs_state *xsave, int xstate_feature_mask
*
* Inputs:
* xstate: the thread's storage area for all FPU data
- * xstate_feature: state which is defined in xsave.h (e.g.
- * XFEATURE_MASK_FP, XFEATURE_MASK_SSE, etc...)
+ * xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ * XFEATURE_SSE, etc...)
* Output:
* address of the state in the xsave area, or NULL if the
* field is not present in the xsave buffer.
*/
-void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
+void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
{
/*
* Do we even *have* xsave state?
@@ -851,11 +849,11 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
* have not enabled. Remember that pcntxt_mask is
* what we write to the XCR0 register.
*/
- WARN_ONCE(!(xfeatures_mask & xstate_feature),
+ WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
"get of unsupported state");
/*
* This assumes the last 'xsave*' instruction to
- * have requested that 'xstate_feature' be saved.
+ * have requested that 'xfeature_nr' be saved.
* If it did not, we might be seeing and old value
* of the field in the buffer.
*
@@ -864,10 +862,10 @@ void *get_xsave_addr(struct xregs_state *xsave, int xstate_feature)
* or because the "init optimization" caused it
* to not be saved.
*/
- if (!(xsave->header.xfeatures & xstate_feature))
+ if (!(xsave->header.xfeatures & BIT_ULL(xfeature_nr)))
return NULL;
- return __raw_xsave_addr(xsave, xstate_feature);
+ return __raw_xsave_addr(xsave, xfeature_nr);
}
EXPORT_SYMBOL_GPL(get_xsave_addr);
@@ -882,25 +880,23 @@ EXPORT_SYMBOL_GPL(get_xsave_addr);
* Note that this only works on the current task.
*
* Inputs:
- * @xsave_state: state which is defined in xsave.h (e.g. XFEATURE_MASK_FP,
- * XFEATURE_MASK_SSE, etc...)
+ * @xfeature_nr: state which is defined in xsave.h (e.g. XFEATURE_FP,
+ * XFEATURE_SSE, etc...)
* Output:
* address of the state in the xsave area or NULL if the state
* is not present or is in its 'init state'.
*/
-const void *get_xsave_field_ptr(int xsave_state)
+const void *get_xsave_field_ptr(int xfeature_nr)
{
struct fpu *fpu = &current->thread.fpu;
- if (!fpu->initialized)
- return NULL;
/*
* fpu__save() takes the CPU's xstate registers
* and saves them off to the 'fpu memory buffer.
*/
fpu__save(fpu);
- return get_xsave_addr(&fpu->state.xsave, xsave_state);
+ return get_xsave_addr(&fpu->state.xsave, xfeature_nr);
}
#ifdef CONFIG_ARCH_HAS_PKEYS
@@ -1016,7 +1012,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of
* Copy only in-use xstates:
*/
if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, 1 << i);
+ void *src = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1102,7 +1098,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i
* Copy only in-use xstates:
*/
if ((header.xfeatures >> i) & 1) {
- void *src = __raw_xsave_addr(xsave, 1 << i);
+ void *src = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1159,7 +1155,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf)
u64 mask = ((u64)1 << i);
if (hdr.xfeatures & mask) {
- void *dst = __raw_xsave_addr(xsave, 1 << i);
+ void *dst = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
@@ -1213,7 +1209,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf)
u64 mask = ((u64)1 << i);
if (hdr.xfeatures & mask) {
- void *dst = __raw_xsave_addr(xsave, 1 << i);
+ void *dst = __raw_xsave_addr(xsave, i);
offset = xstate_offsets[i];
size = xstate_sizes[i];
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ef49517f6bb2..0caf8122d680 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -678,12 +678,8 @@ static inline void *alloc_tramp(unsigned long size)
{
return module_alloc(size);
}
-static inline void tramp_free(void *tramp, int size)
+static inline void tramp_free(void *tramp)
{
- int npages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
- set_memory_nx((unsigned long)tramp, npages);
- set_memory_rw((unsigned long)tramp, npages);
module_memfree(tramp);
}
#else
@@ -692,7 +688,7 @@ static inline void *alloc_tramp(unsigned long size)
{
return NULL;
}
-static inline void tramp_free(void *tramp, int size) { }
+static inline void tramp_free(void *tramp) { }
#endif
/* Defined as markers to the end of the ftrace default trampolines */
@@ -730,6 +726,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
unsigned long end_offset;
unsigned long op_offset;
unsigned long offset;
+ unsigned long npages;
unsigned long size;
unsigned long retq;
unsigned long *ptr;
@@ -762,6 +759,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
return 0;
*tramp_size = size + RET_SIZE + sizeof(void *);
+ npages = DIV_ROUND_UP(*tramp_size, PAGE_SIZE);
/* Copy ftrace_caller onto the trampoline memory */
ret = probe_kernel_read(trampoline, (void *)start_offset, size);
@@ -806,9 +804,17 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
/* ALLOC_TRAMP flags lets us know we created it */
ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
+ set_vm_flush_reset_perms(trampoline);
+
+ /*
+ * Module allocation needs to be completed by making the page
+ * executable. The page is still writable, which is a security hazard,
+ * but anyhow ftrace breaks W^X completely.
+ */
+ set_memory_x((unsigned long)trampoline, npages);
return (unsigned long)trampoline;
fail:
- tramp_free(trampoline, *tramp_size);
+ tramp_free(trampoline);
return 0;
}
@@ -939,7 +945,7 @@ void arch_ftrace_trampoline_free(struct ftrace_ops *ops)
if (!ops || !(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
return;
- tramp_free((void *)ops->trampoline, ops->trampoline_size);
+ tramp_free((void *)ops->trampoline);
ops->trampoline = 0;
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d1dbe8e4eb82..bcd206c8ac90 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -265,7 +265,7 @@ ENDPROC(start_cpu0)
GLOBAL(initial_code)
.quad x86_64_start_kernel
GLOBAL(initial_gs)
- .quad INIT_PER_CPU_VAR(irq_stack_union)
+ .quad INIT_PER_CPU_VAR(fixed_percpu_data)
GLOBAL(initial_stack)
/*
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 01adea278a71..6d8917875f44 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -41,13 +41,12 @@ struct idt_data {
#define SYSG(_vector, _addr) \
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
-/* Interrupt gate with interrupt stack */
+/*
+ * Interrupt gate with interrupt stack. The _ist index is the index in
+ * the tss.ist[] array, but for the descriptor it needs to start at 1.
+ */
#define ISTG(_vector, _addr, _ist) \
- G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
-
-/* System interrupt gate with interrupt stack */
-#define SISTG(_vector, _addr, _ist) \
- G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
+ G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
/* Task gate */
#define TSKG(_vector, _gdt) \
@@ -184,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
* cpu_init() when the TSS has been initialized.
*/
static const __initconst struct idt_data ist_idts[] = {
- ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
- ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
- ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
+ ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
+ ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
+ ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
#ifdef CONFIG_X86_MCE
- ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
+ ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
#endif
};
diff --git a/arch/x86/kernel/ima_arch.c b/arch/x86/kernel/ima_arch.c
index e47cd9390ab4..85de790583f9 100644
--- a/arch/x86/kernel/ima_arch.c
+++ b/arch/x86/kernel/ima_arch.c
@@ -3,6 +3,7 @@
* Copyright (C) 2018 IBM Corporation
*/
#include <linux/efi.h>
+#include <linux/module.h>
#include <linux/ima.h>
extern struct boot_params boot_params;
@@ -64,12 +65,19 @@ static const char * const sb_arch_rules[] = {
"appraise func=KEXEC_KERNEL_CHECK appraise_type=imasig",
#endif /* CONFIG_KEXEC_VERIFY_SIG */
"measure func=KEXEC_KERNEL_CHECK",
+#if !IS_ENABLED(CONFIG_MODULE_SIG)
+ "appraise func=MODULE_CHECK appraise_type=imasig",
+#endif
+ "measure func=MODULE_CHECK",
NULL
};
const char * const *arch_get_ima_policy(void)
{
- if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot())
+ if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
+ if (IS_ENABLED(CONFIG_MODULE_SIG))
+ set_module_sig_enforced();
return sb_arch_rules;
+ }
return NULL;
}
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 95600a99ae93..fc34816c6f04 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; }
static inline void print_stack_overflow(void) { }
#endif
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
-DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
+DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
+DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
static void call_on_stack(void *func, void *stack)
{
@@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
u32 *isp, *prev_esp, arg1;
curstk = (struct irq_stack *) current_stack();
- irqstk = __this_cpu_read(hardirq_stack);
+ irqstk = __this_cpu_read(hardirq_stack_ptr);
/*
* this is where we switch to the IRQ stack. However, if we are
@@ -107,27 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
}
/*
- * allocate per-cpu stacks for hardirq and for softirq processing
+ * Allocate per-cpu stacks for hardirq and softirq processing
*/
-void irq_ctx_init(int cpu)
+int irq_init_percpu_irqstack(unsigned int cpu)
{
- struct irq_stack *irqstk;
-
- if (per_cpu(hardirq_stack, cpu))
- return;
+ int node = cpu_to_node(cpu);
+ struct page *ph, *ps;
- irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREADINFO_GFP,
- THREAD_SIZE_ORDER));
- per_cpu(hardirq_stack, cpu) = irqstk;
+ if (per_cpu(hardirq_stack_ptr, cpu))
+ return 0;
- irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
- THREADINFO_GFP,
- THREAD_SIZE_ORDER));
- per_cpu(softirq_stack, cpu) = irqstk;
+ ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
+ if (!ph)
+ return -ENOMEM;
+ ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
+ if (!ps) {
+ __free_pages(ph, THREAD_SIZE_ORDER);
+ return -ENOMEM;
+ }
- printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
- cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
+ per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
+ per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
+ return 0;
}
void do_softirq_own_stack(void)
@@ -135,7 +136,7 @@ void do_softirq_own_stack(void)
struct irq_stack *irqstk;
u32 *isp, *prev_esp;
- irqstk = __this_cpu_read(softirq_stack);
+ irqstk = __this_cpu_read(softirq_stack_ptr);
/* build the stack frame on the softirq stack */
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 0469cd078db1..6bf6517a05bb 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,63 +18,64 @@
#include <linux/uaccess.h>
#include <linux/smp.h>
#include <linux/sched/task_stack.h>
+
+#include <asm/cpu_entry_area.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
-int sysctl_panic_on_stackoverflow;
+DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
+DECLARE_INIT_PER_CPU(irq_stack_backing_store);
-/*
- * Probabilistic stack overflow check:
- *
- * Only check the stack in process context, because everything else
- * runs on the big interrupt stacks. Checking reliably is too expensive,
- * so we just check from interrupts.
- */
-static inline void stack_overflow_check(struct pt_regs *regs)
+bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
{
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-#define STACK_TOP_MARGIN 128
- struct orig_ist *oist;
- u64 irq_stack_top, irq_stack_bottom;
- u64 estack_top, estack_bottom;
- u64 curbase = (u64)task_stack_page(current);
+ if (IS_ERR_OR_NULL(desc))
+ return false;
- if (user_mode(regs))
- return;
+ generic_handle_irq_desc(desc);
+ return true;
+}
- if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
- regs->sp <= curbase + THREAD_SIZE)
- return;
+#ifdef CONFIG_VMAP_STACK
+/*
+ * VMAP the backing store with guard pages
+ */
+static int map_irq_stack(unsigned int cpu)
+{
+ char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu);
+ struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE];
+ void *va;
+ int i;
- irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
- STACK_TOP_MARGIN;
- irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
- if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
- return;
+ for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) {
+ phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT));
- oist = this_cpu_ptr(&orig_ist);
- estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
- estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
- if (regs->sp >= estack_top && regs->sp <= estack_bottom)
- return;
+ pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
+ }
- WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
- current->comm, curbase, regs->sp,
- irq_stack_top, irq_stack_bottom,
- estack_top, estack_bottom, (void *)regs->ip);
+ va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
+ if (!va)
+ return -ENOMEM;
- if (sysctl_panic_on_stackoverflow)
- panic("low stack detected by irq handler - check messages\n");
-#endif
+ per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+ return 0;
}
-
-bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
+#else
+/*
+ * If VMAP stacks are disabled due to KASAN, just use the per cpu
+ * backing store without guard pages.
+ */
+static int map_irq_stack(unsigned int cpu)
{
- stack_overflow_check(regs);
+ void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
- if (IS_ERR_OR_NULL(desc))
- return false;
+ per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+ return 0;
+}
+#endif
- generic_handle_irq_desc(desc);
- return true;
+int irq_init_percpu_irqstack(unsigned int cpu)
+{
+ if (per_cpu(hardirq_stack_ptr, cpu))
+ return 0;
+ return map_irq_stack(cpu);
}
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index a0693b71cfc1..16919a9671fa 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -91,6 +91,8 @@ void __init init_IRQ(void)
for (i = 0; i < nr_legacy_irqs(); i++)
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
+ BUG_ON(irq_init_percpu_irqstack(smp_processor_id()));
+
x86_init.irqs.intr_init();
}
@@ -104,6 +106,4 @@ void __init native_init_IRQ(void)
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
setup_irq(2, &irq2);
-
- irq_ctx_init(smp_processor_id());
}
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index f99bd26bd3f1..e631c358f7f4 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -37,7 +37,6 @@ static void bug_at(unsigned char *ip, int line)
static void __ref __jump_label_transform(struct jump_entry *entry,
enum jump_label_type type,
- void *(*poker)(void *, const void *, size_t),
int init)
{
union jump_code_union jmp;
@@ -50,9 +49,6 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
jmp.offset = jump_entry_target(entry) -
(jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
- if (early_boot_irqs_disabled)
- poker = text_poke_early;
-
if (type == JUMP_LABEL_JMP) {
if (init) {
expect = default_nop; line = __LINE__;
@@ -75,16 +71,19 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
bug_at((void *)jump_entry_code(entry), line);
/*
- * Make text_poke_bp() a default fallback poker.
+ * As long as only a single processor is running and the code is still
+ * not marked as RO, text_poke_early() can be used; Checking that
+ * system_state is SYSTEM_BOOTING guarantees it. It will be set to
+ * SYSTEM_SCHEDULING before other cores are awaken and before the
+ * code is write-protected.
*
* At the time the change is being done, just ignore whether we
* are doing nop -> jump or jump -> nop transition, and assume
* always nop being the 'currently valid' instruction
- *
*/
- if (poker) {
- (*poker)((void *)jump_entry_code(entry), code,
- JUMP_LABEL_NOP_SIZE);
+ if (init || system_state == SYSTEM_BOOTING) {
+ text_poke_early((void *)jump_entry_code(entry), code,
+ JUMP_LABEL_NOP_SIZE);
return;
}
@@ -96,7 +95,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
mutex_lock(&text_mutex);
- __jump_label_transform(entry, type, NULL, 0);
+ __jump_label_transform(entry, type, 0);
mutex_unlock(&text_mutex);
}
@@ -126,5 +125,5 @@ __init_or_module void arch_jump_label_transform_static(struct jump_entry *entry,
jlstate = JL_STATE_NO_UPDATE;
}
if (jlstate == JL_STATE_UPDATE)
- __jump_label_transform(entry, type, text_poke_early, 1);
+ __jump_label_transform(entry, type, 1);
}
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 4ff6b4cdb941..13b13311b792 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -747,7 +747,6 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
{
int err;
- char opc[BREAK_INSTR_SIZE];
bpt->type = BP_BREAKPOINT;
err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
@@ -759,18 +758,13 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
if (!err)
return err;
/*
- * It is safe to call text_poke() because normal kernel execution
+ * It is safe to call text_poke_kgdb() because normal kernel execution
* is stopped on all cores, so long as the text_mutex is not locked.
*/
if (mutex_is_locked(&text_mutex))
return -EBUSY;
- text_poke((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
- BREAK_INSTR_SIZE);
- err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
- if (err)
- return err;
- if (memcmp(opc, arch_kgdb_ops.gdb_bpt_instr, BREAK_INSTR_SIZE))
- return -EINVAL;
+ text_poke_kgdb((void *)bpt->bpt_addr, arch_kgdb_ops.gdb_bpt_instr,
+ BREAK_INSTR_SIZE);
bpt->type = BP_POKE_BREAKPOINT;
return err;
@@ -778,22 +772,17 @@ int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
{
- int err;
- char opc[BREAK_INSTR_SIZE];
-
if (bpt->type != BP_POKE_BREAKPOINT)
goto knl_write;
/*
- * It is safe to call text_poke() because normal kernel execution
+ * It is safe to call text_poke_kgdb() because normal kernel execution
* is stopped on all cores, so long as the text_mutex is not locked.
*/
if (mutex_is_locked(&text_mutex))
goto knl_write;
- text_poke((void *)bpt->bpt_addr, bpt->saved_instr, BREAK_INSTR_SIZE);
- err = probe_kernel_read(opc, (char *)bpt->bpt_addr, BREAK_INSTR_SIZE);
- if (err || memcmp(opc, bpt->saved_instr, BREAK_INSTR_SIZE))
- goto knl_write;
- return err;
+ text_poke_kgdb((void *)bpt->bpt_addr, bpt->saved_instr,
+ BREAK_INSTR_SIZE);
+ return 0;
knl_write:
return probe_kernel_write((char *)bpt->bpt_addr,
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index fed46ddb1eef..cf52ee0d8711 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -431,8 +431,21 @@ void *alloc_insn_page(void)
void *page;
page = module_alloc(PAGE_SIZE);
- if (page)
- set_memory_ro((unsigned long)page & PAGE_MASK, 1);
+ if (!page)
+ return NULL;
+
+ set_vm_flush_reset_perms(page);
+ /*
+ * First make the page read-only, and only then make it executable to
+ * prevent it from being W+X in between.
+ */
+ set_memory_ro((unsigned long)page, 1);
+
+ /*
+ * TODO: Once additional kernel code protection mechanisms are set, ensure
+ * that the page was not maliciously altered and it is still zeroed.
+ */
+ set_memory_x((unsigned long)page, 1);
return page;
}
@@ -440,8 +453,6 @@ void *alloc_insn_page(void)
/* Recover page to RW mode before releasing it */
void free_insn_page(void *page)
{
- set_memory_nx((unsigned long)page & PAGE_MASK, 1);
- set_memory_rw((unsigned long)page & PAGE_MASK, 1);
module_memfree(page);
}
@@ -716,6 +727,7 @@ NOKPROBE_SYMBOL(kprobe_int3_handler);
* calls trampoline_handler() runs, which calls the kretprobe's handler.
*/
asm(
+ ".text\n"
".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5c93a65ee1e5..3f0cc828cc36 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -67,7 +67,7 @@ static int __init parse_no_stealacc(char *arg)
early_param("no-steal-acc", parse_no_stealacc);
static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
-static DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64);
+DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
static int has_steal_clock = 0;
/*
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 6135ae8ce036..b2463fcb20a8 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -113,7 +113,7 @@ static void do_sanity_check(struct mm_struct *mm,
* tables.
*/
WARN_ON(!had_kernel_mapping);
- if (static_cpu_has(X86_FEATURE_PTI))
+ if (boot_cpu_has(X86_FEATURE_PTI))
WARN_ON(!had_user_mapping);
} else {
/*
@@ -121,7 +121,7 @@ static void do_sanity_check(struct mm_struct *mm,
* Sync the pgd to the usermode tables.
*/
WARN_ON(had_kernel_mapping);
- if (static_cpu_has(X86_FEATURE_PTI))
+ if (boot_cpu_has(X86_FEATURE_PTI))
WARN_ON(had_user_mapping);
}
}
@@ -156,7 +156,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
- if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
+ if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
set_pmd(u_pmd, *k_pmd);
}
@@ -181,7 +181,7 @@ static void map_ldt_struct_to_user(struct mm_struct *mm)
{
pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
- if (static_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
+ if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
set_pgd(kernel_to_user_pgdp(pgd), *pgd);
}
@@ -208,7 +208,7 @@ map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
spinlock_t *ptl;
int i, nr_pages;
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return 0;
/*
@@ -271,7 +271,7 @@ static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
return;
/* LDT map/unmap is only required for PTI */
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
@@ -311,7 +311,7 @@ static void free_ldt_pgtables(struct mm_struct *mm)
unsigned long start = LDT_BASE_ADDR;
unsigned long end = LDT_END_ADDR;
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
tlb_gather_mmu(&tlb, mm, start, end);
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index b052e883dd8c..cfa3106faee4 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -87,7 +87,7 @@ void *module_alloc(unsigned long size)
p = __vmalloc_node_range(size, MODULE_ALIGN,
MODULES_VADDR + get_module_load_offset(),
MODULES_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (p && (kasan_module_alloc(p, size) < 0)) {
vfree(p);
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 18bc9b51ac9b..3755d0310026 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -21,13 +21,14 @@
#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/export.h>
+#include <linux/atomic.h>
#include <linux/sched/clock.h>
#if defined(CONFIG_EDAC)
#include <linux/edac.h>
#endif
-#include <linux/atomic.h>
+#include <asm/cpu_entry_area.h>
#include <asm/traps.h>
#include <asm/mach_traps.h>
#include <asm/nmi.h>
@@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
* switch back to the original IDT.
*/
static DEFINE_PER_CPU(int, update_debug_stack);
+
+static bool notrace is_debug_stack(unsigned long addr)
+{
+ struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
+ unsigned long top = CEA_ESTACK_TOP(cs, DB);
+ unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
+
+ if (__this_cpu_read(debug_stack_usage))
+ return true;
+ /*
+ * Note, this covers the guard page between DB and DB1 as well to
+ * avoid two checks. But by all means @addr can never point into
+ * the guard page.
+ */
+ return addr >= bot && addr < top;
+}
+NOKPROBE_SYMBOL(is_debug_stack);
#endif
dotraplinkage notrace void
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c0e0101133f3..7bbaa6baf37f 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -121,7 +121,7 @@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
void __init native_pv_lock_init(void)
{
- if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
static_branch_disable(&virt_spin_lock_key);
}
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index d460998ae828..dcd272dbd0a9 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -51,14 +51,6 @@ int iommu_pass_through __read_mostly;
extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
-/* Dummy device used for NULL arguments (normally ISA). */
-struct device x86_dma_fallback_dev = {
- .init_name = "fallback device",
- .coherent_dma_mask = ISA_DMA_BIT_MASK,
- .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
-};
-EXPORT_SYMBOL(x86_dma_fallback_dev);
-
void __init pci_iommu_alloc(void)
{
struct iommu_table_entry *p;
@@ -77,18 +69,6 @@ void __init pci_iommu_alloc(void)
}
}
-bool arch_dma_alloc_attrs(struct device **dev)
-{
- if (!*dev)
- *dev = &x86_dma_fallback_dev;
-
- if (!is_device_dma_capable(*dev))
- return false;
- return true;
-
-}
-EXPORT_SYMBOL(arch_dma_alloc_attrs);
-
/*
* See <Documentation/x86/x86_64/boot-options.txt> for the iommu kernel
* parameter documentation.
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index c06c4c16c6b6..07c30ee17425 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -59,18 +59,34 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
+ struct x86_perf_regs *perf_regs;
+
+ if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+ perf_regs = container_of(regs, struct x86_perf_regs, regs);
+ if (!perf_regs->xmm_regs)
+ return 0;
+ return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+ }
+
if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
return 0;
return regs_get_register(regs, pt_regs_offset[idx]);
}
-#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
-
#ifdef CONFIG_X86_32
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
+ (1ULL << PERF_REG_X86_R9) | \
+ (1ULL << PERF_REG_X86_R10) | \
+ (1ULL << PERF_REG_X86_R11) | \
+ (1ULL << PERF_REG_X86_R12) | \
+ (1ULL << PERF_REG_X86_R13) | \
+ (1ULL << PERF_REG_X86_R14) | \
+ (1ULL << PERF_REG_X86_R15))
+
int perf_reg_validate(u64 mask)
{
- if (!mask || mask & REG_RESERVED)
+ if (!mask || (mask & REG_NOSUPPORT))
return -EINVAL;
return 0;
@@ -96,10 +112,7 @@ void perf_get_regs_user(struct perf_regs *regs_user,
int perf_reg_validate(u64 mask)
{
- if (!mask || mask & REG_RESERVED)
- return -EINVAL;
-
- if (mask & REG_NOSUPPORT)
+ if (!mask || (mask & REG_NOSUPPORT))
return -EINVAL;
return 0;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 957eae13b370..75fea0d48c0e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -101,7 +101,7 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
dst->thread.vm86 = NULL;
#endif
- return fpu__copy(&dst->thread.fpu, &src->thread.fpu);
+ return fpu__copy(dst, src);
}
/*
@@ -236,7 +236,7 @@ static int get_cpuid_mode(void)
static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
{
- if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+ if (!boot_cpu_has(X86_FEATURE_CPUID_FAULT))
return -ENODEV;
if (cpuid_enabled)
@@ -670,7 +670,7 @@ static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
if (c->x86_vendor != X86_VENDOR_INTEL)
return 0;
- if (!cpu_has(c, X86_FEATURE_MWAIT) || static_cpu_has_bug(X86_BUG_MONITOR))
+ if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_cpu_has_bug(X86_BUG_MONITOR))
return 0;
return 1;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index e471d8e6f0b2..2399e910d109 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -127,6 +127,13 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
struct task_struct *tsk;
int err;
+ /*
+ * For a new task use the RESET flags value since there is no before.
+ * All the status flags are zero; DF and all the system flags must also
+ * be 0, specifically IF must be 0 because we context switch to the new
+ * task with interrupts disabled.
+ */
+ frame->flags = X86_EFLAGS_FIXED;
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
@@ -234,7 +241,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_fpu, cpu);
/*
* Save away %gs. No need to save %fs, as it was saved on the
@@ -267,9 +275,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Leave lazy mode, flushing any hypercalls made here.
* This must be done before restoring TLS segments so
- * the GDT and LDT are properly updated, and must be
- * done before fpu__restore(), so the TS bit is up
- * to date.
+ * the GDT and LDT are properly updated.
*/
arch_end_context_switch(next_p);
@@ -290,10 +296,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (prev->gs | next->gs)
lazy_load_gs(next->gs);
- switch_fpu_finish(next_fpu, cpu);
-
this_cpu_write(current_task, next_p);
+ switch_fpu_finish(next_fpu);
+
/* Load the Intel cache allocation PQR MSR. */
resctrl_sched_in();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6a62f4af9fcf..f8e1af380cdf 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -392,6 +392,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
childregs = task_pt_regs(p);
fork_frame = container_of(childregs, struct fork_frame, regs);
frame = &fork_frame->frame;
+
frame->bp = 0;
frame->ret_addr = (unsigned long) ret_from_fork;
p->thread.sp = (unsigned long) fork_frame;
@@ -520,7 +521,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
this_cpu_read(irq_count) != -1);
- switch_fpu_prepare(prev_fpu, cpu);
+ if (!test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_prepare(prev_fpu, cpu);
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
@@ -538,9 +540,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Leave lazy mode, flushing any hypercalls made here. This
* must be done after loading TLS entries in the GDT but before
- * loading segments that might reference them, and and it must
- * be done before fpu__restore(), so the TS bit is up to
- * date.
+ * loading segments that might reference them.
*/
arch_end_context_switch(next_p);
@@ -568,14 +568,14 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
x86_fsgsbase_load(prev, next);
- switch_fpu_finish(next_fpu, cpu);
-
/*
* Switch the PDA and FPU contexts.
*/
this_cpu_write(current_task, next_p);
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
+ switch_fpu_finish(next_fpu);
+
/* Reload sp0. */
update_task_stack(next_p);
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 8fd3cedd9acc..09d6bded3c1e 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -121,7 +121,7 @@ void __noreturn machine_real_restart(unsigned int type)
write_cr3(real_mode_header->trampoline_pgd);
/* Exiting long mode will fail if CR4.PCIDE is set. */
- if (static_cpu_has(X86_FEATURE_PCID))
+ if (boot_cpu_has(X86_FEATURE_PCID))
cr4_clear_bits(X86_CR4_PCIDE);
#endif
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 3d872a527cd9..905dae880563 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -71,6 +71,7 @@
#include <linux/tboot.h>
#include <linux/jiffies.h>
#include <linux/mem_encrypt.h>
+#include <linux/sizes.h>
#include <linux/usb/xhci-dbgp.h>
#include <video/edid.h>
@@ -448,18 +449,17 @@ static void __init memblock_x86_reserve_range_setup_data(void)
#ifdef CONFIG_KEXEC_CORE
/* 16M alignment for crash kernel regions */
-#define CRASH_ALIGN (16 << 20)
+#define CRASH_ALIGN SZ_16M
/*
* Keep the crash kernel below this limit. On 32 bits earlier kernels
* would limit the kernel to the low 512 MiB due to mapping restrictions.
- * On 64bit, old kexec-tools need to under 896MiB.
*/
#ifdef CONFIG_X86_32
-# define CRASH_ADDR_LOW_MAX (512 << 20)
-# define CRASH_ADDR_HIGH_MAX (512 << 20)
+# define CRASH_ADDR_LOW_MAX SZ_512M
+# define CRASH_ADDR_HIGH_MAX SZ_512M
#else
-# define CRASH_ADDR_LOW_MAX (896UL << 20)
+# define CRASH_ADDR_LOW_MAX SZ_4G
# define CRASH_ADDR_HIGH_MAX MAXMEM
#endif
@@ -541,21 +541,27 @@ static void __init reserve_crashkernel(void)
}
/* 0 means: find the address automatically */
- if (crash_base <= 0) {
+ if (!crash_base) {
/*
* Set CRASH_ADDR_LOW_MAX upper bound for crash memory,
- * as old kexec-tools loads bzImage below that, unless
- * "crashkernel=size[KMG],high" is specified.
+ * crashkernel=x,high reserves memory over 4G, also allocates
+ * 256M extra low memory for DMA buffers and swiotlb.
+ * But the extra memory is not required for all machines.
+ * So try low memory first and fall back to high memory
+ * unless "crashkernel=size[KMG],high" is specified.
*/
- crash_base = memblock_find_in_range(CRASH_ALIGN,
- high ? CRASH_ADDR_HIGH_MAX
- : CRASH_ADDR_LOW_MAX,
- crash_size, CRASH_ALIGN);
+ if (!high)
+ crash_base = memblock_find_in_range(CRASH_ALIGN,
+ CRASH_ADDR_LOW_MAX,
+ crash_size, CRASH_ALIGN);
+ if (!crash_base)
+ crash_base = memblock_find_in_range(CRASH_ALIGN,
+ CRASH_ADDR_HIGH_MAX,
+ crash_size, CRASH_ALIGN);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
-
} else {
unsigned long long start;
@@ -1005,13 +1011,11 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled(EFI_BOOT))
efi_init();
- dmi_scan_machine();
- dmi_memdev_walk();
- dmi_set_dump_stack_arch_desc();
+ dmi_setup();
/*
* VMware detection requires dmi to be available, so this
- * needs to be done after dmi_scan_machine(), for the boot CPU.
+ * needs to be done after dmi_setup(), for the boot CPU.
*/
init_hypervisor_platform();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 4bf46575568a..86663874ef04 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void)
per_cpu(x86_cpu_to_logical_apicid, cpu) =
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
#endif
-#ifdef CONFIG_X86_64
- per_cpu(irq_stack_ptr, cpu) =
- per_cpu(irq_stack_union.irq_stack, cpu) +
- IRQ_STACK_SIZE;
-#endif
#ifdef CONFIG_NUMA
per_cpu(x86_cpu_to_node_map, cpu) =
early_per_cpu_map(x86_cpu_to_node_map, cpu);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 08dfd4c1a4f9..364813cea647 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -132,16 +132,6 @@ static int restore_sigcontext(struct pt_regs *regs,
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#ifdef CONFIG_X86_64
- /*
- * Fix up SS if needed for the benefit of old DOSEMU and
- * CRIU.
- */
- if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
- user_64bit_mode(regs)))
- force_valid_ss(regs);
-#endif
-
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
@@ -150,6 +140,15 @@ static int restore_sigcontext(struct pt_regs *regs,
buf = (void __user *)buf_val;
} get_user_catch(err);
+#ifdef CONFIG_X86_64
+ /*
+ * Fix up SS if needed for the benefit of old DOSEMU and
+ * CRIU.
+ */
+ if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && user_64bit_mode(regs)))
+ force_valid_ss(regs);
+#endif
+
err |= fpu__restore_sig(buf, IS_ENABLED(CONFIG_X86_32));
force_iret();
@@ -206,7 +205,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
- put_user_ex(fpstate, &sc->fpstate);
+ put_user_ex(fpstate, (unsigned long __user *)&sc->fpstate);
/* non-iBCS2 extensions.. */
put_user_ex(mask, &sc->oldmask);
@@ -246,7 +245,7 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
unsigned long sp = regs->sp;
unsigned long buf_fx = 0;
int onsigstack = on_sig_stack(sp);
- struct fpu *fpu = &current->thread.fpu;
+ int ret;
/* redzone */
if (IS_ENABLED(CONFIG_X86_64))
@@ -265,11 +264,9 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
sp = (unsigned long) ka->sa.sa_restorer;
}
- if (fpu->initialized) {
- sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
- &buf_fx, &math_size);
- *fpstate = (void __user *)sp;
- }
+ sp = fpu__alloc_mathframe(sp, IS_ENABLED(CONFIG_X86_32),
+ &buf_fx, &math_size);
+ *fpstate = (void __user *)sp;
sp = align_sigframe(sp - frame_size);
@@ -281,8 +278,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
return (void __user *)-1L;
/* save i387 and extended state */
- if (fpu->initialized &&
- copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size) < 0)
+ ret = copy_fpstate_to_sigframe(*fpstate, (void __user *)buf_fx, math_size);
+ if (ret < 0)
return (void __user *)-1L;
return (void __user *)sp;
@@ -461,6 +458,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
{
struct rt_sigframe __user *frame;
void __user *fp = NULL;
+ unsigned long uc_flags;
int err = 0;
frame = get_sigframe(&ksig->ka, regs, sizeof(struct rt_sigframe), &fp);
@@ -473,9 +471,11 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
return -EFAULT;
}
+ uc_flags = frame_uc_flags(regs);
+
put_user_try {
/* Create the ucontext. */
- put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+ put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
save_altstack_ex(&frame->uc.uc_stack, regs->sp);
@@ -541,6 +541,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
{
#ifdef CONFIG_X86_X32_ABI
struct rt_sigframe_x32 __user *frame;
+ unsigned long uc_flags;
void __user *restorer;
int err = 0;
void __user *fpstate = NULL;
@@ -555,9 +556,11 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
return -EFAULT;
}
+ uc_flags = frame_uc_flags(regs);
+
put_user_try {
/* Create the ucontext. */
- put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
+ put_user_ex(uc_flags, &frame->uc.uc_flags);
put_user_ex(0, &frame->uc.uc_link);
compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
put_user_ex(0, &frame->uc.uc__pad0);
@@ -569,7 +572,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
restorer = NULL;
err |= -EFAULT;
}
- put_user_ex(restorer, &frame->pretcode);
+ put_user_ex(restorer, (unsigned long __user *)&frame->pretcode);
} put_user_catch(err);
err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
@@ -688,10 +691,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs)
sigset_t *set = sigmask_to_save();
compat_sigset_t *cset = (compat_sigset_t *) set;
- /*
- * Increment event counter and perform fixup for the pre-signal
- * frame.
- */
+ /* Perform fixup for the pre-signal frame. */
rseq_signal_deliver(ksig, regs);
/* Set up the stack frame */
@@ -763,8 +763,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/*
* Ensure the signal handler starts with the new fpu state.
*/
- if (fpu->initialized)
- fpu__clear(fpu);
+ fpu__clear(fpu);
}
signal_setup_done(failed, ksig, stepping);
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ce1a67b70168..73e69aaaa117 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -455,7 +455,7 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
* multicore group inside a NUMA node. If this happens, we will
* discard the MC level of the topology later.
*/
-static bool match_die(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
+static bool match_pkg(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
{
if (c->phys_proc_id == o->phys_proc_id)
return true;
@@ -546,7 +546,7 @@ void set_cpu_sibling_map(int cpu)
for_each_cpu(i, cpu_sibling_setup_mask) {
o = &cpu_data(i);
- if ((i == cpu) || (has_mp && match_die(c, o))) {
+ if ((i == cpu) || (has_mp && match_pkg(c, o))) {
link_mask(topology_core_cpumask, cpu, i);
/*
@@ -570,7 +570,7 @@ void set_cpu_sibling_map(int cpu)
} else if (i != cpu && !c->booted_cores)
c->booted_cores = cpu_data(i).booted_cores;
}
- if (match_die(c, o) && !topology_same_node(c, o))
+ if (match_pkg(c, o) && !topology_same_node(c, o))
x86_has_numa_in_package = true;
}
@@ -935,20 +935,27 @@ out:
return boot_error;
}
-void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+int common_cpu_up(unsigned int cpu, struct task_struct *idle)
{
+ int ret;
+
/* Just in case we booted with a single CPU. */
alternatives_enable_smp();
per_cpu(current_task, cpu) = idle;
+ /* Initialize the interrupt stack(s) */
+ ret = irq_init_percpu_irqstack(cpu);
+ if (ret)
+ return ret;
+
#ifdef CONFIG_X86_32
/* Stack for startup_32 can be just as for start_secondary onwards */
- irq_ctx_init(cpu);
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
#else
initial_gs = per_cpu_offset(cpu);
#endif
+ return 0;
}
/*
@@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
/* the FPU context is blank, nobody can own it */
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
- common_cpu_up(cpu, tidle);
+ err = common_cpu_up(cpu, tidle);
+ if (err)
+ return err;
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
if (err) {
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 5c2d71a1dc06..2abf27d7df6b 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -12,78 +12,31 @@
#include <asm/stacktrace.h>
#include <asm/unwind.h>
-static int save_stack_address(struct stack_trace *trace, unsigned long addr,
- bool nosched)
-{
- if (nosched && in_sched_functions(addr))
- return 0;
-
- if (trace->skip > 0) {
- trace->skip--;
- return 0;
- }
-
- if (trace->nr_entries >= trace->max_entries)
- return -1;
-
- trace->entries[trace->nr_entries++] = addr;
- return 0;
-}
-
-static void noinline __save_stack_trace(struct stack_trace *trace,
- struct task_struct *task, struct pt_regs *regs,
- bool nosched)
+void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
+ struct task_struct *task, struct pt_regs *regs)
{
struct unwind_state state;
unsigned long addr;
- if (regs)
- save_stack_address(trace, regs->ip, nosched);
+ if (regs && !consume_entry(cookie, regs->ip, false))
+ return;
for (unwind_start(&state, task, regs, NULL); !unwind_done(&state);
unwind_next_frame(&state)) {
addr = unwind_get_return_address(&state);
- if (!addr || save_stack_address(trace, addr, nosched))
+ if (!addr || !consume_entry(cookie, addr, false))
break;
}
-
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
}
/*
- * Save stack-backtrace addresses into a stack_trace buffer.
+ * This function returns an error if it detects any unreliable features of the
+ * stack. Otherwise it guarantees that the stack trace is reliable.
+ *
+ * If the task is not 'current', the caller *must* ensure the task is inactive.
*/
-void save_stack_trace(struct stack_trace *trace)
-{
- trace->skip++;
- __save_stack_trace(trace, current, NULL, false);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace);
-
-void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
-{
- __save_stack_trace(trace, current, regs, false);
-}
-
-void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
-{
- if (!try_get_task_stack(tsk))
- return;
-
- if (tsk == current)
- trace->skip++;
- __save_stack_trace(trace, tsk, NULL, true);
-
- put_task_stack(tsk);
-}
-EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
-
-#ifdef CONFIG_HAVE_RELIABLE_STACKTRACE
-
-static int __always_inline
-__save_stack_trace_reliable(struct stack_trace *trace,
- struct task_struct *task)
+int arch_stack_walk_reliable(stack_trace_consume_fn consume_entry,
+ void *cookie, struct task_struct *task)
{
struct unwind_state state;
struct pt_regs *regs;
@@ -97,7 +50,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
if (regs) {
/* Success path for user tasks */
if (user_mode(regs))
- goto success;
+ return 0;
/*
* Kernel mode registers on the stack indicate an
@@ -120,7 +73,7 @@ __save_stack_trace_reliable(struct stack_trace *trace,
if (!addr)
return -EINVAL;
- if (save_stack_address(trace, addr, false))
+ if (!consume_entry(cookie, addr, false))
return -EINVAL;
}
@@ -132,39 +85,9 @@ __save_stack_trace_reliable(struct stack_trace *trace,
if (!(task->flags & (PF_KTHREAD | PF_IDLE)))
return -EINVAL;
-success:
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
-
return 0;
}
-/*
- * This function returns an error if it detects any unreliable features of the
- * stack. Otherwise it guarantees that the stack trace is reliable.
- *
- * If the task is not 'current', the caller *must* ensure the task is inactive.
- */
-int save_stack_trace_tsk_reliable(struct task_struct *tsk,
- struct stack_trace *trace)
-{
- int ret;
-
- /*
- * If the task doesn't have a stack (e.g., a zombie), the stack is
- * "reliably" empty.
- */
- if (!try_get_task_stack(tsk))
- return 0;
-
- ret = __save_stack_trace_reliable(trace, tsk);
-
- put_task_stack(tsk);
-
- return ret;
-}
-#endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
-
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
struct stack_frame_user {
@@ -189,15 +112,15 @@ copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
return ret;
}
-static inline void __save_stack_trace_user(struct stack_trace *trace)
+void arch_stack_walk_user(stack_trace_consume_fn consume_entry, void *cookie,
+ const struct pt_regs *regs)
{
- const struct pt_regs *regs = task_pt_regs(current);
const void __user *fp = (const void __user *)regs->bp;
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = regs->ip;
+ if (!consume_entry(cookie, regs->ip, false))
+ return;
- while (trace->nr_entries < trace->max_entries) {
+ while (1) {
struct stack_frame_user frame;
frame.next_fp = NULL;
@@ -207,8 +130,8 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
if ((unsigned long)fp < regs->sp)
break;
if (frame.ret_addr) {
- trace->entries[trace->nr_entries++] =
- frame.ret_addr;
+ if (!consume_entry(cookie, frame.ret_addr, false))
+ return;
}
if (fp == frame.next_fp)
break;
@@ -216,14 +139,3 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
}
}
-void save_stack_trace_user(struct stack_trace *trace)
-{
- /*
- * Trace user stack if we are not a kernel thread
- */
- if (current->mm) {
- __save_stack_trace_user(trace);
- }
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = ULONG_MAX;
-}
diff --git a/arch/x86/kernel/topology.c b/arch/x86/kernel/topology.c
index 738bf42b0218..be5bc2e47c71 100644
--- a/arch/x86/kernel/topology.c
+++ b/arch/x86/kernel/topology.c
@@ -71,7 +71,7 @@ int _debug_hotplug_cpu(int cpu, int action)
case 0:
ret = cpu_down(cpu);
if (!ret) {
- pr_info("CPU %u is now offline\n", cpu);
+ pr_info("DEBUG_HOTPLUG_CPU0: CPU %u is now offline\n", cpu);
dev->offline = true;
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
} else
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d26f9e9c3d83..8b6d03e55d2f 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -456,7 +456,7 @@ dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
* which is all zeros which indicates MPX was not
* responsible for the exception.
*/
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
goto exit_trap;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 3fae23834069..15b5e98a86f9 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -185,8 +185,7 @@ static void __init cyc2ns_init_boot_cpu(void)
/*
* Secondary CPUs do not run through tsc_init(), so set up
* all the scale factors for all CPUs, assuming the same
- * speed as the bootup CPU. (cpufreq notifiers will fix this
- * up if their speed diverges)
+ * speed as the bootup CPU.
*/
static void __init cyc2ns_init_secondary_cpus(void)
{
@@ -283,6 +282,7 @@ int __init notsc_setup(char *str)
__setup("notsc", notsc_setup);
static int no_sched_irq_time;
+static int no_tsc_watchdog;
static int __init tsc_setup(char *str)
{
@@ -292,6 +292,8 @@ static int __init tsc_setup(char *str)
no_sched_irq_time = 1;
if (!strcmp(str, "unstable"))
mark_tsc_unstable("boot parameter");
+ if (!strcmp(str, "nowatchdog"))
+ no_tsc_watchdog = 1;
return 1;
}
@@ -937,12 +939,12 @@ void tsc_restore_sched_clock_state(void)
}
#ifdef CONFIG_CPU_FREQ
-/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+/*
+ * Frequency scaling support. Adjust the TSC based timer when the CPU frequency
* changes.
*
- * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
- * not that important because current Opteron setups do not support
- * scaling on SMP anyroads.
+ * NOTE: On SMP the situation is not fixable in general, so simply mark the TSC
+ * as unstable and give up in those cases.
*
* Should fix up last_tsc too. Currently gettimeofday in the
* first tick after the change will be slightly wrong.
@@ -956,22 +958,22 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
void *data)
{
struct cpufreq_freqs *freq = data;
- unsigned long *lpj;
- lpj = &boot_cpu_data.loops_per_jiffy;
-#ifdef CONFIG_SMP
- if (!(freq->flags & CPUFREQ_CONST_LOOPS))
- lpj = &cpu_data(freq->cpu).loops_per_jiffy;
-#endif
+ if (num_online_cpus() > 1) {
+ mark_tsc_unstable("cpufreq changes on SMP");
+ return 0;
+ }
if (!ref_freq) {
ref_freq = freq->old;
- loops_per_jiffy_ref = *lpj;
+ loops_per_jiffy_ref = boot_cpu_data.loops_per_jiffy;
tsc_khz_ref = tsc_khz;
}
+
if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
- (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
- *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
+ (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) {
+ boot_cpu_data.loops_per_jiffy =
+ cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
if (!(freq->flags & CPUFREQ_CONST_LOOPS))
@@ -1349,7 +1351,7 @@ static int __init init_tsc_clocksource(void)
if (tsc_unstable)
goto unreg;
- if (tsc_clocksource_reliable)
+ if (tsc_clocksource_reliable || no_tsc_watchdog)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index a092b6b40c6b..6a38717d179c 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -369,7 +369,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
preempt_disable();
tsk->thread.sp0 += 16;
- if (static_cpu_has(X86_FEATURE_SEP)) {
+ if (boot_cpu_has(X86_FEATURE_SEP)) {
tsk->thread.sysenter_cs = 0;
refresh_sysenter_cs(&tsk->thread);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index a5127b2c195f..0850b5149345 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -141,11 +141,11 @@ SECTIONS
*(.text.__x86.indirect_thunk)
__indirect_thunk_end = .;
#endif
-
- /* End of text section */
- _etext = .;
} :text = 0x9090
+ /* End of text section */
+ _etext = .;
+
NOTES :text :note
EXCEPTION_TABLE(16) :text = 0x9090
@@ -403,7 +403,8 @@ SECTIONS
*/
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
INIT_PER_CPU(gdt_page);
-INIT_PER_CPU(irq_stack_union);
+INIT_PER_CPU(fixed_percpu_data);
+INIT_PER_CPU(irq_stack_backing_store);
/*
* Build-time check on the image size:
@@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union);
"kernel image bigger than KERNEL_IMAGE_SIZE");
#ifdef CONFIG_SMP
-. = ASSERT((irq_stack_union == 0),
- "irq_stack_union is not at start of per-cpu area");
+. = ASSERT((fixed_percpu_data == 0),
+ "fixed_percpu_data is not at start of per-cpu area");
#endif
#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 72fa955f4a15..fc042419e670 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -27,7 +27,6 @@ config KVM
depends on X86_LOCAL_APIC
select PREEMPT_NOTIFIERS
select MMU_NOTIFIER
- select ANON_INODES
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select IRQ_BYPASS_MANAGER
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 421899f6ad7b..cc24b3a32c44 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1371,7 +1371,16 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa,
valid_bank_mask = BIT_ULL(0);
sparse_banks[0] = flush.processor_mask;
- all_cpus = flush.flags & HV_FLUSH_ALL_PROCESSORS;
+
+ /*
+ * Work around possible WS2012 bug: it sends hypercalls
+ * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
+ * while also expecting us to flush something and crashing if
+ * we don't. Let's treat processor_mask == 0 same as
+ * HV_FLUSH_ALL_PROCESSORS.
+ */
+ all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
+ flush.processor_mask == 0;
} else {
if (unlikely(kvm_read_guest(kvm, ingpa, &flush_ex,
sizeof(flush_ex))))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 9bf70cf84564..bd13fdddbdc4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -70,7 +70,6 @@
#define APIC_BROADCAST 0xFF
#define X2APIC_BROADCAST 0xFFFFFFFFul
-static bool lapic_timer_advance_adjust_done = false;
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@@ -1482,14 +1481,32 @@ static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
return false;
}
+static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
+{
+ u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
+
+ /*
+ * If the guest TSC is running at a different ratio than the host, then
+ * convert the delay to nanoseconds to achieve an accurate delay. Note
+ * that __delay() uses delay_tsc whenever the hardware has TSC, thus
+ * always for VMX enabled hardware.
+ */
+ if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
+ __delay(min(guest_cycles,
+ nsec_to_cycles(vcpu, timer_advance_ns)));
+ } else {
+ u64 delay_ns = guest_cycles * 1000000ULL;
+ do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
+ ndelay(min_t(u32, delay_ns, timer_advance_ns));
+ }
+}
+
void wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
u64 guest_tsc, tsc_deadline, ns;
- if (!lapic_in_kernel(vcpu))
- return;
-
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
@@ -1501,33 +1518,37 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
- /* __delay is delay_tsc whenever the hardware has TSC, thus always. */
if (guest_tsc < tsc_deadline)
- __delay(min(tsc_deadline - guest_tsc,
- nsec_to_cycles(vcpu, lapic_timer_advance_ns)));
+ __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
- if (!lapic_timer_advance_adjust_done) {
+ if (!apic->lapic_timer.timer_advance_adjust_done) {
/* too early */
if (guest_tsc < tsc_deadline) {
ns = (tsc_deadline - guest_tsc) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
- lapic_timer_advance_ns -= min((unsigned int)ns,
- lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ timer_advance_ns -= min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
} else {
/* too late */
ns = (guest_tsc - tsc_deadline) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
- lapic_timer_advance_ns += min((unsigned int)ns,
- lapic_timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
+ timer_advance_ns += min((u32)ns,
+ timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
}
if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
- lapic_timer_advance_adjust_done = true;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ if (unlikely(timer_advance_ns > 5000)) {
+ timer_advance_ns = 0;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ }
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
}
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
- u64 guest_tsc, tscdeadline = apic->lapic_timer.tscdeadline;
+ struct kvm_timer *ktimer = &apic->lapic_timer;
+ u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
u64 ns = 0;
ktime_t expire;
struct kvm_vcpu *vcpu = apic->vcpu;
@@ -1542,13 +1563,15 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
now = ktime_get();
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
- if (likely(tscdeadline > guest_tsc)) {
- ns = (tscdeadline - guest_tsc) * 1000000ULL;
- do_div(ns, this_tsc_khz);
+
+ ns = (tscdeadline - guest_tsc) * 1000000ULL;
+ do_div(ns, this_tsc_khz);
+
+ if (likely(tscdeadline > guest_tsc) &&
+ likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns);
- expire = ktime_sub_ns(expire, lapic_timer_advance_ns);
- hrtimer_start(&apic->lapic_timer.timer,
- expire, HRTIMER_MODE_ABS_PINNED);
+ expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
+ hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED);
} else
apic_timer_expired(apic);
@@ -2255,7 +2278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
return HRTIMER_NORESTART;
}
-int kvm_create_lapic(struct kvm_vcpu *vcpu)
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
{
struct kvm_lapic *apic;
@@ -2279,6 +2302,14 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_PINNED);
apic->lapic_timer.timer.function = apic_timer_fn;
+ if (timer_advance_ns == -1) {
+ apic->lapic_timer.timer_advance_ns = 1000;
+ apic->lapic_timer.timer_advance_adjust_done = false;
+ } else {
+ apic->lapic_timer.timer_advance_ns = timer_advance_ns;
+ apic->lapic_timer.timer_advance_adjust_done = true;
+ }
+
/*
* APIC is created enabled. This will prevent kvm_lapic_set_base from
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index ff6ef9c3d760..d6d049ba3045 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -31,8 +31,10 @@ struct kvm_timer {
u32 timer_mode_mask;
u64 tscdeadline;
u64 expired_tscdeadline;
+ u32 timer_advance_ns;
atomic_t pending; /* accumulated triggered timers */
bool hv_timer_in_use;
+ bool timer_advance_adjust_done;
};
struct kvm_lapic {
@@ -62,7 +64,7 @@ struct kvm_lapic {
struct dest_map;
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
+int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns);
void kvm_free_lapic(struct kvm_vcpu *vcpu);
int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index e10962dfc203..d9c7b45d231f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4781,6 +4781,7 @@ static union kvm_mmu_extended_role kvm_calc_mmu_role_ext(struct kvm_vcpu *vcpu)
union kvm_mmu_extended_role ext = {0};
ext.cr0_pg = !!is_paging(vcpu);
+ ext.cr4_pae = !!is_pae(vcpu);
ext.cr4_smep = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
ext.cr4_smap = !!kvm_read_cr4_bits(vcpu, X86_CR4_SMAP);
ext.cr4_pse = !!is_pse(vcpu);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 6401eb7ef19c..0c601d079cd2 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5423,7 +5423,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
return ret;
/* Empty 'VMXON' state is permitted */
- if (kvm_state->size < sizeof(kvm_state) + sizeof(*vmcs12))
+ if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12))
return 0;
if (kvm_state->vmx.vmcs_pa != -1ull) {
@@ -5467,7 +5467,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
vmcs12->vmcs_link_pointer != -1ull) {
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
- if (kvm_state->size < sizeof(kvm_state) + 2 * sizeof(*vmcs12))
+ if (kvm_state->size < sizeof(*kvm_state) + 2 * sizeof(*vmcs12))
return -EINVAL;
if (copy_from_user(shadow_vmcs12,
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 7b272738c576..d4cb1945b2e3 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -3,6 +3,7 @@
#include <asm/asm.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
+#include <asm/nospec-branch.h>
#define WORD_SIZE (BITS_PER_LONG / 8)
@@ -77,6 +78,17 @@ ENDPROC(vmx_vmenter)
* referred to by VMCS.HOST_RIP.
*/
ENTRY(vmx_vmexit)
+#ifdef CONFIG_RETPOLINE
+ ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
+ /* Preserve guest's RAX, it's used to stuff the RSB. */
+ push %_ASM_AX
+
+ /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
+ FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+
+ pop %_ASM_AX
+.Lvmexit_skip_rsb:
+#endif
ret
ENDPROC(vmx_vmexit)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index b4e7d645275a..9663d41cc2bc 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6462,9 +6462,6 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs))
current_evmcs->hv_clean_fields |=
@@ -6503,7 +6500,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
*/
if (static_cpu_has(X86_FEATURE_PKU) &&
kvm_read_cr4_bits(vcpu, X86_CR4_PKE)) {
- vcpu->arch.pkru = __read_pkru();
+ vcpu->arch.pkru = rdpkru();
if (vcpu->arch.pkru != vmx->host_pkru)
__write_pkru(vmx->host_pkru);
}
@@ -7032,6 +7029,7 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
{
struct vcpu_vmx *vmx;
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
+ struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
if (kvm_mwait_in_guest(vcpu->kvm))
return -EOPNOTSUPP;
@@ -7040,7 +7038,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc)
tscl = rdtsc();
guest_tscl = kvm_read_l1_tsc(vcpu, tscl);
delta_tsc = max(guest_deadline_tsc, guest_tscl) - guest_tscl;
- lapic_timer_advance_cycles = nsec_to_cycles(vcpu, lapic_timer_advance_ns);
+ lapic_timer_advance_cycles = nsec_to_cycles(vcpu,
+ ktimer->timer_advance_ns);
if (delta_tsc > lapic_timer_advance_cycles)
delta_tsc -= lapic_timer_advance_cycles;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a0d1fc80ac5a..d75bb97b983c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,10 +136,14 @@ EXPORT_SYMBOL_GPL(kvm_default_tsc_scaling_ratio);
static u32 __read_mostly tsc_tolerance_ppm = 250;
module_param(tsc_tolerance_ppm, uint, S_IRUGO | S_IWUSR);
-/* lapic timer advance (tscdeadline mode only) in nanoseconds */
-unsigned int __read_mostly lapic_timer_advance_ns = 1000;
+/*
+ * lapic timer advance (tscdeadline mode only) in nanoseconds. '-1' enables
+ * adaptive tuning starting from default advancment of 1000ns. '0' disables
+ * advancement entirely. Any other value is used as-is and disables adaptive
+ * tuning, i.e. allows priveleged userspace to set an exact advancement time.
+ */
+static int __read_mostly lapic_timer_advance_ns = -1;
module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
-EXPORT_SYMBOL_GPL(lapic_timer_advance_ns);
static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO);
@@ -3677,15 +3681,15 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
- int index = fls64(feature) - 1;
- void *src = get_xsave_addr(xsave, feature);
+ u64 xfeature_mask = valid & -valid;
+ int xfeature_nr = fls64(xfeature_mask) - 1;
+ void *src = get_xsave_addr(xsave, xfeature_nr);
if (src) {
u32 size, offset, ecx, edx;
- cpuid_count(XSTATE_CPUID, index,
+ cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
- if (feature == XFEATURE_MASK_PKRU)
+ if (xfeature_nr == XFEATURE_PKRU)
memcpy(dest + offset, &vcpu->arch.pkru,
sizeof(vcpu->arch.pkru));
else
@@ -3693,7 +3697,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
}
- valid -= feature;
+ valid -= xfeature_mask;
}
}
@@ -3720,22 +3724,22 @@ static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
*/
valid = xstate_bv & ~XFEATURE_MASK_FPSSE;
while (valid) {
- u64 feature = valid & -valid;
- int index = fls64(feature) - 1;
- void *dest = get_xsave_addr(xsave, feature);
+ u64 xfeature_mask = valid & -valid;
+ int xfeature_nr = fls64(xfeature_mask) - 1;
+ void *dest = get_xsave_addr(xsave, xfeature_nr);
if (dest) {
u32 size, offset, ecx, edx;
- cpuid_count(XSTATE_CPUID, index,
+ cpuid_count(XSTATE_CPUID, xfeature_nr,
&size, &offset, &ecx, &edx);
- if (feature == XFEATURE_MASK_PKRU)
+ if (xfeature_nr == XFEATURE_PKRU)
memcpy(&vcpu->arch.pkru, src + offset,
sizeof(vcpu->arch.pkru));
else
memcpy(dest, src + offset, size);
}
- valid -= feature;
+ valid -= xfeature_mask;
}
}
@@ -6535,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
+static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.pio.count = 0;
+ return 1;
+}
+
static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
{
vcpu->arch.pio.count = 0;
@@ -6551,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1);
+ if (ret)
+ return ret;
- if (!ret) {
+ /*
+ * Workaround userspace that relies on old KVM behavior of %rip being
+ * incremented prior to exiting to userspace to handle "OUT 0x7e".
+ */
+ if (port == 0x7e &&
+ kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
+ vcpu->arch.complete_userspace_io =
+ complete_fast_pio_out_port_0x7e;
+ kvm_skip_emulated_instruction(vcpu);
+ } else {
vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
vcpu->arch.complete_userspace_io = complete_fast_pio_out;
}
- return ret;
+ return 0;
}
static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
@@ -7873,10 +7894,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
- if (lapic_timer_advance_ns)
+ if (lapic_in_kernel(vcpu) &&
+ vcpu->arch.apic->lapic_timer.timer_advance_ns)
wait_lapic_expire(vcpu);
guest_enter_irqoff();
+ fpregs_assert_state_consistent();
+ if (test_thread_flag(TIF_NEED_FPU_LOAD))
+ switch_fpu_return();
+
if (unlikely(vcpu->arch.switch_db_regs)) {
set_debugreg(0, 7);
set_debugreg(vcpu->arch.eff_db[0], 0);
@@ -8135,22 +8161,30 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
/* Swap (qemu) user FPU context for the guest FPU context. */
static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
{
- preempt_disable();
+ fpregs_lock();
+
copy_fpregs_to_fpstate(&current->thread.fpu);
/* PKRU is separately restored in kvm_x86_ops->run. */
__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu->state,
~XFEATURE_MASK_PKRU);
- preempt_enable();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+
trace_kvm_fpu(1);
}
/* When vcpu_run ends, restore user space FPU context. */
static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
{
- preempt_disable();
+ fpregs_lock();
+
copy_fpregs_to_fpstate(vcpu->arch.guest_fpu);
copy_kernel_to_fpregs(&current->thread.fpu.state);
- preempt_enable();
+
+ fpregs_mark_activate();
+ fpregs_unlock();
+
++vcpu->stat.fpu_reload;
trace_kvm_fpu(0);
}
@@ -8848,11 +8882,11 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (init_event)
kvm_put_guest_fpu(vcpu);
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_MASK_BNDREGS);
+ XFEATURE_BNDREGS);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndreg_state));
mpx_state_buffer = get_xsave_addr(&vcpu->arch.guest_fpu->state.xsave,
- XFEATURE_MASK_BNDCSR);
+ XFEATURE_BNDCSR);
if (mpx_state_buffer)
memset(mpx_state_buffer, 0, sizeof(struct mpx_bndcsr));
if (init_event)
@@ -9061,7 +9095,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.apicv_active = kvm_x86_ops->get_enable_apicv(vcpu);
- r = kvm_create_lapic(vcpu);
+ r = kvm_create_lapic(vcpu, lapic_timer_advance_ns);
if (r < 0)
goto fail_mmu_destroy;
} else
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index aedc5d0d4989..534d3f28bb01 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -294,8 +294,6 @@ extern u64 kvm_supported_xcr0(void);
extern unsigned int min_timer_period_us;
-extern unsigned int lapic_timer_advance_ns;
-
extern bool enable_vmware_backdoor;
extern struct static_key kvm_no_apic_vcpu;
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 140e61843a07..5246db42de45 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -6,6 +6,18 @@
# Produces uninteresting flaky coverage.
KCOV_INSTRUMENT_delay.o := n
+# Early boot use of cmdline; don't instrument it
+ifdef CONFIG_AMD_MEM_ENCRYPT
+KCOV_INSTRUMENT_cmdline.o := n
+KASAN_SANITIZE_cmdline.o := n
+
+ifdef CONFIG_FUNCTION_TRACER
+CFLAGS_REMOVE_cmdline.o = -pg
+endif
+
+CFLAGS_cmdline.o := $(call cc-option, -fno-stack-protector)
+endif
+
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
@@ -23,7 +35,6 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
lib-y := delay.o misc.o cmdline.o cpu.o
lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
lib-y += memcpy_$(BITS).o
-lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index db4e5aa0858b..b2f1822084ae 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -16,6 +16,30 @@
#include <asm/smap.h>
#include <asm/export.h>
+.macro ALIGN_DESTINATION
+ /* check for bad alignment of destination */
+ movl %edi,%ecx
+ andl $7,%ecx
+ jz 102f /* already aligned */
+ subl $8,%ecx
+ negl %ecx
+ subl %ecx,%edx
+100: movb (%rsi),%al
+101: movb %al,(%rdi)
+ incq %rsi
+ incq %rdi
+ decl %ecx
+ jnz 100b
+102:
+ .section .fixup,"ax"
+103: addl %ecx,%edx /* ecx is zerorest also */
+ jmp copy_user_handle_tail
+ .previous
+
+ _ASM_EXTABLE_UA(100b, 103b)
+ _ASM_EXTABLE_UA(101b, 103b)
+ .endm
+
/*
* copy_user_generic_unrolled - memory copy with exception handling.
* This version is for CPUs like P4 that don't have efficient micro
@@ -194,6 +218,30 @@ ENDPROC(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
/*
+ * Try to copy last bytes and clear the rest if needed.
+ * Since protection fault in copy_from/to_user is not a normal situation,
+ * it is not necessary to optimize tail handling.
+ *
+ * Input:
+ * rdi destination
+ * rsi source
+ * rdx count
+ *
+ * Output:
+ * eax uncopied bytes or 0 if successful.
+ */
+ALIGN;
+copy_user_handle_tail:
+ movl %edx,%ecx
+1: rep movsb
+2: mov %ecx,%eax
+ ASM_CLAC
+ ret
+
+ _ASM_EXTABLE_UA(1b, 2b)
+ENDPROC(copy_user_handle_tail)
+
+/*
* copy_user_nocache - Uncached memory copy with exception handling
* This will force destination out of cache for more performance.
*
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index f5b7f1b3b6d7..b7375dc6898f 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -162,7 +162,7 @@ void __delay(unsigned long loops)
}
EXPORT_SYMBOL(__delay);
-void __const_udelay(unsigned long xloops)
+noinline void __const_udelay(unsigned long xloops)
{
unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy;
int d0;
diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 3cdf06128d13..be5b5fb1598b 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -6,6 +6,7 @@
asmlinkage void just_return_func(void);
asm(
+ ".text\n"
".type just_return_func, @function\n"
".globl just_return_func\n"
"just_return_func:\n"
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 3b24dc05251c..9d05572370ed 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -257,6 +257,7 @@ ENTRY(__memcpy_mcsafe)
/* Copy successful. Return zero */
.L_done_memcpy_trap:
xorl %eax, %eax
+.L_done:
ret
ENDPROC(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
@@ -273,7 +274,7 @@ EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
addl %edx, %ecx
.E_trailing_bytes:
mov %ecx, %eax
- ret
+ jmp .L_done
/*
* For write fault handling, given the destination is unaligned,
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
deleted file mode 100644
index dc2ab6ea6768..000000000000
--- a/arch/x86/lib/rwsem.S
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * x86 semaphore implementation.
- *
- * (C) Copyright 1999 Linus Torvalds
- *
- * Portions Copyright 1999 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
- */
-
-#include <linux/linkage.h>
-#include <asm/alternative-asm.h>
-#include <asm/frame.h>
-
-#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg)
-#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l)
-
-#ifdef CONFIG_X86_32
-
-/*
- * The semaphore operations have a special calling sequence that
- * allow us to do a simpler in-line version of them. These routines
- * need to convert that sequence back into the C sequence when
- * there is contention on the semaphore.
- *
- * %eax contains the semaphore pointer on entry. Save the C-clobbered
- * registers (%eax, %edx and %ecx) except %eax which is either a return
- * value or just gets clobbered. Same is true for %edx so make sure GCC
- * reloads it after the slow path, by making it hold a temporary, for
- * example see ____down_write().
- */
-
-#define save_common_regs \
- pushl %ecx
-
-#define restore_common_regs \
- popl %ecx
-
- /* Avoid uglifying the argument copying x86-64 needs to do. */
- .macro movq src, dst
- .endm
-
-#else
-
-/*
- * x86-64 rwsem wrappers
- *
- * This interfaces the inline asm code to the slow-path
- * C routines. We need to save the call-clobbered regs
- * that the asm does not mark as clobbered, and move the
- * argument from %rax to %rdi.
- *
- * NOTE! We don't need to save %rax, because the functions
- * will always return the semaphore pointer in %rax (which
- * is also the input argument to these helpers)
- *
- * The following can clobber %rdx because the asm clobbers it:
- * call_rwsem_down_write_failed
- * call_rwsem_wake
- * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
- */
-
-#define save_common_regs \
- pushq %rdi; \
- pushq %rsi; \
- pushq %rcx; \
- pushq %r8; \
- pushq %r9; \
- pushq %r10; \
- pushq %r11
-
-#define restore_common_regs \
- popq %r11; \
- popq %r10; \
- popq %r9; \
- popq %r8; \
- popq %rcx; \
- popq %rsi; \
- popq %rdi
-
-#endif
-
-/* Fix up special calling conventions */
-ENTRY(call_rwsem_down_read_failed)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_down_read_failed
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_read_failed)
-
-ENTRY(call_rwsem_down_read_failed_killable)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_down_read_failed_killable
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_read_failed_killable)
-
-ENTRY(call_rwsem_down_write_failed)
- FRAME_BEGIN
- save_common_regs
- movq %rax,%rdi
- call rwsem_down_write_failed
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_write_failed)
-
-ENTRY(call_rwsem_down_write_failed_killable)
- FRAME_BEGIN
- save_common_regs
- movq %rax,%rdi
- call rwsem_down_write_failed_killable
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_down_write_failed_killable)
-
-ENTRY(call_rwsem_wake)
- FRAME_BEGIN
- /* do nothing if still outstanding active readers */
- __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
- jnz 1f
- save_common_regs
- movq %rax,%rdi
- call rwsem_wake
- restore_common_regs
-1: FRAME_END
- ret
-ENDPROC(call_rwsem_wake)
-
-ENTRY(call_rwsem_downgrade_wake)
- FRAME_BEGIN
- save_common_regs
- __ASM_SIZE(push,) %__ASM_REG(dx)
- movq %rax,%rdi
- call rwsem_downgrade_wake
- __ASM_SIZE(pop,) %__ASM_REG(dx)
- restore_common_regs
- FRAME_END
- ret
-ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index ee42bb0cbeb3..9952a01cad24 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -55,26 +55,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
EXPORT_SYMBOL(clear_user);
/*
- * Try to copy last bytes and clear the rest if needed.
- * Since protection fault in copy_from/to_user is not a normal situation,
- * it is not necessary to optimize tail handling.
- */
-__visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++) {
- char c;
-
- if (__get_user_nocheck(c, from++, sizeof(char)))
- break;
- if (__put_user_nocheck(c, to, sizeof(char)))
- break;
- }
- clac();
- return len;
-}
-
-/*
* Similar to copy_user_handle_tail, probe for the write fault point,
* but reuse __memcpy_mcsafe in case a new read error is encountered.
* clac() is handled in _copy_to_iter_mcsafe().
diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c
index 9e2ba7e667f6..a873da6b46d6 100644
--- a/arch/x86/math-emu/fpu_entry.c
+++ b/arch/x86/math-emu/fpu_entry.c
@@ -113,9 +113,6 @@ void math_emulate(struct math_emu_info *info)
unsigned long code_base = 0;
unsigned long code_limit = 0; /* Initialized to stop compiler warnings */
struct desc_struct code_descriptor;
- struct fpu *fpu = &current->thread.fpu;
-
- fpu__initialize(fpu);
#ifdef RE_ENTRANT_CHECKING
if (emulating) {
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 19c6abf9ea31..752ad11d6868 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -13,8 +13,8 @@
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
#ifdef CONFIG_X86_64
-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
+DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
#endif
struct cpu_entry_area *get_cpu_entry_area(int cpu)
@@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
}
-static void __init percpu_setup_debug_store(int cpu)
+static void __init percpu_setup_debug_store(unsigned int cpu)
{
#ifdef CONFIG_CPU_SUP_INTEL
- int npages;
+ unsigned int npages;
void *cea;
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
@@ -78,9 +78,43 @@ static void __init percpu_setup_debug_store(int cpu)
#endif
}
+#ifdef CONFIG_X86_64
+
+#define cea_map_stack(name) do { \
+ npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \
+ cea_map_percpu_pages(cea->estacks.name## _stack, \
+ estacks->name## _stack, npages, PAGE_KERNEL); \
+ } while (0)
+
+static void __init percpu_setup_exception_stacks(unsigned int cpu)
+{
+ struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
+ unsigned int npages;
+
+ BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
+
+ per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
+
+ /*
+ * The exceptions stack mappings in the per cpu area are protected
+ * by guard pages so each stack must be mapped separately. DB2 is
+ * not mapped; it just exists to catch triple nesting of #DB.
+ */
+ cea_map_stack(DF);
+ cea_map_stack(NMI);
+ cea_map_stack(DB1);
+ cea_map_stack(DB);
+ cea_map_stack(MCE);
+}
+#else
+static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
+#endif
+
/* Setup the fixmap mappings only once per-processor */
-static void __init setup_cpu_entry_area(int cpu)
+static void __init setup_cpu_entry_area(unsigned int cpu)
{
+ struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
#ifdef CONFIG_X86_64
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
pgprot_t gdt_prot = PAGE_KERNEL_RO;
@@ -101,10 +135,9 @@ static void __init setup_cpu_entry_area(int cpu)
pgprot_t tss_prot = PAGE_KERNEL;
#endif
- cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
- gdt_prot);
+ cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
+ cea_map_percpu_pages(&cea->entry_stack_page,
per_cpu_ptr(&entry_stack_storage, cpu), 1,
PAGE_KERNEL);
@@ -128,22 +161,15 @@ static void __init setup_cpu_entry_area(int cpu)
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
- &per_cpu(cpu_tss_rw, cpu),
+ cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
#ifdef CONFIG_X86_32
- per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
+ per_cpu(cpu_entry_area, cpu) = cea;
#endif
-#ifdef CONFIG_X86_64
- BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
- BUILD_BUG_ON(sizeof(exception_stacks) !=
- sizeof(((struct cpu_entry_area *)0)->exception_stacks));
- cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
- &per_cpu(exception_stacks, cpu),
- sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
-#endif
+ percpu_setup_exception_stacks(cpu);
+
percpu_setup_debug_store(cpu);
}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index c0309ea9abee..6a7302d1161f 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -578,7 +578,7 @@ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
{
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- if (user && static_cpu_has(X86_FEATURE_PTI))
+ if (user && boot_cpu_has(X86_FEATURE_PTI))
pgd = kernel_to_user_pgdp(pgd);
#endif
ptdump_walk_pgd_level_core(m, pgd, false, false);
@@ -591,7 +591,7 @@ void ptdump_walk_user_pgd_level_checkwx(void)
pgd_t *pgd = INIT_PGD;
if (!(__supported_pte_mask & _PAGE_NX) ||
- !static_cpu_has(X86_FEATURE_PTI))
+ !boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("x86/mm: Checking user space page tables\n");
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 3c4568f8fb28..b0a2de8d2f9e 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -145,7 +145,7 @@ __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup
unsigned long error_code,
unsigned long fault_addr)
{
- if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pF)\n",
+ if (pr_warn_once("unchecked MSR access error: RDMSR from 0x%x at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
@@ -162,7 +162,7 @@ __visible bool ex_handler_wrmsr_unsafe(const struct exception_table_entry *fixup
unsigned long error_code,
unsigned long fault_addr)
{
- if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pF)\n",
+ if (pr_warn_once("unchecked MSR access error: WRMSR to 0x%x (tried to write 0x%08x%08x) at rIP: 0x%lx (%pS)\n",
(unsigned int)regs->cx, (unsigned int)regs->dx,
(unsigned int)regs->ax, regs->ip, (void *)regs->ip))
show_stack_regs(regs);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 667f1da36208..46df4c6aae46 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -28,6 +28,7 @@
#include <asm/mmu_context.h> /* vma_pkey() */
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
#include <asm/desc.h> /* store_idt(), ... */
+#include <asm/cpu_entry_area.h> /* exception stack */
#define CREATE_TRACE_POINTS
#include <asm/trace/exceptions.h>
@@ -359,8 +360,6 @@ static noinline int vmalloc_fault(unsigned long address)
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
- WARN_ON_ONCE(in_nmi());
-
/*
* Copy kernel mappings over when needed. This can also
* happen within a race in page table update. In the later
@@ -603,24 +602,9 @@ static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
}
-/*
- * This helper function transforms the #PF error_code bits into
- * "[PROT] [USER]" type of descriptive, almost human-readable error strings:
- */
-static void err_str_append(unsigned long error_code, char *buf, unsigned long mask, const char *txt)
-{
- if (error_code & mask) {
- if (buf[0])
- strcat(buf, " ");
- strcat(buf, txt);
- }
-}
-
static void
show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address)
{
- char err_txt[64];
-
if (!oops_may_print())
return;
@@ -644,31 +628,29 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long ad
from_kuid(&init_user_ns, current_uid()));
}
- pr_alert("BUG: unable to handle kernel %s at %px\n",
- address < PAGE_SIZE ? "NULL pointer dereference" : "paging request",
- (void *)address);
-
- err_txt[0] = 0;
-
- /*
- * Note: length of these appended strings including the separation space and the
- * zero delimiter must fit into err_txt[].
- */
- err_str_append(error_code, err_txt, X86_PF_PROT, "[PROT]" );
- err_str_append(error_code, err_txt, X86_PF_WRITE, "[WRITE]");
- err_str_append(error_code, err_txt, X86_PF_USER, "[USER]" );
- err_str_append(error_code, err_txt, X86_PF_RSVD, "[RSVD]" );
- err_str_append(error_code, err_txt, X86_PF_INSTR, "[INSTR]");
- err_str_append(error_code, err_txt, X86_PF_PK, "[PK]" );
-
- pr_alert("#PF error: %s\n", error_code ? err_txt : "[normal kernel read fault]");
+ if (address < PAGE_SIZE && !user_mode(regs))
+ pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
+ (void *)address);
+ else
+ pr_alert("BUG: unable to handle page fault for address: %px\n",
+ (void *)address);
+
+ pr_alert("#PF: %s %s in %s mode\n",
+ (error_code & X86_PF_USER) ? "user" : "supervisor",
+ (error_code & X86_PF_INSTR) ? "instruction fetch" :
+ (error_code & X86_PF_WRITE) ? "write access" :
+ "read access",
+ user_mode(regs) ? "user" : "kernel");
+ pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code,
+ !(error_code & X86_PF_PROT) ? "not-present page" :
+ (error_code & X86_PF_RSVD) ? "reserved bit violation" :
+ (error_code & X86_PF_PK) ? "protection keys violation" :
+ "permissions violation");
if (!(error_code & X86_PF_USER) && user_mode(regs)) {
struct desc_ptr idt, gdt;
u16 ldtr, tr;
- pr_alert("This was a system access from user code\n");
-
/*
* This can happen for quite a few reasons. The more obvious
* ones are faults accessing the GDT, or LDT. Perhaps
@@ -793,7 +775,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
if (is_vmalloc_addr((void *)address) &&
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
- unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
+ unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
/*
* We're likely to be running with very little stack space
* left. It's plausible that we'd hit this condition but
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 8dacdb96899e..fd10d91a6115 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -6,6 +6,7 @@
#include <linux/swapfile.h>
#include <linux/swapops.h>
#include <linux/kmemleak.h>
+#include <linux/sched/task.h>
#include <asm/set_memory.h>
#include <asm/e820/api.h>
@@ -23,6 +24,7 @@
#include <asm/hypervisor.h>
#include <asm/cpufeature.h>
#include <asm/pti.h>
+#include <asm/text-patching.h>
/*
* We need to define the tracepoints somewhere, and tlb.c
@@ -702,6 +704,41 @@ void __init init_mem_mapping(void)
}
/*
+ * Initialize an mm_struct to be used during poking and a pointer to be used
+ * during patching.
+ */
+void __init poking_init(void)
+{
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ poking_mm = copy_init_mm();
+ BUG_ON(!poking_mm);
+
+ /*
+ * Randomize the poking address, but make sure that the following page
+ * will be mapped at the same PMD. We need 2 pages, so find space for 3,
+ * and adjust the address if the PMD ends after the first one.
+ */
+ poking_addr = TASK_UNMAPPED_BASE;
+ if (IS_ENABLED(CONFIG_RANDOMIZE_BASE))
+ poking_addr += (kaslr_get_random_long("Poking") & PAGE_MASK) %
+ (TASK_SIZE - TASK_UNMAPPED_BASE - 3 * PAGE_SIZE);
+
+ if (((poking_addr + PAGE_SIZE) & ~PMD_MASK) == 0)
+ poking_addr += PAGE_SIZE;
+
+ /*
+ * We need to trigger the allocation of the page-tables that will be
+ * needed for poking now. Later, poking may be performed in an atomic
+ * section, which might cause allocation to fail.
+ */
+ ptep = get_locked_pte(poking_mm, poking_addr, &ptl);
+ BUG_ON(!ptep);
+ pte_unmap_unlock(ptep, ptl);
+}
+
+/*
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
* is valid. The argument is a physical page number.
*
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index d669c5e797e0..dc3f058bdf9b 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -125,10 +125,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
- if (pgtable_l5_enabled())
- entropy = (rand % (entropy + 1)) & P4D_MASK;
- else
- entropy = (rand % (entropy + 1)) & PUD_MASK;
+ entropy = (rand % (entropy + 1)) & PUD_MASK;
vaddr += entropy;
*kaslr_regions[i].base = vaddr;
@@ -137,84 +134,71 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
- if (pgtable_l5_enabled())
- vaddr = round_up(vaddr + 1, P4D_SIZE);
- else
- vaddr = round_up(vaddr + 1, PUD_SIZE);
+ vaddr = round_up(vaddr + 1, PUD_SIZE);
remain_entropy -= entropy;
}
}
static void __meminit init_trampoline_pud(void)
{
- unsigned long paddr, paddr_next;
+ pud_t *pud_page_tramp, *pud, *pud_tramp;
+ p4d_t *p4d_page_tramp, *p4d, *p4d_tramp;
+ unsigned long paddr, vaddr;
pgd_t *pgd;
- pud_t *pud_page, *pud_page_tramp;
- int i;
pud_page_tramp = alloc_low_page();
+ /*
+ * There are two mappings for the low 1MB area, the direct mapping
+ * and the 1:1 mapping for the real mode trampoline:
+ *
+ * Direct mapping: virt_addr = phys_addr + PAGE_OFFSET
+ * 1:1 mapping: virt_addr = phys_addr
+ */
paddr = 0;
- pgd = pgd_offset_k((unsigned long)__va(paddr));
- pud_page = (pud_t *) pgd_page_vaddr(*pgd);
-
- for (i = pud_index(paddr); i < PTRS_PER_PUD; i++, paddr = paddr_next) {
- pud_t *pud, *pud_tramp;
- unsigned long vaddr = (unsigned long)__va(paddr);
+ vaddr = (unsigned long)__va(paddr);
+ pgd = pgd_offset_k(vaddr);
- pud_tramp = pud_page_tramp + pud_index(paddr);
- pud = pud_page + pud_index(vaddr);
- paddr_next = (paddr & PUD_MASK) + PUD_SIZE;
-
- *pud_tramp = *pud;
- }
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
-}
-
-static void __meminit init_trampoline_p4d(void)
-{
- unsigned long paddr, paddr_next;
- pgd_t *pgd;
- p4d_t *p4d_page, *p4d_page_tramp;
- int i;
+ pud_tramp = pud_page_tramp + pud_index(paddr);
+ *pud_tramp = *pud;
- p4d_page_tramp = alloc_low_page();
-
- paddr = 0;
- pgd = pgd_offset_k((unsigned long)__va(paddr));
- p4d_page = (p4d_t *) pgd_page_vaddr(*pgd);
-
- for (i = p4d_index(paddr); i < PTRS_PER_P4D; i++, paddr = paddr_next) {
- p4d_t *p4d, *p4d_tramp;
- unsigned long vaddr = (unsigned long)__va(paddr);
+ if (pgtable_l5_enabled()) {
+ p4d_page_tramp = alloc_low_page();
p4d_tramp = p4d_page_tramp + p4d_index(paddr);
- p4d = p4d_page + p4d_index(vaddr);
- paddr_next = (paddr & P4D_MASK) + P4D_SIZE;
- *p4d_tramp = *p4d;
- }
+ set_p4d(p4d_tramp,
+ __p4d(_KERNPG_TABLE | __pa(pud_page_tramp)));
- set_pgd(&trampoline_pgd_entry,
- __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ set_pgd(&trampoline_pgd_entry,
+ __pgd(_KERNPG_TABLE | __pa(p4d_page_tramp)));
+ } else {
+ set_pgd(&trampoline_pgd_entry,
+ __pgd(_KERNPG_TABLE | __pa(pud_page_tramp)));
+ }
}
/*
- * Create PGD aligned trampoline table to allow real mode initialization
- * of additional CPUs. Consume only 1 low memory page.
+ * The real mode trampoline, which is required for bootstrapping CPUs
+ * occupies only a small area under the low 1MB. See reserve_real_mode()
+ * for details.
+ *
+ * If KASLR is disabled the first PGD entry of the direct mapping is copied
+ * to map the real mode trampoline.
+ *
+ * If KASLR is enabled, copy only the PUD which covers the low 1MB
+ * area. This limits the randomization granularity to 1GB for both 4-level
+ * and 5-level paging.
*/
void __meminit init_trampoline(void)
{
-
if (!kaslr_memory_enabled()) {
init_trampoline_default();
return;
}
- if (pgtable_l5_enabled())
- init_trampoline_p4d();
- else
- init_trampoline_pud();
+ init_trampoline_pud();
}
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index c805db6236b4..59726aaf4671 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -142,7 +142,7 @@ int mpx_fault_info(struct mpx_fault_info *info, struct pt_regs *regs)
goto err_out;
}
/* get bndregs field from current task's xsave area */
- bndregs = get_xsave_field_ptr(XFEATURE_MASK_BNDREGS);
+ bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS);
if (!bndregs) {
err = -EINVAL;
goto err_out;
@@ -190,7 +190,7 @@ static __user void *mpx_get_bounds_dir(void)
* The bounds directory pointer is stored in a register
* only accessible if we first do an xsave.
*/
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
return MPX_INVALID_BOUNDS_DIR;
@@ -376,7 +376,7 @@ static int do_mpx_bt_fault(void)
const struct mpx_bndcsr *bndcsr;
struct mm_struct *mm = current->mm;
- bndcsr = get_xsave_field_ptr(XFEATURE_MASK_BNDCSR);
+ bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
if (!bndcsr)
return -EINVAL;
/*
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 4c570612e24e..daf4d645e537 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -2209,8 +2209,6 @@ int set_pages_rw(struct page *page, int numpages)
return set_memory_rw(addr, numpages);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-
static int __set_pages_p(struct page *page, int numpages)
{
unsigned long tempaddr = (unsigned long) page_address(page);
@@ -2249,6 +2247,16 @@ static int __set_pages_np(struct page *page, int numpages)
return __change_page_attr_set_clr(&cpa, 0);
}
+int set_direct_map_invalid_noflush(struct page *page)
+{
+ return __set_pages_np(page, 1);
+}
+
+int set_direct_map_default_noflush(struct page *page)
+{
+ return __set_pages_p(page, 1);
+}
+
void __kernel_map_pages(struct page *page, int numpages, int enable)
{
if (PageHighMem(page))
@@ -2282,7 +2290,6 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
}
#ifdef CONFIG_HIBERNATION
-
bool kernel_page_present(struct page *page)
{
unsigned int level;
@@ -2294,11 +2301,8 @@ bool kernel_page_present(struct page *page)
pte = lookup_address((unsigned long)page_address(page), &level);
return (pte_val(*pte) & _PAGE_PRESENT);
}
-
#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC */
-
int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
unsigned numpages, unsigned long page_flags)
{
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 7bd01709a091..1f67b1e15bf6 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -190,7 +190,7 @@ static void pgd_dtor(pgd_t *pgd)
* when PTI is enabled. We need them to map the per-process LDT into the
* user-space page-table.
*/
-#define PREALLOCATED_USER_PMDS (static_cpu_has(X86_FEATURE_PTI) ? \
+#define PREALLOCATED_USER_PMDS (boot_cpu_has(X86_FEATURE_PTI) ? \
KERNEL_PGD_PTRS : 0)
#define MAX_PREALLOCATED_USER_PMDS KERNEL_PGD_PTRS
@@ -292,7 +292,7 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pgdp = kernel_to_user_pgdp(pgdp);
@@ -373,14 +373,14 @@ static void pgd_prepopulate_user_pmd(struct mm_struct *mm,
static struct kmem_cache *pgd_cache;
-static int __init pgd_cache_init(void)
+void __init pgd_cache_init(void)
{
/*
* When PAE kernel is running as a Xen domain, it does not use
* shared kernel pmd. And this requires a whole page for pgd.
*/
if (!SHARED_KERNEL_PMD)
- return 0;
+ return;
/*
* when PAE kernel is not running as a Xen domain, it uses
@@ -390,9 +390,7 @@ static int __init pgd_cache_init(void)
*/
pgd_cache = kmem_cache_create("pgd_cache", PGD_SIZE, PGD_ALIGN,
SLAB_PANIC, NULL);
- return 0;
}
-core_initcall(pgd_cache_init);
static inline pgd_t *_pgd_alloc(void)
{
@@ -420,6 +418,10 @@ static inline void _pgd_free(pgd_t *pgd)
}
#else
+void __init pgd_cache_init(void)
+{
+}
+
static inline pgd_t *_pgd_alloc(void)
{
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 047a77f6a10c..1dcfc91c8f0c 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -18,6 +18,7 @@
#include <asm/cpufeature.h> /* boot_cpu_has, ... */
#include <asm/mmu_context.h> /* vma_pkey() */
+#include <asm/fpu/internal.h> /* init_fpstate */
int __execute_only_pkey(struct mm_struct *mm)
{
@@ -39,17 +40,12 @@ int __execute_only_pkey(struct mm_struct *mm)
* dance to set PKRU if we do not need to. Check it
* first and assume that if the execute-only pkey is
* write-disabled that we do not have to set it
- * ourselves. We need preempt off so that nobody
- * can make fpregs inactive.
+ * ourselves.
*/
- preempt_disable();
if (!need_to_set_mm_pkey &&
- current->thread.fpu.initialized &&
!__pkru_allows_read(read_pkru(), execute_only_pkey)) {
- preempt_enable();
return execute_only_pkey;
}
- preempt_enable();
/*
* Set up PKRU so that it denies access for everything
@@ -131,7 +127,6 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
* in the process's lifetime will not accidentally get access
* to data which is pkey-protected later on.
*/
-static
u32 init_pkru_value = PKRU_AD_KEY( 1) | PKRU_AD_KEY( 2) | PKRU_AD_KEY( 3) |
PKRU_AD_KEY( 4) | PKRU_AD_KEY( 5) | PKRU_AD_KEY( 6) |
PKRU_AD_KEY( 7) | PKRU_AD_KEY( 8) | PKRU_AD_KEY( 9) |
@@ -148,13 +143,6 @@ void copy_init_pkru_to_fpregs(void)
{
u32 init_pkru_value_snapshot = READ_ONCE(init_pkru_value);
/*
- * Any write to PKRU takes it out of the XSAVE 'init
- * state' which increases context switch cost. Avoid
- * writing 0 when PKRU was already 0.
- */
- if (!init_pkru_value_snapshot && !read_pkru())
- return;
- /*
* Override the PKRU state that came from 'init_fpstate'
* with the baseline from the process.
*/
@@ -174,6 +162,7 @@ static ssize_t init_pkru_read_file(struct file *file, char __user *user_buf,
static ssize_t init_pkru_write_file(struct file *file,
const char __user *user_buf, size_t count, loff_t *ppos)
{
+ struct pkru_state *pk;
char buf[32];
ssize_t len;
u32 new_init_pkru;
@@ -196,6 +185,10 @@ static ssize_t init_pkru_write_file(struct file *file,
return -EINVAL;
WRITE_ONCE(init_pkru_value, new_init_pkru);
+ pk = get_xsave_addr(&init_fpstate.xsave, XFEATURE_PKRU);
+ if (!pk)
+ return -EINVAL;
+ pk->pkru = new_init_pkru;
return count;
}
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 139b28a01ce4..9c2463bc158f 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -35,6 +35,7 @@
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
+#include <linux/cpu.h>
#include <asm/cpufeature.h>
#include <asm/hypervisor.h>
@@ -115,7 +116,8 @@ void __init pti_check_boottime_disable(void)
}
}
- if (cmdline_find_option_bool(boot_command_line, "nopti")) {
+ if (cmdline_find_option_bool(boot_command_line, "nopti") ||
+ cpu_mitigations_off()) {
pti_mode = PTI_FORCE_OFF;
pti_print_if_insecure("disabled on command line.");
return;
@@ -626,7 +628,7 @@ static void pti_set_kernel_image_nonglobal(void)
*/
void __init pti_init(void)
{
- if (!static_cpu_has(X86_FEATURE_PTI))
+ if (!boot_cpu_has(X86_FEATURE_PTI))
return;
pr_info("enabled\n");
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 487b8474c01c..7f61431c75fb 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -634,7 +634,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
}
-static void flush_tlb_func_local(void *info, enum tlb_flush_reason reason)
+static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
{
const struct flush_tlb_info *f = info;
@@ -722,43 +722,81 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
*/
unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+static DEFINE_PER_CPU_SHARED_ALIGNED(struct flush_tlb_info, flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+static DEFINE_PER_CPU(unsigned int, flush_tlb_info_idx);
+#endif
+
+static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
+ unsigned long start, unsigned long end,
+ unsigned int stride_shift, bool freed_tables,
+ u64 new_tlb_gen)
+{
+ struct flush_tlb_info *info = this_cpu_ptr(&flush_tlb_info);
+
+#ifdef CONFIG_DEBUG_VM
+ /*
+ * Ensure that the following code is non-reentrant and flush_tlb_info
+ * is not overwritten. This means no TLB flushing is initiated by
+ * interrupt handlers and machine-check exception handlers.
+ */
+ BUG_ON(this_cpu_inc_return(flush_tlb_info_idx) != 1);
+#endif
+
+ info->start = start;
+ info->end = end;
+ info->mm = mm;
+ info->stride_shift = stride_shift;
+ info->freed_tables = freed_tables;
+ info->new_tlb_gen = new_tlb_gen;
+
+ return info;
+}
+
+static inline void put_flush_tlb_info(void)
+{
+#ifdef CONFIG_DEBUG_VM
+ /* Complete reentrency prevention checks */
+ barrier();
+ this_cpu_dec(flush_tlb_info_idx);
+#endif
+}
+
void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
unsigned long end, unsigned int stride_shift,
bool freed_tables)
{
+ struct flush_tlb_info *info;
+ u64 new_tlb_gen;
int cpu;
- struct flush_tlb_info info = {
- .mm = mm,
- .stride_shift = stride_shift,
- .freed_tables = freed_tables,
- };
-
cpu = get_cpu();
- /* This is also a barrier that synchronizes with switch_mm(). */
- info.new_tlb_gen = inc_mm_tlb_gen(mm);
-
/* Should we flush just the requested range? */
- if ((end != TLB_FLUSH_ALL) &&
- ((end - start) >> stride_shift) <= tlb_single_page_flush_ceiling) {
- info.start = start;
- info.end = end;
- } else {
- info.start = 0UL;
- info.end = TLB_FLUSH_ALL;
+ if ((end == TLB_FLUSH_ALL) ||
+ ((end - start) >> stride_shift) > tlb_single_page_flush_ceiling) {
+ start = 0;
+ end = TLB_FLUSH_ALL;
}
+ /* This is also a barrier that synchronizes with switch_mm(). */
+ new_tlb_gen = inc_mm_tlb_gen(mm);
+
+ info = get_flush_tlb_info(mm, start, end, stride_shift, freed_tables,
+ new_tlb_gen);
+
if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
- VM_WARN_ON(irqs_disabled());
+ lockdep_assert_irqs_enabled();
local_irq_disable();
- flush_tlb_func_local(&info, TLB_LOCAL_MM_SHOOTDOWN);
+ flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
local_irq_enable();
}
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids)
- flush_tlb_others(mm_cpumask(mm), &info);
+ flush_tlb_others(mm_cpumask(mm), info);
+ put_flush_tlb_info();
put_cpu();
}
@@ -787,38 +825,48 @@ static void do_kernel_range_flush(void *info)
void flush_tlb_kernel_range(unsigned long start, unsigned long end)
{
-
/* Balance as user space task's flush, a bit conservative */
if (end == TLB_FLUSH_ALL ||
(end - start) > tlb_single_page_flush_ceiling << PAGE_SHIFT) {
on_each_cpu(do_flush_tlb_all, NULL, 1);
} else {
- struct flush_tlb_info info;
- info.start = start;
- info.end = end;
- on_each_cpu(do_kernel_range_flush, &info, 1);
+ struct flush_tlb_info *info;
+
+ preempt_disable();
+ info = get_flush_tlb_info(NULL, start, end, 0, false, 0);
+
+ on_each_cpu(do_kernel_range_flush, info, 1);
+
+ put_flush_tlb_info();
+ preempt_enable();
}
}
+/*
+ * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
+ * This means that the 'struct flush_tlb_info' that describes which mappings to
+ * flush is actually fixed. We therefore set a single fixed struct and use it in
+ * arch_tlbbatch_flush().
+ */
+static const struct flush_tlb_info full_flush_tlb_info = {
+ .mm = NULL,
+ .start = 0,
+ .end = TLB_FLUSH_ALL,
+};
+
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
{
- struct flush_tlb_info info = {
- .mm = NULL,
- .start = 0UL,
- .end = TLB_FLUSH_ALL,
- };
-
int cpu = get_cpu();
if (cpumask_test_cpu(cpu, &batch->cpumask)) {
- VM_WARN_ON(irqs_disabled());
+ lockdep_assert_irqs_enabled();
local_irq_disable();
- flush_tlb_func_local(&info, TLB_LOCAL_SHOOTDOWN);
+ flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
local_irq_enable();
}
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
- flush_tlb_others(&batch->cpumask, &info);
+ flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
cpumask_clear(&batch->cpumask);
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 0d9cdffce6ac..b29e82f190c7 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -117,6 +117,8 @@ static bool is_simm32(s64 value)
#define IA32_JLE 0x7E
#define IA32_JG 0x7F
+#define COND_JMP_OPCODE_INVALID (0xFF)
+
/*
* Map eBPF registers to IA32 32bit registers or stack scratch space.
*
@@ -698,19 +700,12 @@ static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
STACK_VAR(dst_hi));
}
- /* xor ecx,ecx */
- EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* sub dreg_lo,ecx */
- EMIT2(0x2B, add_2reg(0xC0, dreg_lo, IA32_ECX));
- /* mov dreg_lo,ecx */
- EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
-
- /* xor ecx,ecx */
- EMIT2(0x31, add_2reg(0xC0, IA32_ECX, IA32_ECX));
- /* sbb dreg_hi,ecx */
- EMIT2(0x19, add_2reg(0xC0, dreg_hi, IA32_ECX));
- /* mov dreg_hi,ecx */
- EMIT2(0x89, add_2reg(0xC0, dreg_hi, IA32_ECX));
+ /* neg dreg_lo */
+ EMIT2(0xF7, add_1reg(0xD8, dreg_lo));
+ /* adc dreg_hi,0x0 */
+ EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00);
+ /* neg dreg_hi */
+ EMIT2(0xF7, add_1reg(0xD8, dreg_hi));
if (dstk) {
/* mov dword ptr [ebp+off],dreg_lo */
@@ -1613,6 +1608,75 @@ static inline void emit_push_r64(const u8 src[], u8 **pprog)
*pprog = prog;
}
+static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
+{
+ u8 jmp_cond;
+
+ /* Convert BPF opcode to x86 */
+ switch (op) {
+ case BPF_JEQ:
+ jmp_cond = IA32_JE;
+ break;
+ case BPF_JSET:
+ case BPF_JNE:
+ jmp_cond = IA32_JNE;
+ break;
+ case BPF_JGT:
+ /* GT is unsigned '>', JA in x86 */
+ jmp_cond = IA32_JA;
+ break;
+ case BPF_JLT:
+ /* LT is unsigned '<', JB in x86 */
+ jmp_cond = IA32_JB;
+ break;
+ case BPF_JGE:
+ /* GE is unsigned '>=', JAE in x86 */
+ jmp_cond = IA32_JAE;
+ break;
+ case BPF_JLE:
+ /* LE is unsigned '<=', JBE in x86 */
+ jmp_cond = IA32_JBE;
+ break;
+ case BPF_JSGT:
+ if (!is_cmp_lo)
+ /* Signed '>', GT in x86 */
+ jmp_cond = IA32_JG;
+ else
+ /* GT is unsigned '>', JA in x86 */
+ jmp_cond = IA32_JA;
+ break;
+ case BPF_JSLT:
+ if (!is_cmp_lo)
+ /* Signed '<', LT in x86 */
+ jmp_cond = IA32_JL;
+ else
+ /* LT is unsigned '<', JB in x86 */
+ jmp_cond = IA32_JB;
+ break;
+ case BPF_JSGE:
+ if (!is_cmp_lo)
+ /* Signed '>=', GE in x86 */
+ jmp_cond = IA32_JGE;
+ else
+ /* GE is unsigned '>=', JAE in x86 */
+ jmp_cond = IA32_JAE;
+ break;
+ case BPF_JSLE:
+ if (!is_cmp_lo)
+ /* Signed '<=', LE in x86 */
+ jmp_cond = IA32_JLE;
+ else
+ /* LE is unsigned '<=', JBE in x86 */
+ jmp_cond = IA32_JBE;
+ break;
+ default: /* to silence GCC warning */
+ jmp_cond = COND_JMP_OPCODE_INVALID;
+ break;
+ }
+
+ return jmp_cond;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -2069,10 +2133,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JLT | BPF_X:
case BPF_JMP | BPF_JGE | BPF_X:
case BPF_JMP | BPF_JLE | BPF_X:
- case BPF_JMP | BPF_JSGT | BPF_X:
- case BPF_JMP | BPF_JSLE | BPF_X:
- case BPF_JMP | BPF_JSLT | BPF_X:
- case BPF_JMP | BPF_JSGE | BPF_X:
case BPF_JMP32 | BPF_JEQ | BPF_X:
case BPF_JMP32 | BPF_JNE | BPF_X:
case BPF_JMP32 | BPF_JGT | BPF_X:
@@ -2118,6 +2178,40 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
goto emit_cond_jmp;
}
+ case BPF_JMP | BPF_JSGT | BPF_X:
+ case BPF_JMP | BPF_JSLE | BPF_X:
+ case BPF_JMP | BPF_JSLT | BPF_X:
+ case BPF_JMP | BPF_JSGE | BPF_X: {
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = sstk ? IA32_ECX : src_lo;
+ u8 sreg_hi = sstk ? IA32_EBX : src_hi;
+
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ if (sstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
+ STACK_VAR(src_lo));
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EBX),
+ STACK_VAR(src_hi));
+ }
+
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 10);
+ /* cmp dreg_lo,sreg_lo */
+ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+ goto emit_cond_jmp_signed;
+ }
case BPF_JMP | BPF_JSET | BPF_X:
case BPF_JMP32 | BPF_JSET | BPF_X: {
bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
@@ -2194,10 +2288,6 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
case BPF_JMP | BPF_JLT | BPF_K:
case BPF_JMP | BPF_JGE | BPF_K:
case BPF_JMP | BPF_JLE | BPF_K:
- case BPF_JMP | BPF_JSGT | BPF_K:
- case BPF_JMP | BPF_JSLE | BPF_K:
- case BPF_JMP | BPF_JSLT | BPF_K:
- case BPF_JMP | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JEQ | BPF_K:
case BPF_JMP32 | BPF_JNE | BPF_K:
case BPF_JMP32 | BPF_JGT | BPF_K:
@@ -2238,50 +2328,9 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
/* cmp dreg_lo,sreg_lo */
EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
-emit_cond_jmp: /* Convert BPF opcode to x86 */
- switch (BPF_OP(code)) {
- case BPF_JEQ:
- jmp_cond = IA32_JE;
- break;
- case BPF_JSET:
- case BPF_JNE:
- jmp_cond = IA32_JNE;
- break;
- case BPF_JGT:
- /* GT is unsigned '>', JA in x86 */
- jmp_cond = IA32_JA;
- break;
- case BPF_JLT:
- /* LT is unsigned '<', JB in x86 */
- jmp_cond = IA32_JB;
- break;
- case BPF_JGE:
- /* GE is unsigned '>=', JAE in x86 */
- jmp_cond = IA32_JAE;
- break;
- case BPF_JLE:
- /* LE is unsigned '<=', JBE in x86 */
- jmp_cond = IA32_JBE;
- break;
- case BPF_JSGT:
- /* Signed '>', GT in x86 */
- jmp_cond = IA32_JG;
- break;
- case BPF_JSLT:
- /* Signed '<', LT in x86 */
- jmp_cond = IA32_JL;
- break;
- case BPF_JSGE:
- /* Signed '>=', GE in x86 */
- jmp_cond = IA32_JGE;
- break;
- case BPF_JSLE:
- /* Signed '<=', LE in x86 */
- jmp_cond = IA32_JLE;
- break;
- default: /* to silence GCC warning */
+emit_cond_jmp: jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
+ if (jmp_cond == COND_JMP_OPCODE_INVALID)
return -EFAULT;
- }
jmp_offset = addrs[i + insn->off] - addrs[i];
if (is_imm8(jmp_offset)) {
EMIT2(jmp_cond, jmp_offset);
@@ -2291,7 +2340,66 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
pr_err("cond_jmp gen bug %llx\n", jmp_offset);
return -EFAULT;
}
+ break;
+ }
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JSLE | BPF_K:
+ case BPF_JMP | BPF_JSLT | BPF_K:
+ case BPF_JMP | BPF_JSGE | BPF_K: {
+ u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
+ u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
+ u8 sreg_lo = IA32_ECX;
+ u8 sreg_hi = IA32_EBX;
+ u32 hi;
+ if (dstk) {
+ EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
+ STACK_VAR(dst_lo));
+ EMIT3(0x8B,
+ add_2reg(0x40, IA32_EBP,
+ IA32_EDX),
+ STACK_VAR(dst_hi));
+ }
+
+ /* mov ecx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
+ hi = imm32 & (1 << 31) ? (u32)~0 : 0;
+ /* mov ebx,imm32 */
+ EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
+ /* cmp dreg_hi,sreg_hi */
+ EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
+ EMIT2(IA32_JNE, 10);
+ /* cmp dreg_lo,sreg_lo */
+ EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
+
+ /*
+ * For simplicity of branch offset computation,
+ * let's use fixed jump coding here.
+ */
+emit_cond_jmp_signed: /* Check the condition for low 32-bit comparison */
+ jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
+ if (jmp_cond == COND_JMP_OPCODE_INVALID)
+ return -EFAULT;
+ jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
+ if (is_simm32(jmp_offset)) {
+ EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+ } else {
+ pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
+ EMIT2(0xEB, 6);
+
+ /* Check the condition for high 32-bit comparison */
+ jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
+ if (jmp_cond == COND_JMP_OPCODE_INVALID)
+ return -EFAULT;
+ jmp_offset = addrs[i + insn->off] - addrs[i];
+ if (is_simm32(jmp_offset)) {
+ EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
+ } else {
+ pr_err("cond_jmp gen bug %llx\n", jmp_offset);
+ return -EFAULT;
+ }
break;
}
case BPF_JMP | BPF_JA:
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c
index 2c53b0f19329..1297e185b8c8 100644
--- a/arch/x86/platform/uv/tlb_uv.c
+++ b/arch/x86/platform/uv/tlb_uv.c
@@ -2133,14 +2133,19 @@ static int __init summarize_uvhub_sockets(int nuvhubs,
*/
static int __init init_per_cpu(int nuvhubs, int base_part_pnode)
{
- unsigned char *uvhub_mask;
struct uvhub_desc *uvhub_descs;
+ unsigned char *uvhub_mask = NULL;
if (is_uv3_hub() || is_uv2_hub() || is_uv1_hub())
timeout_us = calculate_destination_timeout();
uvhub_descs = kcalloc(nuvhubs, sizeof(struct uvhub_desc), GFP_KERNEL);
+ if (!uvhub_descs)
+ goto fail;
+
uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL);
+ if (!uvhub_mask)
+ goto fail;
if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask))
goto fail;
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index bcddf09b5aa3..4845b8c7be7f 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -90,7 +90,6 @@ static int get_e820_md5(struct e820_table *table, void *buf)
}
desc->tfm = tfm;
- desc->flags = 0;
size = offsetof(struct e820_table, entries) +
sizeof(struct e820_entry) * table->nr_entries;
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c
index b629f6992d9f..ce7188cbdae5 100644
--- a/arch/x86/tools/relocs.c
+++ b/arch/x86/tools/relocs.c
@@ -11,7 +11,9 @@
#define Elf_Shdr ElfW(Shdr)
#define Elf_Sym ElfW(Sym)
-static Elf_Ehdr ehdr;
+static Elf_Ehdr ehdr;
+static unsigned long shnum;
+static unsigned int shstrndx;
struct relocs {
uint32_t *offset;
@@ -241,9 +243,9 @@ static const char *sec_name(unsigned shndx)
{
const char *sec_strtab;
const char *name;
- sec_strtab = secs[ehdr.e_shstrndx].strtab;
+ sec_strtab = secs[shstrndx].strtab;
name = "<noname>";
- if (shndx < ehdr.e_shnum) {
+ if (shndx < shnum) {
name = sec_strtab + secs[shndx].shdr.sh_name;
}
else if (shndx == SHN_ABS) {
@@ -271,7 +273,7 @@ static const char *sym_name(const char *sym_strtab, Elf_Sym *sym)
static Elf_Sym *sym_lookup(const char *symname)
{
int i;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
long nsyms;
char *strtab;
@@ -366,27 +368,41 @@ static void read_ehdr(FILE *fp)
ehdr.e_shnum = elf_half_to_cpu(ehdr.e_shnum);
ehdr.e_shstrndx = elf_half_to_cpu(ehdr.e_shstrndx);
- if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN)) {
+ shnum = ehdr.e_shnum;
+ shstrndx = ehdr.e_shstrndx;
+
+ if ((ehdr.e_type != ET_EXEC) && (ehdr.e_type != ET_DYN))
die("Unsupported ELF header type\n");
- }
- if (ehdr.e_machine != ELF_MACHINE) {
+ if (ehdr.e_machine != ELF_MACHINE)
die("Not for %s\n", ELF_MACHINE_NAME);
- }
- if (ehdr.e_version != EV_CURRENT) {
+ if (ehdr.e_version != EV_CURRENT)
die("Unknown ELF version\n");
- }
- if (ehdr.e_ehsize != sizeof(Elf_Ehdr)) {
+ if (ehdr.e_ehsize != sizeof(Elf_Ehdr))
die("Bad Elf header size\n");
- }
- if (ehdr.e_phentsize != sizeof(Elf_Phdr)) {
+ if (ehdr.e_phentsize != sizeof(Elf_Phdr))
die("Bad program header entry\n");
- }
- if (ehdr.e_shentsize != sizeof(Elf_Shdr)) {
+ if (ehdr.e_shentsize != sizeof(Elf_Shdr))
die("Bad section header entry\n");
+
+
+ if (shnum == SHN_UNDEF || shstrndx == SHN_XINDEX) {
+ Elf_Shdr shdr;
+
+ if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0)
+ die("Seek to %d failed: %s\n", ehdr.e_shoff, strerror(errno));
+
+ if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
+ die("Cannot read initial ELF section header: %s\n", strerror(errno));
+
+ if (shnum == SHN_UNDEF)
+ shnum = elf_xword_to_cpu(shdr.sh_size);
+
+ if (shstrndx == SHN_XINDEX)
+ shstrndx = elf_word_to_cpu(shdr.sh_link);
}
- if (ehdr.e_shstrndx >= ehdr.e_shnum) {
+
+ if (shstrndx >= shnum)
die("String table index out of bounds\n");
- }
}
static void read_shdrs(FILE *fp)
@@ -394,20 +410,20 @@ static void read_shdrs(FILE *fp)
int i;
Elf_Shdr shdr;
- secs = calloc(ehdr.e_shnum, sizeof(struct section));
+ secs = calloc(shnum, sizeof(struct section));
if (!secs) {
die("Unable to allocate %d section headers\n",
- ehdr.e_shnum);
+ shnum);
}
if (fseek(fp, ehdr.e_shoff, SEEK_SET) < 0) {
die("Seek to %d failed: %s\n",
ehdr.e_shoff, strerror(errno));
}
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (fread(&shdr, sizeof(shdr), 1, fp) != 1)
die("Cannot read ELF section headers %d/%d: %s\n",
- i, ehdr.e_shnum, strerror(errno));
+ i, shnum, strerror(errno));
sec->shdr.sh_name = elf_word_to_cpu(shdr.sh_name);
sec->shdr.sh_type = elf_word_to_cpu(shdr.sh_type);
sec->shdr.sh_flags = elf_xword_to_cpu(shdr.sh_flags);
@@ -418,7 +434,7 @@ static void read_shdrs(FILE *fp)
sec->shdr.sh_info = elf_word_to_cpu(shdr.sh_info);
sec->shdr.sh_addralign = elf_xword_to_cpu(shdr.sh_addralign);
sec->shdr.sh_entsize = elf_xword_to_cpu(shdr.sh_entsize);
- if (sec->shdr.sh_link < ehdr.e_shnum)
+ if (sec->shdr.sh_link < shnum)
sec->link = &secs[sec->shdr.sh_link];
}
@@ -427,7 +443,7 @@ static void read_shdrs(FILE *fp)
static void read_strtabs(FILE *fp)
{
int i;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_STRTAB) {
continue;
@@ -452,7 +468,7 @@ static void read_strtabs(FILE *fp)
static void read_symtabs(FILE *fp)
{
int i,j;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_SYMTAB) {
continue;
@@ -485,7 +501,7 @@ static void read_symtabs(FILE *fp)
static void read_relocs(FILE *fp)
{
int i,j;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
if (sec->shdr.sh_type != SHT_REL_TYPE) {
continue;
@@ -528,7 +544,7 @@ static void print_absolute_symbols(void)
printf("Absolute symbols\n");
printf(" Num: Value Size Type Bind Visibility Name\n");
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
char *sym_strtab;
int j;
@@ -566,7 +582,7 @@ static void print_absolute_relocs(void)
else
format = "%08"PRIx32" %08"PRIx32" %10s %08"PRIx32" %s\n";
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
struct section *sec = &secs[i];
struct section *sec_applies, *sec_symtab;
char *sym_strtab;
@@ -650,7 +666,7 @@ static void walk_relocs(int (*process)(struct section *sec, Elf_Rel *rel,
{
int i;
/* Walk through the relocations */
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
char *sym_strtab;
Elf_Sym *sh_symtab;
struct section *sec_applies, *sec_symtab;
@@ -706,7 +722,7 @@ static Elf_Addr per_cpu_load_addr;
static void percpu_init(void)
{
int i;
- for (i = 0; i < ehdr.e_shnum; i++) {
+ for (i = 0; i < shnum; i++) {
ElfW(Sym) *sym;
if (strcmp(sec_name(i), ".data..percpu"))
continue;
@@ -738,7 +754,7 @@ static void percpu_init(void)
* __per_cpu_load
*
* The "gold" linker incorrectly associates:
- * init_per_cpu__irq_stack_union
+ * init_per_cpu__fixed_percpu_data
* init_per_cpu__gdt_page
*/
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index a9e80e44178c..a8985e1f7432 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -32,12 +32,6 @@ config ARCH_DEFCONFIG
default "arch/um/configs/i386_defconfig" if X86_32
default "arch/um/configs/x86_64_defconfig" if X86_64
-config RWSEM_XCHGADD_ALGORITHM
- def_bool 64BIT
-
-config RWSEM_GENERIC_SPINLOCK
- def_bool !RWSEM_XCHGADD_ALGORITHM
-
config 3_LEVEL_PGTABLES
bool "Three-level pagetables" if !64BIT
default 64BIT
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 2d686ae54681..33c51c064c77 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -21,14 +21,12 @@ obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
-subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
else
obj-y += syscalls_64.o vdso/
-subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \
- ../lib/rwsem.o
+subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
endif
diff --git a/arch/x86/um/asm/syscall.h b/arch/x86/um/asm/syscall.h
index ef898af102d1..56a2f0913e3c 100644
--- a/arch/x86/um/asm/syscall.h
+++ b/arch/x86/um/asm/syscall.h
@@ -9,7 +9,7 @@ typedef asmlinkage long (*sys_call_ptr_t)(unsigned long, unsigned long,
unsigned long, unsigned long,
unsigned long, unsigned long);
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
#ifdef CONFIG_X86_32
return AUDIT_ARCH_I386;
diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile
index bf94060fc06f..0caddd6acb22 100644
--- a/arch/x86/um/vdso/Makefile
+++ b/arch/x86/um/vdso/Makefile
@@ -62,7 +62,7 @@ quiet_cmd_vdso = VDSO $@
-Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \
sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@'
-VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv)
+VDSO_LDFLAGS = -fPIC -shared -Wl,--hash-style=sysv
GCOV_PROFILE := n
#
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index a21e1734fc1f..beb44e22afdf 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -2318,8 +2318,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#elif defined(CONFIG_X86_VSYSCALL_EMULATION)
case VSYSCALL_PAGE:
#endif
- case FIX_TEXT_POKE0:
- case FIX_TEXT_POKE1:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c
index 0766a08bdf45..07054572297f 100644
--- a/arch/x86/xen/multicalls.c
+++ b/arch/x86/xen/multicalls.c
@@ -105,7 +105,7 @@ void xen_mc_flush(void)
for (i = 0; i < b->mcidx; i++) {
if (b->entries[i].result < 0) {
#if MC_DEBUG
- pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n",
+ pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pS\n",
i + 1,
b->debug[i].op,
b->debug[i].args[0],
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 145506f9fdbe..590fcf863006 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int rc;
- common_cpu_up(cpu, idle);
+ rc = common_cpu_up(cpu, idle);
+ if (rc)
+ return rc;
xen_setup_runstate_info(cpu);
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index 5077ead5e59c..c1d8b90aa4e2 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -40,13 +40,13 @@ ENTRY(startup_xen)
#ifdef CONFIG_X86_64
/* Set up %gs.
*
- * The base of %gs always points to the bottom of the irqstack
- * union. If the stack protector canary is enabled, it is
- * located at %gs:40. Note that, on SMP, the boot cpu uses
- * init data section till per cpu areas are set up.
+ * The base of %gs always points to fixed_percpu_data. If the
+ * stack protector canary is enabled, it is located at %gs:40.
+ * Note that, on SMP, the boot cpu uses init data section until
+ * the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
- movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
+ movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
cdq
wrmsr
#endif
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 4b9aafe766c5..6ec1b75eabc5 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -46,9 +46,6 @@ config XTENSA
with reasonable minimum requirements. The Xtensa Linux project has
a home page at <http://www.linux-xtensa.org/>.
-config RWSEM_XCHGADD_ALGORITHM
- def_bool y
-
config GENERIC_HWEIGHT
def_bool y
@@ -256,12 +253,26 @@ config MEMMAP_CACHEATTR
region: bits 0..3 -- for addresses 0x00000000..0x1fffffff,
bits 4..7 -- for addresses 0x20000000..0x3fffffff, and so on.
- Cache attribute values are specific for the MMU type, so e.g.
- for region protection MMUs: 2 is cache bypass, 4 is WB cached,
- 1 is WT cached, f is illegal. For ful MMU: bit 0 makes it executable,
- bit 1 makes it writable, bits 2..3 meaning is 0: cache bypass,
- 1: WB cache, 2: WT cache, 3: special (c and e are illegal, f is
- reserved).
+ Cache attribute values are specific for the MMU type.
+ For region protection MMUs:
+ 1: WT cached,
+ 2: cache bypass,
+ 4: WB cached,
+ f: illegal.
+ For ful MMU:
+ bit 0: executable,
+ bit 1: writable,
+ bits 2..3:
+ 0: cache bypass,
+ 1: WB cache,
+ 2: WT cache,
+ 3: special (c and e are illegal, f is reserved).
+ For MPU:
+ 0: illegal,
+ 1: WB cache,
+ 2: WB, no-write-allocate cache,
+ 3: WT cache,
+ 4: cache bypass.
config KSEG_PADDR
hex "Physical address of the KSEG mapping"
diff --git a/arch/xtensa/boot/boot-redboot/bootstrap.S b/arch/xtensa/boot/boot-redboot/bootstrap.S
index bbf3b4b080cd..48ba5a232d94 100644
--- a/arch/xtensa/boot/boot-redboot/bootstrap.S
+++ b/arch/xtensa/boot/boot-redboot/bootstrap.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#include <variant/core.h>
+#include <asm/core.h>
#include <asm/regs.h>
#include <asm/asmmacro.h>
#include <asm/cacheasm.h>
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 3843198e03d4..35f83c4bf239 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -20,12 +20,12 @@ generic-y += local.h
generic-y += local64.h
generic-y += mcs_spinlock.h
generic-y += mm-arch-hooks.h
+generic-y += mmiowb.h
generic-y += param.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += qrwlock.h
generic-y += qspinlock.h
-generic-y += rwsem.h
generic-y += sections.h
generic-y += socket.h
generic-y += topology.h
diff --git a/arch/xtensa/include/asm/asmmacro.h b/arch/xtensa/include/asm/asmmacro.h
index 7f2ae5872151..8308a9c3abb2 100644
--- a/arch/xtensa/include/asm/asmmacro.h
+++ b/arch/xtensa/include/asm/asmmacro.h
@@ -11,7 +11,7 @@
#ifndef _XTENSA_ASMMACRO_H
#define _XTENSA_ASMMACRO_H
-#include <variant/core.h>
+#include <asm/core.h>
/*
* Some little helpers for loops. Use zero-overhead-loops
diff --git a/arch/xtensa/include/asm/atomic.h b/arch/xtensa/include/asm/atomic.h
index 7de0149e1cf7..7b00d26f472e 100644
--- a/arch/xtensa/include/asm/atomic.h
+++ b/arch/xtensa/include/asm/atomic.h
@@ -15,8 +15,6 @@
#include <linux/stringify.h>
#include <linux/types.h>
-
-#ifdef __KERNEL__
#include <asm/processor.h>
#include <asm/cmpxchg.h>
#include <asm/barrier.h>
@@ -58,7 +56,67 @@
*/
#define atomic_set(v,i) WRITE_ONCE((v)->counter, (i))
-#if XCHAL_HAVE_S32C1I
+#if XCHAL_HAVE_EXCLUSIVE
+#define ATOMIC_OP(op) \
+static inline void atomic_##op(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int result; \
+ \
+ __asm__ __volatile__( \
+ "1: l32ex %1, %3\n" \
+ " " #op " %0, %1, %2\n" \
+ " s32ex %0, %3\n" \
+ " getex %0\n" \
+ " beqz %0, 1b\n" \
+ : "=&a" (result), "=&a" (tmp) \
+ : "a" (i), "a" (v) \
+ : "memory" \
+ ); \
+} \
+
+#define ATOMIC_OP_RETURN(op) \
+static inline int atomic_##op##_return(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int result; \
+ \
+ __asm__ __volatile__( \
+ "1: l32ex %1, %3\n" \
+ " " #op " %0, %1, %2\n" \
+ " s32ex %0, %3\n" \
+ " getex %0\n" \
+ " beqz %0, 1b\n" \
+ " " #op " %0, %1, %2\n" \
+ : "=&a" (result), "=&a" (tmp) \
+ : "a" (i), "a" (v) \
+ : "memory" \
+ ); \
+ \
+ return result; \
+}
+
+#define ATOMIC_FETCH_OP(op) \
+static inline int atomic_fetch_##op(int i, atomic_t *v) \
+{ \
+ unsigned long tmp; \
+ int result; \
+ \
+ __asm__ __volatile__( \
+ "1: l32ex %1, %3\n" \
+ " " #op " %0, %1, %2\n" \
+ " s32ex %0, %3\n" \
+ " getex %0\n" \
+ " beqz %0, 1b\n" \
+ : "=&a" (result), "=&a" (tmp) \
+ : "a" (i), "a" (v) \
+ : "memory" \
+ ); \
+ \
+ return tmp; \
+}
+
+#elif XCHAL_HAVE_S32C1I
#define ATOMIC_OP(op) \
static inline void atomic_##op(int i, atomic_t * v) \
{ \
@@ -200,6 +258,4 @@ ATOMIC_OPS(xor)
#define atomic_cmpxchg(v, o, n) ((int)cmpxchg(&((v)->counter), (o), (n)))
#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
-#endif /* __KERNEL__ */
-
#endif /* _XTENSA_ATOMIC_H */
diff --git a/arch/xtensa/include/asm/barrier.h b/arch/xtensa/include/asm/barrier.h
index 956596e4d437..d6f8d4ddc2bc 100644
--- a/arch/xtensa/include/asm/barrier.h
+++ b/arch/xtensa/include/asm/barrier.h
@@ -9,12 +9,16 @@
#ifndef _XTENSA_SYSTEM_H
#define _XTENSA_SYSTEM_H
+#include <asm/core.h>
+
#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); })
#define rmb() barrier()
#define wmb() mb()
+#if XCHAL_HAVE_S32C1I
#define __smp_mb__before_atomic() barrier()
#define __smp_mb__after_atomic() barrier()
+#endif
#include <asm-generic/barrier.h>
diff --git a/arch/xtensa/include/asm/bitops.h b/arch/xtensa/include/asm/bitops.h
index d3490189792b..aeb15f4c755b 100644
--- a/arch/xtensa/include/asm/bitops.h
+++ b/arch/xtensa/include/asm/bitops.h
@@ -13,8 +13,6 @@
#ifndef _XTENSA_BITOPS_H
#define _XTENSA_BITOPS_H
-#ifdef __KERNEL__
-
#ifndef _LINUX_BITOPS_H
#error only <linux/bitops.h> can be included directly
#endif
@@ -98,7 +96,126 @@ static inline unsigned long __fls(unsigned long word)
#include <asm-generic/bitops/fls64.h>
-#if XCHAL_HAVE_S32C1I
+#if XCHAL_HAVE_EXCLUSIVE
+
+static inline void set_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long tmp;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __asm__ __volatile__(
+ "1: l32ex %0, %2\n"
+ " or %0, %0, %1\n"
+ " s32ex %0, %2\n"
+ " getex %0\n"
+ " beqz %0, 1b\n"
+ : "=&a" (tmp)
+ : "a" (mask), "a" (p)
+ : "memory");
+}
+
+static inline void clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long tmp;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __asm__ __volatile__(
+ "1: l32ex %0, %2\n"
+ " and %0, %0, %1\n"
+ " s32ex %0, %2\n"
+ " getex %0\n"
+ " beqz %0, 1b\n"
+ : "=&a" (tmp)
+ : "a" (~mask), "a" (p)
+ : "memory");
+}
+
+static inline void change_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long tmp;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __asm__ __volatile__(
+ "1: l32ex %0, %2\n"
+ " xor %0, %0, %1\n"
+ " s32ex %0, %2\n"
+ " getex %0\n"
+ " beqz %0, 1b\n"
+ : "=&a" (tmp)
+ : "a" (~mask), "a" (p)
+ : "memory");
+}
+
+static inline int
+test_and_set_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long tmp, value;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __asm__ __volatile__(
+ "1: l32ex %1, %3\n"
+ " or %0, %1, %2\n"
+ " s32ex %0, %3\n"
+ " getex %0\n"
+ " beqz %0, 1b\n"
+ : "=&a" (tmp), "=&a" (value)
+ : "a" (mask), "a" (p)
+ : "memory");
+
+ return value & mask;
+}
+
+static inline int
+test_and_clear_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long tmp, value;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __asm__ __volatile__(
+ "1: l32ex %1, %3\n"
+ " and %0, %1, %2\n"
+ " s32ex %0, %3\n"
+ " getex %0\n"
+ " beqz %0, 1b\n"
+ : "=&a" (tmp), "=&a" (value)
+ : "a" (~mask), "a" (p)
+ : "memory");
+
+ return value & mask;
+}
+
+static inline int
+test_and_change_bit(unsigned int bit, volatile unsigned long *p)
+{
+ unsigned long tmp, value;
+ unsigned long mask = 1UL << (bit & 31);
+
+ p += bit >> 5;
+
+ __asm__ __volatile__(
+ "1: l32ex %1, %3\n"
+ " xor %0, %1, %2\n"
+ " s32ex %0, %3\n"
+ " getex %0\n"
+ " beqz %0, 1b\n"
+ : "=&a" (tmp), "=&a" (value)
+ : "a" (mask), "a" (p)
+ : "memory");
+
+ return value & mask;
+}
+
+#elif XCHAL_HAVE_S32C1I
static inline void set_bit(unsigned int bit, volatile unsigned long *p)
{
@@ -232,6 +349,4 @@ test_and_change_bit(unsigned int bit, volatile unsigned long *p)
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/sched.h>
-#endif /* __KERNEL__ */
-
#endif /* _XTENSA_BITOPS_H */
diff --git a/arch/xtensa/include/asm/cache.h b/arch/xtensa/include/asm/cache.h
index d2fd932fdb4d..b21fd133ff62 100644
--- a/arch/xtensa/include/asm/cache.h
+++ b/arch/xtensa/include/asm/cache.h
@@ -11,7 +11,7 @@
#ifndef _XTENSA_CACHE_H
#define _XTENSA_CACHE_H
-#include <variant/core.h>
+#include <asm/core.h>
#define L1_CACHE_SHIFT XCHAL_DCACHE_LINEWIDTH
#define L1_CACHE_BYTES XCHAL_DCACHE_LINESIZE
diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h
index f302ef57973a..8b687176ad72 100644
--- a/arch/xtensa/include/asm/checksum.h
+++ b/arch/xtensa/include/asm/checksum.h
@@ -13,7 +13,7 @@
#include <linux/in6.h>
#include <linux/uaccess.h>
-#include <variant/core.h>
+#include <asm/core.h>
/*
* computes the checksum of a memory block at buff, length len,
diff --git a/arch/xtensa/include/asm/cmpxchg.h b/arch/xtensa/include/asm/cmpxchg.h
index 22a10c715c1f..7ccc5cbf441b 100644
--- a/arch/xtensa/include/asm/cmpxchg.h
+++ b/arch/xtensa/include/asm/cmpxchg.h
@@ -23,7 +23,24 @@
static inline unsigned long
__cmpxchg_u32(volatile int *p, int old, int new)
{
-#if XCHAL_HAVE_S32C1I
+#if XCHAL_HAVE_EXCLUSIVE
+ unsigned long tmp, result;
+
+ __asm__ __volatile__(
+ "1: l32ex %0, %3\n"
+ " bne %0, %4, 2f\n"
+ " mov %1, %2\n"
+ " s32ex %1, %3\n"
+ " getex %1\n"
+ " beqz %1, 1b\n"
+ "2:\n"
+ : "=&a" (result), "=&a" (tmp)
+ : "a" (new), "a" (p), "a" (old)
+ : "memory"
+ );
+
+ return result;
+#elif XCHAL_HAVE_S32C1I
__asm__ __volatile__(
" wsr %2, scompare1\n"
" s32c1i %0, %1, 0\n"
@@ -108,7 +125,22 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
static inline unsigned long xchg_u32(volatile int * m, unsigned long val)
{
-#if XCHAL_HAVE_S32C1I
+#if XCHAL_HAVE_EXCLUSIVE
+ unsigned long tmp, result;
+
+ __asm__ __volatile__(
+ "1: l32ex %0, %3\n"
+ " mov %1, %2\n"
+ " s32ex %1, %3\n"
+ " getex %1\n"
+ " beqz %1, 1b\n"
+ : "=&a" (result), "=&a" (tmp)
+ : "a" (val), "a" (m)
+ : "memory"
+ );
+
+ return result;
+#elif XCHAL_HAVE_S32C1I
unsigned long tmp, result;
__asm__ __volatile__(
"1: l32i %1, %2, 0\n"
diff --git a/arch/xtensa/include/asm/coprocessor.h b/arch/xtensa/include/asm/coprocessor.h
index 6712929a27c9..0fbe2a740b8d 100644
--- a/arch/xtensa/include/asm/coprocessor.h
+++ b/arch/xtensa/include/asm/coprocessor.h
@@ -12,8 +12,8 @@
#ifndef _XTENSA_COPROCESSOR_H
#define _XTENSA_COPROCESSOR_H
-#include <variant/core.h>
#include <variant/tie.h>
+#include <asm/core.h>
#include <asm/types.h>
#ifdef __ASSEMBLY__
diff --git a/arch/xtensa/include/asm/core.h b/arch/xtensa/include/asm/core.h
new file mode 100644
index 000000000000..5b4acb7d1c07
--- /dev/null
+++ b/arch/xtensa/include/asm/core.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2019 Cadence Design Systems Inc. */
+
+#ifndef _ASM_XTENSA_CORE_H
+#define _ASM_XTENSA_CORE_H
+
+#include <variant/core.h>
+
+#ifndef XCHAL_HAVE_EXCLUSIVE
+#define XCHAL_HAVE_EXCLUSIVE 0
+#endif
+
+#ifndef XCHAL_HAVE_MPU
+#define XCHAL_HAVE_MPU 0
+#endif
+
+#ifndef XCHAL_SPANNING_WAY
+#define XCHAL_SPANNING_WAY 0
+#endif
+
+#endif
diff --git a/arch/xtensa/include/asm/futex.h b/arch/xtensa/include/asm/futex.h
index 505d09eff184..9538b0f7953c 100644
--- a/arch/xtensa/include/asm/futex.h
+++ b/arch/xtensa/include/asm/futex.h
@@ -15,65 +15,88 @@
#ifndef _ASM_XTENSA_FUTEX_H
#define _ASM_XTENSA_FUTEX_H
-#ifdef __KERNEL__
-
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <linux/errno.h>
-#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+#if XCHAL_HAVE_EXCLUSIVE
+#define __futex_atomic_op(insn, ret, old, uaddr, arg) \
+ __asm__ __volatile( \
+ "1: l32ex %[oldval], %[addr]\n" \
+ insn "\n" \
+ "2: s32ex %[newval], %[addr]\n" \
+ " getex %[newval]\n" \
+ " beqz %[newval], 1b\n" \
+ " movi %[newval], 0\n" \
+ "3:\n" \
+ " .section .fixup,\"ax\"\n" \
+ " .align 4\n" \
+ " .literal_position\n" \
+ "5: movi %[oldval], 3b\n" \
+ " movi %[newval], %[fault]\n" \
+ " jx %[oldval]\n" \
+ " .previous\n" \
+ " .section __ex_table,\"a\"\n" \
+ " .long 1b, 5b, 2b, 5b\n" \
+ " .previous\n" \
+ : [oldval] "=&r" (old), [newval] "=&r" (ret) \
+ : [addr] "r" (uaddr), [oparg] "r" (arg), \
+ [fault] "I" (-EFAULT) \
+ : "memory")
+#elif XCHAL_HAVE_S32C1I
+#define __futex_atomic_op(insn, ret, old, uaddr, arg) \
__asm__ __volatile( \
- "1: l32i %0, %2, 0\n" \
+ "1: l32i %[oldval], %[addr], 0\n" \
insn "\n" \
- " wsr %0, scompare1\n" \
- "2: s32c1i %1, %2, 0\n" \
- " bne %1, %0, 1b\n" \
- " movi %1, 0\n" \
+ " wsr %[oldval], scompare1\n" \
+ "2: s32c1i %[newval], %[addr], 0\n" \
+ " bne %[newval], %[oldval], 1b\n" \
+ " movi %[newval], 0\n" \
"3:\n" \
" .section .fixup,\"ax\"\n" \
" .align 4\n" \
" .literal_position\n" \
- "5: movi %0, 3b\n" \
- " movi %1, %3\n" \
- " jx %0\n" \
+ "5: movi %[oldval], 3b\n" \
+ " movi %[newval], %[fault]\n" \
+ " jx %[oldval]\n" \
" .previous\n" \
" .section __ex_table,\"a\"\n" \
- " .long 1b,5b,2b,5b\n" \
+ " .long 1b, 5b, 2b, 5b\n" \
" .previous\n" \
- : "=&r" (oldval), "=&r" (ret) \
- : "r" (uaddr), "I" (-EFAULT), "r" (oparg) \
+ : [oldval] "=&r" (old), [newval] "=&r" (ret) \
+ : [addr] "r" (uaddr), [oparg] "r" (arg), \
+ [fault] "I" (-EFAULT) \
: "memory")
+#endif
static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
u32 __user *uaddr)
{
+#if XCHAL_HAVE_S32C1I || XCHAL_HAVE_EXCLUSIVE
int oldval = 0, ret;
-#if !XCHAL_HAVE_S32C1I
- return -ENOSYS;
-#endif
-
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
- __futex_atomic_op("mov %1, %4", ret, oldval, uaddr, oparg);
+ __futex_atomic_op("mov %[newval], %[oparg]",
+ ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ADD:
- __futex_atomic_op("add %1, %0, %4", ret, oldval, uaddr,
- oparg);
+ __futex_atomic_op("add %[newval], %[oldval], %[oparg]",
+ ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_OR:
- __futex_atomic_op("or %1, %0, %4", ret, oldval, uaddr,
- oparg);
+ __futex_atomic_op("or %[newval], %[oldval], %[oparg]",
+ ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ANDN:
- __futex_atomic_op("and %1, %0, %4", ret, oldval, uaddr,
- ~oparg);
+ __futex_atomic_op("and %[newval], %[oldval], %[oparg]",
+ ret, oldval, uaddr, ~oparg);
break;
case FUTEX_OP_XOR:
- __futex_atomic_op("xor %1, %0, %4", ret, oldval, uaddr,
- oparg);
+ __futex_atomic_op("xor %[newval], %[oldval], %[oparg]",
+ ret, oldval, uaddr, oparg);
break;
default:
ret = -ENOSYS;
@@ -85,43 +108,60 @@ static inline int arch_futex_atomic_op_inuser(int op, int oparg, int *oval,
*oval = oldval;
return ret;
+#else
+ return -ENOSYS;
+#endif
}
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
+#if XCHAL_HAVE_S32C1I || XCHAL_HAVE_EXCLUSIVE
+ unsigned long tmp;
int ret = 0;
if (!access_ok(uaddr, sizeof(u32)))
return -EFAULT;
-#if !XCHAL_HAVE_S32C1I
- return -ENOSYS;
-#endif
-
__asm__ __volatile__ (
" # futex_atomic_cmpxchg_inatomic\n"
- " wsr %5, scompare1\n"
- "1: s32c1i %1, %4, 0\n"
- " s32i %1, %6, 0\n"
+#if XCHAL_HAVE_EXCLUSIVE
+ "1: l32ex %[tmp], %[addr]\n"
+ " s32i %[tmp], %[uval], 0\n"
+ " bne %[tmp], %[oldval], 2f\n"
+ " mov %[tmp], %[newval]\n"
+ "3: s32ex %[tmp], %[addr]\n"
+ " getex %[tmp]\n"
+ " beqz %[tmp], 1b\n"
+#elif XCHAL_HAVE_S32C1I
+ " wsr %[oldval], scompare1\n"
+ "1: s32c1i %[newval], %[addr], 0\n"
+ " s32i %[newval], %[uval], 0\n"
+#endif
"2:\n"
" .section .fixup,\"ax\"\n"
" .align 4\n"
" .literal_position\n"
- "4: movi %1, 2b\n"
- " movi %0, %7\n"
- " jx %1\n"
+ "4: movi %[tmp], 2b\n"
+ " movi %[ret], %[fault]\n"
+ " jx %[tmp]\n"
" .previous\n"
" .section __ex_table,\"a\"\n"
- " .long 1b,4b\n"
+ " .long 1b, 4b\n"
+#if XCHAL_HAVE_EXCLUSIVE
+ " .long 3b, 4b\n"
+#endif
" .previous\n"
- : "+r" (ret), "+r" (newval), "+m" (*uaddr), "+m" (*uval)
- : "r" (uaddr), "r" (oldval), "r" (uval), "I" (-EFAULT)
+ : [ret] "+r" (ret), [newval] "+r" (newval), [tmp] "=&r" (tmp)
+ : [addr] "r" (uaddr), [oldval] "r" (oldval), [uval] "r" (uval),
+ [fault] "I" (-EFAULT)
: "memory");
return ret;
+#else
+ return -ENOSYS;
+#endif
}
-#endif /* __KERNEL__ */
#endif /* _ASM_XTENSA_FUTEX_H */
diff --git a/arch/xtensa/include/asm/initialize_mmu.h b/arch/xtensa/include/asm/initialize_mmu.h
index 10e9852b2fb4..323d05789159 100644
--- a/arch/xtensa/include/asm/initialize_mmu.h
+++ b/arch/xtensa/include/asm/initialize_mmu.h
@@ -33,10 +33,6 @@
#define CA_WRITEBACK (0x4)
#endif
-#ifndef XCHAL_SPANNING_WAY
-#define XCHAL_SPANNING_WAY 0
-#endif
-
#ifdef __ASSEMBLY__
#define XTENSA_HWVERSION_RC_2009_0 230000
@@ -181,11 +177,42 @@
.macro initialize_cacheattr
-#if !defined(CONFIG_MMU) && XCHAL_HAVE_TLBS
+#if !defined(CONFIG_MMU) && (XCHAL_HAVE_TLBS || XCHAL_HAVE_MPU)
#if CONFIG_MEMMAP_CACHEATTR == 0x22222222 && XCHAL_HAVE_PTP_MMU
#error Default MEMMAP_CACHEATTR of 0x22222222 does not work with full MMU.
#endif
+#if XCHAL_HAVE_MPU
+ .data
+ .align 4
+.Lattribute_table:
+ .long 0x000000, 0x1fff00, 0x1ddf00, 0x1eef00
+ .long 0x006600, 0x000000, 0x000000, 0x000000
+ .long 0x000000, 0x000000, 0x000000, 0x000000
+ .long 0x000000, 0x000000, 0x000000, 0x000000
+ .previous
+
+ movi a3, .Lattribute_table
+ movi a4, CONFIG_MEMMAP_CACHEATTR
+ movi a5, 1
+ movi a6, XCHAL_MPU_ENTRIES
+ movi a10, 0x20000000
+ movi a11, -1
+1:
+ sub a5, a5, a10
+ extui a8, a4, 28, 4
+ beq a8, a11, 2f
+ addi a6, a6, -1
+ mov a11, a8
+2:
+ addx4 a9, a8, a3
+ l32i a9, a9, 0
+ or a9, a9, a6
+ wptlb a9, a5
+ slli a4, a4, 4
+ bgeu a5, a10, 1b
+
+#else
movi a5, XCHAL_SPANNING_WAY
movi a6, ~_PAGE_ATTRIB_MASK
movi a4, CONFIG_MEMMAP_CACHEATTR
@@ -208,6 +235,7 @@
isync
#endif
+#endif
.endm
diff --git a/arch/xtensa/include/asm/io.h b/arch/xtensa/include/asm/io.h
index acc5bb2cf1c7..da3e783f896b 100644
--- a/arch/xtensa/include/asm/io.h
+++ b/arch/xtensa/include/asm/io.h
@@ -11,7 +11,6 @@
#ifndef _XTENSA_IO_H
#define _XTENSA_IO_H
-#ifdef __KERNEL__
#include <asm/byteorder.h>
#include <asm/page.h>
#include <asm/vectors.h>
@@ -78,8 +77,6 @@ static inline void iounmap(volatile void __iomem *addr)
#endif /* CONFIG_MMU */
-#endif /* __KERNEL__ */
-
#include <asm-generic/io.h>
#endif /* _XTENSA_IO_H */
diff --git a/arch/xtensa/include/asm/irq.h b/arch/xtensa/include/asm/irq.h
index 6c6ed23e0c79..0f71a51dab25 100644
--- a/arch/xtensa/include/asm/irq.h
+++ b/arch/xtensa/include/asm/irq.h
@@ -12,7 +12,7 @@
#define _XTENSA_IRQ_H
#include <linux/init.h>
-#include <variant/core.h>
+#include <asm/core.h>
#ifdef CONFIG_PLATFORM_NR_IRQS
# define PLATFORM_NR_IRQS CONFIG_PLATFORM_NR_IRQS
diff --git a/arch/xtensa/include/asm/pci-bridge.h b/arch/xtensa/include/asm/pci-bridge.h
index 0b68c76ec1e6..405526912d9a 100644
--- a/arch/xtensa/include/asm/pci-bridge.h
+++ b/arch/xtensa/include/asm/pci-bridge.h
@@ -11,8 +11,6 @@
#ifndef _XTENSA_PCI_BRIDGE_H
#define _XTENSA_PCI_BRIDGE_H
-#ifdef __KERNEL__
-
struct device_node;
struct pci_controller;
@@ -84,5 +82,4 @@ int early_write_config_byte(struct pci_controller*, int, int, int, u8);
int early_write_config_word(struct pci_controller*, int, int, int, u16);
int early_write_config_dword(struct pci_controller*, int, int, int, u32);
-#endif /* __KERNEL__ */
#endif /* _XTENSA_PCI_BRIDGE_H */
diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h
index 883024054b05..8e2b48a268db 100644
--- a/arch/xtensa/include/asm/pci.h
+++ b/arch/xtensa/include/asm/pci.h
@@ -11,8 +11,6 @@
#ifndef _XTENSA_PCI_H
#define _XTENSA_PCI_H
-#ifdef __KERNEL__
-
/* Can be used to override the logic in pci_scan_bus for skipping
* already-configured bus numbers - to be used for buggy BIOSes
* or architectures with incomplete PCI setup by the loader
@@ -45,8 +43,6 @@
#define ARCH_GENERIC_PCI_MMAP_RESOURCE 1
#define arch_can_pci_mmap_io() 1
-#endif /* __KERNEL__ */
-
/* Generic PCI */
#include <asm-generic/pci.h>
diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h
index b3b388ff2f01..368284c972e7 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -11,8 +11,6 @@
#ifndef _XTENSA_PGALLOC_H
#define _XTENSA_PGALLOC_H
-#ifdef __KERNEL__
-
#include <linux/highmem.h>
#include <linux/slab.h>
@@ -79,5 +77,4 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
}
#define pmd_pgtable(pmd) pmd_page(pmd)
-#endif /* __KERNEL__ */
#endif /* _XTENSA_PGALLOC_H */
diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h
index 0c14018d1c26..19f6b54e358b 100644
--- a/arch/xtensa/include/asm/processor.h
+++ b/arch/xtensa/include/asm/processor.h
@@ -10,7 +10,7 @@
#ifndef _XTENSA_PROCESSOR_H
#define _XTENSA_PROCESSOR_H
-#include <variant/core.h>
+#include <asm/core.h>
#include <linux/compiler.h>
#include <linux/stringify.h>
diff --git a/arch/xtensa/include/asm/ptrace.h b/arch/xtensa/include/asm/ptrace.h
index 62a58d2567e9..b109416dc07e 100644
--- a/arch/xtensa/include/asm/ptrace.h
+++ b/arch/xtensa/include/asm/ptrace.h
@@ -80,7 +80,7 @@ struct pt_regs {
unsigned long areg[16];
};
-#include <variant/core.h>
+#include <asm/core.h>
# define arch_has_single_step() (1)
# define task_pt_regs(tsk) ((struct pt_regs*) \
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 91dc06d58060..359ab40e935a 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -14,7 +14,7 @@
#include <asm/ptrace.h>
#include <uapi/linux/audit.h>
-static inline int syscall_get_arch(void)
+static inline int syscall_get_arch(struct task_struct *task)
{
return AUDIT_ARCH_XTENSA;
}
diff --git a/arch/xtensa/include/asm/tlb.h b/arch/xtensa/include/asm/tlb.h
index 0d766f9c1083..50889935138a 100644
--- a/arch/xtensa/include/asm/tlb.h
+++ b/arch/xtensa/include/asm/tlb.h
@@ -14,32 +14,6 @@
#include <asm/cache.h>
#include <asm/page.h>
-#if (DCACHE_WAY_SIZE <= PAGE_SIZE)
-
-/* Note, read http://lkml.org/lkml/2004/1/15/6 */
-
-# define tlb_start_vma(tlb,vma) do { } while (0)
-# define tlb_end_vma(tlb,vma) do { } while (0)
-
-#else
-
-# define tlb_start_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while(0)
-
-# define tlb_end_vma(tlb, vma) \
- do { \
- if (!tlb->fullmm) \
- flush_tlb_range(vma, vma->vm_start, vma->vm_end); \
- } while(0)
-
-#endif
-
-#define __tlb_remove_tlb_entry(tlb,pte,addr) do { } while (0)
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)
diff --git a/arch/xtensa/include/asm/vectors.h b/arch/xtensa/include/asm/vectors.h
index 7111280c8842..79fe3007919e 100644
--- a/arch/xtensa/include/asm/vectors.h
+++ b/arch/xtensa/include/asm/vectors.h
@@ -18,7 +18,7 @@
#ifndef _XTENSA_VECTORS_H
#define _XTENSA_VECTORS_H
-#include <variant/core.h>
+#include <asm/core.h>
#include <asm/kmem_layout.h>
#if XCHAL_HAVE_PTP_MMU
diff --git a/arch/xtensa/include/uapi/asm/sockios.h b/arch/xtensa/include/uapi/asm/sockios.h
index fb8ac3607189..1a1f58f4b75a 100644
--- a/arch/xtensa/include/uapi/asm/sockios.h
+++ b/arch/xtensa/include/uapi/asm/sockios.h
@@ -26,7 +26,7 @@
#define SIOCSPGRP _IOW('s', 8, pid_t)
#define SIOCGPGRP _IOR('s', 9, pid_t)
-#define SIOCGSTAMP 0x8906 /* Get stamp (timeval) */
-#define SIOCGSTAMPNS 0x8907 /* Get stamp (timespec) */
+#define SIOCGSTAMP_OLD 0x8906 /* Get stamp (timeval) */
+#define SIOCGSTAMPNS_OLD 0x8907 /* Get stamp (timespec) */
#endif /* _XTENSA_SOCKIOS_H */
diff --git a/arch/xtensa/kernel/hw_breakpoint.c b/arch/xtensa/kernel/hw_breakpoint.c
index 4f20416061fb..285fb2942b06 100644
--- a/arch/xtensa/kernel/hw_breakpoint.c
+++ b/arch/xtensa/kernel/hw_breakpoint.c
@@ -12,7 +12,7 @@
#include <linux/log2.h>
#include <linux/percpu.h>
#include <linux/perf_event.h>
-#include <variant/core.h>
+#include <asm/core.h>
/* Breakpoint currently in use for each IBREAKA. */
static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[XCHAL_NUM_IBREAK]);
diff --git a/arch/xtensa/kernel/setup.c b/arch/xtensa/kernel/setup.c
index 4ec6fbb696bf..c0ec24349421 100644
--- a/arch/xtensa/kernel/setup.c
+++ b/arch/xtensa/kernel/setup.c
@@ -651,6 +651,9 @@ c_show(struct seq_file *f, void *slot)
#if XCHAL_HAVE_S32C1I
"s32c1i "
#endif
+#if XCHAL_HAVE_EXCLUSIVE
+ "exclusive "
+#endif
"\n");
/* Registers. */
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index b80a430453b1..943f10639a93 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -18,8 +18,8 @@
#include <asm/page.h>
#include <asm/thread_info.h>
+#include <asm/core.h>
#include <asm/vectors.h>
-#include <variant/core.h>
OUTPUT_ARCH(xtensa)
ENTRY(_start)
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S
index 528fe0dd9339..d82c20c1fb7a 100644
--- a/arch/xtensa/lib/checksum.S
+++ b/arch/xtensa/lib/checksum.S
@@ -16,8 +16,8 @@
#include <linux/errno.h>
#include <linux/linkage.h>
-#include <variant/core.h>
#include <asm/asmmacro.h>
+#include <asm/core.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
diff --git a/arch/xtensa/lib/memcopy.S b/arch/xtensa/lib/memcopy.S
index c0f6981719d6..efecfd7ed8cc 100644
--- a/arch/xtensa/lib/memcopy.S
+++ b/arch/xtensa/lib/memcopy.S
@@ -10,8 +10,8 @@
*/
#include <linux/linkage.h>
-#include <variant/core.h>
#include <asm/asmmacro.h>
+#include <asm/core.h>
/*
* void *memcpy(void *dst, const void *src, size_t len);
diff --git a/arch/xtensa/lib/memset.S b/arch/xtensa/lib/memset.S
index 276747dec300..8632eacbdc80 100644
--- a/arch/xtensa/lib/memset.S
+++ b/arch/xtensa/lib/memset.S
@@ -12,8 +12,8 @@
*/
#include <linux/linkage.h>
-#include <variant/core.h>
#include <asm/asmmacro.h>
+#include <asm/core.h>
/*
* void *memset(void *dst, int c, size_t length)
diff --git a/arch/xtensa/lib/strncpy_user.S b/arch/xtensa/lib/strncpy_user.S
index 5fce16b67dca..c4c6c8578d59 100644
--- a/arch/xtensa/lib/strncpy_user.S
+++ b/arch/xtensa/lib/strncpy_user.S
@@ -13,8 +13,8 @@
#include <linux/errno.h>
#include <linux/linkage.h>
-#include <variant/core.h>
#include <asm/asmmacro.h>
+#include <asm/core.h>
/*
* char *__strncpy_user(char *dst, const char *src, size_t len)
diff --git a/arch/xtensa/lib/strnlen_user.S b/arch/xtensa/lib/strnlen_user.S
index 0b956ce7f386..1f2ca2bb2ab3 100644
--- a/arch/xtensa/lib/strnlen_user.S
+++ b/arch/xtensa/lib/strnlen_user.S
@@ -12,8 +12,8 @@
*/
#include <linux/linkage.h>
-#include <variant/core.h>
#include <asm/asmmacro.h>
+#include <asm/core.h>
/*
* size_t __strnlen_user(const char *s, size_t len)
diff --git a/arch/xtensa/lib/usercopy.S b/arch/xtensa/lib/usercopy.S
index 64ab1971324f..228607e30bc2 100644
--- a/arch/xtensa/lib/usercopy.S
+++ b/arch/xtensa/lib/usercopy.S
@@ -54,8 +54,8 @@
*/
#include <linux/linkage.h>
-#include <variant/core.h>
#include <asm/asmmacro.h>
+#include <asm/core.h>
.text
ENTRY(__xtensa_copy_user)
diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c
index 026211e7ab09..f9cd45860bee 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -297,8 +297,7 @@ out_alloc_disk:
blk_cleanup_queue(dev->queue);
dev->queue = NULL;
out_alloc_queue:
- simc_close(dev->fd);
- return -EIO;
+ return -ENOMEM;
}
static int __init simdisk_init(void)
diff --git a/arch/xtensa/platforms/xt2000/include/platform/hardware.h b/arch/xtensa/platforms/xt2000/include/platform/hardware.h
index 8e5e0d6a81ec..9f213f573330 100644
--- a/arch/xtensa/platforms/xt2000/include/platform/hardware.h
+++ b/arch/xtensa/platforms/xt2000/include/platform/hardware.h
@@ -15,7 +15,7 @@
#ifndef _XTENSA_XT2000_HARDWARE_H
#define _XTENSA_XT2000_HARDWARE_H
-#include <variant/core.h>
+#include <asm/core.h>
/*
* On-board components.
diff --git a/arch/xtensa/platforms/xt2000/include/platform/serial.h b/arch/xtensa/platforms/xt2000/include/platform/serial.h
index 7226cf732b47..cde804827626 100644
--- a/arch/xtensa/platforms/xt2000/include/platform/serial.h
+++ b/arch/xtensa/platforms/xt2000/include/platform/serial.h
@@ -11,7 +11,7 @@
#ifndef _XTENSA_XT2000_SERIAL_H
#define _XTENSA_XT2000_SERIAL_H
-#include <variant/core.h>
+#include <asm/core.h>
#include <asm/io.h>
/* National-Semi PC16552D DUART: */