summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap1
-rw-r--r--CREDITS9
-rw-r--r--Documentation/ABI/testing/ima_policy2
-rw-r--r--Documentation/ABI/testing/sysfs-devices-memory58
-rw-r--r--Documentation/ABI/testing/sysfs-devices-xenbus41
-rw-r--r--Documentation/admin-guide/auxdisplay/cfag12864b.rst2
-rw-r--r--Documentation/admin-guide/auxdisplay/ks0108.rst2
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst2
-rw-r--r--Documentation/admin-guide/cifs/authors.rst6
-rw-r--r--Documentation/admin-guide/cifs/changes.rst5
-rw-r--r--Documentation/admin-guide/cifs/introduction.rst30
-rw-r--r--Documentation/admin-guide/cifs/todo.rst34
-rw-r--r--Documentation/admin-guide/cifs/usage.rst2
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/admin-guide/mm/memory-hotplug.rst20
-rw-r--r--Documentation/admin-guide/xfs.rst16
-rw-r--r--Documentation/block/bfq-iosched.rst4
-rw-r--r--Documentation/conf.py3
-rw-r--r--Documentation/dev-tools/index.rst1
-rw-r--r--Documentation/dev-tools/kasan.rst8
-rw-r--r--Documentation/dev-tools/kfence.rst298
-rw-r--r--Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml2
-rw-r--r--Documentation/devicetree/bindings/gpio/sifive,gpio.yaml29
-rw-r--r--Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml13
-rw-r--r--Documentation/devicetree/bindings/leds/leds-lgm.yaml113
-rw-r--r--Documentation/devicetree/bindings/media/i2c/imx258.yaml14
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml5
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml16
-rw-r--r--Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml16
-rw-r--r--Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml11
-rw-r--r--Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml109
-rw-r--r--Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml171
-rw-r--r--Documentation/devicetree/bindings/pwm/pwm-sifive.yaml9
-rw-r--r--Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml40
-rw-r--r--Documentation/devicetree/bindings/riscv/canaan.yaml47
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.yaml8
-rw-r--r--Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml34
-rw-r--r--Documentation/devicetree/bindings/riscv/sifive.yaml17
-rw-r--r--Documentation/devicetree/bindings/serial/sifive-serial.yaml1
-rw-r--r--Documentation/devicetree/bindings/timer/sifive,clint.yaml12
-rw-r--r--Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml3
-rw-r--r--Documentation/features/core/cBPF-JIT/arch-support.txt1
-rw-r--r--Documentation/features/core/eBPF-JIT/arch-support.txt1
-rw-r--r--Documentation/features/core/generic-idle-thread/arch-support.txt1
-rw-r--r--Documentation/features/core/jump-labels/arch-support.txt1
-rw-r--r--Documentation/features/core/tracehook/arch-support.txt1
-rw-r--r--Documentation/features/debug/KASAN/arch-support.txt1
-rw-r--r--Documentation/features/debug/debug-vm-pgtable/arch-support.txt1
-rw-r--r--Documentation/features/debug/gcov-profile-all/arch-support.txt3
-rw-r--r--Documentation/features/debug/kcov/arch-support.txt1
-rw-r--r--Documentation/features/debug/kgdb/arch-support.txt1
-rw-r--r--Documentation/features/debug/kmemleak/arch-support.txt3
-rw-r--r--Documentation/features/debug/kprobes-on-ftrace/arch-support.txt3
-rw-r--r--Documentation/features/debug/kprobes/arch-support.txt3
-rw-r--r--Documentation/features/debug/kretprobes/arch-support.txt3
-rw-r--r--Documentation/features/debug/optprobes/arch-support.txt1
-rw-r--r--Documentation/features/debug/stackprotector/arch-support.txt1
-rw-r--r--Documentation/features/debug/uprobes/arch-support.txt3
-rw-r--r--Documentation/features/debug/user-ret-profiler/arch-support.txt1
-rw-r--r--Documentation/features/io/dma-contiguous/arch-support.txt1
-rw-r--r--Documentation/features/locking/cmpxchg-local/arch-support.txt1
-rw-r--r--Documentation/features/locking/lockdep/arch-support.txt1
-rw-r--r--Documentation/features/locking/queued-rwlocks/arch-support.txt1
-rw-r--r--Documentation/features/locking/queued-spinlocks/arch-support.txt1
-rw-r--r--Documentation/features/perf/kprobes-event/arch-support.txt3
-rw-r--r--Documentation/features/perf/perf-regs/arch-support.txt3
-rw-r--r--Documentation/features/perf/perf-stackdump/arch-support.txt3
-rw-r--r--Documentation/features/sched/membarrier-sync-core/arch-support.txt1
-rw-r--r--Documentation/features/sched/numa-balancing/arch-support.txt3
-rw-r--r--Documentation/features/seccomp/seccomp-filter/arch-support.txt1
-rw-r--r--Documentation/features/time/arch-tick-broadcast/arch-support.txt1
-rw-r--r--Documentation/features/time/clockevents/arch-support.txt1
-rw-r--r--Documentation/features/time/context-tracking/arch-support.txt1
-rw-r--r--Documentation/features/time/irq-time-acct/arch-support.txt1
-rw-r--r--Documentation/features/time/virt-cpuacct/arch-support.txt1
-rw-r--r--Documentation/features/vm/ELF-ASLR/arch-support.txt1
-rw-r--r--Documentation/features/vm/PG_uncached/arch-support.txt1
-rw-r--r--Documentation/features/vm/THP/arch-support.txt1
-rw-r--r--Documentation/features/vm/TLB/arch-support.txt1
-rw-r--r--Documentation/features/vm/huge-vmap/arch-support.txt1
-rw-r--r--Documentation/features/vm/ioremap_prot/arch-support.txt1
-rw-r--r--Documentation/features/vm/pte_special/arch-support.txt1
-rw-r--r--Documentation/filesystems/porting.rst7
-rw-r--r--Documentation/filesystems/proc.rst4
-rw-r--r--Documentation/filesystems/seq_file.rst6
-rw-r--r--Documentation/powerpc/syscall64-abi.rst51
-rw-r--r--Documentation/process/4.Coding.rst2
-rw-r--r--Documentation/process/submit-checklist.rst2
-rw-r--r--Documentation/translations/it_IT/process/4.Coding.rst2
-rw-r--r--Documentation/translations/it_IT/process/submit-checklist.rst2
-rw-r--r--Documentation/translations/zh_CN/process/4.Coding.rst2
-rw-r--r--Documentation/virt/kvm/api.rst117
-rw-r--r--MAINTAINERS49
-rw-r--r--Makefile17
-rw-r--r--arch/alpha/configs/defconfig1
-rw-r--r--arch/alpha/kernel/process.c2
-rw-r--r--arch/arc/kernel/process.c2
-rw-r--r--arch/arm/kernel/process.c2
-rw-r--r--arch/arm/xen/p2m.c35
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/include/asm/cache.h1
-rw-r--r--arch/arm64/include/asm/kasan.h1
-rw-r--r--arch/arm64/include/asm/kfence.h22
-rw-r--r--arch/arm64/include/asm/module.lds.h6
-rw-r--r--arch/arm64/include/asm/mte-def.h2
-rw-r--r--arch/arm64/include/asm/mte-kasan.h65
-rw-r--r--arch/arm64/include/asm/mte.h2
-rw-r--r--arch/arm64/include/asm/numa.h48
-rw-r--r--arch/arm64/kernel/acpi_numa.c12
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/kernel/hyp-stub.S40
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c4
-rw-r--r--arch/arm64/kernel/mte.c46
-rw-r--r--arch/arm64/kernel/probes/uprobes.c2
-rw-r--r--arch/arm64/kernel/process.c2
-rw-r--r--arch/arm64/kernel/ptrace.c2
-rw-r--r--arch/arm64/kernel/stacktrace.c2
-rw-r--r--arch/arm64/kernel/suspend.c2
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S2
-rw-r--r--arch/arm64/lib/mte.S16
-rw-r--r--arch/arm64/mm/Makefile1
-rw-r--r--arch/arm64/mm/fault.c4
-rw-r--r--arch/arm64/mm/init.c4
-rw-r--r--arch/arm64/mm/mmu.c27
-rw-r--r--arch/csky/Kconfig24
-rw-r--r--arch/csky/abiv1/inc/abi/cacheflush.h1
-rw-r--r--arch/csky/abiv1/inc/abi/ckmmu.h10
-rw-r--r--arch/csky/abiv1/inc/abi/entry.h1
-rw-r--r--arch/csky/abiv1/inc/abi/page.h1
-rw-r--r--arch/csky/abiv1/inc/abi/pgtable-bits.h40
-rw-r--r--arch/csky/abiv1/inc/abi/reg_ops.h1
-rw-r--r--arch/csky/abiv1/inc/abi/regdef.h6
-rw-r--r--arch/csky/abiv1/inc/abi/string.h1
-rw-r--r--arch/csky/abiv1/inc/abi/switch_context.h1
-rw-r--r--arch/csky/abiv1/inc/abi/vdso.h18
-rw-r--r--arch/csky/abiv2/cacheflush.c3
-rw-r--r--arch/csky/abiv2/inc/abi/ckmmu.h44
-rw-r--r--arch/csky/abiv2/inc/abi/entry.h20
-rw-r--r--arch/csky/abiv2/inc/abi/fpu.h1
-rw-r--r--arch/csky/abiv2/inc/abi/page.h1
-rw-r--r--arch/csky/abiv2/inc/abi/pgtable-bits.h37
-rw-r--r--arch/csky/abiv2/inc/abi/reg_ops.h1
-rw-r--r--arch/csky/abiv2/inc/abi/regdef.h6
-rw-r--r--arch/csky/abiv2/inc/abi/switch_context.h1
-rw-r--r--arch/csky/abiv2/inc/abi/vdso.h20
-rw-r--r--arch/csky/abiv2/sysdep.h1
-rw-r--r--arch/csky/include/asm/addrspace.h1
-rw-r--r--arch/csky/include/asm/atomic.h212
-rw-r--r--arch/csky/include/asm/barrier.h83
-rw-r--r--arch/csky/include/asm/bitops.h1
-rw-r--r--arch/csky/include/asm/bug.h3
-rw-r--r--arch/csky/include/asm/cacheflush.h1
-rw-r--r--arch/csky/include/asm/checksum.h1
-rw-r--r--arch/csky/include/asm/clocksource.h8
-rw-r--r--arch/csky/include/asm/cmpxchg.h27
-rw-r--r--arch/csky/include/asm/elf.h1
-rw-r--r--arch/csky/include/asm/fixmap.h1
-rw-r--r--arch/csky/include/asm/ftrace.h1
-rw-r--r--arch/csky/include/asm/futex.h121
-rw-r--r--arch/csky/include/asm/highmem.h1
-rw-r--r--arch/csky/include/asm/io.h1
-rw-r--r--arch/csky/include/asm/memory.h2
-rw-r--r--arch/csky/include/asm/mmu.h1
-rw-r--r--arch/csky/include/asm/mmu_context.h10
-rw-r--r--arch/csky/include/asm/page.h2
-rw-r--r--arch/csky/include/asm/perf_event.h1
-rw-r--r--arch/csky/include/asm/pgalloc.h3
-rw-r--r--arch/csky/include/asm/pgtable.h80
-rw-r--r--arch/csky/include/asm/processor.h3
-rw-r--r--arch/csky/include/asm/ptrace.h1
-rw-r--r--arch/csky/include/asm/segment.h3
-rw-r--r--arch/csky/include/asm/shmparam.h1
-rw-r--r--arch/csky/include/asm/spinlock.h167
-rw-r--r--arch/csky/include/asm/spinlock_types.h10
-rw-r--r--arch/csky/include/asm/string.h1
-rw-r--r--arch/csky/include/asm/switch_to.h1
-rw-r--r--arch/csky/include/asm/syscalls.h1
-rw-r--r--arch/csky/include/asm/thread_info.h2
-rw-r--r--arch/csky/include/asm/tlb.h1
-rw-r--r--arch/csky/include/asm/tlbflush.h1
-rw-r--r--arch/csky/include/asm/traps.h1
-rw-r--r--arch/csky/include/asm/uaccess.h1
-rw-r--r--arch/csky/include/asm/unistd.h1
-rw-r--r--arch/csky/include/asm/vdso.h21
-rw-r--r--arch/csky/include/asm/vdso/clocksource.h9
-rw-r--r--arch/csky/include/asm/vdso/gettimeofday.h114
-rw-r--r--arch/csky/include/asm/vdso/processor.h12
-rw-r--r--arch/csky/include/asm/vdso/vsyscall.h22
-rw-r--r--arch/csky/include/uapi/asm/byteorder.h1
-rw-r--r--arch/csky/include/uapi/asm/perf_regs.h1
-rw-r--r--arch/csky/include/uapi/asm/ptrace.h1
-rw-r--r--arch/csky/include/uapi/asm/sigcontext.h1
-rw-r--r--arch/csky/include/uapi/asm/unistd.h1
-rw-r--r--arch/csky/kernel/Makefile2
-rw-r--r--arch/csky/kernel/atomic.S24
-rw-r--r--arch/csky/kernel/entry.S106
-rw-r--r--arch/csky/kernel/head.S10
-rw-r--r--arch/csky/kernel/perf_event.c4
-rw-r--r--arch/csky/kernel/probes/simulate-insn.c22
-rw-r--r--arch/csky/kernel/process.c2
-rw-r--r--arch/csky/kernel/ptrace.c128
-rw-r--r--arch/csky/kernel/setup.c18
-rw-r--r--arch/csky/kernel/signal.c4
-rw-r--r--arch/csky/kernel/smp.c7
-rw-r--r--arch/csky/kernel/traps.c10
-rw-r--r--arch/csky/kernel/vdso.c127
-rw-r--r--arch/csky/kernel/vdso/.gitignore4
-rw-r--r--arch/csky/kernel/vdso/Makefile72
-rw-r--r--arch/csky/kernel/vdso/note.S12
-rw-r--r--arch/csky/kernel/vdso/rt_sigreturn.S14
-rwxr-xr-xarch/csky/kernel/vdso/so2s.sh5
-rw-r--r--arch/csky/kernel/vdso/vdso.S16
-rw-r--r--arch/csky/kernel/vdso/vdso.lds.S58
-rw-r--r--arch/csky/kernel/vdso/vgettimeofday.c28
-rw-r--r--arch/csky/kernel/vmlinux.lds.S2
-rw-r--r--arch/csky/mm/fault.c388
-rw-r--r--arch/csky/mm/init.c56
-rw-r--r--arch/csky/mm/tlb.c42
-rw-r--r--arch/h8300/kernel/process.c2
-rw-r--r--arch/hexagon/kernel/process.c2
-rw-r--r--arch/ia64/Makefile1
-rw-r--r--arch/ia64/kernel/process.c2
-rw-r--r--arch/ia64/kernel/signal.c3
-rw-r--r--arch/m68k/coldfire/clk.c4
-rw-r--r--arch/m68k/include/asm/page_mm.h2
-rw-r--r--arch/m68k/include/asm/page_no.h4
-rw-r--r--arch/m68k/kernel/process.c2
-rw-r--r--arch/microblaze/kernel/process.c2
-rw-r--r--arch/mips/boot/compressed/decompress.c8
-rw-r--r--arch/mips/crypto/Makefile4
-rw-r--r--arch/mips/include/asm/traps.h3
-rw-r--r--arch/mips/kernel/cpu-probe.c6
-rw-r--r--arch/mips/kernel/cpu-r3k-probe.c3
-rw-r--r--arch/mips/kernel/process.c2
-rw-r--r--arch/mips/kernel/traps.c10
-rw-r--r--arch/mips/kernel/vmlinux.lds.S6
-rw-r--r--arch/mips/mm/cache.c30
-rw-r--r--arch/nds32/kernel/process.c2
-rw-r--r--arch/nios2/kernel/process.c2
-rw-r--r--arch/openrisc/Kbuild3
-rw-r--r--arch/openrisc/Makefile21
-rw-r--r--arch/openrisc/boot/.gitignore2
-rw-r--r--arch/openrisc/boot/Makefile10
-rw-r--r--arch/openrisc/kernel/process.c15
-rw-r--r--arch/openrisc/kernel/smp.c23
-rw-r--r--arch/parisc/Kconfig8
-rw-r--r--arch/parisc/kernel/process.c2
-rw-r--r--arch/parisc/kernel/ptrace.c2
-rw-r--r--arch/powerpc/include/asm/dcr-native.h8
-rw-r--r--arch/powerpc/include/asm/mmu.h4
-rw-r--r--arch/powerpc/include/asm/vio.h2
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S9
-rw-r--r--arch/powerpc/kernel/interrupt.c2
-rw-r--r--arch/powerpc/kernel/process.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c2
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_radix.c2
-rw-r--r--arch/powerpc/lib/sstep.c4
-rw-r--r--arch/powerpc/perf/core-book3s.c4
-rw-r--r--arch/powerpc/platforms/pseries/msi.c25
-rw-r--r--arch/powerpc/platforms/pseries/vio.c7
-rw-r--r--arch/riscv/Kconfig49
-rw-r--r--arch/riscv/Kconfig.socs33
-rw-r--r--arch/riscv/Makefile14
-rw-r--r--arch/riscv/boot/dts/Makefile2
-rw-r--r--arch/riscv/boot/dts/canaan/Makefile5
-rw-r--r--arch/riscv/boot/dts/canaan/canaan_kd233.dts152
-rw-r--r--arch/riscv/boot/dts/canaan/k210.dtsi459
-rw-r--r--arch/riscv/boot/dts/canaan/k210_generic.dts46
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts209
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts211
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maix_go.dts219
-rw-r--r--arch/riscv/boot/dts/canaan/sipeed_maixduino.dts184
-rw-r--r--arch/riscv/boot/dts/kendryte/Makefile4
-rw-r--r--arch/riscv/boot/dts/kendryte/k210.dts23
-rw-r--r--arch/riscv/boot/dts/kendryte/k210.dtsi125
-rw-r--r--arch/riscv/boot/dts/sifive/Makefile3
-rw-r--r--arch/riscv/boot/dts/sifive/fu740-c000.dtsi293
-rw-r--r--arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts253
-rw-r--r--arch/riscv/configs/defconfig1
-rw-r--r--arch/riscv/configs/nommu_k210_defconfig46
-rw-r--r--arch/riscv/configs/nommu_k210_sdcard_defconfig92
-rw-r--r--arch/riscv/configs/rv32_defconfig1
-rw-r--r--arch/riscv/include/asm/bug.h1
-rw-r--r--arch/riscv/include/asm/csr.h6
-rw-r--r--arch/riscv/include/asm/kasan.h22
-rw-r--r--arch/riscv/include/asm/kprobes.h40
-rw-r--r--arch/riscv/include/asm/mmu.h2
-rw-r--r--arch/riscv/include/asm/mmu_context.h10
-rw-r--r--arch/riscv/include/asm/mmzone.h13
-rw-r--r--arch/riscv/include/asm/numa.h8
-rw-r--r--arch/riscv/include/asm/page.h3
-rw-r--r--arch/riscv/include/asm/pci.h14
-rw-r--r--arch/riscv/include/asm/pgtable.h21
-rw-r--r--arch/riscv/include/asm/probes.h24
-rw-r--r--arch/riscv/include/asm/processor.h1
-rw-r--r--arch/riscv/include/asm/ptrace.h35
-rw-r--r--arch/riscv/include/asm/sbi.h18
-rw-r--r--arch/riscv/include/asm/set_memory.h2
-rw-r--r--arch/riscv/include/asm/soc.h38
-rw-r--r--arch/riscv/include/asm/stackprotector.h3
-rw-r--r--arch/riscv/include/asm/stacktrace.h2
-rw-r--r--arch/riscv/include/asm/thread_info.h4
-rw-r--r--arch/riscv/include/asm/uprobes.h40
-rw-r--r--arch/riscv/kernel/Makefile6
-rw-r--r--arch/riscv/kernel/asm-offsets.c3
-rw-r--r--arch/riscv/kernel/ftrace.c95
-rw-r--r--arch/riscv/kernel/head.S4
-rw-r--r--arch/riscv/kernel/image-vars.h2
-rw-r--r--arch/riscv/kernel/mcount-dyn.S342
-rw-r--r--arch/riscv/kernel/patch.c8
-rw-r--r--arch/riscv/kernel/probes/Makefile6
-rw-r--r--arch/riscv/kernel/probes/decode-insn.c48
-rw-r--r--arch/riscv/kernel/probes/decode-insn.h18
-rw-r--r--arch/riscv/kernel/probes/ftrace.c53
-rw-r--r--arch/riscv/kernel/probes/kprobes.c398
-rw-r--r--arch/riscv/kernel/probes/kprobes_trampoline.S93
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.c85
-rw-r--r--arch/riscv/kernel/probes/simulate-insn.h47
-rw-r--r--arch/riscv/kernel/probes/uprobes.c186
-rw-r--r--arch/riscv/kernel/process.c20
-rw-r--r--arch/riscv/kernel/ptrace.c99
-rw-r--r--arch/riscv/kernel/sbi.c36
-rw-r--r--arch/riscv/kernel/setup.c23
-rw-r--r--arch/riscv/kernel/signal.c3
-rw-r--r--arch/riscv/kernel/smpboot.c12
-rw-r--r--arch/riscv/kernel/soc.c27
-rw-r--r--arch/riscv/kernel/stacktrace.c22
-rw-r--r--arch/riscv/kernel/traps.c22
-rw-r--r--arch/riscv/kernel/vdso/Makefile3
-rw-r--r--arch/riscv/lib/Makefile2
-rw-r--r--arch/riscv/lib/error-inject.c10
-rw-r--r--arch/riscv/mm/Makefile3
-rw-r--r--arch/riscv/mm/context.c265
-rw-r--r--arch/riscv/mm/fault.c38
-rw-r--r--arch/riscv/mm/init.c129
-rw-r--r--arch/riscv/mm/kasan_init.c176
-rw-r--r--arch/s390/include/asm/irq_work.h12
-rw-r--r--arch/s390/include/asm/pgalloc.h2
-rw-r--r--arch/s390/include/asm/pgtable.h16
-rw-r--r--arch/s390/include/uapi/asm/perf_cpum_cf_diag.h51
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c548
-rw-r--r--arch/s390/kernel/process.c2
-rw-r--r--arch/s390/kernel/smp.c28
-rw-r--r--arch/s390/kernel/topology.c25
-rw-r--r--arch/s390/mm/init.c1
-rw-r--r--arch/s390/mm/pgalloc.c22
-rw-r--r--arch/s390/mm/vmem.c44
-rw-r--r--arch/s390/tools/opcodes.txt2
-rw-r--r--arch/sh/boards/mach-landisk/gio.c6
-rw-r--r--arch/sh/kernel/process_32.c2
-rw-r--r--arch/sparc/configs/sparc64_defconfig4
-rw-r--r--arch/sparc/include/asm/elf_64.h1
-rw-r--r--arch/sparc/include/asm/extable.h (renamed from arch/sparc/include/asm/extable_64.h)4
-rw-r--r--arch/sparc/include/asm/processor_32.h6
-rw-r--r--arch/sparc/include/asm/thread_info_64.h1
-rw-r--r--arch/sparc/include/asm/uaccess.h3
-rw-r--r--arch/sparc/include/asm/uaccess_32.h38
-rw-r--r--arch/sparc/include/asm/uaccess_64.h1
-rw-r--r--arch/sparc/kernel/head_32.S2
-rw-r--r--arch/sparc/kernel/head_64.S2
-rw-r--r--arch/sparc/kernel/process_32.c14
-rw-r--r--arch/sparc/kernel/process_64.c2
-rw-r--r--arch/sparc/kernel/setup_32.c3
-rw-r--r--arch/sparc/kernel/setup_64.c4
-rw-r--r--arch/sparc/kernel/traps_64.c13
-rw-r--r--arch/sparc/kernel/unaligned_32.c106
-rw-r--r--arch/sparc/lib/checksum_32.S64
-rw-r--r--arch/sparc/lib/copy_user.S315
-rw-r--r--arch/sparc/lib/memset.S87
-rw-r--r--arch/sparc/mm/Makefile2
-rw-r--r--arch/sparc/mm/extable.c107
-rw-r--r--arch/sparc/mm/fault_32.c80
-rw-r--r--arch/sparc/mm/mm_32.h2
-rw-r--r--arch/um/kernel/process.c2
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/kfence.h64
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h6
-rw-r--r--arch/x86/include/asm/kvm_host.h45
-rw-r--r--arch/x86/include/asm/xen/page.h12
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kvm/Kconfig9
-rw-r--r--arch/x86/kvm/Makefile3
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/hyperv.c2
-rw-r--r--arch/x86/kvm/mmu/mmu.c224
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h15
-rw-r--r--arch/x86/kvm/mmu/paging_tmpl.h14
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c66
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h3
-rw-r--r--arch/x86/kvm/svm/nested.c48
-rw-r--r--arch/x86/kvm/svm/svm.c23
-rw-r--r--arch/x86/kvm/vmx/nested.c37
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c4
-rw-r--r--arch/x86/kvm/vmx/vmx.c112
-rw-r--r--arch/x86/kvm/vmx/vmx.h2
-rw-r--r--arch/x86/kvm/x86.c165
-rw-r--r--arch/x86/kvm/xen.c290
-rw-r--r--arch/x86/kvm/xen.h64
-rw-r--r--arch/x86/mm/fault.c6
-rw-r--r--arch/x86/mm/pat/memtype.c4
-rw-r--r--arch/x86/xen/p2m.c54
-rw-r--r--arch/x86/xen/setup.c25
-rw-r--r--arch/xtensa/kernel/process.c2
-rw-r--r--block/bfq-iosched.c6
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-crypto-fallback.c12
-rw-r--r--block/blk-map.c4
-rw-r--r--block/blk-mq-debugfs.c1
-rw-r--r--block/blk-mq-sched.c12
-rw-r--r--block/blk-mq-sched.h1
-rw-r--r--block/blk-pm.h38
-rw-r--r--block/blk-settings.c12
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/bounce.c24
-rw-r--r--block/genhd.c9
-rw-r--r--block/ioctl.c21
-rw-r--r--block/kyber-iosched.c33
-rw-r--r--block/mq-deadline.c4
-rw-r--r--block/partitions/core.c6
-rw-r--r--crypto/Kconfig2
-rw-r--r--drivers/auxdisplay/cfag12864b.c4
-rw-r--r--drivers/auxdisplay/cfag12864bfb.c4
-rw-r--r--drivers/auxdisplay/ks0108.c4
-rw-r--r--drivers/base/Kconfig6
-rw-r--r--drivers/base/Makefile1
-rw-r--r--drivers/base/arch_numa.c (renamed from arch/arm64/mm/numa.c)40
-rw-r--r--drivers/base/memory.c35
-rw-r--r--drivers/base/power/runtime.c62
-rw-r--r--drivers/block/loop.c5
-rw-r--r--drivers/block/nbd.c32
-rw-r--r--drivers/block/rsxx/core.c8
-rw-r--r--drivers/block/rsxx/rsxx_priv.h1
-rw-r--r--drivers/block/xen-blkback/blkback.c4
-rw-r--r--drivers/block/zram/zram_drv.c2
-rw-r--r--drivers/char/hw_random/pseries-rng.c3
-rw-r--r--drivers/char/tpm/tpm-chip.c2
-rw-r--r--drivers/char/tpm/tpm_ibmvtpm.c4
-rw-r--r--drivers/char/tpm/tpm_tis_core.c30
-rw-r--r--drivers/clk/Kconfig7
-rw-r--r--drivers/clk/Makefile1
-rw-r--r--drivers/clk/clk-k210.c1007
-rw-r--r--drivers/crypto/nx/nx-842-pseries.c4
-rw-r--r--drivers/crypto/nx/nx.c4
-rw-r--r--drivers/dax/super.c2
-rw-r--r--drivers/gpio/gpio-pca953x.c78
-rw-r--r--drivers/gpio/gpiolib-acpi.c21
-rw-r--r--drivers/gpio/gpiolib.c19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c6
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c2
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c15
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c6
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c2
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c3
-rw-r--r--drivers/gpu/drm/tilcdc/Makefile2
-rw-r--r--drivers/hv/hv_balloon.c2
-rw-r--r--drivers/i2c/busses/i2c-brcmstb.c2
-rw-r--r--drivers/i2c/busses/i2c-designware-core.h2
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c2
-rw-r--r--drivers/i2c/busses/i2c-exynos5.c8
-rw-r--r--drivers/i2c/busses/i2c-qcom-geni.c34
-rw-r--r--drivers/ide/falconide.c3
-rw-r--r--drivers/infiniband/core/cm.c5
-rw-r--r--drivers/infiniband/core/uverbs_ioctl.c2
-rw-r--r--drivers/infiniband/hw/mlx5/devx.c4
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c2
-rw-r--r--drivers/infiniband/sw/rxe/Kconfig1
-rw-r--r--drivers/infiniband/sw/rxe/rxe_comp.c55
-rw-r--r--drivers/infiniband/sw/rxe/rxe_net.c10
-rw-r--r--drivers/infiniband/sw/rxe/rxe_recv.c59
-rw-r--r--drivers/iommu/amd/io_pgtable.c10
-rw-r--r--drivers/iommu/dma-iommu.c15
-rw-r--r--drivers/iommu/intel/pasid.h4
-rw-r--r--drivers/iommu/tegra-smmu.c72
-rw-r--r--drivers/leds/Kconfig3
-rw-r--r--drivers/leds/Makefile3
-rw-r--r--drivers/leds/blink/Kconfig20
-rw-r--r--drivers/leds/blink/Makefile2
-rw-r--r--drivers/leds/blink/leds-lgm-sso.c888
-rw-r--r--drivers/leds/led-class.c3
-rw-r--r--drivers/leds/led-core.c20
-rw-r--r--drivers/leds/leds-apu.c11
-rw-r--r--drivers/leds/leds-blinkm.c24
-rw-r--r--drivers/leds/leds-gpio.c3
-rw-r--r--drivers/leds/leds-lm3530.c10
-rw-r--r--drivers/leds/leds-lm3533.c2
-rw-r--r--drivers/leds/leds-lm355x.c8
-rw-r--r--drivers/leds/leds-lm3642.c16
-rw-r--r--drivers/leds/leds-lp50xx.c83
-rw-r--r--drivers/leds/leds-max8997.c12
-rw-r--r--drivers/leds/leds-netxbig.c12
-rw-r--r--drivers/leds/leds-ss4200.c18
-rw-r--r--drivers/leds/leds-wm831x-status.c12
-rw-r--r--drivers/leds/leds.h6
-rw-r--r--drivers/md/dm-bufio.c4
-rw-r--r--drivers/md/dm-io.c4
-rw-r--r--drivers/md/dm-log-writes.c10
-rw-r--r--drivers/md/dm-verity-fec.c23
-rw-r--r--drivers/misc/ibmvmc.c4
-rw-r--r--drivers/mmc/host/sdhci.c9
-rw-r--r--drivers/net/Kconfig2
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c4
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c5
-rw-r--r--drivers/net/xen-netback/netback.c12
-rw-r--r--drivers/nvme/host/fabrics.c5
-rw-r--r--drivers/nvme/host/hwmon.c1
-rw-r--r--drivers/nvme/host/pci.c9
-rw-r--r--drivers/nvme/target/admin-cmd.c36
-rw-r--r--drivers/nvme/target/configfs.c50
-rw-r--r--drivers/nvme/target/core.c2
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c8
-rw-r--r--drivers/nvme/target/nvmet.h7
-rw-r--r--drivers/nvme/target/passthru.c4
-rw-r--r--drivers/pcmcia/cistpl.c4
-rw-r--r--drivers/pinctrl/Kconfig13
-rw-r--r--drivers/pinctrl/Makefile1
-rw-r--r--drivers/pinctrl/pinctrl-k210.c985
-rw-r--r--drivers/powercap/Kconfig2
-rw-r--r--drivers/powercap/dtpm.c3
-rw-r--r--drivers/reset/Kconfig10
-rw-r--r--drivers/reset/Makefile1
-rw-r--r--drivers/reset/reset-k210.c131
-rw-r--r--drivers/rtc/rtc-m41t80.c4
-rw-r--r--drivers/s390/char/vmur.c2
-rw-r--r--drivers/s390/virtio/virtio_ccw.c4
-rw-r--r--drivers/scsi/aic7xxx/aic79xx.h2
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx.h2
-rw-r--r--drivers/scsi/bnx2fc/Kconfig1
-rw-r--r--drivers/scsi/bnx2i/bnx2i_iscsi.c2
-rw-r--r--drivers/scsi/hpsa.c51
-rw-r--r--drivers/scsi/hpsa_cmd.h2
-rw-r--r--drivers/scsi/ibmvscsi/ibmvfc.c3
-rw-r--r--drivers/scsi/ibmvscsi/ibmvscsi.c4
-rw-r--r--drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c4
-rw-r--r--drivers/scsi/isci/request.c8
-rw-r--r--drivers/scsi/iscsi_tcp.c9
-rw-r--r--drivers/scsi/libiscsi.c496
-rw-r--r--drivers/scsi/libiscsi_tcp.c86
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.c58
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_base.h52
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_ctl.c67
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_ctl.h22
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c44
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c38
-rw-r--r--drivers/scsi/pmcraid.h6
-rw-r--r--drivers/scsi/qla2xxx/qla_target.c3
-rw-r--r--drivers/scsi/qla4xxx/ql4_os.c2
-rw-r--r--drivers/scsi/scsi_transport_iscsi.c43
-rw-r--r--drivers/scsi/sd.c14
-rw-r--r--drivers/scsi/sd_zbc.c6
-rw-r--r--drivers/scsi/ufs/ufshcd.c6
-rw-r--r--drivers/scsi/ufs/ufshcd.h2
-rw-r--r--drivers/soc/Kconfig2
-rw-r--r--drivers/soc/Makefile2
-rw-r--r--drivers/soc/canaan/Kconfig12
-rw-r--r--drivers/soc/canaan/Makefile3
-rw-r--r--drivers/soc/canaan/k210-sysctl.c78
-rw-r--r--drivers/soc/kendryte/Kconfig14
-rw-r--r--drivers/soc/kendryte/Makefile3
-rw-r--r--drivers/soc/kendryte/k210-sysctl.c260
-rw-r--r--drivers/soc/litex/Kconfig14
-rw-r--r--drivers/soc/litex/litex_soc_ctrl.c116
-rw-r--r--drivers/soc/sifive/sifive_l2_cache.c27
-rw-r--r--drivers/soundwire/intel.h2
-rw-r--r--drivers/soundwire/intel_init.c158
-rw-r--r--drivers/staging/vme/devices/vme_user.c12
-rw-r--r--drivers/target/sbp/sbp_target.c2
-rw-r--r--drivers/target/target_core_iblock.c9
-rw-r--r--drivers/target/target_core_pr.c15
-rw-r--r--drivers/target/target_core_pscsi.c2
-rw-r--r--drivers/target/target_core_transport.c15
-rw-r--r--drivers/target/target_core_user.c189
-rw-r--r--drivers/tty/hvc/hvcs.c4
-rw-r--r--drivers/virtio/virtio_mem.c43
-rw-r--r--drivers/xen/balloon.c2
-rw-r--r--drivers/xen/events/events_base.c27
-rw-r--r--drivers/xen/evtchn.c29
-rw-r--r--drivers/xen/xen-acpi-processor.c3
-rw-r--r--drivers/xen/xen-front-pgdir-shbuf.c11
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c66
-rw-r--r--fs/9p/vfs_inode.c21
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/btrfs/block-group.c33
-rw-r--r--fs/btrfs/block-group.h9
-rw-r--r--fs/btrfs/compression.c68
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-inode.c5
-rw-r--r--fs/btrfs/extent_io.c21
-rw-r--r--fs/btrfs/file.c5
-rw-r--r--fs/btrfs/free-space-cache.c21
-rw-r--r--fs/btrfs/inode.c46
-rw-r--r--fs/btrfs/ioctl.c19
-rw-r--r--fs/btrfs/lzo.c4
-rw-r--r--fs/btrfs/qgroup.c8
-rw-r--r--fs/btrfs/qgroup.h2
-rw-r--r--fs/btrfs/raid56.c31
-rw-r--r--fs/btrfs/ref-verify.c4
-rw-r--r--fs/btrfs/reflink.c24
-rw-r--r--fs/btrfs/scrub.c9
-rw-r--r--fs/btrfs/send.c7
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/tree-checker.c16
-rw-r--r--fs/btrfs/tree-log.c3
-rw-r--r--fs/btrfs/xattr.c31
-rw-r--r--fs/btrfs/zlib.c5
-rw-r--r--fs/btrfs/zoned.c4
-rw-r--r--fs/btrfs/zstd.c6
-rw-r--r--fs/cifs/cifs_debug.c121
-rw-r--r--fs/cifs/cifs_swn.c2
-rw-r--r--fs/cifs/cifsacl.c379
-rw-r--r--fs/cifs/cifsacl.h4
-rw-r--r--fs/cifs/cifsencrypt.c6
-rw-r--r--fs/cifs/cifsfs.c15
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h11
-rw-r--r--fs/cifs/cifsproto.h2
-rw-r--r--fs/cifs/cifssmb.c6
-rw-r--r--fs/cifs/connect.c301
-rw-r--r--fs/cifs/dfs_cache.c33
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/cifs/fs_context.c75
-rw-r--r--fs/cifs/fs_context.h6
-rw-r--r--fs/cifs/inode.c23
-rw-r--r--fs/cifs/sess.c2
-rw-r--r--fs/cifs/smb2ops.c109
-rw-r--r--fs/cifs/smb2pdu.c22
-rw-r--r--fs/cifs/trace.h36
-rw-r--r--fs/cifs/transport.c63
-rw-r--r--fs/coredump.c4
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/erofs/data.c4
-rw-r--r--fs/ext4/readpage.c3
-rw-r--r--fs/f2fs/data.c3
-rw-r--r--fs/f2fs/node.c2
-rw-r--r--fs/gfs2/inode.c4
-rw-r--r--fs/inode.c1
-rw-r--r--fs/io-wq.c626
-rw-r--r--fs/io-wq.h34
-rw-r--r--fs/io_uring.c2003
-rw-r--r--fs/iomap/buffered-io.c11
-rw-r--r--fs/iomap/seek.c125
-rw-r--r--fs/jfs/super.c1
-rw-r--r--fs/mpage.c4
-rw-r--r--fs/namespace.c53
-rw-r--r--fs/nfs/blocklayout/blocklayout.c6
-rw-r--r--fs/nfs/file.c27
-rw-r--r--fs/nfs/fs_context.c35
-rw-r--r--fs/nfs/fscache.c4
-rw-r--r--fs/nfs/inode.c111
-rw-r--r--fs/nfs/nfs3acl.c1
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/nfs4proc.c21
-rw-r--r--fs/nfs/nfs4state.c1
-rw-r--r--fs/nfs/pnfs.c2
-rw-r--r--fs/nfs/read.c204
-rw-r--r--fs/nfs/super.c7
-rw-r--r--fs/nfs/write.c37
-rw-r--r--fs/proc/base.c19
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/proc/self.c7
-rw-r--r--fs/proc/thread_self.c7
-rw-r--r--fs/pstore/inode.c2
-rw-r--r--fs/pstore/ram_core.c2
-rw-r--r--fs/xfs/libxfs/xfs_btree.c12
-rw-r--r--fs/xfs/xfs_aops.c17
-rw-r--r--fs/xfs/xfs_bio_io.c2
-rw-r--r--fs/xfs/xfs_buf.c4
-rw-r--r--fs/xfs/xfs_extent_busy.c14
-rw-r--r--fs/xfs/xfs_sysctl.c35
-rw-r--r--fs/xfs/xfs_trans.c33
-rw-r--r--fs/xfs/xfs_trans.h30
-rw-r--r--include/asm-generic/numa.h52
-rw-r--r--include/asm-generic/vmlinux.lds.h9
-rw-r--r--include/dt-bindings/clock/k210-clk.h1
-rw-r--r--include/dt-bindings/pinctrl/k210-fpioa.h276
-rw-r--r--include/dt-bindings/reset/k210-rst.h42
-rw-r--r--include/linux/acpi.h16
-rw-r--r--include/linux/bio.h7
-rw-r--r--include/linux/bitops.h2
-rw-r--r--include/linux/blkdev.h3
-rw-r--r--include/linux/blktrace_api.h4
-rw-r--r--include/linux/cfag12864b.h2
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/cred.h2
-rw-r--r--include/linux/device.h1
-rw-r--r--include/linux/dma-mapping.h16
-rw-r--r--include/linux/fortify-string.h302
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/gpio/consumer.h2
-rw-r--r--include/linux/highmem.h56
-rw-r--r--include/linux/init.h4
-rw-r--r--include/linux/initrd.h11
-rw-r--r--include/linux/io_uring.h24
-rw-r--r--include/linux/kasan.h25
-rw-r--r--include/linux/kexec.h2
-rw-r--r--include/linux/kfence.h222
-rw-r--r--include/linux/kgdb.h2
-rw-r--r--include/linux/khugepaged.h2
-rw-r--r--include/linux/ks0108.h2
-rw-r--r--include/linux/kvm_host.h25
-rw-r--r--include/linux/led-class-flash.h42
-rw-r--r--include/linux/led-class-multicolor.h42
-rw-r--r--include/linux/leds.h12
-rw-r--r--include/linux/litex.h150
-rw-r--r--include/linux/mdev.h2
-rw-r--r--include/linux/memory.h3
-rw-r--r--include/linux/memory_hotplug.h33
-rw-r--r--include/linux/memremap.h6
-rw-r--r--include/linux/mmzone.h49
-rw-r--r--include/linux/mount.h1
-rw-r--r--include/linux/net.h3
-rw-r--r--include/linux/nfs_fs.h3
-rw-r--r--include/linux/nfs_fs_sb.h4
-rw-r--r--include/linux/page-flags.h4
-rw-r--r--include/linux/pagemap.h6
-rw-r--r--include/linux/pagevec.h4
-rw-r--r--include/linux/pgtable.h8
-rw-r--r--include/linux/ptrace.h2
-rw-r--r--include/linux/rmap.h3
-rw-r--r--include/linux/sched.h3
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--include/linux/soundwire/sdw_intel.h2
-rw-r--r--include/linux/stackdepot.h9
-rw-r--r--include/linux/string.h282
-rw-r--r--include/linux/swap.h1
-rw-r--r--include/linux/swiotlb.h1
-rw-r--r--include/linux/trace_events.h11
-rw-r--r--include/linux/vmstat.h6
-rw-r--r--include/linux/zpool.h3
-rw-r--r--include/linux/zsmalloc.h2
-rw-r--r--include/scsi/libiscsi.h6
-rw-r--r--include/soc/canaan/k210-sysctl.h43
-rw-r--r--include/sound/intel-nhlt.h5
-rw-r--r--include/sound/soc-acpi.h2
-rw-r--r--include/target/target_core_backend.h1
-rw-r--r--include/trace/events/bcache.h10
-rw-r--r--include/trace/events/block.h20
-rw-r--r--include/trace/events/error_report.h74
-rw-r--r--include/trace/events/kmem.h6
-rw-r--r--include/trace/events/rpcrdma.h50
-rw-r--r--include/uapi/linux/firewire-cdev.h2
-rw-r--r--include/uapi/linux/input.h2
-rw-r--r--include/uapi/linux/io_uring.h1
-rw-r--r--include/uapi/linux/kvm.h13
-rw-r--r--include/xen/xenbus.h7
-rw-r--r--init/Kconfig3
-rw-r--r--init/initramfs.c64
-rw-r--r--init/main.c6
-rw-r--r--init/version.c8
-rw-r--r--kernel/audit_fsnotify.c2
-rw-r--r--kernel/debug/debug_core.c11
-rw-r--r--kernel/dma/swiotlb.c310
-rw-r--r--kernel/events/core.c8
-rw-r--r--kernel/events/uprobes.c2
-rw-r--r--kernel/fork.c30
-rw-r--r--kernel/groups.c7
-rw-r--r--kernel/kexec_internal.h2
-rw-r--r--kernel/locking/rtmutex.c4
-rw-r--r--kernel/locking/rwsem.c2
-rw-r--r--kernel/locking/semaphore.c2
-rw-r--r--kernel/ptrace.c2
-rw-r--r--kernel/sched/fair.c2
-rw-r--r--kernel/sched/membarrier.c2
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sysctl.c8
-rw-r--r--kernel/trace/Kconfig2
-rw-r--r--kernel/trace/Makefile1
-rw-r--r--kernel/trace/blktrace.c20
-rw-r--r--kernel/trace/error_report-traces.c11
-rw-r--r--kernel/trace/ring_buffer.c21
-rw-r--r--kernel/trace/trace.c6
-rw-r--r--kernel/trace/trace.h1
-rw-r--r--kernel/trace/trace_events_synth.c4
-rw-r--r--lib/Kconfig9
-rw-r--r--lib/Kconfig.debug1
-rw-r--r--lib/Kconfig.kfence82
-rw-r--r--lib/Kconfig.ubsan17
-rw-r--r--lib/cmdline.c7
-rw-r--r--lib/extable.c5
-rw-r--r--lib/genalloc.c3
-rw-r--r--lib/iov_iter.c14
-rw-r--r--lib/stackdepot.c37
-rw-r--r--lib/test_kasan.c111
-rw-r--r--lib/test_ubsan.c49
-rw-r--r--lib/ubsan.c68
-rw-r--r--mm/Makefile1
-rw-r--r--mm/backing-dev.c3
-rw-r--r--mm/cma.c64
-rw-r--r--mm/dmapool.c3
-rw-r--r--mm/early_ioremap.c12
-rw-r--r--mm/filemap.c343
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/internal.h4
-rw-r--r--mm/kasan/common.c193
-rw-r--r--mm/kasan/generic.c3
-rw-r--r--mm/kasan/hw_tags.c2
-rw-r--r--mm/kasan/kasan.h77
-rw-r--r--mm/kasan/report.c8
-rw-r--r--mm/kasan/shadow.c62
-rw-r--r--mm/kfence/Makefile6
-rw-r--r--mm/kfence/core.c841
-rw-r--r--mm/kfence/kfence.h106
-rw-r--r--mm/kfence/kfence_test.c858
-rw-r--r--mm/kfence/report.c262
-rw-r--r--mm/khugepaged.c22
-rw-r--r--mm/memory-failure.c6
-rw-r--r--mm/memory.c4
-rw-r--r--mm/memory_hotplug.c160
-rw-r--r--mm/memremap.c23
-rw-r--r--mm/mlock.c2
-rw-r--r--mm/page_alloc.c1
-rw-r--r--mm/page_io.c5
-rw-r--r--mm/rmap.c22
-rw-r--r--mm/shmem.c154
-rw-r--r--mm/slab.c38
-rw-r--r--mm/slab_common.c23
-rw-r--r--mm/slub.c63
-rw-r--r--mm/swap.c38
-rw-r--r--mm/swap_state.c7
-rw-r--r--mm/swapfile.c13
-rw-r--r--mm/truncate.c131
-rw-r--r--mm/vmstat.c35
-rw-r--r--mm/z3fold.c1
-rw-r--r--mm/zbud.c1
-rw-r--r--mm/zpool.c13
-rw-r--r--mm/zsmalloc.c22
-rw-r--r--mm/zswap.c57
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/socket.c10
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c12
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c67
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c4
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h15
-rw-r--r--net/sunrpc/xprtsock.c17
-rw-r--r--samples/auxdisplay/cfag12864b-example.c2
-rw-r--r--scripts/Makefile.ubsan2
-rwxr-xr-xscripts/adjust_autoksyms.sh3
-rwxr-xr-xscripts/checkpatch.pl152
l---------scripts/dtc/include-prefixes/c6x1
-rw-r--r--scripts/gcc-plugins/latent_entropy_plugin.c2
-rw-r--r--scripts/gcc-plugins/structleak_plugin.c3
-rw-r--r--scripts/gdb/linux/lists.py5
-rwxr-xr-xscripts/gen_autoksyms.sh35
-rwxr-xr-xscripts/kernel-doc2
-rw-r--r--scripts/lto-used-symbollist.txt5
-rw-r--r--scripts/recordmcount.c2
-rw-r--r--sound/hda/Kconfig4
-rw-r--r--sound/hda/Makefile3
-rw-r--r--sound/hda/ext/hdac_ext_controller.c2
-rw-r--r--sound/hda/ext/hdac_ext_stream.c2
-rw-r--r--sound/hda/hdac_regmap.c2
-rw-r--r--sound/hda/intel-dsp-config.c2
-rw-r--r--sound/hda/intel-nhlt.c54
-rw-r--r--sound/hda/intel-sdw-acpi.c179
-rw-r--r--sound/mips/snd-n64.c8
-rw-r--r--sound/pci/ctxfi/cthw20k2.c2
-rw-r--r--sound/pci/hda/hda_codec.c6
-rw-r--r--sound/pci/hda/hda_generic.c4
-rw-r--r--sound/pci/hda/hda_jack.c4
-rw-r--r--sound/pci/hda/patch_ca0132.c9
-rw-r--r--sound/pci/hda/patch_hdmi.c18
-rw-r--r--sound/pci/hda/patch_realtek.c26
-rw-r--r--sound/soc/sof/Kconfig15
-rw-r--r--sound/soc/sof/Makefile4
-rw-r--r--sound/soc/sof/intel/Kconfig254
-rw-r--r--sound/soc/sof/intel/Makefile20
-rw-r--r--sound/soc/sof/intel/bdw.c67
-rw-r--r--sound/soc/sof/intel/byt.c106
-rw-r--r--sound/soc/sof/intel/hda.c18
-rw-r--r--sound/soc/sof/intel/hda.h3
-rw-r--r--sound/soc/sof/intel/pci-apl.c81
-rw-r--r--sound/soc/sof/intel/pci-cnl.c104
-rw-r--r--sound/soc/sof/intel/pci-icl.c84
-rw-r--r--sound/soc/sof/intel/pci-tgl.c121
-rw-r--r--sound/soc/sof/intel/pci-tng.c70
-rw-r--r--sound/soc/sof/intel/shim.h6
-rw-r--r--sound/soc/sof/sof-acpi-dev.c130
-rw-r--r--sound/soc/sof/sof-acpi-dev.h16
-rw-r--r--sound/soc/sof/sof-pci-dev.c340
-rw-r--r--sound/soc/sof/sof-pci-dev.h17
-rw-r--r--sound/usb/clock.c8
-rw-r--r--sound/usb/mixer.c11
-rw-r--r--sound/usb/mixer_maps.c10
-rw-r--r--sound/usb/pcm.c12
-rw-r--r--sound/usb/quirks.c2
-rw-r--r--tools/arch/s390/include/uapi/asm/ptrace.h5
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h19
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--tools/build/Makefile8
-rw-r--r--tools/include/linux/coresight-pmu.h20
-rw-r--r--tools/include/uapi/drm/drm.h97
-rw-r--r--tools/include/uapi/drm/i915_drm.h3
-rw-r--r--tools/include/uapi/linux/kvm.h73
-rw-r--r--tools/include/uapi/linux/mount.h16
-rw-r--r--tools/include/uapi/linux/openat2.h4
-rw-r--r--tools/lib/perf/evlist.c13
-rw-r--r--tools/lib/perf/include/internal/evlist.h2
-rw-r--r--tools/perf/Documentation/perf-evlist.txt2
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt4
-rw-r--r--tools/perf/Documentation/perf-kallsyms.txt2
-rw-r--r--tools/perf/Documentation/perf-trace.txt4
-rw-r--r--tools/perf/Makefile.perf12
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c12
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl21
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/x86/Makefile11
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c2
-rw-r--r--tools/perf/arch/x86/tests/sample-parsing.c121
-rw-r--r--tools/perf/arch/x86/util/archinsn.c2
-rw-r--r--tools/perf/bench/numa.c42
-rw-r--r--tools/perf/bench/sched-messaging.c4
-rw-r--r--tools/perf/bench/sched-pipe.c4
-rw-r--r--tools/perf/bench/syscall.c4
-rw-r--r--tools/perf/builtin-daemon.c6
-rw-r--r--tools/perf/builtin-diff.c3
-rw-r--r--tools/perf/builtin-trace.c5
-rw-r--r--tools/perf/perf-archive.sh3
-rw-r--r--tools/perf/tests/attr.c8
-rw-r--r--tools/perf/tests/code-reading.c10
-rw-r--r--tools/perf/tests/cpumap.c2
-rw-r--r--tools/perf/tests/keep-tracking.c5
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c2
-rw-r--r--tools/perf/tests/sample-parsing.c4
-rwxr-xr-xtools/perf/tests/shell/daemon.sh32
-rw-r--r--tools/perf/tests/sw-clock.c12
-rw-r--r--tools/perf/tests/switch-tracking.c5
-rw-r--r--tools/perf/tests/task-exit.c10
-rw-r--r--tools/perf/tests/thread-map.c8
-rw-r--r--tools/perf/util/evlist.c1
-rw-r--r--tools/perf/util/evsel.c18
-rw-r--r--tools/perf/util/evsel.h4
-rw-r--r--tools/perf/util/header.c4
-rw-r--r--tools/perf/util/map.c7
-rw-r--r--tools/perf/util/parse-events.y6
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/sort.c4
-rw-r--r--tools/perf/util/stat-display.c2
-rw-r--r--tools/perf/util/stat.c47
-rw-r--r--tools/perf/util/trace-event-read.c1
-rw-r--r--tools/testing/selftests/gpio/.gitignore2
-rw-r--r--tools/testing/selftests/kvm/.gitignore1
-rw-r--r--tools/testing/selftests/kvm/Makefile1
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c165
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c6
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c159
-rw-r--r--tools/tracing/latency/latency-collector.c6
-rw-r--r--virt/kvm/kvm_main.c29
962 files changed, 24058 insertions, 10118 deletions
diff --git a/.mailmap b/.mailmap
index 87a8bbdbf749..85b93cdefc87 100644
--- a/.mailmap
+++ b/.mailmap
@@ -237,6 +237,7 @@ Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
Mayuresh Janorkar <mayur@ti.com>
Michael Buesch <m@bues.ch>
Michel Dänzer <michel@tungstengraphics.com>
+Miguel Ojeda <ojeda@kernel.org> <miguel.ojeda.sandonis@gmail.com>
Mike Rapoport <rppt@kernel.org> <mike@compulab.co.il>
Mike Rapoport <rppt@kernel.org> <mike.rapoport@gmail.com>
Mike Rapoport <rppt@kernel.org> <rppt@linux.ibm.com>
diff --git a/CREDITS b/CREDITS
index be097156bd71..cef83b958cbe 100644
--- a/CREDITS
+++ b/CREDITS
@@ -2841,14 +2841,11 @@ S: Subiaco, 6008
S: Perth, Western Australia
S: Australia
-N: Miguel Ojeda Sandonis
-E: miguel.ojeda.sandonis@gmail.com
-W: http://miguelojeda.es
-W: http://jair.lab.fi.uva.es/~migojed/
+N: Miguel Ojeda
+E: ojeda@kernel.org
+W: https://ojeda.dev
D: Author of the ks0108, cfag12864b and cfag12864bfb auxiliary display drivers.
D: Maintainer of the auxiliary display drivers tree (drivers/auxdisplay/*)
-S: C/ Mieses 20, 9-B
-S: Valladolid 47009
S: Spain
N: Peter Oruba
diff --git a/Documentation/ABI/testing/ima_policy b/Documentation/ABI/testing/ima_policy
index bc8e1cbe5e61..070779e8d836 100644
--- a/Documentation/ABI/testing/ima_policy
+++ b/Documentation/ABI/testing/ima_policy
@@ -29,7 +29,7 @@ Description:
option: [[appraise_type=]] [template=] [permit_directio]
[appraise_flag=] [keyrings=]
base:
- func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK]MODULE_CHECK]
+ func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
[FIRMWARE_CHECK]
[KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
[KEXEC_CMDLINE] [KEY_CHECK] [CRITICAL_DATA]
diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index 246a45b96d22..d8b0f80b9e33 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -13,21 +13,22 @@ What: /sys/devices/system/memory/memoryX/removable
Date: June 2008
Contact: Badari Pulavarty <pbadari@us.ibm.com>
Description:
- The file /sys/devices/system/memory/memoryX/removable
- indicates whether this memory block is removable or not.
- This is useful for a user-level agent to determine
- identify removable sections of the memory before attempting
- potentially expensive hot-remove memory operation
+ The file /sys/devices/system/memory/memoryX/removable is a
+ legacy interface used to indicated whether a memory block is
+ likely to be offlineable or not. Newer kernel versions return
+ "1" if and only if the kernel supports memory offlining.
Users: hotplug memory remove tools
http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
+ lsmem/chmem part of util-linux
What: /sys/devices/system/memory/memoryX/phys_device
Date: September 2008
Contact: Badari Pulavarty <pbadari@us.ibm.com>
Description:
The file /sys/devices/system/memory/memoryX/phys_device
- is read-only and is designed to show the name of physical
- memory device. Implementation is currently incomplete.
+ is read-only; it is a legacy interface only ever used on s390x
+ to expose the covered storage increment.
+Users: Legacy s390-tools lsmem/chmem
What: /sys/devices/system/memory/memoryX/phys_index
Date: September 2008
@@ -43,23 +44,25 @@ Date: September 2008
Contact: Badari Pulavarty <pbadari@us.ibm.com>
Description:
The file /sys/devices/system/memory/memoryX/state
- is read-write. When read, its contents show the
- online/offline state of the memory section. When written,
- root can toggle the the online/offline state of a removable
- memory section (see removable file description above)
- using the following commands::
+ is read-write. When read, it returns the online/offline
+ state of the memory block. When written, root can toggle
+ the online/offline state of a memory block using the following
+ commands::
# echo online > /sys/devices/system/memory/memoryX/state
# echo offline > /sys/devices/system/memory/memoryX/state
- For example, if /sys/devices/system/memory/memory22/removable
- contains a value of 1 and
- /sys/devices/system/memory/memory22/state contains the
- string "online" the following command can be executed by
- by root to offline that section::
-
- # echo offline > /sys/devices/system/memory/memory22/state
-
+ On newer kernel versions, advanced states can be specified
+ when onlining to select a target zone: "online_movable"
+ selects the movable zone. "online_kernel" selects the
+ applicable kernel zone (DMA, DMA32, or Normal). However,
+ after successfully setting one of the advanced states,
+ reading the file will return "online"; the zone information
+ can be obtained via "valid_zones" instead.
+
+ While onlining is unlikely to fail, there are no guarantees
+ that offlining will succeed. Offlining is more likely to
+ succeed if "valid_zones" indicates "Movable".
Users: hotplug memory remove tools
http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
@@ -69,8 +72,19 @@ Date: July 2014
Contact: Zhang Zhen <zhenzhang.zhang@huawei.com>
Description:
The file /sys/devices/system/memory/memoryX/valid_zones is
- read-only and is designed to show which zone this memory
- block can be onlined to.
+ read-only.
+
+ For online memory blocks, it returns in which zone memory
+ provided by a memory block is managed. If multiple zones
+ apply (not applicable for hotplugged memory), "None" is returned
+ and the memory block cannot be offlined.
+
+ For offline memory blocks, it returns by which zone memory
+ provided by a memory block can be managed when onlining.
+ The first returned zone ("default") will be used when setting
+ the state of an offline memory block to "online". Only one of
+ the kernel zones (DMA, DMA32, Normal) is applicable for a single
+ memory block.
What: /sys/devices/system/memoryX/nodeY
Date: October 2009
diff --git a/Documentation/ABI/testing/sysfs-devices-xenbus b/Documentation/ABI/testing/sysfs-devices-xenbus
new file mode 100644
index 000000000000..fd796cb4f315
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-xenbus
@@ -0,0 +1,41 @@
+What: /sys/devices/*/xenbus/event_channels
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Number of Xen event channels associated with a kernel based
+ paravirtualized device frontend or backend.
+
+What: /sys/devices/*/xenbus/events
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Total number of Xen events received for a Xen pv device
+ frontend or backend.
+
+What: /sys/devices/*/xenbus/jiffies_eoi_delayed
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Summed up time in jiffies the EOI of an interrupt for a Xen
+ pv device has been delayed in order to avoid stalls due to
+ event storms. This value rising is a first sign for a rogue
+ other end of the pv device.
+
+What: /sys/devices/*/xenbus/spurious_events
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Number of events received for a Xen pv device which did not
+ require any action. Too many spurious events in a row will
+ trigger delayed EOI processing.
+
+What: /sys/devices/*/xenbus/spurious_threshold
+Date: February 2021
+Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+ Controls the tolerated number of subsequent spurious events
+ before delayed EOI processing is triggered for a Xen pv
+ device. Default is 1. This can be modified in case the other
+ end of the pv device is issuing spurious events on a regular
+ basis and is known not to be malicious on purpose. Raising
+ the value for such cases can improve pv device performance.
diff --git a/Documentation/admin-guide/auxdisplay/cfag12864b.rst b/Documentation/admin-guide/auxdisplay/cfag12864b.rst
index 18c2865bd322..da385d851acc 100644
--- a/Documentation/admin-guide/auxdisplay/cfag12864b.rst
+++ b/Documentation/admin-guide/auxdisplay/cfag12864b.rst
@@ -3,7 +3,7 @@ cfag12864b LCD Driver Documentation
===================================
:License: GPLv2
-:Author & Maintainer: Miguel Ojeda Sandonis
+:Author & Maintainer: Miguel Ojeda <ojeda@kernel.org>
:Date: 2006-10-27
diff --git a/Documentation/admin-guide/auxdisplay/ks0108.rst b/Documentation/admin-guide/auxdisplay/ks0108.rst
index c0b7faf73136..a7d3fe509373 100644
--- a/Documentation/admin-guide/auxdisplay/ks0108.rst
+++ b/Documentation/admin-guide/auxdisplay/ks0108.rst
@@ -3,7 +3,7 @@ ks0108 LCD Controller Driver Documentation
==========================================
:License: GPLv2
-:Author & Maintainer: Miguel Ojeda Sandonis
+:Author & Maintainer: Miguel Ojeda <ojeda@kernel.org>
:Date: 2006-10-27
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 9acc57f68f31..64c62b979f2f 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -2098,7 +2098,7 @@ If the program returns 0, the attempt fails with -EPERM, otherwise
it succeeds.
An example of BPF_CGROUP_DEVICE program may be found in the kernel
-source tree in the tools/testing/selftests/bpf/dev_cgroup.c file.
+source tree in the tools/testing/selftests/bpf/progs/dev_cgroup.c file.
RDMA
diff --git a/Documentation/admin-guide/cifs/authors.rst b/Documentation/admin-guide/cifs/authors.rst
index b02d6dd6c070..5c1d2f0fa7d1 100644
--- a/Documentation/admin-guide/cifs/authors.rst
+++ b/Documentation/admin-guide/cifs/authors.rst
@@ -5,10 +5,10 @@ Authors
Original Author
---------------
-Steve French (sfrench@samba.org)
+Steve French (smfrench@gmail.com, sfrench@samba.org)
The author wishes to express his appreciation and thanks to:
-Andrew Tridgell (Samba team) for his early suggestions about smb/cifs VFS
+Andrew Tridgell (Samba team) for his early suggestions about SMB/CIFS VFS
improvements. Thanks to IBM for allowing me time and test resources to pursue
this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
@@ -51,7 +51,7 @@ Patch Contributors
- Ronnie Sahlberg (for SMB3 xattr work, bug fixes, and lots of great work on compounding)
- Shirish Pargaonkar (for many ACL patches over the years)
- Sachin Prabhu (many bug fixes, including for reconnect, copy offload and security)
-- Paulo Alcantara
+- Paulo Alcantara (for some excellent work in DFS, and in booting from SMB3)
- Long Li (some great work on RDMA, SMB Direct)
diff --git a/Documentation/admin-guide/cifs/changes.rst b/Documentation/admin-guide/cifs/changes.rst
index 71f2ecb62299..3147bbae9c43 100644
--- a/Documentation/admin-guide/cifs/changes.rst
+++ b/Documentation/admin-guide/cifs/changes.rst
@@ -3,6 +3,7 @@ Changes
=======
See https://wiki.samba.org/index.php/LinuxCIFSKernel for summary
-information (that may be easier to read than parsing the output of
-"git log fs/cifs") about fixes/improvements to CIFS/SMB2/SMB3 support (changes
+information about fixes/improvements to CIFS/SMB2/SMB3 support (changes
to cifs.ko module) by kernel version (and cifs internal module version).
+This may be easier to read than parsing the output of "git log fs/cifs"
+by release.
diff --git a/Documentation/admin-guide/cifs/introduction.rst b/Documentation/admin-guide/cifs/introduction.rst
index cc2851d93d17..53ea62906aa5 100644
--- a/Documentation/admin-guide/cifs/introduction.rst
+++ b/Documentation/admin-guide/cifs/introduction.rst
@@ -7,19 +7,19 @@ Introduction
protocol which was the successor to the Server Message Block
(SMB) protocol, the native file sharing mechanism for most early
PC operating systems. New and improved versions of CIFS are now
- called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1)
- is strongly preferred over using older dialects like CIFS due to
- security reasons. All modern dialects, including the most recent,
- SMB3.1.1 are supported by the CIFS VFS module. The SMB3 protocol
- is implemented and supported by all major file servers
- such as all modern versions of Windows (including Windows 2016
- Server), as well as by Samba (which provides excellent
- CIFS/SMB2/SMB3 server support and tools for Linux and many other
- operating systems). Apple systems also support SMB3 well, as
- do most Network Attached Storage vendors, so this network
- filesystem client can mount to a wide variety of systems.
- It also supports mounting to the cloud (for example
- Microsoft Azure), including the necessary security features.
+ called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1
+ the most current dialect) is strongly preferred over using older
+ dialects like CIFS due to security reasons. All modern dialects,
+ including the most recent, SMB3.1.1, are supported by the CIFS VFS
+ module. The SMB3 protocol is implemented and supported by all major
+ file servers such as Windows (including Windows 2019 Server), as
+ well as by Samba (which provides excellent CIFS/SMB2/SMB3 server
+ support and tools for Linux and many other operating systems).
+ Apple systems also support SMB3 well, as do most Network Attached
+ Storage vendors, so this network filesystem client can mount to a
+ wide variety of systems. It also supports mounting to the cloud
+ (for example Microsoft Azure), including the necessary security
+ features.
The intent of this module is to provide the most advanced network
file system function for SMB3 compliant servers, including advanced
@@ -27,8 +27,8 @@ Introduction
POSIX compliance, secure per-user session establishment, encryption,
high performance safe distributed caching (leases/oplocks), optional packet
signing, large files, Unicode support and other internationalization
- improvements. Since both Samba server and this filesystem client support
- the CIFS Unix extensions (and in the future SMB3 POSIX extensions),
+ improvements. Since both Samba server and this filesystem client support the
+ CIFS Unix extensions, and the Linux client also suppors SMB3 POSIX extensions,
the combination can provide a reasonable alternative to other network and
cluster file systems for fileserving in some Linux to Linux environments,
not just in Linux to Windows (or Linux to Mac) environments.
diff --git a/Documentation/admin-guide/cifs/todo.rst b/Documentation/admin-guide/cifs/todo.rst
index 25f11576e7b9..2646ed2e2d3e 100644
--- a/Documentation/admin-guide/cifs/todo.rst
+++ b/Documentation/admin-guide/cifs/todo.rst
@@ -13,24 +13,26 @@ is a partial list of the known problems and missing features:
a) SMB3 (and SMB3.1.1) missing optional features:
- - multichannel (started), integration with RDMA
- - directory leases (improved metadata caching), started (root dir only)
+ - multichannel (partially integrated), integration of multichannel with RDMA
+ - directory leases (improved metadata caching). Currently only implemented for root dir
- T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl
currently the only two server side copy mechanisms supported)
b) improved sparse file support (fiemap and SEEK_HOLE are implemented
- but additional features would be supportable by the protocol).
+ but additional features would be supportable by the protocol such
+ as FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE)
c) Directory entry caching relies on a 1 second timer, rather than
using Directory Leases, currently only the root file handle is cached longer
+ by leveraging Directory Leases
-d) quota support (needs minor kernel change since quota calls
- to make it to network filesystems or deviceless filesystems)
+d) quota support (needs minor kernel change since quota calls otherwise
+ won't make it to network filesystems or deviceless filesystems).
e) Additional use cases can be optimized to use "compounding" (e.g.
open/query/close and open/setinfo/close) to reduce the number of
roundtrips to the server and improve performance. Various cases
- (stat, statfs, create, unlink, mkdir) already have been improved by
+ (stat, statfs, create, unlink, mkdir, xattrs) already have been improved by
using compounding but more can be done. In addition we could
significantly reduce redundant opens by using deferred close (with
handle caching leases) and better using reference counters on file
@@ -60,7 +62,9 @@ k) Add tools to take advantage of more smb3 specific ioctls and features
metadata attributes easier from tools (e.g. extending what was done
in smb-info tool).
-l) encrypted file support
+l) encrypted file support (currently the attribute showing the file is
+ encrypted on the server is reported, but changing the attribute is not
+ supported).
m) improved stats gathering tools (perhaps integration with nfsometer?)
to extend and make easier to use what is currently in /proc/fs/cifs/Stats
@@ -69,14 +73,13 @@ n) Add support for claims based ACLs ("DAC")
o) mount helper GUI (to simplify the various configuration options on mount)
-p) Add support for witness protocol (perhaps ioctl to cifs.ko from user space
- tool listening on witness protocol RPC) to allow for notification of share
- move, server failover, and server adapter changes. And also improve other
- failover scenarios, e.g. when client knows multiple DFS entries point to
- different servers, and the server we are connected to has gone down.
+p) Expand support for witness protocol to allow for notification of share
+ move, and server network adapter changes. Currently only notifications by
+ the witness protocol for server move is supported by the Linux client.
q) Allow mount.cifs to be more verbose in reporting errors with dialect
- or unsupported feature errors.
+ or unsupported feature errors. This would now be easier due to the
+ implementation of the new mount API.
r) updating cifs documentation, and user guide.
@@ -87,11 +90,10 @@ t) split cifs and smb3 support into separate modules so legacy (and less
secure) CIFS dialect can be disabled in environments that don't need it
and simplify the code.
-v) POSIX Extensions for SMB3.1.1 (started, create and mkdir support added
- so far).
+v) Additional testing of POSIX Extensions for SMB3.1.1
w) Add support for additional strong encryption types, and additional spnego
- authentication mechanisms (see MS-SMB2)
+ authentication mechanisms (see MS-SMB2). GCM-256 is now partially implemented.
x) Finish support for SMB3.1.1 compression
diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index b6d9f02bc12b..13783dc68ab7 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -83,7 +83,7 @@ and encrypted shares and stronger signing and authentication algorithms.
There are additional mount options that may be helpful for SMB3 to get
improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1):
- ``mfsymlinks`` and ``cifsacl`` and ``idsfromsid``
+ ``mfsymlinks`` and either ``cifsacl`` or ``modefromsid`` (usually with ``idsfromsid``)
Allowing User Mounts
====================
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bab6a8b01202..04545725f187 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -5182,6 +5182,12 @@
growing up) the main stack are reserved for no other
mapping. Default value is 256 pages.
+ stack_depot_disable= [KNL]
+ Setting this to true through kernel command line will
+ disable the stack depot thereby saving the static memory
+ consumed by the stack hash table. By default this is set
+ to false.
+
stacktrace [FTRACE]
Enabled the stack tracer on boot up.
diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst
index 5c4432c96c4b..5307f90738aa 100644
--- a/Documentation/admin-guide/mm/memory-hotplug.rst
+++ b/Documentation/admin-guide/mm/memory-hotplug.rst
@@ -160,16 +160,16 @@ Under each memory block, you can see 5 files:
"online_movable", "online", "offline" command
which will be performed on all sections in the block.
-``phys_device`` read-only: designed to show the name of physical memory
- device. This is not well implemented now.
-``removable`` read-only: contains an integer value indicating
- whether the memory block is removable or not
- removable. A value of 1 indicates that the memory
- block is removable and a value of 0 indicates that
- it is not removable. A memory block is removable only if
- every section in the block is removable.
-``valid_zones`` read-only: designed to show which zones this memory block
- can be onlined to.
+``phys_device`` read-only: legacy interface only ever used on s390x to
+ expose the covered storage increment.
+``removable`` read-only: legacy interface that indicated whether a memory
+ block was likely to be offlineable or not. Newer kernel
+ versions return "1" if and only if the kernel supports
+ memory offlining.
+``valid_zones`` read-only: designed to show by which zone memory provided by
+ a memory block is managed, and to show by which zone memory
+ provided by an offline memory block could be managed when
+ onlining.
The first column shows it`s default zone.
diff --git a/Documentation/admin-guide/xfs.rst b/Documentation/admin-guide/xfs.rst
index 6178153d3320..5422407a96d7 100644
--- a/Documentation/admin-guide/xfs.rst
+++ b/Documentation/admin-guide/xfs.rst
@@ -284,6 +284,9 @@ The following sysctls are available for the XFS filesystem:
removes unused preallocation from clean inodes and releases
the unused space back to the free pool.
+ fs.xfs.speculative_cow_prealloc_lifetime
+ This is an alias for speculative_prealloc_lifetime.
+
fs.xfs.error_level (Min: 0 Default: 3 Max: 11)
A volume knob for error reporting when internal errors occur.
This will generate detailed messages & backtraces for filesystem
@@ -356,12 +359,13 @@ The following sysctls are available for the XFS filesystem:
Deprecated Sysctls
==================
-=========================== ================
- Name Removal Schedule
-=========================== ================
-fs.xfs.irix_sgid_inherit September 2025
-fs.xfs.irix_symlink_mode September 2025
-=========================== ================
+=========================================== ================
+ Name Removal Schedule
+=========================================== ================
+fs.xfs.irix_sgid_inherit September 2025
+fs.xfs.irix_symlink_mode September 2025
+fs.xfs.speculative_cow_prealloc_lifetime September 2025
+=========================================== ================
Removed Sysctls
diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst
index 19d4d1570cee..66c5a4e54130 100644
--- a/Documentation/block/bfq-iosched.rst
+++ b/Documentation/block/bfq-iosched.rst
@@ -430,13 +430,13 @@ fifo_expire_async
-----------------
This parameter is used to set the timeout of asynchronous requests. Default
-value of this is 248ms.
+value of this is 250ms.
fifo_expire_sync
----------------
This parameter is used to set the timeout of synchronous requests. Default
-value of this is 124ms. In case to favor synchronous requests over asynchronous
+value of this is 125ms. In case to favor synchronous requests over asynchronous
one, this value should be decreased relative to fifo_expire_async.
low_latency
diff --git a/Documentation/conf.py b/Documentation/conf.py
index 5bd45d5fb0a0..fd65168c10f8 100644
--- a/Documentation/conf.py
+++ b/Documentation/conf.py
@@ -49,8 +49,7 @@ extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
if major >= 3:
sys.stderr.write('''WARNING: The kernel documentation build process
support for Sphinx v3.0 and above is brand new. Be prepared for
- possible issues in the generated output.
- ''')
+ possible issues in the generated output.\n''')
if (major > 3) or (minor > 0 or patch >= 2):
# Sphinx c function parser is more pedantic with regards to type
# checking. Due to that, having macros at c:function cause problems.
diff --git a/Documentation/dev-tools/index.rst b/Documentation/dev-tools/index.rst
index f7809c7b1ba9..1b1cf4f5c9d9 100644
--- a/Documentation/dev-tools/index.rst
+++ b/Documentation/dev-tools/index.rst
@@ -22,6 +22,7 @@ whole; patches welcome!
ubsan
kmemleak
kcsan
+ kfence
gdb-kernel-debugging
kgdb
kselftest
diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index cde14aeefca7..ddf4239a5890 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -155,7 +155,7 @@ Boot parameters
~~~~~~~~~~~~~~~
Hardware tag-based KASAN mode (see the section about various modes below) is
-intended for use in production as a security mitigation. Therefore it supports
+intended for use in production as a security mitigation. Therefore, it supports
boot parameters that allow to disable KASAN competely or otherwise control
particular KASAN features.
@@ -165,7 +165,8 @@ particular KASAN features.
traces collection (default: ``on``).
- ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
- report or also panic the kernel (default: ``report``).
+ report or also panic the kernel (default: ``report``). Note, that tag
+ checking gets disabled after the first reported bug.
For developers
~~~~~~~~~~~~~~
@@ -295,6 +296,9 @@ Note, that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
enabled. Even when kasan.mode=off is provided, or when the hardware doesn't
support MTE (but supports TBI).
+Hardware tag-based KASAN only reports the first found bug. After that MTE tag
+checking gets disabled.
+
What memory accesses are sanitised by KASAN?
--------------------------------------------
diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst
new file mode 100644
index 000000000000..fdf04e741ea5
--- /dev/null
+++ b/Documentation/dev-tools/kfence.rst
@@ -0,0 +1,298 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright (C) 2020, Google LLC.
+
+Kernel Electric-Fence (KFENCE)
+==============================
+
+Kernel Electric-Fence (KFENCE) is a low-overhead sampling-based memory safety
+error detector. KFENCE detects heap out-of-bounds access, use-after-free, and
+invalid-free errors.
+
+KFENCE is designed to be enabled in production kernels, and has near zero
+performance overhead. Compared to KASAN, KFENCE trades performance for
+precision. The main motivation behind KFENCE's design, is that with enough
+total uptime KFENCE will detect bugs in code paths not typically exercised by
+non-production test workloads. One way to quickly achieve a large enough total
+uptime is when the tool is deployed across a large fleet of machines.
+
+Usage
+-----
+
+To enable KFENCE, configure the kernel with::
+
+ CONFIG_KFENCE=y
+
+To build a kernel with KFENCE support, but disabled by default (to enable, set
+``kfence.sample_interval`` to non-zero value), configure the kernel with::
+
+ CONFIG_KFENCE=y
+ CONFIG_KFENCE_SAMPLE_INTERVAL=0
+
+KFENCE provides several other configuration options to customize behaviour (see
+the respective help text in ``lib/Kconfig.kfence`` for more info).
+
+Tuning performance
+~~~~~~~~~~~~~~~~~~
+
+The most important parameter is KFENCE's sample interval, which can be set via
+the kernel boot parameter ``kfence.sample_interval`` in milliseconds. The
+sample interval determines the frequency with which heap allocations will be
+guarded by KFENCE. The default is configurable via the Kconfig option
+``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
+disables KFENCE.
+
+The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
+further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
+255), the number of available guarded objects can be controlled. Each object
+requires 2 pages, one for the object itself and the other one used as a guard
+page; object pages are interleaved with guard pages, and every object page is
+therefore surrounded by two guard pages.
+
+The total memory dedicated to the KFENCE memory pool can be computed as::
+
+ ( #objects + 1 ) * 2 * PAGE_SIZE
+
+Using the default config, and assuming a page size of 4 KiB, results in
+dedicating 2 MiB to the KFENCE memory pool.
+
+Note: On architectures that support huge pages, KFENCE will ensure that the
+pool is using pages of size ``PAGE_SIZE``. This will result in additional page
+tables being allocated.
+
+Error reports
+~~~~~~~~~~~~~
+
+A typical out-of-bounds access looks like this::
+
+ ==================================================================
+ BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa3/0x22b
+
+ Out-of-bounds read at 0xffffffffb672efff (1B left of kfence-#17):
+ test_out_of_bounds_read+0xa3/0x22b
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ kfence-#17 [0xffffffffb672f000-0xffffffffb672f01f, size=32, cache=kmalloc-32] allocated by task 507:
+ test_alloc+0xf3/0x25b
+ test_out_of_bounds_read+0x98/0x22b
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ CPU: 4 PID: 107 Comm: kunit_try_catch Not tainted 5.8.0-rc6+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+ ==================================================================
+
+The header of the report provides a short summary of the function involved in
+the access. It is followed by more detailed information about the access and
+its origin. Note that, real kernel addresses are only shown when using the
+kernel command line option ``no_hash_pointers``.
+
+Use-after-free accesses are reported as::
+
+ ==================================================================
+ BUG: KFENCE: use-after-free read in test_use_after_free_read+0xb3/0x143
+
+ Use-after-free read at 0xffffffffb673dfe0 (in kfence-#24):
+ test_use_after_free_read+0xb3/0x143
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ kfence-#24 [0xffffffffb673dfe0-0xffffffffb673dfff, size=32, cache=kmalloc-32] allocated by task 507:
+ test_alloc+0xf3/0x25b
+ test_use_after_free_read+0x76/0x143
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ freed by task 507:
+ test_use_after_free_read+0xa8/0x143
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ CPU: 4 PID: 109 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+ ==================================================================
+
+KFENCE also reports on invalid frees, such as double-frees::
+
+ ==================================================================
+ BUG: KFENCE: invalid free in test_double_free+0xdc/0x171
+
+ Invalid free of 0xffffffffb6741000:
+ test_double_free+0xdc/0x171
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ kfence-#26 [0xffffffffb6741000-0xffffffffb674101f, size=32, cache=kmalloc-32] allocated by task 507:
+ test_alloc+0xf3/0x25b
+ test_double_free+0x76/0x171
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ freed by task 507:
+ test_double_free+0xa8/0x171
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ CPU: 4 PID: 111 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+ ==================================================================
+
+KFENCE also uses pattern-based redzones on the other side of an object's guard
+page, to detect out-of-bounds writes on the unprotected side of the object.
+These are reported on frees::
+
+ ==================================================================
+ BUG: KFENCE: memory corruption in test_kmalloc_aligned_oob_write+0xef/0x184
+
+ Corrupted memory at 0xffffffffb6797ff9 [ 0xac . . . . . . ] (in kfence-#69):
+ test_kmalloc_aligned_oob_write+0xef/0x184
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ kfence-#69 [0xffffffffb6797fb0-0xffffffffb6797ff8, size=73, cache=kmalloc-96] allocated by task 507:
+ test_alloc+0xf3/0x25b
+ test_kmalloc_aligned_oob_write+0x57/0x184
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ CPU: 4 PID: 120 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+ ==================================================================
+
+For such errors, the address where the corruption occurred as well as the
+invalidly written bytes (offset from the address) are shown; in this
+representation, '.' denote untouched bytes. In the example above ``0xac`` is
+the value written to the invalid address at offset 0, and the remaining '.'
+denote that no following bytes have been touched. Note that, real values are
+only shown if the kernel was booted with ``no_hash_pointers``; to avoid
+information disclosure otherwise, '!' is used instead to denote invalidly
+written bytes.
+
+And finally, KFENCE may also report on invalid accesses to any protected page
+where it was not possible to determine an associated object, e.g. if adjacent
+object pages had not yet been allocated::
+
+ ==================================================================
+ BUG: KFENCE: invalid read in test_invalid_access+0x26/0xe0
+
+ Invalid read at 0xffffffffb670b00a:
+ test_invalid_access+0x26/0xe0
+ kunit_try_run_case+0x51/0x85
+ kunit_generic_run_threadfn_adapter+0x16/0x30
+ kthread+0x137/0x160
+ ret_from_fork+0x22/0x30
+
+ CPU: 4 PID: 124 Comm: kunit_try_catch Tainted: G W 5.8.0-rc6+ #7
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+ ==================================================================
+
+DebugFS interface
+~~~~~~~~~~~~~~~~~
+
+Some debugging information is exposed via debugfs:
+
+* The file ``/sys/kernel/debug/kfence/stats`` provides runtime statistics.
+
+* The file ``/sys/kernel/debug/kfence/objects`` provides a list of objects
+ allocated via KFENCE, including those already freed but protected.
+
+Implementation Details
+----------------------
+
+Guarded allocations are set up based on the sample interval. After expiration
+of the sample interval, the next allocation through the main allocator (SLAB or
+SLUB) returns a guarded allocation from the KFENCE object pool (allocation
+sizes up to PAGE_SIZE are supported). At this point, the timer is reset, and
+the next allocation is set up after the expiration of the interval. To "gate" a
+KFENCE allocation through the main allocator's fast-path without overhead,
+KFENCE relies on static branches via the static keys infrastructure. The static
+branch is toggled to redirect the allocation to KFENCE.
+
+KFENCE objects each reside on a dedicated page, at either the left or right
+page boundaries selected at random. The pages to the left and right of the
+object page are "guard pages", whose attributes are changed to a protected
+state, and cause page faults on any attempted access. Such page faults are then
+intercepted by KFENCE, which handles the fault gracefully by reporting an
+out-of-bounds access, and marking the page as accessible so that the faulting
+code can (wrongly) continue executing (set ``panic_on_warn`` to panic instead).
+
+To detect out-of-bounds writes to memory within the object's page itself,
+KFENCE also uses pattern-based redzones. For each object page, a redzone is set
+up for all non-object memory. For typical alignments, the redzone is only
+required on the unguarded side of an object. Because KFENCE must honor the
+cache's requested alignment, special alignments may result in unprotected gaps
+on either side of an object, all of which are redzoned.
+
+The following figure illustrates the page layout::
+
+ ---+-----------+-----------+-----------+-----------+-----------+---
+ | xxxxxxxxx | O : | xxxxxxxxx | : O | xxxxxxxxx |
+ | xxxxxxxxx | B : | xxxxxxxxx | : B | xxxxxxxxx |
+ | x GUARD x | J : RED- | x GUARD x | RED- : J | x GUARD x |
+ | xxxxxxxxx | E : ZONE | xxxxxxxxx | ZONE : E | xxxxxxxxx |
+ | xxxxxxxxx | C : | xxxxxxxxx | : C | xxxxxxxxx |
+ | xxxxxxxxx | T : | xxxxxxxxx | : T | xxxxxxxxx |
+ ---+-----------+-----------+-----------+-----------+-----------+---
+
+Upon deallocation of a KFENCE object, the object's page is again protected and
+the object is marked as freed. Any further access to the object causes a fault
+and KFENCE reports a use-after-free access. Freed objects are inserted at the
+tail of KFENCE's freelist, so that the least recently freed objects are reused
+first, and the chances of detecting use-after-frees of recently freed objects
+is increased.
+
+Interface
+---------
+
+The following describes the functions which are used by allocators as well as
+page handling code to set up and deal with KFENCE allocations.
+
+.. kernel-doc:: include/linux/kfence.h
+ :functions: is_kfence_address
+ kfence_shutdown_cache
+ kfence_alloc kfence_free __kfence_free
+ kfence_ksize kfence_object_start
+ kfence_handle_page_fault
+
+Related Tools
+-------------
+
+In userspace, a similar approach is taken by `GWP-ASan
+<http://llvm.org/docs/GwpAsan.html>`_. GWP-ASan also relies on guard pages and
+a sampling strategy to detect memory unsafety bugs at scale. KFENCE's design is
+directly influenced by GWP-ASan, and can be seen as its kernel sibling. Another
+similar but non-sampling approach, that also inspired the name "KFENCE", can be
+found in the userspace `Electric Fence Malloc Debugger
+<https://linux.die.net/man/3/efence>`_.
+
+In the kernel, several tools exist to debug memory access errors, and in
+particular KASAN can detect all bug classes that KFENCE can detect. While KASAN
+is more precise, relying on compiler instrumentation, this comes at a
+performance cost.
+
+It is worth highlighting that KASAN and KFENCE are complementary, with
+different target environments. For instance, KASAN is the better debugging-aid,
+where test cases or reproducers exists: due to the lower chance to detect the
+error, it would require more effort using KFENCE to debug. Deployments at scale
+that cannot afford to enable KASAN, however, would benefit from using KFENCE to
+discover bugs due to code paths not exercised by test cases or fuzzers.
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml
index 57324a5f0271..a1d5a32660e0 100644
--- a/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml
@@ -109,7 +109,7 @@ required:
- resets
- ddc
-unevaluatedProperties: false
+additionalProperties: false
examples:
- |
diff --git a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
index a0efd8dc2538..c2902aac2514 100644
--- a/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
@@ -13,7 +13,10 @@ maintainers:
properties:
compatible:
items:
- - const: sifive,fu540-c000-gpio
+ - enum:
+ - sifive,fu540-c000-gpio
+ - sifive,fu740-c000-gpio
+ - canaan,k210-gpiohs
- const: sifive,gpio0
reg:
@@ -21,9 +24,9 @@ properties:
interrupts:
description:
- interrupt mapping one per GPIO. Maximum 16 GPIOs.
+ Interrupt mapping, one per GPIO. Maximum 32 GPIOs.
minItems: 1
- maxItems: 16
+ maxItems: 32
interrupt-controller: true
@@ -36,6 +39,14 @@ properties:
"#gpio-cells":
const: 2
+ ngpios:
+ description:
+ The number of GPIOs available on the controller implementation.
+ It is 16 for the SiFive SoCs and 32 for the Canaan K210.
+ minimum: 1
+ maximum: 32
+ default: 16
+
gpio-controller: true
required:
@@ -44,10 +55,20 @@ required:
- interrupts
- interrupt-controller
- "#interrupt-cells"
- - clocks
- "#gpio-cells"
- gpio-controller
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - sifive,fu540-c000-gpio
+ - sifive,fu740-c000-gpio
+then:
+ required:
+ - clocks
+
additionalProperties: false
examples:
diff --git a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
index b9a61c9f7530..08d5a57ce00f 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
@@ -8,10 +8,11 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: SiFive Platform-Level Interrupt Controller (PLIC)
description:
- SiFive SOCs include an implementation of the Platform-Level Interrupt Controller
- (PLIC) high-level specification in the RISC-V Privileged Architecture
- specification. The PLIC connects all external interrupts in the system to all
- hart contexts in the system, via the external interrupt source in each hart.
+ SiFive SoCs and other RISC-V SoCs include an implementation of the
+ Platform-Level Interrupt Controller (PLIC) high-level specification in
+ the RISC-V Privileged Architecture specification. The PLIC connects all
+ external interrupts in the system to all hart contexts in the system, via
+ the external interrupt source in each hart.
A hart context is a privilege mode in a hardware execution thread. For example,
in an 4 core system with 2-way SMT, you have 8 harts and probably at least two
@@ -42,7 +43,9 @@ maintainers:
properties:
compatible:
items:
- - const: sifive,fu540-c000-plic
+ - enum:
+ - sifive,fu540-c000-plic
+ - canaan,k210-plic
- const: sifive,plic-1.0.0
reg:
diff --git a/Documentation/devicetree/bindings/leds/leds-lgm.yaml b/Documentation/devicetree/bindings/leds/leds-lgm.yaml
new file mode 100644
index 000000000000..32bbf146c01d
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/leds-lgm.yaml
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/leds-lgm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Lightning Mountain (LGM) SoC LED Serial Shift Output (SSO) Controller driver
+
+maintainers:
+ - Zhu, Yi Xin <Yixin.zhu@intel.com>
+ - Amireddy Mallikarjuna reddy <mallikarjunax.reddy@intel.com>
+
+properties:
+ compatible:
+ const: intel,lgm-ssoled
+
+ gpio-controller: true
+
+ '#gpio-cells':
+ const: 2
+
+ ngpios:
+ minimum: 0
+ maximum: 32
+ description:
+ Number of GPIOs this controller provides.
+
+ intel,sso-update-rate-hz:
+ description:
+ Blink frequency for SOUTs in Hz.
+
+ led-controller:
+ type: object
+ description:
+ This sub-node must contain a sub-node for each leds.
+
+ additionalProperties: false
+
+ patternProperties:
+ "^led@[0-23]$":
+ type: object
+
+ properties:
+ reg:
+ description: Index of the LED.
+ minimum: 0
+ maximum: 2
+
+ intel,sso-hw-trigger:
+ type: boolean
+ description: This property indicates Hardware driven/control LED.
+
+ intel,sso-hw-blink:
+ type: boolean
+ description: This property indicates Enable LED blink by Hardware.
+
+ intel,sso-blink-rate-hz:
+ description: LED HW blink frequency.
+
+ retain-state-suspended:
+ type: boolean
+ description: The suspend state of LED can be retained.
+
+ retain-state-shutdown:
+ type: boolean
+ description: Retain the state of the LED on shutdown.
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - clock-names
+ - "#gpio-cells"
+ - gpio-controller
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/intel,lgm-clk.h>
+ #include <dt-bindings/leds/common.h>
+
+ ssogpio: ssogpio@e0d40000 {
+ compatible = "intel,sso-led";
+ reg = <0xE0D40000 0x2E4>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <32>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_ledc>;
+ clocks = <&cgu0 LGM_GCLK_LEDC0>, <&afeclk>;
+ clock-names = "sso", "fpid";
+ intel,sso-update-rate-hz = <250000>;
+
+ led-controller {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ led@0 {
+ reg = <0>;
+ function = "gphy";
+ color = <LED_COLOR_ID_GREEN>;
+ led-gpio = <&ssogpio 0 0>;
+ };
+
+ led@23 {
+ reg = <23>;
+ function = LED_FUNCTION_POWER;
+ color = <LED_COLOR_ID_GREEN>;
+ led-gpio = <&ssogpio 23 0>;
+ };
+ };
+ };
diff --git a/Documentation/devicetree/bindings/media/i2c/imx258.yaml b/Documentation/devicetree/bindings/media/i2c/imx258.yaml
index eaf77866ed9b..515317eff41a 100644
--- a/Documentation/devicetree/bindings/media/i2c/imx258.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/imx258.yaml
@@ -49,10 +49,14 @@ properties:
# See ../video-interfaces.txt for more details
port:
- type: object
+ $ref: /schemas/graph.yaml#/properties/port
+ additionalProperties: false
+
properties:
endpoint:
- type: object
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
properties:
data-lanes:
oneOf:
@@ -65,11 +69,7 @@ properties:
- const: 1
- const: 2
- link-frequencies:
- allOf:
- - $ref: /schemas/types.yaml#/definitions/uint64-array
- description:
- Allowed data bus frequencies.
+ link-frequencies: true
required:
- data-lanes
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml
index 1ab22e75d3c6..3e5d82df90a2 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml
@@ -31,7 +31,8 @@ properties:
maxItems: 1
port:
- $ref: /schemas/graph.yaml#/$defs/port-base
+ $ref: /schemas/graph.yaml#/properties/port
+ additionalProperties: false
properties:
endpoint:
@@ -41,8 +42,6 @@ properties:
properties:
clock-noncontinuous: true
- additionalProperties: false
-
required:
- compatible
- reg
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml
index f8783f77cc54..9149f5685688 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml
@@ -44,19 +44,17 @@ properties:
description: Reset Pin GPIO Control (active low)
port:
- type: object
description: MIPI CSI-2 transmitter port
+ $ref: /schemas/graph.yaml#/properties/port
+ additionalProperties: false
properties:
endpoint:
- type: object
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
properties:
- remote-endpoint: true
-
- link-frequencies:
- $ref: /schemas/types.yaml#/definitions/uint64-array
- description: Allowed MIPI CSI-2 link frequencies
+ link-frequencies: true
data-lanes:
minItems: 1
@@ -65,10 +63,6 @@ properties:
required:
- data-lanes
- link-frequencies
- - remote-endpoint
-
- required:
- - endpoint
required:
- compatible
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml
index c0ba28aa30c4..0699c7e4fdeb 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml
@@ -44,19 +44,17 @@ properties:
description: Reset Pin GPIO Control (active low)
port:
- type: object
description: MIPI CSI-2 transmitter port
+ $ref: /schemas/graph.yaml#/properties/port
+ additionalProperties: false
properties:
endpoint:
- type: object
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
properties:
- remote-endpoint: true
-
- link-frequencies:
- $ref: /schemas/types.yaml#/definitions/uint64-array
- description: Allowed MIPI CSI-2 link frequencies
+ link-frequencies: true
data-lanes:
minItems: 1
@@ -65,10 +63,6 @@ properties:
required:
- data-lanes
- link-frequencies
- - remote-endpoint
-
- required:
- - endpoint
required:
- compatible
diff --git a/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml b/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml
index 24e689314bde..27cc5b7ff613 100644
--- a/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml
@@ -36,18 +36,17 @@ properties:
description: Reference to the GPIO connected to the XCLR pin, if any.
port:
- type: object
additionalProperties: false
$ref: /schemas/graph.yaml#/properties/port
properties:
endpoint:
- type: object
+ $ref: /schemas/media/video-interfaces.yaml#
+ unevaluatedProperties: false
+
properties:
- data-lanes:
- $ref: ../video-interfaces.yaml#/properties/data-lanes
- link-frequencies:
- $ref: ../video-interfaces.yaml#/properties/link-frequencies
+ data-lanes: true
+ link-frequencies: true
required:
- data-lanes
diff --git a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
new file mode 100644
index 000000000000..c24ad45cabb5
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/canaan,k210-sysctl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 System Controller Device Tree Bindings
+
+maintainers:
+ - Damien Le Moal <damien.lemoal@wdc.com>
+
+description:
+ Canaan Inc. Kendryte K210 SoC system controller which provides a
+ register map for controlling the clocks, reset signals and pin power
+ domains of the SoC.
+
+properties:
+ compatible:
+ items:
+ - const: canaan,k210-sysctl
+ - const: syscon
+ - const: simple-mfd
+
+ clocks:
+ maxItems: 1
+ description:
+ System controller Advanced Power Bus (APB) interface clock source.
+
+ clock-names:
+ items:
+ - const: pclk
+
+ reg:
+ maxItems: 1
+
+ clock-controller:
+ # Child node
+ type: object
+ $ref: "../clock/canaan,k210-clk.yaml"
+ description:
+ Clock controller for the SoC clocks. This child node definition
+ should follow the bindings specified in
+ Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml.
+
+ reset-controller:
+ # Child node
+ type: object
+ $ref: "../reset/canaan,k210-rst.yaml"
+ description:
+ Reset controller for the SoC. This child node definition
+ should follow the bindings specified in
+ Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml.
+
+ syscon-reboot:
+ # Child node
+ type: object
+ $ref: "../power/reset/syscon-reboot.yaml"
+ description:
+ Reboot method for the SoC. This child node definition
+ should follow the bindings specified in
+ Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml.
+
+required:
+ - compatible
+ - clocks
+ - reg
+ - clock-controller
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/k210-clk.h>
+ #include <dt-bindings/reset/k210-rst.h>
+
+ clocks {
+ in0: oscllator {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ };
+ };
+
+ sysctl: syscon@50440000 {
+ compatible = "canaan,k210-sysctl",
+ "syscon", "simple-mfd";
+ reg = <0x50440000 0x100>;
+ clocks = <&sysclk K210_CLK_APB1>;
+ clock-names = "pclk";
+
+ sysclk: clock-controller {
+ #clock-cells = <1>;
+ compatible = "canaan,k210-clk";
+ clocks = <&in0>;
+ };
+
+ sysrst: reset-controller {
+ compatible = "canaan,k210-rst";
+ #reset-cells = <1>;
+ };
+
+ reboot: syscon-reboot {
+ compatible = "syscon-reboot";
+ regmap = <&sysctl>;
+ offset = <48>;
+ mask = <1>;
+ value = <1>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
new file mode 100644
index 000000000000..46fbc73ab26b
--- /dev/null
+++ b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
@@ -0,0 +1,171 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/canaan,k210-fpioa.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 FPIOA Device Tree Bindings
+
+maintainers:
+ - Damien Le Moal <damien.lemoal@wdc.com>
+
+description:
+ The Canaan Kendryte K210 SoC Fully Programmable IO Array (FPIOA)
+ controller allows assiging any of 256 possible functions to any of
+ 48 IO pins of the SoC. Pin function configuration is performed on
+ a per-pin basis.
+
+properties:
+ compatible:
+ const: canaan,k210-fpioa
+
+ reg:
+ maxItems: 1
+ description:
+ Address and length of the register set for the FPIOA controller.
+
+ clocks:
+ items:
+ - description: Controller reference clock source
+ - description: APB interface clock source
+
+ clock-names:
+ items:
+ - const: ref
+ - const: pclk
+
+ resets:
+ maxItems: 1
+
+ canaan,k210-sysctl-power:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description: |
+ phandle of the K210 system controller node and offset of its
+ power domain control register.
+
+patternProperties:
+ '-pinmux$':
+ type: object
+ $ref: /schemas/pinctrl/pinmux-node.yaml
+ description:
+ FPIOA client devices use sub-nodes to define the desired pin
+ configuration. Client device sub-nodes use the pinux property
+ below.
+
+ properties:
+ pinmux:
+ description:
+ List of IO pins alternate functions. The values for each IO
+ pin is a combination of an IO pin number (0 to 47) with the
+ desired function for the IO pin. Functions are defined as
+ macros in include/dt-bindings/pinctrl/k210-fpioa.h.
+ The K210_FPIOA(IO pin, function) macro is provided to
+ facilitate the combination of IO pin numbers and functions.
+
+ required:
+ - pinmux
+
+ additionalProperties: false
+
+ '-pins$':
+ type: object
+ $ref: /schemas/pinctrl/pincfg-node.yaml
+ description:
+ FPIOA client devices use sub-nodes to define the desired
+ configuration of pins. Client device sub-nodes use the
+ properties below.
+
+ properties:
+ pins:
+ description:
+ List of IO pins affected by the properties specified in this
+ subnode. IO pins are identified using the pin names "IO_xx".
+ Pin configuration nodes can also define the power domain to
+ be used for the SoC pin groups A0 (IO pins 0-5),
+ A1 (IO pins 6-11), A2 (IO pins 12-17), B0 (IO pins 18-23),
+ B1 (IO pins 24-29), B2 (IO pins 30-35), B3 (IO pins 30-35),
+ C0 (IO pins 36-41) and C1 (IO pins 42-47) using the
+ power-source property.
+ items:
+ anyOf:
+ - pattern: "^(IO_([0-9]*))|(A[0-2])|(B[3-5])|(C[6-7])$"
+ - enum: [ IO_0, IO_1, IO_2, IO_3, IO_4, IO_5, IO_6, IO_7,
+ IO_8, IO_9, IO_10, IO_11, IO_12, IO_13, IO_14,
+ IO_15, IO_16, IO_17, IO_18, IO_19, IO_20, IO_21,
+ IO_22, IO_23, IO_24, IO_25, IO_26, IO_27, IO_28,
+ IO_29, IO_30, IO_31, IO_32, IO_33, IO_34, IO_35,
+ IO_36, IO_37, IO_38, IO_39, IO_40, IO_41, IO_42,
+ IO_43, IO_44, IO_45, IO_46, IO_47,
+ A0, A1, A2, B3, B4, B5, C6, C7 ]
+ bias-disable: true
+
+ bias-pull-down: true
+
+ bias-pull-up: true
+
+ drive-strength: true
+
+ drive-strength-microamp: true
+
+ input-enable: true
+
+ input-disable: true
+
+ input-schmitt-enable: true
+
+ input-schmitt-disable: true
+
+ input-polarity-invert:
+ description:
+ Enable or disable pin input polarity inversion.
+
+ output-enable: true
+
+ output-disable: true
+
+ output-high: true
+
+ output-low: true
+
+ output-polarity-invert:
+ description:
+ Enable or disable pin output polarity inversion.
+
+ slew-rate: true
+
+ power-source: true
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - clocks
+ - canaan,k210-sysctl-power
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/pinctrl/k210-fpioa.h>
+ #include <dt-bindings/clock/k210-clk.h>
+ #include <dt-bindings/reset/k210-rst.h>
+
+ fpioa: pinmux@502B0000 {
+ compatible = "canaan,k210-fpioa";
+ reg = <0x502B0000 0x100>;
+ clocks = <&sysclk K210_CLK_FPIOA>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_FPIOA>;
+ canaan,k210-sysctl-power = <&sysctl 108>;
+ pinctrl-0 = <&jtag_pinctrl>;
+ pinctrl-names = "default";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+ };
diff --git a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml
index 5ac25275d8bf..84e66913d042 100644
--- a/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml
+++ b/Documentation/devicetree/bindings/pwm/pwm-sifive.yaml
@@ -25,12 +25,15 @@ description:
properties:
compatible:
items:
- - const: sifive,fu540-c000-pwm
+ - enum:
+ - sifive,fu540-c000-pwm
+ - sifive,fu740-c000-pwm
- const: sifive,pwm0
description:
Should be "sifive,<chip>-pwm" and "sifive,pwm<version>". Supported
- compatible strings are "sifive,fu540-c000-pwm" for the SiFive PWM v0
- as integrated onto the SiFive FU540 chip, and "sifive,pwm0" for the
+ compatible strings are "sifive,fu540-c000-pwm" and
+ "sifive,fu740-c000-pwm" for the SiFive PWM v0 as integrated onto the
+ SiFive FU540 and FU740 chip respectively, and "sifive,pwm0" for the
SiFive PWM v0 IP block with no chip integration tweaks.
Please refer to sifive-blocks-ip-versioning.txt for details.
diff --git a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
new file mode 100644
index 000000000000..53e4ede9c0bd
--- /dev/null
+++ b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reset/canaan,k210-rst.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 Reset Controller Device Tree Bindings
+
+maintainers:
+ - Damien Le Moal <damien.lemoal@wdc.com>
+
+description: |
+ Canaan Kendryte K210 reset controller driver which supports the SoC
+ system controller supplied reset registers for the various peripherals
+ of the SoC. The K210 reset controller node must be defined as a child
+ node of the K210 system controller node.
+
+ See also:
+ - dt-bindings/reset/k210-rst.h
+
+properties:
+ compatible:
+ const: canaan,k210-rst
+
+ '#reset-cells':
+ const: 1
+
+required:
+ - '#reset-cells'
+ - compatible
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/reset/k210-rst.h>
+ sysrst: reset-controller {
+ compatible = "canaan,k210-rst";
+ #reset-cells = <1>;
+ };
diff --git a/Documentation/devicetree/bindings/riscv/canaan.yaml b/Documentation/devicetree/bindings/riscv/canaan.yaml
new file mode 100644
index 000000000000..f8f3f286bd55
--- /dev/null
+++ b/Documentation/devicetree/bindings/riscv/canaan.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/canaan.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan SoC-based boards
+
+maintainers:
+ - Damien Le Moal <damien.lemoal@wdc.com>
+
+description:
+ Canaan Kendryte K210 SoC-based boards
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ oneOf:
+ - items:
+ - const: sipeed,maix-bit
+ - const: sipeed,maix-bitm
+ - const: canaan,kendryte-k210
+
+ - items:
+ - const: sipeed,maix-go
+ - const: canaan,kendryte-k210
+
+ - items:
+ - const: sipeed,maix-dock-m1
+ - const: sipeed,maix-dock-m1w
+ - const: canaan,kendryte-k210
+
+ - items:
+ - const: sipeed,maixduino
+ - const: canaan,kendryte-k210
+
+ - items:
+ - const: canaan,kendryte-kd233
+ - const: canaan,kendryte-k210
+
+ - items:
+ - const: canaan,kendryte-k210
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index c6925e0b16e4..e534f6a7cfa1 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -28,11 +28,18 @@ properties:
- items:
- enum:
- sifive,rocket0
+ - sifive,bullet0
- sifive,e5
+ - sifive,e7
- sifive,e51
+ - sifive,e71
- sifive,u54-mc
+ - sifive,u74-mc
- sifive,u54
+ - sifive,u74
- sifive,u5
+ - sifive,u7
+ - canaan,k210
- const: riscv
- const: riscv # Simulator only
description:
@@ -50,6 +57,7 @@ properties:
- riscv,sv32
- riscv,sv39
- riscv,sv48
+ - riscv,none
riscv,isa:
description:
diff --git a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
index 2ece8630dc68..23b227614366 100644
--- a/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
+++ b/Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
@@ -27,6 +27,7 @@ select:
items:
- enum:
- sifive,fu540-c000-ccache
+ - sifive,fu740-c000-ccache
required:
- compatible
@@ -34,7 +35,9 @@ select:
properties:
compatible:
items:
- - const: sifive,fu540-c000-ccache
+ - enum:
+ - sifive,fu540-c000-ccache
+ - sifive,fu740-c000-ccache
- const: cache
cache-block-size:
@@ -52,10 +55,13 @@ properties:
cache-unified: true
interrupts:
- description: |
- Must contain entries for DirError, DataError and DataFail signals.
minItems: 3
- maxItems: 3
+ maxItems: 4
+ items:
+ - description: DirError interrupt
+ - description: DataError interrupt
+ - description: DataFail interrupt
+ - description: DirFail interrupt
reg:
maxItems: 1
@@ -68,6 +74,26 @@ properties:
The reference to the reserved-memory for the L2 Loosely Integrated Memory region.
The reserved memory node should be defined as per the bindings in reserved-memory.txt.
+if:
+ properties:
+ compatible:
+ contains:
+ const: sifive,fu540-c000-ccache
+
+then:
+ properties:
+ interrupts:
+ description: |
+ Must contain entries for DirError, DataError and DataFail signals.
+ maxItems: 3
+
+else:
+ properties:
+ interrupts:
+ description: |
+ Must contain entries for DirError, DataError, DataFail, DirFail signals.
+ minItems: 4
+
additionalProperties: false
required:
diff --git a/Documentation/devicetree/bindings/riscv/sifive.yaml b/Documentation/devicetree/bindings/riscv/sifive.yaml
index 3a8647d1da4c..ee0a239af4c2 100644
--- a/Documentation/devicetree/bindings/riscv/sifive.yaml
+++ b/Documentation/devicetree/bindings/riscv/sifive.yaml
@@ -17,11 +17,18 @@ properties:
$nodename:
const: '/'
compatible:
- items:
- - enum:
- - sifive,hifive-unleashed-a00
- - const: sifive,fu540-c000
- - const: sifive,fu540
+ oneOf:
+ - items:
+ - enum:
+ - sifive,hifive-unleashed-a00
+ - const: sifive,fu540-c000
+ - const: sifive,fu540
+
+ - items:
+ - enum:
+ - sifive,hifive-unmatched-a00
+ - const: sifive,fu740-c000
+ - const: sifive,fu740
additionalProperties: true
diff --git a/Documentation/devicetree/bindings/serial/sifive-serial.yaml b/Documentation/devicetree/bindings/serial/sifive-serial.yaml
index 3ac5c7ff2758..5fa94dacbba9 100644
--- a/Documentation/devicetree/bindings/serial/sifive-serial.yaml
+++ b/Documentation/devicetree/bindings/serial/sifive-serial.yaml
@@ -20,6 +20,7 @@ properties:
- enum:
- sifive,fu540-c000-uart
- sifive,fu740-c000-uart
+ - canaan,k210-uarths
- const: sifive,uart0
description:
diff --git a/Documentation/devicetree/bindings/timer/sifive,clint.yaml b/Documentation/devicetree/bindings/timer/sifive,clint.yaml
index 2a0e9cd9fbcf..a35952f48742 100644
--- a/Documentation/devicetree/bindings/timer/sifive,clint.yaml
+++ b/Documentation/devicetree/bindings/timer/sifive,clint.yaml
@@ -23,15 +23,19 @@ description:
properties:
compatible:
items:
- - const: sifive,fu540-c000-clint
+ - enum:
+ - sifive,fu540-c000-clint
+ - canaan,k210-clint
- const: sifive,clint0
description:
- Should be "sifive,<chip>-clint" and "sifive,clint<version>".
+ Should be "<vendor>,<chip>-clint" and "sifive,clint<version>".
Supported compatible strings are -
"sifive,fu540-c000-clint" for the SiFive CLINT v0 as integrated
- onto the SiFive FU540 chip, and "sifive,clint0" for the SiFive
- CLINT v0 IP block with no chip integration tweaks.
+ onto the SiFive FU540 chip, "canaan,k210-clint" for the SiFive
+ CLINT v0 as integrated onto the Canaan Kendryte K210 chip, and
+ "sifive,clint0" for the SiFive CLINT v0 IP block with no chip
+ integration tweaks.
Please refer to sifive-blocks-ip-versioning.txt for details
reg:
diff --git a/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml b/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml
index d65faf289a83..d33c9205a909 100644
--- a/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml
@@ -24,6 +24,9 @@ properties:
interrupts:
maxItems: 1
+ resets:
+ maxItems: 1
+
clocks:
minItems: 1
items:
diff --git a/Documentation/features/core/cBPF-JIT/arch-support.txt b/Documentation/features/core/cBPF-JIT/arch-support.txt
index 399935616813..e59b5215402d 100644
--- a/Documentation/features/core/cBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/cBPF-JIT/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | TODO |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt
index 79409bfe0263..dcbd8679f514 100644
--- a/Documentation/features/core/eBPF-JIT/arch-support.txt
+++ b/Documentation/features/core/eBPF-JIT/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt
index 9ea60e416efd..4efcba7b5239 100644
--- a/Documentation/features/core/generic-idle-thread/arch-support.txt
+++ b/Documentation/features/core/generic-idle-thread/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | ok |
diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt
index 894d9693b380..0c801d1bd2da 100644
--- a/Documentation/features/core/jump-labels/arch-support.txt
+++ b/Documentation/features/core/jump-labels/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt
index cd3510e2eedb..af34308fce7f 100644
--- a/Documentation/features/core/tracehook/arch-support.txt
+++ b/Documentation/features/core/tracehook/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | ok |
| csky: | ok |
| h8300: | TODO |
| hexagon: | ok |
diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt
index b2288dc14b72..c244ac7eee26 100644
--- a/Documentation/features/debug/KASAN/arch-support.txt
+++ b/Documentation/features/debug/KASAN/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
index 1c49723e7534..7aff505af706 100644
--- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
+++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | TODO |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt
index 7563a494ddb8..b39c1a5de3f3 100644
--- a/Documentation/features/debug/gcov-profile-all/arch-support.txt
+++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt
@@ -10,14 +10,13 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| m68k: | TODO |
| microblaze: | ok |
- | mips: | TODO |
+ | mips: | ok |
| nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
diff --git a/Documentation/features/debug/kcov/arch-support.txt b/Documentation/features/debug/kcov/arch-support.txt
index ab0ee1c933c2..7e44013cc320 100644
--- a/Documentation/features/debug/kcov/arch-support.txt
+++ b/Documentation/features/debug/kcov/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt
index bc45bac20442..2cb0576f9180 100644
--- a/Documentation/features/debug/kgdb/arch-support.txt
+++ b/Documentation/features/debug/kgdb/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | ok |
| hexagon: | ok |
diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt
index 2db76807ec6f..e9ac415f8aec 100644
--- a/Documentation/features/debug/kmemleak/arch-support.txt
+++ b/Documentation/features/debug/kmemleak/arch-support.txt
@@ -10,8 +10,7 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
- | csky: | TODO |
+ | csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
index 6225cfe0c5bf..96156e8802a7 100644
--- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
+++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | TODO |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
@@ -23,7 +22,7 @@
| openrisc: | TODO |
| parisc: | ok |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | TODO |
| sparc: | TODO |
diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt
index 371f0ac488f5..ee95ed61909a 100644
--- a/Documentation/features/debug/kprobes/arch-support.txt
+++ b/Documentation/features/debug/kprobes/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
@@ -23,7 +22,7 @@
| openrisc: | TODO |
| parisc: | ok |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt
index 38e95251deed..612cb97d47b8 100644
--- a/Documentation/features/debug/kretprobes/arch-support.txt
+++ b/Documentation/features/debug/kretprobes/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
@@ -23,7 +22,7 @@
| openrisc: | TODO |
| parisc: | ok |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt
index 7f4a20e6a12b..d6ff141a6122 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | TODO |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt
index 3329559c8207..ad4de22a71ab 100644
--- a/Documentation/features/debug/stackprotector/arch-support.txt
+++ b/Documentation/features/debug/stackprotector/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt
index 43cac6ee0c68..8bd5548a4485 100644
--- a/Documentation/features/debug/uprobes/arch-support.txt
+++ b/Documentation/features/debug/uprobes/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
@@ -23,7 +22,7 @@
| openrisc: | TODO |
| parisc: | TODO |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | TODO |
| sparc: | ok |
diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt
index d636ed0e679f..2a3fe812a5fa 100644
--- a/Documentation/features/debug/user-ret-profiler/arch-support.txt
+++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | TODO |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt
index dfc93d074e3d..bece89586efa 100644
--- a/Documentation/features/io/dma-contiguous/arch-support.txt
+++ b/Documentation/features/io/dma-contiguous/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt
index 1815c7fed06d..52bdda004f5c 100644
--- a/Documentation/features/locking/cmpxchg-local/arch-support.txt
+++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt
index 940b0bd02957..a8cd163c8b7e 100644
--- a/Documentation/features/locking/lockdep/arch-support.txt
+++ b/Documentation/features/locking/lockdep/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | ok |
diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt
index 4dd5e554873f..8c85949752b3 100644
--- a/Documentation/features/locking/queued-rwlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt
index b16d4f71e5ce..5f4e1b3841af 100644
--- a/Documentation/features/locking/queued-spinlocks/arch-support.txt
+++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt
index 04c17c2106a4..78f3fe080f0e 100644
--- a/Documentation/features/perf/kprobes-event/arch-support.txt
+++ b/Documentation/features/perf/kprobes-event/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | ok |
@@ -23,7 +22,7 @@
| openrisc: | TODO |
| parisc: | ok |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | ok |
diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt
index e7450fbb8253..5bf3b1854a1f 100644
--- a/Documentation/features/perf/perf-regs/arch-support.txt
+++ b/Documentation/features/perf/perf-regs/arch-support.txt
@@ -10,14 +10,13 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
- | mips: | TODO |
+ | mips: | ok |
| nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt
index 98e79d128d9b..d88659bb4fc1 100644
--- a/Documentation/features/perf/perf-stackdump/arch-support.txt
+++ b/Documentation/features/perf/perf-stackdump/arch-support.txt
@@ -10,14 +10,13 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
- | mips: | TODO |
+ | mips: | ok |
| nds32: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 47e6903f47a5..883d33b265d6 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -33,7 +33,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt
index 964457ad26c1..9affb7c2c500 100644
--- a/Documentation/features/sched/numa-balancing/arch-support.txt
+++ b/Documentation/features/sched/numa-balancing/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | .. |
| arm: | .. |
| arm64: | ok |
- | c6x: | .. |
| csky: | .. |
| h8300: | .. |
| hexagon: | .. |
@@ -23,7 +22,7 @@
| openrisc: | .. |
| parisc: | .. |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | .. |
| sparc: | TODO |
diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
index eb3d74092c61..26eec58ab819 100644
--- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt
+++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
index 4d11cbb3c09b..8639fe8315f5 100644
--- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt
+++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt
index 6863a3fbddad..9a81cb03b1fd 100644
--- a/Documentation/features/time/clockevents/arch-support.txt
+++ b/Documentation/features/time/clockevents/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | TODO |
| arm64: | ok |
- | c6x: | ok |
| csky: | ok |
| h8300: | ok |
| hexagon: | ok |
diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt
index 52aea275aab7..4ed116c2ec39 100644
--- a/Documentation/features/time/context-tracking/arch-support.txt
+++ b/Documentation/features/time/context-tracking/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt
index 6fc03deb1c38..bc30c15557c7 100644
--- a/Documentation/features/time/irq-time-acct/arch-support.txt
+++ b/Documentation/features/time/irq-time-acct/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt
index e51f3af38e31..050de43bbbb9 100644
--- a/Documentation/features/time/virt-cpuacct/arch-support.txt
+++ b/Documentation/features/time/virt-cpuacct/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | ok |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt
index eccda0732474..99cb6d7f5005 100644
--- a/Documentation/features/vm/ELF-ASLR/arch-support.txt
+++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt
index c74e3f8040e1..6cde38458596 100644
--- a/Documentation/features/vm/PG_uncached/arch-support.txt
+++ b/Documentation/features/vm/PG_uncached/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | TODO |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt
index 1c0b95f2b40d..e8238cb2a4da 100644
--- a/Documentation/features/vm/THP/arch-support.txt
+++ b/Documentation/features/vm/THP/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | .. |
| csky: | .. |
| h8300: | .. |
| hexagon: | .. |
diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt
index 30f75a79ce01..48a5ca548399 100644
--- a/Documentation/features/vm/TLB/arch-support.txt
+++ b/Documentation/features/vm/TLB/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | TODO |
- | c6x: | .. |
| csky: | TODO |
| h8300: | .. |
| hexagon: | TODO |
diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index c5ff3a427722..439fd9069b8b 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | TODO |
| arm: | TODO |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt
index b5fb37c28cc6..9a0c8783b84d 100644
--- a/Documentation/features/vm/ioremap_prot/arch-support.txt
+++ b/Documentation/features/vm/ioremap_prot/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | TODO |
| arm64: | TODO |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt
index 13d0e1e17001..40b969f3a6bb 100644
--- a/Documentation/features/vm/pte_special/arch-support.txt
+++ b/Documentation/features/vm/pte_special/arch-support.txt
@@ -10,7 +10,6 @@
| arc: | ok |
| arm: | ok |
| arm64: | ok |
- | c6x: | TODO |
| csky: | TODO |
| h8300: | TODO |
| hexagon: | TODO |
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index 633610253f45..0302035781be 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -883,3 +883,10 @@ For bvec based itererators bio_iov_iter_get_pages() now doesn't copy bvecs but
uses the one provided. Anyone issuing kiocb-I/O should ensure that the bvec and
page references stay until I/O has completed, i.e. until ->ki_complete() has
been called or returned with non -EIOCBQUEUED code.
+
+---
+
+**mandatory**
+
+mnt_want_write_file() can now only be paired with mnt_drop_write_file(),
+whereas previously it could be paired with mnt_drop_write() as well.
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 9abdba17565e..48fbfc336ebf 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -691,6 +691,10 @@ files are there, and which are missing.
number of processes currently runnable (running or on ready queue);
total number of processes in system;
last pid created.
+ All fields are separated by one space except "number of
+ processes currently runnable" and "total number of processes
+ in system", which are separated by a slash ('/'). Example:
+ 0.61 0.61 0.55 3/828 22084
locks Kernel locks
meminfo Memory info
misc Miscellaneous
diff --git a/Documentation/filesystems/seq_file.rst b/Documentation/filesystems/seq_file.rst
index 56856481dc8d..a6726082a7c2 100644
--- a/Documentation/filesystems/seq_file.rst
+++ b/Documentation/filesystems/seq_file.rst
@@ -217,6 +217,12 @@ between the calls to start() and stop(), so holding a lock during that time
is a reasonable thing to do. The seq_file code will also avoid taking any
other locks while the iterator is active.
+The iterater value returned by start() or next() is guaranteed to be
+passed to a subsequent next() or stop() call. This allows resources
+such as locks that were taken to be reliably released. There is *no*
+guarantee that the iterator will be passed to show(), though in practice
+it often will be.
+
Formatted output
================
diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst
index cf9b2857c72a..dabee3729e5a 100644
--- a/Documentation/powerpc/syscall64-abi.rst
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -46,25 +46,38 @@ stack frame LR and CR save fields are not used.
Register preservation rules
---------------------------
-Register preservation rules match the ELF ABI calling sequence with the
-following differences:
-
-+------------------------------------------------------------------------+
-| For the sc instruction, differences with the ELF ABI |
-+--------------+--------------+------------------------------------------+
-| r0 | Volatile | (System call number.) |
-| rr3 | Volatile | (Parameter 1, and return value.) |
-| rr4-r8 | Volatile | (Parameters 2-6.) |
-| rcr0 | Volatile | (cr0.SO is the return error condition.) |
-| rcr1, cr5-7 | Nonvolatile | |
-| rlr | Nonvolatile | |
-+--------------+--------------+------------------------------------------+
-| For the scv 0 instruction, differences with the ELF ABI |
-+--------------+--------------+------------------------------------------+
-| r0 | Volatile | (System call number.) |
-| r3 | Volatile | (Parameter 1, and return value.) |
-| r4-r8 | Volatile | (Parameters 2-6.) |
-+--------------+--------------+------------------------------------------+
+Register preservation rules match the ELF ABI calling sequence with some
+differences.
+
+For the sc instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register | Preservation Rules | Purpose |
++==============+====================+=========================================+
+| r0 | Volatile | (System call number.) |
++--------------+--------------------+-----------------------------------------+
+| r3 | Volatile | (Parameter 1, and return value.) |
++--------------+--------------------+-----------------------------------------+
+| r4-r8 | Volatile | (Parameters 2-6.) |
++--------------+--------------------+-----------------------------------------+
+| cr0 | Volatile | (cr0.SO is the return error condition.) |
++--------------+--------------------+-----------------------------------------+
+| cr1, cr5-7 | Nonvolatile | |
++--------------+--------------------+-----------------------------------------+
+| lr | Nonvolatile | |
++--------------+--------------------+-----------------------------------------+
+
+For the scv 0 instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register | Preservation Rules | Purpose |
++==============+====================+=========================================+
+| r0 | Volatile | (System call number.) |
++--------------+--------------------+-----------------------------------------+
+| r3 | Volatile | (Parameter 1, and return value.) |
++--------------+--------------------+-----------------------------------------+
+| r4-r8 | Volatile | (Parameters 2-6.) |
++--------------+--------------------+-----------------------------------------+
All floating point and vector data registers as well as control and status
registers are nonvolatile.
diff --git a/Documentation/process/4.Coding.rst b/Documentation/process/4.Coding.rst
index 0825dc496f22..1f0d81f44e14 100644
--- a/Documentation/process/4.Coding.rst
+++ b/Documentation/process/4.Coding.rst
@@ -242,7 +242,7 @@ and try to avoid "fixes" which make the warning go away without addressing
its cause.
Note that not all compiler warnings are enabled by default. Build the
-kernel with "make EXTRA_CFLAGS=-W" to get the full set.
+kernel with "make KCFLAGS=-W" to get the full set.
The kernel provides several configuration options which turn on debugging
features; most of these are found in the "kernel hacking" submenu. Several
diff --git a/Documentation/process/submit-checklist.rst b/Documentation/process/submit-checklist.rst
index f709beaf02c9..b1bc2d37bd0a 100644
--- a/Documentation/process/submit-checklist.rst
+++ b/Documentation/process/submit-checklist.rst
@@ -96,7 +96,7 @@ and elsewhere regarding submitting Linux kernel patches.
injection might be appropriate.
20) Newly-added code has been compiled with ``gcc -W`` (use
- ``make EXTRA_CFLAGS=-W``). This will generate lots of noise, but is good
+ ``make KCFLAGS=-W``). This will generate lots of noise, but is good
for finding bugs like "warning: comparison between signed and unsigned".
21) Tested after it has been merged into the -mm patchset to make sure
diff --git a/Documentation/translations/it_IT/process/4.Coding.rst b/Documentation/translations/it_IT/process/4.Coding.rst
index a5e36aa60448..8012fe9497ae 100644
--- a/Documentation/translations/it_IT/process/4.Coding.rst
+++ b/Documentation/translations/it_IT/process/4.Coding.rst
@@ -256,7 +256,7 @@ e cercate di evitare le "riparazioni" che fan sparire l'avvertimento senza
però averne trovato la causa.
Tenete a mente che non tutti gli avvertimenti sono disabilitati di default.
-Costruite il kernel con "make EXTRA_CFLAGS=-W" per ottenerli tutti.
+Costruite il kernel con "make KCFLAGS=-W" per ottenerli tutti.
Il kernel fornisce differenti opzioni che abilitano funzionalità di debugging;
molti di queste sono trovano all'interno del sotto menu "kernel hacking".
diff --git a/Documentation/translations/it_IT/process/submit-checklist.rst b/Documentation/translations/it_IT/process/submit-checklist.rst
index 3e575502690f..614fc17d9086 100644
--- a/Documentation/translations/it_IT/process/submit-checklist.rst
+++ b/Documentation/translations/it_IT/process/submit-checklist.rst
@@ -104,7 +104,7 @@ sottomissione delle patch, in particolare
l'iniezione di fallimenti specifici per il sottosistema.
22) Il nuovo codice è stato compilato con ``gcc -W`` (usate
- ``make EXTRA_CFLAGS=-W``). Questo genererà molti avvisi, ma è ottimo
+ ``make KCFLAGS=-W``). Questo genererà molti avvisi, ma è ottimo
per scovare bachi come "warning: comparison between signed and unsigned".
23) La patch è stata verificata dopo essere stata inclusa nella serie di patch
diff --git a/Documentation/translations/zh_CN/process/4.Coding.rst b/Documentation/translations/zh_CN/process/4.Coding.rst
index 959a06ba025c..66cd8ee07606 100644
--- a/Documentation/translations/zh_CN/process/4.Coding.rst
+++ b/Documentation/translations/zh_CN/process/4.Coding.rst
@@ -165,7 +165,7 @@ Linus对这个问题给出了最佳答案:
通常,这些警告都指向真正的问题。提交以供审阅的代码通常不会产生任何编译器警告。
在消除警告时,注意了解真正的原因,并尽量避免“修复”,使警告消失而不解决其原因。
-请注意,并非所有编译器警告都默认启用。使用“make EXTRA_CFLAGS=-W”构建内核以
+请注意,并非所有编译器警告都默认启用。使用“make KCFLAGS=-W”构建内核以
获得完整集合。
内核提供了几个配置选项,可以打开调试功能;大多数配置选项位于“kernel hacking”
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 45fd862ac128..1a2b5210cdbf 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3856,49 +3856,20 @@ base 2 of the page size in the bottom 6 bits.
-EFAULT if struct kvm_reinject_control cannot be read,
-EINVAL if the supplied shift or flags are invalid,
-ENOMEM if unable to allocate the new HPT,
- -ENOSPC if there was a hash collision
-
-::
-
- struct kvm_ppc_rmmu_info {
- struct kvm_ppc_radix_geom {
- __u8 page_shift;
- __u8 level_bits[4];
- __u8 pad[3];
- } geometries[8];
- __u32 ap_encodings[8];
- };
-
-The geometries[] field gives up to 8 supported geometries for the
-radix page table, in terms of the log base 2 of the smallest page
-size, and the number of bits indexed at each level of the tree, from
-the PTE level up to the PGD level in that order. Any unused entries
-will have 0 in the page_shift field.
-
-The ap_encodings gives the supported page sizes and their AP field
-encodings, encoded with the AP value in the top 3 bits and the log
-base 2 of the page size in the bottom 6 bits.
-
-4.102 KVM_PPC_RESIZE_HPT_PREPARE
---------------------------------
-
-:Capability: KVM_CAP_SPAPR_RESIZE_HPT
-:Architectures: powerpc
-:Type: vm ioctl
-:Parameters: struct kvm_ppc_resize_hpt (in)
-:Returns: 0 on successful completion,
- >0 if a new HPT is being prepared, the value is an estimated
- number of milliseconds until preparation is complete,
- -EFAULT if struct kvm_reinject_control cannot be read,
- -EINVAL if the supplied shift or flags are invalid,when moving existing
- HPT entries to the new HPT,
- -EIO on other error conditions
Used to implement the PAPR extension for runtime resizing of a guest's
Hashed Page Table (HPT). Specifically this starts, stops or monitors
the preparation of a new potential HPT for the guest, essentially
implementing the H_RESIZE_HPT_PREPARE hypercall.
+::
+
+ struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+ };
+
If called with shift > 0 when there is no pending HPT for the guest,
this begins preparation of a new pending HPT of size 2^(shift) bytes.
It then returns a positive integer with the estimated number of
@@ -3926,14 +3897,6 @@ Normally this will be called repeatedly with the same parameters until
it returns <= 0. The first call will initiate preparation, subsequent
ones will monitor preparation until it completes or fails.
-::
-
- struct kvm_ppc_resize_hpt {
- __u64 flags;
- __u32 shift;
- __u32 pad;
- };
-
4.103 KVM_PPC_RESIZE_HPT_COMMIT
-------------------------------
@@ -3956,6 +3919,14 @@ Hashed Page Table (HPT). Specifically this requests that the guest be
transferred to working with the new HPT, essentially implementing the
H_RESIZE_HPT_COMMIT hypercall.
+::
+
+ struct kvm_ppc_resize_hpt {
+ __u64 flags;
+ __u32 shift;
+ __u32 pad;
+ };
+
This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
returned 0 with the same parameters. In other cases
KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
@@ -3971,14 +3942,6 @@ HPT and the previous HPT will be discarded.
On failure, the guest will still be operating on its previous HPT.
-::
-
- struct kvm_ppc_resize_hpt {
- __u64 flags;
- __u32 shift;
- __u32 pad;
- };
-
4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
-----------------------------------
@@ -4519,6 +4482,7 @@ KVM_GET_SUPPORTED_CPUID ioctl because some of them intersect with KVM feature
leaves (0x40000000, 0x40000001).
Currently, the following list of CPUID leaves are returned:
+
- HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
- HYPERV_CPUID_INTERFACE
- HYPERV_CPUID_VERSION
@@ -4543,6 +4507,7 @@ userspace should not expect to get any particular value there.
Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike
system ioctl which exposes all supported feature bits unconditionally, vcpu
version has the following quirks:
+
- HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED
feature bit are only exposed when Enlightened VMCS was previously enabled
on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
@@ -4913,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above.
union {
__u64 gpa;
__u64 pad[4];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
} u;
};
@@ -4925,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
Sets the guest physical address of an additional pvclock structure
for a given vCPU. This is typically used for guest vsyscall support.
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
+ Sets the guest physical address of the vcpu_runstate_info for a given
+ vCPU. This is how a Xen guest tracks CPU state such as steal time.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
+ Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
+ the given vCPU from the .u.runstate.state member of the structure.
+ KVM automatically accounts running and runnable time but blocked
+ and offline states are only entered explicitly.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
+ Sets all fields of the vCPU runstate data from the .u.runstate member
+ of the structure, including the current runstate. The state_entry_time
+ must equal the sum of the other four times.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
+ This *adds* the contents of the .u.runstate members of the structure
+ to the corresponding members of the given vCPU's runstate data, thus
+ permitting atomic adjustments to the runstate times. The adjustment
+ to the state_entry_time must equal the sum of the adjustments to the
+ other four times. The state field must be set to -1, or to a valid
+ runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked
+ or RUNSTATE_offline) to set the current accounted state as of the
+ adjusted state_entry_time.
+
4.130 KVM_XEN_VCPU_GET_ATTR
---------------------------
@@ -4937,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
Allows Xen vCPU attributes to be read. For the structure and types,
see KVM_XEN_VCPU_SET_ATTR above.
+The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
+with the KVM_XEN_VCPU_GET_ATTR ioctl.
+
5. The kvm_run structure
========================
@@ -4998,7 +4999,8 @@ local APIC is not used.
__u16 flags;
More architecture-specific flags detailing state of the VCPU that may
-affect the device's behavior. Current defined flags:
+affect the device's behavior. Current defined flags::
+
/* x86, set if the VCPU is in system management mode */
#define KVM_RUN_X86_SMM (1 << 0)
/* x86, set if bus lock detected in VM */
@@ -6215,7 +6217,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit
notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
KVM_RUN_BUS_LOCK flag is used to distinguish between them.
-7.22 KVM_CAP_PPC_DAWR1
+7.23 KVM_CAP_PPC_DAWR1
----------------------
:Architectures: ppc
@@ -6700,6 +6702,7 @@ PVHVM guests. Valid flags are::
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+ #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2)
The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
ioctl is available, for the guest to set its hypercall page.
@@ -6714,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
vcpu_info is set.
+
+The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
+features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
+supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
diff --git a/MAINTAINERS b/MAINTAINERS
index 47ae27ff6b4b..f2e34ede1ab3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -261,6 +261,8 @@ ABI/API
L: linux-api@vger.kernel.org
F: include/linux/syscalls.h
F: kernel/sys_ni.c
+F: include/uapi/
+F: arch/*/include/uapi/
ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
M: Hans de Goede <hdegoede@redhat.com>
@@ -2982,7 +2984,7 @@ F: include/uapi/linux/audit.h
F: kernel/audit*
AUXILIARY DISPLAY DRIVERS
-M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M: Miguel Ojeda <ojeda@kernel.org>
S: Maintained
F: drivers/auxdisplay/
F: include/linux/cfag12864b.h
@@ -3853,6 +3855,29 @@ W: https://github.com/Cascoda/ca8210-linux.git
F: Documentation/devicetree/bindings/net/ieee802154/ca8210.txt
F: drivers/net/ieee802154/ca8210.c
+CANAAN/KENDRYTE K210 SOC FPIOA DRIVER
+M: Damien Le Moal <damien.lemoal@wdc.com>
+L: linux-riscv@lists.infradead.org
+L: linux-gpio@vger.kernel.org (pinctrl driver)
+F: Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
+F: drivers/pinctrl/pinctrl-k210.c
+
+CANAAN/KENDRYTE K210 SOC RESET CONTROLLER DRIVER
+M: Damien Le Moal <damien.lemoal@wdc.com>
+L: linux-kernel@vger.kernel.org
+L: linux-riscv@lists.infradead.org
+S: Maintained
+F: Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
+F: drivers/reset/reset-k210.c
+
+CANAAN/KENDRYTE K210 SOC SYSTEM CONTROLLER DRIVER
+M: Damien Le Moal <damien.lemoal@wdc.com>
+L: linux-riscv@lists.infradead.org
+S: Maintained
+F: Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
+F: drivers/soc/canaan/
+F: include/soc/canaan/
+
CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
M: David Howells <dhowells@redhat.com>
L: linux-cachefs@redhat.com (moderated for non-subscribers)
@@ -4128,13 +4153,13 @@ F: scripts/extract-cert.c
F: scripts/sign-file.c
CFAG12864B LCD DRIVER
-M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M: Miguel Ojeda <ojeda@kernel.org>
S: Maintained
F: drivers/auxdisplay/cfag12864b.c
F: include/linux/cfag12864b.h
CFAG12864BFB LCD FRAMEBUFFER DRIVER
-M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M: Miguel Ojeda <ojeda@kernel.org>
S: Maintained
F: drivers/auxdisplay/cfag12864bfb.c
F: include/linux/cfag12864b.h
@@ -4304,7 +4329,7 @@ S: Supported
F: drivers/infiniband/hw/usnic/
CLANG-FORMAT FILE
-M: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+M: Miguel Ojeda <ojeda@kernel.org>
S: Maintained
F: .clang-format
@@ -4444,7 +4469,7 @@ S: Maintained
F: drivers/platform/x86/compal-laptop.c
COMPILER ATTRIBUTES
-M: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+M: Miguel Ojeda <ojeda@kernel.org>
S: Maintained
F: include/linux/compiler_attributes.h
@@ -9867,6 +9892,18 @@ F: include/linux/keyctl.h
F: include/uapi/linux/keyctl.h
F: security/keys/
+KFENCE
+M: Alexander Potapenko <glider@google.com>
+M: Marco Elver <elver@google.com>
+R: Dmitry Vyukov <dvyukov@google.com>
+L: kasan-dev@googlegroups.com
+S: Maintained
+F: Documentation/dev-tools/kfence.rst
+F: arch/*/include/asm/kfence.h
+F: include/linux/kfence.h
+F: lib/Kconfig.kfence
+F: mm/kfence/
+
KFIFO
M: Stefani Seibold <stefani@seibold.net>
S: Maintained
@@ -9927,7 +9964,7 @@ F: include/linux/kprobes.h
F: kernel/kprobes.c
KS0108 LCD CONTROLLER DRIVER
-M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M: Miguel Ojeda <ojeda@kernel.org>
S: Maintained
F: Documentation/admin-guide/auxdisplay/ks0108.rst
F: drivers/auxdisplay/ks0108.c
diff --git a/Makefile b/Makefile
index 6ecd0d22e608..31dcdb3d61fa 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 5
-PATCHLEVEL = 11
+PATCHLEVEL = 12
SUBLEVEL = 0
-EXTRAVERSION =
-NAME = 💕 Valentine's Day Edition 💕
+EXTRAVERSION = -rc2
+NAME = Frozen Wasteland
# *DOCUMENTATION*
# To see a list of typical targets execute "make help"
@@ -96,6 +96,7 @@ endif
ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)
quiet=silent_
+ KBUILD_VERBOSE = 0
endif
export quiet Q KBUILD_VERBOSE
@@ -1283,10 +1284,10 @@ endef
define filechk_version.h
if [ $(SUBLEVEL) -gt 255 ]; then \
echo \#define LINUX_VERSION_CODE $(shell \
- expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + 255); \
+ expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + 255); \
else \
echo \#define LINUX_VERSION_CODE $(shell \
- expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \
+ expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \
fi; \
echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + \
((c) > 255 ? 255 : (c)))'; \
@@ -1295,6 +1296,8 @@ define filechk_version.h
echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL)
endef
+$(version_h): PATCHLEVEL := $(if $(PATCHLEVEL), $(PATCHLEVEL), 0)
+$(version_h): SUBLEVEL := $(if $(SUBLEVEL), $(SUBLEVEL), 0)
$(version_h): FORCE
$(call filechk,version.h)
@@ -1510,7 +1513,7 @@ endif # CONFIG_MODULES
# Directories & files removed with 'make clean'
CLEAN_FILES += include/ksym vmlinux.symvers \
modules.builtin modules.builtin.modinfo modules.nsdeps \
- compile_commands.json
+ compile_commands.json .thinlto-cache
# Directories & files removed with 'make mrproper'
MRPROPER_FILES += include/config include/generated \
@@ -1524,7 +1527,7 @@ MRPROPER_FILES += include/config include/generated \
*.spec
# Directories & files removed with 'make distclean'
-DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS .thinlto-cache
+DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
# clean - Delete most, but leave enough to build external modules
#
diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig
index 6293675db164..724c4075df40 100644
--- a/arch/alpha/configs/defconfig
+++ b/arch/alpha/configs/defconfig
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_LOG_BUF_SHIFT=14
diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
index 6c71554206cc..5112ab996394 100644
--- a/arch/alpha/kernel/process.c
+++ b/arch/alpha/kernel/process.c
@@ -249,7 +249,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childti->pcb.ksp = (unsigned long) childstack;
childti->pcb.flags = 1; /* set FEN, clear everything else */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* kernel thread */
memset(childstack, 0,
sizeof(struct switch_stack) + sizeof(struct pt_regs));
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index 37f724ad5e39..d838d0d57696 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -191,7 +191,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childksp[0] = 0; /* fp */
childksp[1] = (unsigned long)ret_from_fork; /* blink */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(c_regs, 0, sizeof(struct pt_regs));
c_callee->r13 = kthread_arg;
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index ee3aee69e444..5199a2bb4111 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -243,7 +243,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
thread->cpu_domain = get_domain();
#endif
- if (likely(!(p->flags & PF_KTHREAD))) {
+ if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
*childregs = *current_pt_regs();
childregs->ARM_r0 = 0;
if (stack_start)
diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c
index fd6e3aafe272..acb464547a54 100644
--- a/arch/arm/xen/p2m.c
+++ b/arch/arm/xen/p2m.c
@@ -93,12 +93,39 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
int i;
for (i = 0; i < count; i++) {
+ struct gnttab_unmap_grant_ref unmap;
+ int rc;
+
if (map_ops[i].status)
continue;
- if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
- map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) {
- return -ENOMEM;
- }
+ if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
+ map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT)))
+ continue;
+
+ /*
+ * Signal an error for this slot. This in turn requires
+ * immediate unmapping.
+ */
+ map_ops[i].status = GNTST_general_error;
+ unmap.host_addr = map_ops[i].host_addr,
+ unmap.handle = map_ops[i].handle;
+ map_ops[i].handle = ~0;
+ if (map_ops[i].flags & GNTMAP_device_map)
+ unmap.dev_bus_addr = map_ops[i].dev_bus_addr;
+ else
+ unmap.dev_bus_addr = 0;
+
+ /*
+ * Pre-populate the status field, to be recognizable in
+ * the log message below.
+ */
+ unmap.status = 1;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ &unmap, 1);
+ if (rc || unmap.status != GNTST_okay)
+ pr_err_once("gnttab unmap failed: rc=%d st=%d\n",
+ rc, unmap.status);
}
return 0;
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index fc0ce2a1f3bf..1f212b47a48a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -140,6 +140,7 @@ config ARM64
select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
+ select HAVE_ARCH_KFENCE
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
@@ -998,6 +999,7 @@ config HOTPLUG_CPU
# Common NUMA Features
config NUMA
bool "NUMA Memory Allocation and Scheduler Support"
+ select GENERIC_ARCH_NUMA
select ACPI_NUMA if ACPI
select OF_NUMA
help
diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h
index 77cbbe3625f2..a074459f8f2f 100644
--- a/arch/arm64/include/asm/cache.h
+++ b/arch/arm64/include/asm/cache.h
@@ -6,7 +6,6 @@
#define __ASM_CACHE_H
#include <asm/cputype.h>
-#include <asm/mte-kasan.h>
#define CTR_L1IP_SHIFT 14
#define CTR_L1IP_MASK 3
diff --git a/arch/arm64/include/asm/kasan.h b/arch/arm64/include/asm/kasan.h
index 0aaf9044cd6a..12d5f47f7dbe 100644
--- a/arch/arm64/include/asm/kasan.h
+++ b/arch/arm64/include/asm/kasan.h
@@ -6,6 +6,7 @@
#include <linux/linkage.h>
#include <asm/memory.h>
+#include <asm/mte-kasan.h>
#include <asm/pgtable-types.h>
#define arch_kasan_set_tag(addr, tag) __tag_set(addr, tag)
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
new file mode 100644
index 000000000000..d061176d57ea
--- /dev/null
+++ b/arch/arm64/include/asm/kfence.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm64 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef __ASM_KFENCE_H
+#define __ASM_KFENCE_H
+
+#include <asm/cacheflush.h>
+
+static inline bool arch_kfence_init_pool(void) { return true; }
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ set_memory_valid(addr, 1, !protect);
+
+ return true;
+}
+
+#endif /* __ASM_KFENCE_H */
diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h
index 691f15af788e..810045628c66 100644
--- a/arch/arm64/include/asm/module.lds.h
+++ b/arch/arm64/include/asm/module.lds.h
@@ -1,7 +1,7 @@
#ifdef CONFIG_ARM64_MODULE_PLTS
SECTIONS {
- .plt (NOLOAD) : { BYTE(0) }
- .init.plt (NOLOAD) : { BYTE(0) }
- .text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
+ .plt 0 (NOLOAD) : { BYTE(0) }
+ .init.plt 0 (NOLOAD) : { BYTE(0) }
+ .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) }
}
#endif
diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h
index 2d73a1612f09..cf241b0f0a42 100644
--- a/arch/arm64/include/asm/mte-def.h
+++ b/arch/arm64/include/asm/mte-def.h
@@ -11,4 +11,6 @@
#define MTE_TAG_SIZE 4
#define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
+#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
+
#endif /* __ASM_MTE_DEF_H */
diff --git a/arch/arm64/include/asm/mte-kasan.h b/arch/arm64/include/asm/mte-kasan.h
index 3748d5bb88c0..7ab500e2ad17 100644
--- a/arch/arm64/include/asm/mte-kasan.h
+++ b/arch/arm64/include/asm/mte-kasan.h
@@ -11,11 +11,14 @@
#include <linux/types.h>
+#ifdef CONFIG_ARM64_MTE
+
/*
- * The functions below are meant to be used only for the
- * KASAN_HW_TAGS interface defined in asm/memory.h.
+ * These functions are meant to be only used from KASAN runtime through
+ * the arch_*() interface defined in asm/memory.h.
+ * These functions don't include system_supports_mte() checks,
+ * as KASAN only calls them when MTE is supported and enabled.
*/
-#ifdef CONFIG_ARM64_MTE
static inline u8 mte_get_ptr_tag(void *ptr)
{
@@ -25,9 +28,54 @@ static inline u8 mte_get_ptr_tag(void *ptr)
return tag;
}
-u8 mte_get_mem_tag(void *addr);
-u8 mte_get_random_tag(void);
-void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag);
+/* Get allocation tag for the address. */
+static inline u8 mte_get_mem_tag(void *addr)
+{
+ asm(__MTE_PREAMBLE "ldg %0, [%0]"
+ : "+r" (addr));
+
+ return mte_get_ptr_tag(addr);
+}
+
+/* Generate a random tag. */
+static inline u8 mte_get_random_tag(void)
+{
+ void *addr;
+
+ asm(__MTE_PREAMBLE "irg %0, %0"
+ : "=r" (addr));
+
+ return mte_get_ptr_tag(addr);
+}
+
+/*
+ * Assign allocation tags for a region of memory based on the pointer tag.
+ * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
+ * size must be non-zero and MTE_GRANULE_SIZE aligned.
+ */
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
+{
+ u64 curr, end;
+
+ if (!size)
+ return;
+
+ curr = (u64)__tag_set(addr, tag);
+ end = curr + size;
+
+ do {
+ /*
+ * 'asm volatile' is required to prevent the compiler to move
+ * the statement outside of the loop.
+ */
+ asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
+ :
+ : "r" (curr)
+ : "memory");
+
+ curr += MTE_GRANULE_SIZE;
+ } while (curr != end);
+}
void mte_enable_kernel(void);
void mte_init_tags(u64 max_tag);
@@ -46,13 +94,14 @@ static inline u8 mte_get_mem_tag(void *addr)
{
return 0xFF;
}
+
static inline u8 mte_get_random_tag(void)
{
return 0xFF;
}
-static inline void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
+
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
{
- return addr;
}
static inline void mte_enable_kernel(void)
diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index d02aff9f493d..9b557a457f24 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -8,8 +8,6 @@
#include <asm/compiler.h>
#include <asm/mte-def.h>
-#define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
-
#ifndef __ASSEMBLY__
#include <linux/bitfield.h>
diff --git a/arch/arm64/include/asm/numa.h b/arch/arm64/include/asm/numa.h
index dd870390d639..8c8cf4297cc3 100644
--- a/arch/arm64/include/asm/numa.h
+++ b/arch/arm64/include/asm/numa.h
@@ -3,52 +3,6 @@
#define __ASM_NUMA_H
#include <asm/topology.h>
-
-#ifdef CONFIG_NUMA
-
-#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
-
-int __node_distance(int from, int to);
-#define node_distance(a, b) __node_distance(a, b)
-
-extern nodemask_t numa_nodes_parsed __initdata;
-
-extern bool numa_off;
-
-/* Mappings between node number and cpus on that node. */
-extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
-void numa_clear_node(unsigned int cpu);
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-const struct cpumask *cpumask_of_node(int node);
-#else
-/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-static inline const struct cpumask *cpumask_of_node(int node)
-{
- if (node == NUMA_NO_NODE)
- return cpu_all_mask;
-
- return node_to_cpumask_map[node];
-}
-#endif
-
-void __init arm64_numa_init(void);
-int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-void __init numa_set_distance(int from, int to, int distance);
-void __init numa_free_distance(void);
-void __init early_map_cpu_to_node(unsigned int cpu, int nid);
-void numa_store_cpu_info(unsigned int cpu);
-void numa_add_cpu(unsigned int cpu);
-void numa_remove_cpu(unsigned int cpu);
-
-#else /* CONFIG_NUMA */
-
-static inline void numa_store_cpu_info(unsigned int cpu) { }
-static inline void numa_add_cpu(unsigned int cpu) { }
-static inline void numa_remove_cpu(unsigned int cpu) { }
-static inline void arm64_numa_init(void) { }
-static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
-
-#endif /* CONFIG_NUMA */
+#include <asm-generic/numa.h>
#endif /* __ASM_NUMA_H */
diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c
index 7ff800045434..fdfecf0991ce 100644
--- a/arch/arm64/kernel/acpi_numa.c
+++ b/arch/arm64/kernel/acpi_numa.c
@@ -118,15 +118,3 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
node_set(node, numa_nodes_parsed);
}
-int __init arm64_acpi_numa_init(void)
-{
- int ret;
-
- ret = acpi_numa_init();
- if (ret) {
- pr_info("Failed to initialise from firmware\n");
- return ret;
- }
-
- return srat_disabled() ? -EINVAL : 0;
-}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 1e30b5550d2a..66b0e0b66e31 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -837,6 +837,7 @@ SYM_FUNC_START_LOCAL(__primary_switch)
tlbi vmalle1 // Remove any stale TLB entries
dsb nsh
+ isb
set_sctlr_el1 x19 // re-enable the MMU
diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S
index 678cd2c618ee..5eccbd62fec8 100644
--- a/arch/arm64/kernel/hyp-stub.S
+++ b/arch/arm64/kernel/hyp-stub.S
@@ -75,9 +75,6 @@ SYM_CODE_END(el1_sync)
// nVHE? No way! Give me the real thing!
SYM_CODE_START_LOCAL(mutate_to_vhe)
- // Be prepared to fail
- mov_q x0, HVC_STUB_ERR
-
// Sanity check: MMU *must* be off
mrs x1, sctlr_el2
tbnz x1, #0, 1f
@@ -96,8 +93,11 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
cmp x1, xzr
and x2, x2, x1
csinv x2, x2, xzr, ne
- cbz x2, 1f
+ cbnz x2, 2f
+1: mov_q x0, HVC_STUB_ERR
+ eret
+2:
// Engage the VHE magic!
mov_q x0, HCR_HOST_VHE_FLAGS
msr hcr_el2, x0
@@ -131,9 +131,28 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
msr mair_el1, x0
isb
+ // Hack the exception return to stay at EL2
+ mrs x0, spsr_el1
+ and x0, x0, #~PSR_MODE_MASK
+ mov x1, #PSR_MODE_EL2h
+ orr x0, x0, x1
+ msr spsr_el1, x0
+
+ b enter_vhe
+SYM_CODE_END(mutate_to_vhe)
+
+ // At the point where we reach enter_vhe(), we run with
+ // the MMU off (which is enforced by mutate_to_vhe()).
+ // We thus need to be in the idmap, or everything will
+ // explode when enabling the MMU.
+
+ .pushsection .idmap.text, "ax"
+
+SYM_CODE_START_LOCAL(enter_vhe)
// Invalidate TLBs before enabling the MMU
tlbi vmalle1
dsb nsh
+ isb
// Enable the EL2 S1 MMU, as set up from EL1
mrs_s x0, SYS_SCTLR_EL12
@@ -143,17 +162,12 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
mov_q x0, INIT_SCTLR_EL1_MMU_OFF
msr_s SYS_SCTLR_EL12, x0
- // Hack the exception return to stay at EL2
- mrs x0, spsr_el1
- and x0, x0, #~PSR_MODE_MASK
- mov x1, #PSR_MODE_EL2h
- orr x0, x0, x1
- msr spsr_el1, x0
-
mov x0, xzr
-1: eret
-SYM_CODE_END(mutate_to_vhe)
+ eret
+SYM_CODE_END(enter_vhe)
+
+ .popsection
.macro invalid_vector label
SYM_CODE_START_LOCAL(\label)
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 03210f644790..0cde47a63beb 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -182,8 +182,10 @@ static int create_dtb(struct kimage *image,
/* duplicate a device tree blob */
ret = fdt_open_into(initial_boot_params, buf, buf_size);
- if (ret)
+ if (ret) {
+ vfree(buf);
return -EINVAL;
+ }
ret = setup_dtb(image, initrd_load_addr, initrd_len,
cmdline, buf);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 2cfc850809ce..b3c70a612c7a 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -19,7 +19,6 @@
#include <asm/barrier.h>
#include <asm/cpufeature.h>
#include <asm/mte.h>
-#include <asm/mte-kasan.h>
#include <asm/ptrace.h>
#include <asm/sysreg.h>
@@ -88,51 +87,6 @@ int memcmp_pages(struct page *page1, struct page *page2)
return ret;
}
-u8 mte_get_mem_tag(void *addr)
-{
- if (!system_supports_mte())
- return 0xFF;
-
- asm(__MTE_PREAMBLE "ldg %0, [%0]"
- : "+r" (addr));
-
- return mte_get_ptr_tag(addr);
-}
-
-u8 mte_get_random_tag(void)
-{
- void *addr;
-
- if (!system_supports_mte())
- return 0xFF;
-
- asm(__MTE_PREAMBLE "irg %0, %0"
- : "+r" (addr));
-
- return mte_get_ptr_tag(addr);
-}
-
-void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
-{
- void *ptr = addr;
-
- if ((!system_supports_mte()) || (size == 0))
- return addr;
-
- /* Make sure that size is MTE granule aligned. */
- WARN_ON(size & (MTE_GRANULE_SIZE - 1));
-
- /* Make sure that the address is MTE granule aligned. */
- WARN_ON((u64)addr & (MTE_GRANULE_SIZE - 1));
-
- tag = 0xF0 | tag;
- ptr = (void *)__tag_set(ptr, tag);
-
- mte_assign_mem_tag_range(ptr, size);
-
- return ptr;
-}
-
void mte_init_tags(u64 max_tag)
{
static bool gcr_kernel_excl_initialized;
diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c
index a412d8edbcd2..2c247634552b 100644
--- a/arch/arm64/kernel/probes/uprobes.c
+++ b/arch/arm64/kernel/probes/uprobes.c
@@ -38,7 +38,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
/* TODO: Currently we do not support AARCH32 instruction probing */
if (mm->context.flags & MMCF_AARCH32)
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
return -EINVAL;
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 4cc1ccc8d6ab..325c83b1a24d 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -398,7 +398,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
ptrauth_thread_init_kernel(p);
- if (likely(!(p->flags & PF_KTHREAD))) {
+ if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
*childregs = *current_pt_regs();
childregs->regs[0] = 0;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index 3d5c8afca75b..170f42fd6101 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -1797,7 +1797,7 @@ int syscall_trace_enter(struct pt_regs *regs)
if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
- if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU))
+ if (flags & _TIF_SYSCALL_EMU)
return NO_SYSCALL;
}
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 0fb42129b469..ad20981dfda4 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -46,7 +46,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
/* Terminal record; nothing to unwind */
if (!fp)
- return -EINVAL;
+ return -ENOENT;
if (fp & 0xf)
return -EINVAL;
diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index a67b37a7a47e..d7564891ffe1 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c
@@ -119,7 +119,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
if (!ret)
ret = -EOPNOTSUPP;
} else {
- __cpu_suspend_exit();
+ RCU_NONIDLE(__cpu_suspend_exit());
}
unpause_graph_tracing();
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index d179056e1af8..5f49df4ffdd8 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -119,7 +119,7 @@ el2_error:
.macro invalid_vector label, target = __guest_exit_panic
.align 2
-SYM_CODE_START(\label)
+SYM_CODE_START_LOCAL(\label)
b \target
SYM_CODE_END(\label)
.endm
diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S
index 9e1a12e10053..351537c12f36 100644
--- a/arch/arm64/lib/mte.S
+++ b/arch/arm64/lib/mte.S
@@ -149,19 +149,3 @@ SYM_FUNC_START(mte_restore_page_tags)
ret
SYM_FUNC_END(mte_restore_page_tags)
-
-/*
- * Assign allocation tags for a region of memory based on the pointer tag
- * x0 - source pointer
- * x1 - size
- *
- * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
- * size must be non-zero and MTE_GRANULE_SIZE aligned.
- */
-SYM_FUNC_START(mte_assign_mem_tag_range)
-1: stg x0, [x0]
- add x0, x0, #MTE_GRANULE_SIZE
- subs x1, x1, #MTE_GRANULE_SIZE
- b.gt 1b
- ret
-SYM_FUNC_END(mte_assign_mem_tag_range)
diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile
index 77222d92667a..f188c9092696 100644
--- a/arch/arm64/mm/Makefile
+++ b/arch/arm64/mm/Makefile
@@ -7,7 +7,6 @@ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
obj-$(CONFIG_TRANS_TABLE) += trans_pgd.o
-obj-$(CONFIG_NUMA) += numa.o
obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o
obj-$(CONFIG_ARM64_MTE) += mteswap.o
KASAN_SANITIZE_physaddr.o += n
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index dc9f96442edc..f37d4e3830b7 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -10,6 +10,7 @@
#include <linux/acpi.h>
#include <linux/bitfield.h>
#include <linux/extable.h>
+#include <linux/kfence.h>
#include <linux/signal.h>
#include <linux/mm.h>
#include <linux/hardirq.h>
@@ -389,6 +390,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
} else if (addr < PAGE_SIZE) {
msg = "NULL pointer dereference";
} else {
+ if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
+ return;
+
msg = "paging request";
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 709d98fea90c..0ace5e68efba 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -416,10 +416,10 @@ void __init bootmem_init(void)
max_pfn = max_low_pfn = max;
min_low_pfn = min;
- arm64_numa_init();
+ arch_numa_init();
/*
- * must be done after arm64_numa_init() which calls numa_init() to
+ * must be done after arch_numa_init() which calls numa_init() to
* initialize node_online_map that gets used in hugetlb_cma_reserve()
* while allocating required CMA size across online nodes.
*/
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 25af183e4bed..3802cfbdd20d 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1155,7 +1155,7 @@ void vmemmap_free(unsigned long start, unsigned long end,
}
#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-static inline pud_t * fixmap_pud(unsigned long addr)
+static inline pud_t *fixmap_pud(unsigned long addr)
{
pgd_t *pgdp = pgd_offset_k(addr);
p4d_t *p4dp = p4d_offset(pgdp, addr);
@@ -1166,7 +1166,7 @@ static inline pud_t * fixmap_pud(unsigned long addr)
return pud_offset_kimg(p4dp, addr);
}
-static inline pmd_t * fixmap_pmd(unsigned long addr)
+static inline pmd_t *fixmap_pmd(unsigned long addr)
{
pud_t *pudp = fixmap_pud(addr);
pud_t pud = READ_ONCE(*pudp);
@@ -1176,7 +1176,7 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
return pmd_offset_kimg(pudp, addr);
}
-static inline pte_t * fixmap_pte(unsigned long addr)
+static inline pte_t *fixmap_pte(unsigned long addr)
{
return &bm_pte[pte_index(addr)];
}
@@ -1444,16 +1444,19 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);
}
-static bool inside_linear_region(u64 start, u64 size)
+struct range arch_get_mappable_range(void)
{
+ struct range mhp_range;
+
/*
* Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)]
* accommodating both its ends but excluding PAGE_END. Max physical
* range which can be mapped inside this linear mapping range, must
* also be derived from its end points.
*/
- return start >= __pa(_PAGE_OFFSET(vabits_actual)) &&
- (start + size - 1) <= __pa(PAGE_END - 1);
+ mhp_range.start = __pa(_PAGE_OFFSET(vabits_actual));
+ mhp_range.end = __pa(PAGE_END - 1);
+ return mhp_range;
}
int arch_add_memory(int nid, u64 start, u64 size,
@@ -1461,12 +1464,14 @@ int arch_add_memory(int nid, u64 start, u64 size,
{
int ret, flags = 0;
- if (!inside_linear_region(start, size)) {
- pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size);
- return -EINVAL;
- }
+ VM_BUG_ON(!mhp_range_allowed(start, size, true));
- if (rodata_full || debug_pagealloc_enabled())
+ /*
+ * KFENCE requires linear map to be mapped at page granularity, so that
+ * it is possible to protect/unprotect single pages in the KFENCE pool.
+ */
+ if (rodata_full || debug_pagealloc_enabled() ||
+ IS_ENABLED(CONFIG_KFENCE))
flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 89dd2fcf38fa..34e91224adc3 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -7,7 +7,7 @@ config CSKY
select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
select ARCH_USE_BUILTIN_BSWAP
- select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
+ select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_FRAME_POINTERS if !CPU_CK610
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select COMMON_CLK
@@ -35,6 +35,9 @@ config CSKY
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_SCHED_CLOCK
select GENERIC_SMP_IDLE_THREAD
+ select GENERIC_TIME_VSYSCALL
+ select GENERIC_VDSO_32
+ select GENERIC_GETTIMEOFDAY
select GX6605S_TIMER if CPU_CK610
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_AUDITSYSCALL
@@ -43,11 +46,14 @@ config CSKY
select HAVE_CONTEXT_TRACKING
select HAVE_VIRT_CPU_ACCOUNTING_GEN
select HAVE_DEBUG_BUGVERBOSE
+ select HAVE_DEBUG_KMEMLEAK
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_GENERIC_VDSO
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
+ select HAVE_FUTEX_CMPXCHG if FUTEX && SMP
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZO
@@ -192,6 +198,22 @@ config CPU_CK860
endchoice
choice
+ prompt "PAGE OFFSET"
+ default PAGE_OFFSET_80000000
+
+config PAGE_OFFSET_80000000
+ bool "PAGE OFFSET 2G (user:kernel = 2:2)"
+
+config PAGE_OFFSET_A0000000
+ bool "PAGE OFFSET 2.5G (user:kernel = 2.5:1.5)"
+endchoice
+
+config PAGE_OFFSET
+ hex
+ default 0x80000000 if PAGE_OFFSET_80000000
+ default 0xa0000000 if PAGE_OFFSET_A0000000
+choice
+
prompt "C-SKY PMU type"
depends on PERF_EVENTS
depends on CPU_CK807 || CPU_CK810 || CPU_CK860
diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h
index d3e04208d53c..6cab7afae962 100644
--- a/arch/csky/abiv1/inc/abi/cacheflush.h
+++ b/arch/csky/abiv1/inc/abi/cacheflush.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ABI_CSKY_CACHEFLUSH_H
#define __ABI_CSKY_CACHEFLUSH_H
diff --git a/arch/csky/abiv1/inc/abi/ckmmu.h b/arch/csky/abiv1/inc/abi/ckmmu.h
index ba8eb5870835..416b30c57983 100644
--- a/arch/csky/abiv1/inc/abi/ckmmu.h
+++ b/arch/csky/abiv1/inc/abi/ckmmu.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_CKMMUV1_H
#define __ASM_CSKY_CKMMUV1_H
@@ -89,13 +88,14 @@ static inline void tlb_invalid_indexed(void)
cpwcr("cpcr8", 0x02000000);
}
-static inline void setup_pgd(unsigned long pgd, bool kernel)
+static inline void setup_pgd(pgd_t *pgd, int asid)
{
- cpwcr("cpcr29", pgd | BIT(0));
+ cpwcr("cpcr29", __pa(pgd) | BIT(0));
+ write_mmu_entryhi(asid);
}
-static inline unsigned long get_pgd(void)
+static inline pgd_t *get_pgd(void)
{
- return cprcr("cpcr29") & ~BIT(0);
+ return __va(cprcr("cpcr29") & ~BIT(0));
}
#endif /* __ASM_CSKY_CKMMUV1_H */
diff --git a/arch/csky/abiv1/inc/abi/entry.h b/arch/csky/abiv1/inc/abi/entry.h
index 13c23e2c707c..b6a2109b895e 100644
--- a/arch/csky/abiv1/inc/abi/entry.h
+++ b/arch/csky/abiv1/inc/abi/entry.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_ENTRY_H
#define __ASM_CSKY_ENTRY_H
diff --git a/arch/csky/abiv1/inc/abi/page.h b/arch/csky/abiv1/inc/abi/page.h
index c864519117c7..2d2159933b76 100644
--- a/arch/csky/abiv1/inc/abi/page.h
+++ b/arch/csky/abiv1/inc/abi/page.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#include <asm/shmparam.h>
diff --git a/arch/csky/abiv1/inc/abi/pgtable-bits.h b/arch/csky/abiv1/inc/abi/pgtable-bits.h
index d605445aad9a..752c8b3f9194 100644
--- a/arch/csky/abiv1/inc/abi/pgtable-bits.h
+++ b/arch/csky/abiv1/inc/abi/pgtable-bits.h
@@ -1,37 +1,49 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_PGTABLE_BITS_H
#define __ASM_CSKY_PGTABLE_BITS_H
/* implemented in software */
-#define _PAGE_ACCESSED (1<<3)
-#define PAGE_ACCESSED_BIT (3)
-
+#define _PAGE_PRESENT (1<<0)
#define _PAGE_READ (1<<1)
#define _PAGE_WRITE (1<<2)
-#define _PAGE_PRESENT (1<<0)
-
+#define _PAGE_ACCESSED (1<<3)
#define _PAGE_MODIFIED (1<<4)
-#define PAGE_MODIFIED_BIT (4)
/* implemented in hardware */
#define _PAGE_GLOBAL (1<<6)
-
#define _PAGE_VALID (1<<7)
-#define PAGE_VALID_BIT (7)
-
#define _PAGE_DIRTY (1<<8)
-#define PAGE_DIRTY_BIT (8)
#define _PAGE_CACHE (3<<9)
#define _PAGE_UNCACHE (2<<9)
#define _PAGE_SO _PAGE_UNCACHE
-
#define _CACHE_MASK (7<<9)
-#define _CACHE_CACHED (_PAGE_VALID | _PAGE_CACHE)
-#define _CACHE_UNCACHED (_PAGE_VALID | _PAGE_UNCACHE)
+#define _CACHE_CACHED _PAGE_CACHE
+#define _CACHE_UNCACHED _PAGE_UNCACHE
+
+#define _PAGE_PROT_NONE _PAGE_READ
+
+/*
+ * Encode and decode a swap entry
+ *
+ * Format of swap PTE:
+ * bit 0: _PAGE_PRESENT (zero)
+ * bit 1: _PAGE_READ (zero)
+ * bit 2 - 5: swap type[0 - 3]
+ * bit 6: _PAGE_GLOBAL (zero)
+ * bit 7: _PAGE_VALID (zero)
+ * bit 8: swap type[4]
+ * bit 9 - 31: swap offset
+ */
+#define __swp_type(x) ((((x).val >> 2) & 0xf) | \
+ (((x).val >> 4) & 0x10))
+#define __swp_offset(x) ((x).val >> 9)
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ ((type & 0xf) << 2) | \
+ ((type & 0x10) << 4) | \
+ ((offset) << 9)})
#define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/csky/abiv1/inc/abi/reg_ops.h b/arch/csky/abiv1/inc/abi/reg_ops.h
index a153bd3918f7..abd01a243388 100644
--- a/arch/csky/abiv1/inc/abi/reg_ops.h
+++ b/arch/csky/abiv1/inc/abi/reg_ops.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ABI_REG_OPS_H
#define __ABI_REG_OPS_H
diff --git a/arch/csky/abiv1/inc/abi/regdef.h b/arch/csky/abiv1/inc/abi/regdef.h
index 104707fbdcc1..7b386fd67070 100644
--- a/arch/csky/abiv1/inc/abi/regdef.h
+++ b/arch/csky/abiv1/inc/abi/regdef.h
@@ -1,10 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_REGDEF_H
#define __ASM_CSKY_REGDEF_H
+#ifdef __ASSEMBLY__
#define syscallid r1
+#else
+#define syscallid "r1"
+#endif
+
#define regs_syscallid(regs) regs->regs[9]
#define regs_fp(regs) regs->regs[2]
diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h
index 0cd43384f8d2..9d95594b0feb 100644
--- a/arch/csky/abiv1/inc/abi/string.h
+++ b/arch/csky/abiv1/inc/abi/string.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ABI_CSKY_STRING_H
#define __ABI_CSKY_STRING_H
diff --git a/arch/csky/abiv1/inc/abi/switch_context.h b/arch/csky/abiv1/inc/abi/switch_context.h
index 17c82686498e..ec73fd7c9f87 100644
--- a/arch/csky/abiv1/inc/abi/switch_context.h
+++ b/arch/csky/abiv1/inc/abi/switch_context.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ABI_CSKY_PTRACE_H
#define __ABI_CSKY_PTRACE_H
diff --git a/arch/csky/abiv1/inc/abi/vdso.h b/arch/csky/abiv1/inc/abi/vdso.h
index 14352f524f1d..9e6d0a2fdd2b 100644
--- a/arch/csky/abiv1/inc/abi/vdso.h
+++ b/arch/csky/abiv1/inc/abi/vdso.h
@@ -1,17 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#include <linux/uaccess.h>
+#ifndef __ABI_CSKY_VDSO_H
+#define __ABI_CSKY_VDSO_H
-static inline int setup_vdso_page(unsigned short *ptr)
-{
- int err = 0;
+/* movi r1, 127; addi r1, (139 - 127) */
+#define SET_SYSCALL_ID .long 0x20b167f1
- /* movi r1, 127 */
- err |= __put_user(0x67f1, ptr + 0);
- /* addi r1, (139 - 127) */
- err |= __put_user(0x20b1, ptr + 1);
- /* trap 0 */
- err |= __put_user(0x0008, ptr + 2);
-
- return err;
-}
+#endif /* __ABI_CSKY_VDSO_H */
diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c
index 790f1ebfba44..39c51399dd81 100644
--- a/arch/csky/abiv2/cacheflush.c
+++ b/arch/csky/abiv2/cacheflush.c
@@ -12,6 +12,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
unsigned long addr;
struct page *page;
+ if (!pfn_valid(pte_pfn(*pte)))
+ return;
+
page = pfn_to_page(pte_pfn(*pte));
if (page == ZERO_PAGE(0))
return;
diff --git a/arch/csky/abiv2/inc/abi/ckmmu.h b/arch/csky/abiv2/inc/abi/ckmmu.h
index 73ded7c72482..64215f2380f1 100644
--- a/arch/csky/abiv2/inc/abi/ckmmu.h
+++ b/arch/csky/abiv2/inc/abi/ckmmu.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_CKMMUV2_H
#define __ASM_CSKY_CKMMUV2_H
@@ -78,8 +77,13 @@ static inline void tlb_read(void)
static inline void tlb_invalid_all(void)
{
#ifdef CONFIG_CPU_HAS_TLBI
- asm volatile("tlbi.alls\n":::"memory");
sync_is();
+ asm volatile(
+ "tlbi.alls \n"
+ "sync.i \n"
+ :
+ :
+ : "memory");
#else
mtcr("cr<8, 15>", 0x04000000);
#endif
@@ -88,8 +92,13 @@ static inline void tlb_invalid_all(void)
static inline void local_tlb_invalid_all(void)
{
#ifdef CONFIG_CPU_HAS_TLBI
- asm volatile("tlbi.all\n":::"memory");
sync_is();
+ asm volatile(
+ "tlbi.all \n"
+ "sync.i \n"
+ :
+ :
+ : "memory");
#else
tlb_invalid_all();
#endif
@@ -100,16 +109,31 @@ static inline void tlb_invalid_indexed(void)
mtcr("cr<8, 15>", 0x02000000);
}
-static inline void setup_pgd(unsigned long pgd, bool kernel)
+#define NOP32 ".long 0x4820c400\n"
+
+static inline void setup_pgd(pgd_t *pgd, int asid)
{
- if (kernel)
- mtcr("cr<28, 15>", pgd | BIT(0));
- else
- mtcr("cr<29, 15>", pgd | BIT(0));
+#ifdef CONFIG_CPU_HAS_TLBI
+ sync_is();
+#else
+ mb();
+#endif
+ asm volatile(
+#ifdef CONFIG_CPU_HAS_TLBI
+ "mtcr %1, cr<28, 15> \n"
+#endif
+ "mtcr %1, cr<29, 15> \n"
+ "mtcr %0, cr< 4, 15> \n"
+ ".rept 64 \n"
+ NOP32
+ ".endr \n"
+ :
+ :"r"(asid), "r"(__pa(pgd) | BIT(0))
+ :"memory");
}
-static inline unsigned long get_pgd(void)
+static inline pgd_t *get_pgd(void)
{
- return mfcr("cr<29, 15>") & ~BIT(0);
+ return __va(mfcr("cr<29, 15>") & ~BIT(0));
}
#endif /* __ASM_CSKY_CKMMUV2_H */
diff --git a/arch/csky/abiv2/inc/abi/entry.h b/arch/csky/abiv2/inc/abi/entry.h
index bedcc6f06bba..cca63e699b58 100644
--- a/arch/csky/abiv2/inc/abi/entry.h
+++ b/arch/csky/abiv2/inc/abi/entry.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_ENTRY_H
#define __ASM_CSKY_ENTRY_H
@@ -26,6 +25,9 @@
stw tls, (sp, 0)
stw lr, (sp, 4)
+ RD_MEH lr
+ WR_MEH lr
+
mfcr lr, epc
movi tls, \epc_inc
add lr, tls
@@ -231,6 +233,16 @@
mtcr \rx, cr<8, 15>
.endm
+#ifdef CONFIG_PAGE_OFFSET_80000000
+#define MSA_SET cr<30, 15>
+#define MSA_CLR cr<31, 15>
+#endif
+
+#ifdef CONFIG_PAGE_OFFSET_A0000000
+#define MSA_SET cr<31, 15>
+#define MSA_CLR cr<30, 15>
+#endif
+
.macro SETUP_MMU
/* Init psr and enable ee */
lrw r6, DEFAULT_PSR_VALUE
@@ -281,15 +293,15 @@
* 31 - 29 | 28 - 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
* BA Reserved SH WA B SO SEC C D V
*/
- mfcr r6, cr<30, 15> /* Get MSA0 */
+ mfcr r6, MSA_SET /* Get MSA */
2:
lsri r6, 29
lsli r6, 29
addi r6, 0x1ce
- mtcr r6, cr<30, 15> /* Set MSA0 */
+ mtcr r6, MSA_SET /* Set MSA */
movi r6, 0
- mtcr r6, cr<31, 15> /* Clr MSA1 */
+ mtcr r6, MSA_CLR /* Clr MSA */
/* enable MMU */
mfcr r6, cr18
diff --git a/arch/csky/abiv2/inc/abi/fpu.h b/arch/csky/abiv2/inc/abi/fpu.h
index 09e2700a3693..aabb79355013 100644
--- a/arch/csky/abiv2/inc/abi/fpu.h
+++ b/arch/csky/abiv2/inc/abi/fpu.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_FPU_H
#define __ASM_CSKY_FPU_H
diff --git a/arch/csky/abiv2/inc/abi/page.h b/arch/csky/abiv2/inc/abi/page.h
index 0a70cb553dca..cf005f13cd15 100644
--- a/arch/csky/abiv2/inc/abi/page.h
+++ b/arch/csky/abiv2/inc/abi/page.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
static inline void clear_user_page(void *addr, unsigned long vaddr,
struct page *page)
diff --git a/arch/csky/abiv2/inc/abi/pgtable-bits.h b/arch/csky/abiv2/inc/abi/pgtable-bits.h
index 137f7932c83b..7e7f389f546f 100644
--- a/arch/csky/abiv2/inc/abi/pgtable-bits.h
+++ b/arch/csky/abiv2/inc/abi/pgtable-bits.h
@@ -1,37 +1,48 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_PGTABLE_BITS_H
#define __ASM_CSKY_PGTABLE_BITS_H
/* implemented in software */
#define _PAGE_ACCESSED (1<<7)
-#define PAGE_ACCESSED_BIT (7)
-
#define _PAGE_READ (1<<8)
#define _PAGE_WRITE (1<<9)
#define _PAGE_PRESENT (1<<10)
-
#define _PAGE_MODIFIED (1<<11)
-#define PAGE_MODIFIED_BIT (11)
/* implemented in hardware */
#define _PAGE_GLOBAL (1<<0)
-
#define _PAGE_VALID (1<<1)
-#define PAGE_VALID_BIT (1)
-
#define _PAGE_DIRTY (1<<2)
-#define PAGE_DIRTY_BIT (2)
#define _PAGE_SO (1<<5)
#define _PAGE_BUF (1<<6)
-
#define _PAGE_CACHE (1<<3)
-
#define _CACHE_MASK _PAGE_CACHE
-#define _CACHE_CACHED (_PAGE_VALID | _PAGE_CACHE | _PAGE_BUF)
-#define _CACHE_UNCACHED (_PAGE_VALID)
+#define _CACHE_CACHED (_PAGE_CACHE | _PAGE_BUF)
+#define _CACHE_UNCACHED (0)
+
+#define _PAGE_PROT_NONE _PAGE_WRITE
+
+/*
+ * Encode and decode a swap entry
+ *
+ * Format of swap PTE:
+ * bit 0: _PAGE_GLOBAL (zero)
+ * bit 1: _PAGE_VALID (zero)
+ * bit 2 - 6: swap type
+ * bit 7 - 8: swap offset[0 - 1]
+ * bit 9: _PAGE_WRITE (zero)
+ * bit 10: _PAGE_PRESENT (zero)
+ * bit 11 - 31: swap offset[2 - 22]
+ */
+#define __swp_type(x) (((x).val >> 2) & 0x1f)
+#define __swp_offset(x) ((((x).val >> 7) & 0x3) | \
+ (((x).val >> 9) & 0x7ffffc))
+#define __swp_entry(type, offset) ((swp_entry_t) { \
+ ((type & 0x1f) << 2) | \
+ ((offset & 0x3) << 7) | \
+ ((offset & 0x7ffffc) << 9)})
#endif /* __ASM_CSKY_PGTABLE_BITS_H */
diff --git a/arch/csky/abiv2/inc/abi/reg_ops.h b/arch/csky/abiv2/inc/abi/reg_ops.h
index ae82c3f26a6b..49ba18a64751 100644
--- a/arch/csky/abiv2/inc/abi/reg_ops.h
+++ b/arch/csky/abiv2/inc/abi/reg_ops.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ABI_REG_OPS_H
#define __ABI_REG_OPS_H
diff --git a/arch/csky/abiv2/inc/abi/regdef.h b/arch/csky/abiv2/inc/abi/regdef.h
index d7328bbc1ce7..0933addbc27b 100644
--- a/arch/csky/abiv2/inc/abi/regdef.h
+++ b/arch/csky/abiv2/inc/abi/regdef.h
@@ -1,10 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_REGDEF_H
#define __ASM_CSKY_REGDEF_H
+#ifdef __ASSEMBLY__
#define syscallid r7
+#else
+#define syscallid "r7"
+#endif
+
#define regs_syscallid(regs) regs->regs[3]
#define regs_fp(regs) regs->regs[4]
diff --git a/arch/csky/abiv2/inc/abi/switch_context.h b/arch/csky/abiv2/inc/abi/switch_context.h
index 73a81245a3b3..5dd5c3f4ee7e 100644
--- a/arch/csky/abiv2/inc/abi/switch_context.h
+++ b/arch/csky/abiv2/inc/abi/switch_context.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ABI_CSKY_PTRACE_H
#define __ABI_CSKY_PTRACE_H
diff --git a/arch/csky/abiv2/inc/abi/vdso.h b/arch/csky/abiv2/inc/abi/vdso.h
index b60d4a070326..40fd10d893ff 100644
--- a/arch/csky/abiv2/inc/abi/vdso.h
+++ b/arch/csky/abiv2/inc/abi/vdso.h
@@ -3,21 +3,7 @@
#ifndef __ABI_CSKY_VDSO_H
#define __ABI_CSKY_VDSO_H
-#include <linux/uaccess.h>
+/* movi r7, 173 */
+#define SET_SYSCALL_ID .long 0x008bea07
-static inline int setup_vdso_page(unsigned short *ptr)
-{
- int err = 0;
-
- /* movi r7, 173 */
- err |= __put_user(0xea07, ptr);
- err |= __put_user(0x008b, ptr+1);
-
- /* trap 0 */
- err |= __put_user(0xc000, ptr+2);
- err |= __put_user(0x2020, ptr+3);
-
- return err;
-}
-
-#endif /* __ABI_CSKY_STRING_H */
+#endif /* __ABI_CSKY_VDSO_H */
diff --git a/arch/csky/abiv2/sysdep.h b/arch/csky/abiv2/sysdep.h
index bbbedfd34777..61abe9201c50 100644
--- a/arch/csky/abiv2/sysdep.h
+++ b/arch/csky/abiv2/sysdep.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __SYSDEP_H
#define __SYSDEP_H
diff --git a/arch/csky/include/asm/addrspace.h b/arch/csky/include/asm/addrspace.h
index d1c2ede692ed..6fc05d44536c 100644
--- a/arch/csky/include/asm/addrspace.h
+++ b/arch/csky/include/asm/addrspace.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_ADDRSPACE_H
#define __ASM_CSKY_ADDRSPACE_H
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
deleted file mode 100644
index e369d73b13e3..000000000000
--- a/arch/csky/include/asm/atomic.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __ASM_CSKY_ATOMIC_H
-#define __ASM_CSKY_ATOMIC_H
-
-#include <linux/version.h>
-#include <asm/cmpxchg.h>
-#include <asm/barrier.h>
-
-#ifdef CONFIG_CPU_HAS_LDSTEX
-
-#define __atomic_add_unless __atomic_add_unless
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
- unsigned long tmp, ret;
-
- smp_mb();
-
- asm volatile (
- "1: ldex.w %0, (%3) \n"
- " mov %1, %0 \n"
- " cmpne %0, %4 \n"
- " bf 2f \n"
- " add %0, %2 \n"
- " stex.w %0, (%3) \n"
- " bez %0, 1b \n"
- "2: \n"
- : "=&r" (tmp), "=&r" (ret)
- : "r" (a), "r"(&v->counter), "r"(u)
- : "memory");
-
- if (ret != u)
- smp_mb();
-
- return ret;
-}
-
-#define ATOMIC_OP(op, c_op) \
-static inline void atomic_##op(int i, atomic_t *v) \
-{ \
- unsigned long tmp; \
- \
- asm volatile ( \
- "1: ldex.w %0, (%2) \n" \
- " " #op " %0, %1 \n" \
- " stex.w %0, (%2) \n" \
- " bez %0, 1b \n" \
- : "=&r" (tmp) \
- : "r" (i), "r"(&v->counter) \
- : "memory"); \
-}
-
-#define ATOMIC_OP_RETURN(op, c_op) \
-static inline int atomic_##op##_return(int i, atomic_t *v) \
-{ \
- unsigned long tmp, ret; \
- \
- smp_mb(); \
- asm volatile ( \
- "1: ldex.w %0, (%3) \n" \
- " " #op " %0, %2 \n" \
- " mov %1, %0 \n" \
- " stex.w %0, (%3) \n" \
- " bez %0, 1b \n" \
- : "=&r" (tmp), "=&r" (ret) \
- : "r" (i), "r"(&v->counter) \
- : "memory"); \
- smp_mb(); \
- \
- return ret; \
-}
-
-#define ATOMIC_FETCH_OP(op, c_op) \
-static inline int atomic_fetch_##op(int i, atomic_t *v) \
-{ \
- unsigned long tmp, ret; \
- \
- smp_mb(); \
- asm volatile ( \
- "1: ldex.w %0, (%3) \n" \
- " mov %1, %0 \n" \
- " " #op " %0, %2 \n" \
- " stex.w %0, (%3) \n" \
- " bez %0, 1b \n" \
- : "=&r" (tmp), "=&r" (ret) \
- : "r" (i), "r"(&v->counter) \
- : "memory"); \
- smp_mb(); \
- \
- return ret; \
-}
-
-#else /* CONFIG_CPU_HAS_LDSTEX */
-
-#include <linux/irqflags.h>
-
-#define __atomic_add_unless __atomic_add_unless
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
- unsigned long tmp, ret, flags;
-
- raw_local_irq_save(flags);
-
- asm volatile (
- " ldw %0, (%3) \n"
- " mov %1, %0 \n"
- " cmpne %0, %4 \n"
- " bf 2f \n"
- " add %0, %2 \n"
- " stw %0, (%3) \n"
- "2: \n"
- : "=&r" (tmp), "=&r" (ret)
- : "r" (a), "r"(&v->counter), "r"(u)
- : "memory");
-
- raw_local_irq_restore(flags);
-
- return ret;
-}
-
-#define ATOMIC_OP(op, c_op) \
-static inline void atomic_##op(int i, atomic_t *v) \
-{ \
- unsigned long tmp, flags; \
- \
- raw_local_irq_save(flags); \
- \
- asm volatile ( \
- " ldw %0, (%2) \n" \
- " " #op " %0, %1 \n" \
- " stw %0, (%2) \n" \
- : "=&r" (tmp) \
- : "r" (i), "r"(&v->counter) \
- : "memory"); \
- \
- raw_local_irq_restore(flags); \
-}
-
-#define ATOMIC_OP_RETURN(op, c_op) \
-static inline int atomic_##op##_return(int i, atomic_t *v) \
-{ \
- unsigned long tmp, ret, flags; \
- \
- raw_local_irq_save(flags); \
- \
- asm volatile ( \
- " ldw %0, (%3) \n" \
- " " #op " %0, %2 \n" \
- " stw %0, (%3) \n" \
- " mov %1, %0 \n" \
- : "=&r" (tmp), "=&r" (ret) \
- : "r" (i), "r"(&v->counter) \
- : "memory"); \
- \
- raw_local_irq_restore(flags); \
- \
- return ret; \
-}
-
-#define ATOMIC_FETCH_OP(op, c_op) \
-static inline int atomic_fetch_##op(int i, atomic_t *v) \
-{ \
- unsigned long tmp, ret, flags; \
- \
- raw_local_irq_save(flags); \
- \
- asm volatile ( \
- " ldw %0, (%3) \n" \
- " mov %1, %0 \n" \
- " " #op " %0, %2 \n" \
- " stw %0, (%3) \n" \
- : "=&r" (tmp), "=&r" (ret) \
- : "r" (i), "r"(&v->counter) \
- : "memory"); \
- \
- raw_local_irq_restore(flags); \
- \
- return ret; \
-}
-
-#endif /* CONFIG_CPU_HAS_LDSTEX */
-
-#define atomic_add_return atomic_add_return
-ATOMIC_OP_RETURN(add, +)
-#define atomic_sub_return atomic_sub_return
-ATOMIC_OP_RETURN(sub, -)
-
-#define atomic_fetch_add atomic_fetch_add
-ATOMIC_FETCH_OP(add, +)
-#define atomic_fetch_sub atomic_fetch_sub
-ATOMIC_FETCH_OP(sub, -)
-#define atomic_fetch_and atomic_fetch_and
-ATOMIC_FETCH_OP(and, &)
-#define atomic_fetch_or atomic_fetch_or
-ATOMIC_FETCH_OP(or, |)
-#define atomic_fetch_xor atomic_fetch_xor
-ATOMIC_FETCH_OP(xor, ^)
-
-#define atomic_and atomic_and
-ATOMIC_OP(and, &)
-#define atomic_or atomic_or
-ATOMIC_OP(or, |)
-#define atomic_xor atomic_xor
-ATOMIC_OP(xor, ^)
-
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-#include <asm-generic/atomic.h>
-
-#endif /* __ASM_CSKY_ATOMIC_H */
diff --git a/arch/csky/include/asm/barrier.h b/arch/csky/include/asm/barrier.h
index a430e7fddf35..84fc600c8b45 100644
--- a/arch/csky/include/asm/barrier.h
+++ b/arch/csky/include/asm/barrier.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_BARRIER_H
#define __ASM_CSKY_BARRIER_H
@@ -8,6 +7,61 @@
#define nop() asm volatile ("nop\n":::"memory")
+#ifdef CONFIG_SMP
+
+/*
+ * bar.brwarws: ordering barrier for all load/store instructions
+ * before/after
+ *
+ * |31|30 26|25 21|20 16|15 10|9 5|4 0|
+ * 1 10000 00000 00000 100001 00001 0 bw br aw ar
+ *
+ * b: before
+ * a: after
+ * r: read
+ * w: write
+ *
+ * Here are all combinations:
+ *
+ * bar.brw
+ * bar.br
+ * bar.bw
+ * bar.arw
+ * bar.ar
+ * bar.aw
+ * bar.brwarw
+ * bar.brarw
+ * bar.bwarw
+ * bar.brwar
+ * bar.brwaw
+ * bar.brar
+ * bar.bwaw
+ */
+#define __bar_brw() asm volatile (".long 0x842cc000\n":::"memory")
+#define __bar_br() asm volatile (".long 0x8424c000\n":::"memory")
+#define __bar_bw() asm volatile (".long 0x8428c000\n":::"memory")
+#define __bar_arw() asm volatile (".long 0x8423c000\n":::"memory")
+#define __bar_ar() asm volatile (".long 0x8421c000\n":::"memory")
+#define __bar_aw() asm volatile (".long 0x8422c000\n":::"memory")
+#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
+#define __bar_brarw() asm volatile (".long 0x8427c000\n":::"memory")
+#define __bar_bwarw() asm volatile (".long 0x842bc000\n":::"memory")
+#define __bar_brwar() asm volatile (".long 0x842dc000\n":::"memory")
+#define __bar_brwaw() asm volatile (".long 0x842ec000\n":::"memory")
+#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
+#define __bar_brar() asm volatile (".long 0x8425c000\n":::"memory")
+#define __bar_bwaw() asm volatile (".long 0x842ac000\n":::"memory")
+
+#define __smp_mb() __bar_brwarw()
+#define __smp_rmb() __bar_brar()
+#define __smp_wmb() __bar_bwaw()
+
+#define ACQUIRE_FENCE ".long 0x8427c000\n"
+#define __smp_acquire_fence() __bar_brarw()
+#define __smp_release_fence() __bar_brwaw()
+
+#endif /* CONFIG_SMP */
+
/*
* sync: completion barrier, all sync.xx instructions
* guarantee the last response recieved by bus transaction
@@ -15,31 +69,14 @@
* sync.s: inherit from sync, but also shareable to other cores
* sync.i: inherit from sync, but also flush cpu pipeline
* sync.is: the same with sync.i + sync.s
- *
- * bar.brwarw: ordering barrier for all load/store instructions before it
- * bar.brwarws: ordering barrier for all load/store instructions before it
- * and shareable to other cores
- * bar.brar: ordering barrier for all load instructions before it
- * bar.brars: ordering barrier for all load instructions before it
- * and shareable to other cores
- * bar.bwaw: ordering barrier for all store instructions before it
- * bar.bwaws: ordering barrier for all store instructions before it
- * and shareable to other cores
*/
+#define mb() asm volatile ("sync\n":::"memory")
#ifdef CONFIG_CPU_HAS_CACHEV2
-#define mb() asm volatile ("sync.s\n":::"memory")
-
-#ifdef CONFIG_SMP
-#define __smp_mb() asm volatile ("bar.brwarws\n":::"memory")
-#define __smp_rmb() asm volatile ("bar.brars\n":::"memory")
-#define __smp_wmb() asm volatile ("bar.bwaws\n":::"memory")
-#endif /* CONFIG_SMP */
-
-#define sync_is() asm volatile ("sync.is\n":::"memory")
-
-#else /* !CONFIG_CPU_HAS_CACHEV2 */
-#define mb() asm volatile ("sync\n":::"memory")
+/*
+ * Using three sync.is to prevent speculative PTW
+ */
+#define sync_is() asm volatile ("sync.is\nsync.is\nsync.is\n":::"memory")
#endif
#include <asm-generic/barrier.h>
diff --git a/arch/csky/include/asm/bitops.h b/arch/csky/include/asm/bitops.h
index 43b9838bff63..91818787d860 100644
--- a/arch/csky/include/asm/bitops.h
+++ b/arch/csky/include/asm/bitops.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_BITOPS_H
#define __ASM_CSKY_BITOPS_H
diff --git a/arch/csky/include/asm/bug.h b/arch/csky/include/asm/bug.h
index 33ebd16b9c78..03f1a5f9184a 100644
--- a/arch/csky/include/asm/bug.h
+++ b/arch/csky/include/asm/bug.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_BUG_H
#define __ASM_CSKY_BUG_H
@@ -21,6 +20,8 @@ do { \
struct pt_regs;
void die(struct pt_regs *regs, const char *str);
+void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
+
void show_regs(struct pt_regs *regs);
void show_code(struct pt_regs *regs);
diff --git a/arch/csky/include/asm/cacheflush.h b/arch/csky/include/asm/cacheflush.h
index f0b8f25429a2..d0f9eafe8988 100644
--- a/arch/csky/include/asm/cacheflush.h
+++ b/arch/csky/include/asm/cacheflush.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_CACHEFLUSH_H
#define __ASM_CSKY_CACHEFLUSH_H
diff --git a/arch/csky/include/asm/checksum.h b/arch/csky/include/asm/checksum.h
index 7685824291b1..aa12ef4b9080 100644
--- a/arch/csky/include/asm/checksum.h
+++ b/arch/csky/include/asm/checksum.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_CHECKSUM_H
#define __ASM_CSKY_CHECKSUM_H
diff --git a/arch/csky/include/asm/clocksource.h b/arch/csky/include/asm/clocksource.h
new file mode 100644
index 000000000000..54da0e49efa1
--- /dev/null
+++ b/arch/csky/include/asm/clocksource.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_CLOCKSOURCE_H
+#define __ASM_VDSO_CSKY_CLOCKSOURCE_H
+
+#include <asm/vdso/clocksource.h>
+
+#endif
diff --git a/arch/csky/include/asm/cmpxchg.h b/arch/csky/include/asm/cmpxchg.h
index 89224530a0ee..dabc8e46ce7b 100644
--- a/arch/csky/include/asm/cmpxchg.h
+++ b/arch/csky/include/asm/cmpxchg.h
@@ -3,12 +3,12 @@
#ifndef __ASM_CSKY_CMPXCHG_H
#define __ASM_CSKY_CMPXCHG_H
-#ifdef CONFIG_CPU_HAS_LDSTEX
+#ifdef CONFIG_SMP
#include <asm/barrier.h>
extern void __bad_xchg(void);
-#define __xchg(new, ptr, size) \
+#define __xchg_relaxed(new, ptr, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
@@ -16,7 +16,6 @@ extern void __bad_xchg(void);
unsigned long tmp; \
switch (size) { \
case 4: \
- smp_mb(); \
asm volatile ( \
"1: ldex.w %0, (%3) \n" \
" mov %1, %2 \n" \
@@ -25,7 +24,6 @@ extern void __bad_xchg(void);
: "=&r" (__ret), "=&r" (tmp) \
: "r" (__new), "r"(__ptr) \
:); \
- smp_mb(); \
break; \
default: \
__bad_xchg(); \
@@ -33,9 +31,10 @@ extern void __bad_xchg(void);
__ret; \
})
-#define xchg(ptr, x) (__xchg((x), (ptr), sizeof(*(ptr))))
+#define xchg_relaxed(ptr, x) \
+ (__xchg_relaxed((x), (ptr), sizeof(*(ptr))))
-#define __cmpxchg(ptr, old, new, size) \
+#define __cmpxchg_relaxed(ptr, old, new, size) \
({ \
__typeof__(ptr) __ptr = (ptr); \
__typeof__(new) __new = (new); \
@@ -44,7 +43,6 @@ extern void __bad_xchg(void);
__typeof__(*(ptr)) __ret; \
switch (size) { \
case 4: \
- smp_mb(); \
asm volatile ( \
"1: ldex.w %0, (%3) \n" \
" cmpne %0, %4 \n" \
@@ -56,7 +54,6 @@ extern void __bad_xchg(void);
: "=&r" (__ret), "=&r" (__tmp) \
: "r" (__new), "r"(__ptr), "r"(__old) \
:); \
- smp_mb(); \
break; \
default: \
__bad_xchg(); \
@@ -64,8 +61,18 @@ extern void __bad_xchg(void);
__ret; \
})
-#define cmpxchg(ptr, o, n) \
- (__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
+#define cmpxchg_relaxed(ptr, o, n) \
+ (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
+
+#define cmpxchg(ptr, o, n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ __smp_release_fence(); \
+ __ret = cmpxchg_relaxed(ptr, o, n); \
+ __smp_acquire_fence(); \
+ __ret; \
+})
+
#else
#include <asm-generic/cmpxchg.h>
#endif
diff --git a/arch/csky/include/asm/elf.h b/arch/csky/include/asm/elf.h
index eb2cc5a673b5..48b83e283ed4 100644
--- a/arch/csky/include/asm/elf.h
+++ b/arch/csky/include/asm/elf.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_ELF_H
#define __ASM_CSKY_ELF_H
diff --git a/arch/csky/include/asm/fixmap.h b/arch/csky/include/asm/fixmap.h
index 4b589cc20900..49a77cbbe2a9 100644
--- a/arch/csky/include/asm/fixmap.h
+++ b/arch/csky/include/asm/fixmap.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_FIXMAP_H
#define __ASM_CSKY_FIXMAP_H
diff --git a/arch/csky/include/asm/ftrace.h b/arch/csky/include/asm/ftrace.h
index fae72b0b1374..9b86341731b6 100644
--- a/arch/csky/include/asm/ftrace.h
+++ b/arch/csky/include/asm/ftrace.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_FTRACE_H
#define __ASM_CSKY_FTRACE_H
diff --git a/arch/csky/include/asm/futex.h b/arch/csky/include/asm/futex.h
new file mode 100644
index 000000000000..6cfd312723fa
--- /dev/null
+++ b/arch/csky/include/asm/futex.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_FUTEX_H
+#define __ASM_CSKY_FUTEX_H
+
+#ifndef CONFIG_SMP
+#include <asm-generic/futex.h>
+#else
+#include <linux/atomic.h>
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
+{ \
+ u32 tmp; \
+ \
+ __atomic_pre_full_fence(); \
+ \
+ __asm__ __volatile__ ( \
+ "1: ldex.w %[ov], %[u] \n" \
+ " "insn" \n" \
+ "2: stex.w %[t], %[u] \n" \
+ " bez %[t], 1b \n" \
+ " br 4f \n" \
+ "3: mov %[r], %[e] \n" \
+ "4: \n" \
+ " .section __ex_table,\"a\" \n" \
+ " .balign 4 \n" \
+ " .long 1b, 3b \n" \
+ " .long 2b, 3b \n" \
+ " .previous \n" \
+ : [r] "+r" (ret), [ov] "=&r" (oldval), \
+ [u] "+m" (*uaddr), [t] "=&r" (tmp) \
+ : [op] "Jr" (oparg), [e] "jr" (-EFAULT) \
+ : "memory"); \
+ \
+ __atomic_post_full_fence(); \
+}
+
+static inline int
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
+{
+ int oldval = 0, ret = 0;
+
+ if (!access_ok(uaddr, sizeof(u32)))
+ return -EFAULT;
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ __futex_atomic_op("mov %[t], %[ov]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ADD:
+ __futex_atomic_op("add %[t], %[ov], %[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_OR:
+ __futex_atomic_op("or %[t], %[ov], %[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ case FUTEX_OP_ANDN:
+ __futex_atomic_op("and %[t], %[ov], %[op]",
+ ret, oldval, uaddr, ~oparg);
+ break;
+ case FUTEX_OP_XOR:
+ __futex_atomic_op("xor %[t], %[ov], %[op]",
+ ret, oldval, uaddr, oparg);
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+
+ if (!ret)
+ *oval = oldval;
+
+ return ret;
+}
+
+
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ int ret = 0;
+ u32 val, tmp;
+
+ if (!access_ok(uaddr, sizeof(u32)))
+ return -EFAULT;
+
+ __atomic_pre_full_fence();
+
+ __asm__ __volatile__ (
+ "1: ldex.w %[v], %[u] \n"
+ " cmpne %[v], %[ov] \n"
+ " bt 4f \n"
+ " mov %[t], %[nv] \n"
+ "2: stex.w %[t], %[u] \n"
+ " bez %[t], 1b \n"
+ " br 4f \n"
+ "3: mov %[r], %[e] \n"
+ "4: \n"
+ " .section __ex_table,\"a\" \n"
+ " .balign 4 \n"
+ " .long 1b, 3b \n"
+ " .long 2b, 3b \n"
+ " .previous \n"
+ : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr),
+ [t] "=&r" (tmp)
+ : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "Jr" (-EFAULT)
+ : "memory");
+
+ __atomic_post_full_fence();
+
+ *uval = val;
+ return ret;
+}
+
+#endif /* CONFIG_SMP */
+#endif /* __ASM_CSKY_FUTEX_H */
diff --git a/arch/csky/include/asm/highmem.h b/arch/csky/include/asm/highmem.h
index 1f4ed3f4c0d9..1ed810effb3d 100644
--- a/arch/csky/include/asm/highmem.h
+++ b/arch/csky/include/asm/highmem.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_HIGHMEM_H
#define __ASM_CSKY_HIGHMEM_H
diff --git a/arch/csky/include/asm/io.h b/arch/csky/include/asm/io.h
index e909587f24c5..f82654053dc0 100644
--- a/arch/csky/include/asm/io.h
+++ b/arch/csky/include/asm/io.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_IO_H
#define __ASM_CSKY_IO_H
diff --git a/arch/csky/include/asm/memory.h b/arch/csky/include/asm/memory.h
index a65c6759f537..d12179801ae3 100644
--- a/arch/csky/include/asm/memory.h
+++ b/arch/csky/include/asm/memory.h
@@ -10,7 +10,7 @@
#define FIXADDR_TOP _AC(0xffffc000, UL)
#define PKMAP_BASE _AC(0xff800000, UL)
-#define VMALLOC_START _AC(0xc0008000, UL)
+#define VMALLOC_START (PAGE_OFFSET + LOWMEM_LIMIT + (PAGE_SIZE * 8))
#define VMALLOC_END (PKMAP_BASE - (PAGE_SIZE * 2))
#ifdef CONFIG_HAVE_TCM
diff --git a/arch/csky/include/asm/mmu.h b/arch/csky/include/asm/mmu.h
index 26fbb1d15df0..d78321901d06 100644
--- a/arch/csky/include/asm/mmu.h
+++ b/arch/csky/include/asm/mmu.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_MMU_H
#define __ASM_CSKY_MMU_H
diff --git a/arch/csky/include/asm/mmu_context.h b/arch/csky/include/asm/mmu_context.h
index b227d29393a8..95d99b30792c 100644
--- a/arch/csky/include/asm/mmu_context.h
+++ b/arch/csky/include/asm/mmu_context.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_MMU_CONTEXT_H
#define __ASM_CSKY_MMU_CONTEXT_H
@@ -14,12 +13,6 @@
#include <linux/sched.h>
#include <abi/ckmmu.h>
-#define TLBMISS_HANDLER_SETUP_PGD(pgd) \
- setup_pgd(__pa(pgd), false)
-
-#define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \
- setup_pgd(__pa(pgd), true)
-
#define ASID_MASK ((1 << CONFIG_CPU_ASID_BITS) - 1)
#define cpu_asid(mm) (atomic64_read(&mm->context.asid) & ASID_MASK)
@@ -36,8 +29,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (prev != next)
check_and_switch_context(next, cpu);
- TLBMISS_HANDLER_SETUP_PGD(next->pgd);
- write_mmu_entryhi(next->context.asid.counter);
+ setup_pgd(next->pgd, next->context.asid.counter);
flush_icache_deferred(next);
}
diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h
index 9b98bf31d57c..3b91fc3cf36f 100644
--- a/arch/csky/include/asm/page.h
+++ b/arch/csky/include/asm/page.h
@@ -24,7 +24,7 @@
* address region. We use them mapping kernel 1GB direct-map address area and
* for more than 1GB of memory we use highmem.
*/
-#define PAGE_OFFSET 0x80000000
+#define PAGE_OFFSET CONFIG_PAGE_OFFSET
#define SSEG_SIZE 0x20000000
#define LOWMEM_LIMIT (SSEG_SIZE * 2)
diff --git a/arch/csky/include/asm/perf_event.h b/arch/csky/include/asm/perf_event.h
index 572093e11001..249905d8a4e8 100644
--- a/arch/csky/include/asm/perf_event.h
+++ b/arch/csky/include/asm/perf_event.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_PERF_EVENT_H
#define __ASM_CSKY_PERF_EVENT_H
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index d58d8146b729..cd211aabbefd 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_PGALLOC_H
#define __ASM_CSKY_PGALLOC_H
@@ -71,7 +70,7 @@ do { \
} while (0)
extern void pagetable_init(void);
-extern void pre_mmu_init(void);
+extern void mmu_init(unsigned long min_pfn, unsigned long max_pfn);
extern void pre_trap_init(void);
#endif /* __ASM_CSKY_PGALLOC_H */
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 2002cb7f1053..0d60367b6bfa 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_PGTABLE_H
#define __ASM_CSKY_PGTABLE_H
@@ -14,7 +13,7 @@
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-#define USER_PTRS_PER_PGD (0x80000000UL/PGDIR_SIZE)
+#define USER_PTRS_PER_PGD (PAGE_OFFSET/PGDIR_SIZE)
#define FIRST_USER_ADDRESS 0UL
/*
@@ -34,23 +33,13 @@
#define pmd_page(pmd) (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
#define pte_clear(mm, addr, ptep) set_pte((ptep), \
- (((unsigned int) addr & PAGE_OFFSET) ? __pte(_PAGE_GLOBAL) : __pte(0)))
+ (((unsigned int) addr >= PAGE_OFFSET) ? __pte(_PAGE_GLOBAL) : __pte(0)))
#define pte_none(pte) (!(pte_val(pte) & ~_PAGE_GLOBAL))
#define pte_present(pte) (pte_val(pte) & _PAGE_PRESENT)
#define pte_pfn(x) ((unsigned long)((x).pte_low >> PAGE_SHIFT))
#define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << PAGE_SHIFT) \
| pgprot_val(prot))
-#define __READABLE (_PAGE_READ | _PAGE_VALID | _PAGE_ACCESSED)
-#define __WRITEABLE (_PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED)
-
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED | \
- _CACHE_MASK)
-
-#define __swp_type(x) (((x).val >> 4) & 0xff)
-#define __swp_offset(x) ((x).val >> 12)
-#define __swp_entry(type, offset) ((swp_entry_t) {((type) << 4) | \
- ((offset) << 12) })
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
@@ -59,41 +48,52 @@
pgprot_val(pgprot))
/*
- * CSKY can't do page protection for execute, and considers that the same like
- * read. Also, write permissions imply read permissions. This is the closest
- * we can get by reasonable means..
+ * C-SKY only has VALID and DIRTY bit in hardware. So we need to use the
+ * two bits emulate PRESENT, READ, WRITE, EXEC, MODIFIED, ACCESSED.
*/
-#define PAGE_NONE __pgprot(_PAGE_PRESENT | _CACHE_CACHED)
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED)
+
+#define PAGE_NONE __pgprot(_PAGE_PROT_NONE)
+#define PAGE_READ __pgprot(_PAGE_BASE | _PAGE_READ | \
_CACHE_CACHED)
-#define PAGE_COPY __pgprot(_PAGE_PRESENT | _PAGE_READ | _CACHE_CACHED)
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_READ | _CACHE_CACHED)
-#define PAGE_KERNEL __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
- _PAGE_GLOBAL | _CACHE_CACHED)
-#define PAGE_USERIO __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define PAGE_WRITE __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE | \
_CACHE_CACHED)
+#define PAGE_SHARED PAGE_WRITE
+
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_VALID | \
+ _PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED | \
+ _PAGE_GLOBAL | \
+ _CACHE_CACHED)
+
+#define _PAGE_IOREMAP (_PAGE_BASE | _PAGE_READ | _PAGE_VALID | \
+ _PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED | \
+ _PAGE_GLOBAL | \
+ _CACHE_UNCACHED | _PAGE_SO)
+
+#define _PAGE_CHG_MASK (~(unsigned long) \
+ (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+ _CACHE_MASK | _PAGE_GLOBAL))
-#define _PAGE_IOREMAP \
- (_PAGE_PRESENT | __READABLE | __WRITEABLE | _PAGE_GLOBAL | \
- _CACHE_UNCACHED | _PAGE_SO)
+#define MAX_SWAPFILES_CHECK() \
+ BUILD_BUG_ON(MAX_SWAPFILES_SHIFT != 5)
#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY
-#define __P101 PAGE_READONLY
-#define __P110 PAGE_COPY
-#define __P111 PAGE_COPY
+#define __P001 PAGE_READ
+#define __P010 PAGE_READ
+#define __P011 PAGE_READ
+#define __P100 PAGE_READ
+#define __P101 PAGE_READ
+#define __P110 PAGE_READ
+#define __P111 PAGE_READ
#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY
-#define __S101 PAGE_READONLY
-#define __S110 PAGE_SHARED
-#define __S111 PAGE_SHARED
+#define __S001 PAGE_READ
+#define __S010 PAGE_WRITE
+#define __S011 PAGE_WRITE
+#define __S100 PAGE_READ
+#define __S101 PAGE_READ
+#define __S110 PAGE_WRITE
+#define __S111 PAGE_WRITE
extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h
index 4800f6563abb..9e933021fe8e 100644
--- a/arch/csky/include/asm/processor.h
+++ b/arch/csky/include/asm/processor.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_PROCESSOR_H
#define __ASM_CSKY_PROCESSOR_H
@@ -28,7 +27,7 @@ extern struct cpuinfo_csky cpu_data[];
* for a 64 bit kernel expandable to 8192EB, of which the current CSKY
* implementations will "only" be able to use 1TB ...
*/
-#define TASK_SIZE 0x7fff8000UL
+#define TASK_SIZE (PAGE_OFFSET - (PAGE_SIZE * 8))
#ifdef __KERNEL__
#define STACK_TOP TASK_SIZE
diff --git a/arch/csky/include/asm/ptrace.h b/arch/csky/include/asm/ptrace.h
index 91ceb1b454c9..4202aab6df42 100644
--- a/arch/csky/include/asm/ptrace.h
+++ b/arch/csky/include/asm/ptrace.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_PTRACE_H
#define __ASM_CSKY_PTRACE_H
diff --git a/arch/csky/include/asm/segment.h b/arch/csky/include/asm/segment.h
index 79ede9b1a646..589e8321dc14 100644
--- a/arch/csky/include/asm/segment.h
+++ b/arch/csky/include/asm/segment.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_SEGMENT_H
#define __ASM_CSKY_SEGMENT_H
@@ -10,7 +9,7 @@ typedef struct {
#define KERNEL_DS ((mm_segment_t) { 0xFFFFFFFF })
-#define USER_DS ((mm_segment_t) { 0x80000000UL })
+#define USER_DS ((mm_segment_t) { PAGE_OFFSET })
#define get_fs() (current_thread_info()->addr_limit)
#define set_fs(x) (current_thread_info()->addr_limit = (x))
#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
diff --git a/arch/csky/include/asm/shmparam.h b/arch/csky/include/asm/shmparam.h
index efafe4c79fed..2fe6cea0dae9 100644
--- a/arch/csky/include/asm/shmparam.h
+++ b/arch/csky/include/asm/shmparam.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_SHMPARAM_H
#define __ASM_CSKY_SHMPARAM_H
diff --git a/arch/csky/include/asm/spinlock.h b/arch/csky/include/asm/spinlock.h
index 7cf3f2b34cea..69f5aa249c5f 100644
--- a/arch/csky/include/asm/spinlock.h
+++ b/arch/csky/include/asm/spinlock.h
@@ -6,8 +6,6 @@
#include <linux/spinlock_types.h>
#include <asm/barrier.h>
-#ifdef CONFIG_QUEUED_RWLOCKS
-
/*
* Ticket-based spin-locking.
*/
@@ -88,169 +86,4 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
#include <asm/qrwlock.h>
-/* See include/linux/spinlock.h */
-#define smp_mb__after_spinlock() smp_mb()
-
-#else /* CONFIG_QUEUED_RWLOCKS */
-
-/*
- * Test-and-set spin-locking.
- */
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
- u32 *p = &lock->lock;
- u32 tmp;
-
- asm volatile (
- "1: ldex.w %0, (%1) \n"
- " bnez %0, 1b \n"
- " movi %0, 1 \n"
- " stex.w %0, (%1) \n"
- " bez %0, 1b \n"
- : "=&r" (tmp)
- : "r"(p)
- : "cc");
- smp_mb();
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
- smp_mb();
- WRITE_ONCE(lock->lock, 0);
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
- u32 *p = &lock->lock;
- u32 tmp;
-
- asm volatile (
- "1: ldex.w %0, (%1) \n"
- " bnez %0, 2f \n"
- " movi %0, 1 \n"
- " stex.w %0, (%1) \n"
- " bez %0, 1b \n"
- " movi %0, 0 \n"
- "2: \n"
- : "=&r" (tmp)
- : "r"(p)
- : "cc");
-
- if (!tmp)
- smp_mb();
-
- return !tmp;
-}
-
-#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
-
-/*
- * read lock/unlock/trylock
- */
-static inline void arch_read_lock(arch_rwlock_t *lock)
-{
- u32 *p = &lock->lock;
- u32 tmp;
-
- asm volatile (
- "1: ldex.w %0, (%1) \n"
- " blz %0, 1b \n"
- " addi %0, 1 \n"
- " stex.w %0, (%1) \n"
- " bez %0, 1b \n"
- : "=&r" (tmp)
- : "r"(p)
- : "cc");
- smp_mb();
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *lock)
-{
- u32 *p = &lock->lock;
- u32 tmp;
-
- smp_mb();
- asm volatile (
- "1: ldex.w %0, (%1) \n"
- " subi %0, 1 \n"
- " stex.w %0, (%1) \n"
- " bez %0, 1b \n"
- : "=&r" (tmp)
- : "r"(p)
- : "cc");
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *lock)
-{
- u32 *p = &lock->lock;
- u32 tmp;
-
- asm volatile (
- "1: ldex.w %0, (%1) \n"
- " blz %0, 2f \n"
- " addi %0, 1 \n"
- " stex.w %0, (%1) \n"
- " bez %0, 1b \n"
- " movi %0, 0 \n"
- "2: \n"
- : "=&r" (tmp)
- : "r"(p)
- : "cc");
-
- if (!tmp)
- smp_mb();
-
- return !tmp;
-}
-
-/*
- * write lock/unlock/trylock
- */
-static inline void arch_write_lock(arch_rwlock_t *lock)
-{
- u32 *p = &lock->lock;
- u32 tmp;
-
- asm volatile (
- "1: ldex.w %0, (%1) \n"
- " bnez %0, 1b \n"
- " subi %0, 1 \n"
- " stex.w %0, (%1) \n"
- " bez %0, 1b \n"
- : "=&r" (tmp)
- : "r"(p)
- : "cc");
- smp_mb();
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *lock)
-{
- smp_mb();
- WRITE_ONCE(lock->lock, 0);
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *lock)
-{
- u32 *p = &lock->lock;
- u32 tmp;
-
- asm volatile (
- "1: ldex.w %0, (%1) \n"
- " bnez %0, 2f \n"
- " subi %0, 1 \n"
- " stex.w %0, (%1) \n"
- " bez %0, 1b \n"
- " movi %0, 0 \n"
- "2: \n"
- : "=&r" (tmp)
- : "r"(p)
- : "cc");
-
- if (!tmp)
- smp_mb();
-
- return !tmp;
-}
-
-#endif /* CONFIG_QUEUED_RWLOCKS */
#endif /* __ASM_CSKY_SPINLOCK_H */
diff --git a/arch/csky/include/asm/spinlock_types.h b/arch/csky/include/asm/spinlock_types.h
index 88b82438b182..8ff0f6ff3a00 100644
--- a/arch/csky/include/asm/spinlock_types.h
+++ b/arch/csky/include/asm/spinlock_types.h
@@ -22,16 +22,6 @@ typedef struct {
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
-#ifdef CONFIG_QUEUED_RWLOCKS
#include <asm-generic/qrwlock_types.h>
-#else /* CONFIG_NR_CPUS > 2 */
-
-typedef struct {
- u32 lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED { 0 }
-
-#endif /* CONFIG_QUEUED_RWLOCKS */
#endif /* __ASM_CSKY_SPINLOCK_TYPES_H */
diff --git a/arch/csky/include/asm/string.h b/arch/csky/include/asm/string.h
index 73142de18355..a0d81e9d6b8f 100644
--- a/arch/csky/include/asm/string.h
+++ b/arch/csky/include/asm/string.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef _CSKY_STRING_MM_H_
#define _CSKY_STRING_MM_H_
diff --git a/arch/csky/include/asm/switch_to.h b/arch/csky/include/asm/switch_to.h
index 35a39e88933d..731e466415e2 100644
--- a/arch/csky/include/asm/switch_to.h
+++ b/arch/csky/include/asm/switch_to.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_SWITCH_TO_H
#define __ASM_CSKY_SWITCH_TO_H
diff --git a/arch/csky/include/asm/syscalls.h b/arch/csky/include/asm/syscalls.h
index 5d48e5e0082e..ea9ce6138b9b 100644
--- a/arch/csky/include/asm/syscalls.h
+++ b/arch/csky/include/asm/syscalls.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_SYSCALLS_H
#define __ASM_CSKY_SYSCALLS_H
diff --git a/arch/csky/include/asm/thread_info.h b/arch/csky/include/asm/thread_info.h
index 21456a3737c2..8c349a8f904d 100644
--- a/arch/csky/include/asm/thread_info.h
+++ b/arch/csky/include/asm/thread_info.h
@@ -1,12 +1,10 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef _ASM_CSKY_THREAD_INFO_H
#define _ASM_CSKY_THREAD_INFO_H
#ifndef __ASSEMBLY__
-#include <linux/version.h>
#include <asm/types.h>
#include <asm/page.h>
#include <asm/processor.h>
diff --git a/arch/csky/include/asm/tlb.h b/arch/csky/include/asm/tlb.h
index fdff9b8d70c8..3498e65f59f8 100644
--- a/arch/csky/include/asm/tlb.h
+++ b/arch/csky/include/asm/tlb.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_TLB_H
#define __ASM_CSKY_TLB_H
diff --git a/arch/csky/include/asm/tlbflush.h b/arch/csky/include/asm/tlbflush.h
index 6845b0667703..407160b4fde7 100644
--- a/arch/csky/include/asm/tlbflush.h
+++ b/arch/csky/include/asm/tlbflush.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_TLBFLUSH_H
#define __ASM_TLBFLUSH_H
diff --git a/arch/csky/include/asm/traps.h b/arch/csky/include/asm/traps.h
index 1c081805b962..421a4195e2fe 100644
--- a/arch/csky/include/asm/traps.h
+++ b/arch/csky/include/asm/traps.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_TRAPS_H
#define __ASM_CSKY_TRAPS_H
diff --git a/arch/csky/include/asm/uaccess.h b/arch/csky/include/asm/uaccess.h
index 1633ffe5ae15..3dec272e1fa3 100644
--- a/arch/csky/include/asm/uaccess.h
+++ b/arch/csky/include/asm/uaccess.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_UACCESS_H
#define __ASM_CSKY_UACCESS_H
diff --git a/arch/csky/include/asm/unistd.h b/arch/csky/include/asm/unistd.h
index da7a18295615..9cf97de9a26d 100644
--- a/arch/csky/include/asm/unistd.h
+++ b/arch/csky/include/asm/unistd.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#include <uapi/asm/unistd.h>
diff --git a/arch/csky/include/asm/vdso.h b/arch/csky/include/asm/vdso.h
index d963d691f3a1..eb5142f9c564 100644
--- a/arch/csky/include/asm/vdso.h
+++ b/arch/csky/include/asm/vdso.h
@@ -3,10 +3,25 @@
#ifndef __ASM_CSKY_VDSO_H
#define __ASM_CSKY_VDSO_H
-#include <abi/vdso.h>
+#include <linux/types.h>
-struct csky_vdso {
- unsigned short rt_signal_retcode[4];
+#ifndef GENERIC_TIME_VSYSCALL
+struct vdso_data {
};
+#endif
+
+/*
+ * The VDSO symbols are mapped into Linux so we can just use regular symbol
+ * addressing to get their offsets in userspace. The symbols are mapped at an
+ * offset of 0, but since the linker must support setting weak undefined
+ * symbols to the absolute address 0 it also happens to support other low
+ * addresses even when the code model suggests those low addresses would not
+ * otherwise be availiable.
+ */
+#define VDSO_SYMBOL(base, name) \
+({ \
+ extern const char __vdso_##name[]; \
+ (void __user *)((unsigned long)(base) + __vdso_##name); \
+})
#endif /* __ASM_CSKY_VDSO_H */
diff --git a/arch/csky/include/asm/vdso/clocksource.h b/arch/csky/include/asm/vdso/clocksource.h
new file mode 100644
index 000000000000..dfca7b4724b7
--- /dev/null
+++ b/arch/csky/include/asm/vdso/clocksource.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_CLOCKSOURCE_H
+#define __ASM_VDSO_CSKY_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES \
+ VDSO_CLOCKMODE_ARCHTIMER
+
+#endif /* __ASM_VDSO_CSKY_CLOCKSOURCE_H */
diff --git a/arch/csky/include/asm/vdso/gettimeofday.h b/arch/csky/include/asm/vdso/gettimeofday.h
new file mode 100644
index 000000000000..6c4f1446944f
--- /dev/null
+++ b/arch/csky/include/asm/vdso/gettimeofday.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_GETTIMEOFDAY_H
+#define __ASM_VDSO_CSKY_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/barrier.h>
+#include <asm/unistd.h>
+#include <abi/regdef.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES 1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+ struct timezone *_tz)
+{
+ register struct __kernel_old_timeval *tv asm("a0") = _tv;
+ register struct timezone *tz asm("a1") = _tz;
+ register long ret asm("a0");
+ register long nr asm(syscallid) = __NR_gettimeofday;
+
+ asm volatile ("trap 0\n"
+ : "=r" (ret)
+ : "r"(tv), "r"(tz), "r"(nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register clockid_t clkid asm("a0") = _clkid;
+ register struct __kernel_timespec *ts asm("a1") = _ts;
+ register long ret asm("a0");
+ register long nr asm(syscallid) = __NR_clock_gettime64;
+
+ asm volatile ("trap 0\n"
+ : "=r" (ret)
+ : "r"(clkid), "r"(ts), "r"(nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ register clockid_t clkid asm("a0") = _clkid;
+ register struct old_timespec32 *ts asm("a1") = _ts;
+ register long ret asm("a0");
+ register long nr asm(syscallid) = __NR_clock_gettime;
+
+ asm volatile ("trap 0\n"
+ : "=r" (ret)
+ : "r"(clkid), "r"(ts), "r"(nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+ register clockid_t clkid asm("a0") = _clkid;
+ register struct __kernel_timespec *ts asm("a1") = _ts;
+ register long ret asm("a0");
+ register long nr asm(syscallid) = __NR_clock_getres_time64;
+
+ asm volatile ("trap 0\n"
+ : "=r" (ret)
+ : "r"(clkid), "r"(ts), "r"(nr)
+ : "memory");
+
+ return ret;
+}
+
+static __always_inline
+int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+ register clockid_t clkid asm("a0") = _clkid;
+ register struct old_timespec32 *ts asm("a1") = _ts;
+ register long ret asm("a0");
+ register long nr asm(syscallid) = __NR_clock_getres;
+
+ asm volatile ("trap 0\n"
+ : "=r" (ret)
+ : "r"(clkid), "r"(ts), "r"(nr)
+ : "memory");
+
+ return ret;
+}
+
+uint64_t csky_pmu_read_cc(void);
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+ const struct vdso_data *vd)
+{
+#ifdef CONFIG_CSKY_PMU_V1
+ return csky_pmu_read_cc();
+#else
+ return 0;
+#endif
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+ return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_CSKY_GETTIMEOFDAY_H */
diff --git a/arch/csky/include/asm/vdso/processor.h b/arch/csky/include/asm/vdso/processor.h
new file mode 100644
index 000000000000..39a6b561d0cc
--- /dev/null
+++ b/arch/csky/include/asm/vdso/processor.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_VDSO_CSKY_PROCESSOR_H
+#define __ASM_VDSO_CSKY_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+#define cpu_relax() barrier()
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_CSKY_PROCESSOR_H */
diff --git a/arch/csky/include/asm/vdso/vsyscall.h b/arch/csky/include/asm/vdso/vsyscall.h
new file mode 100644
index 000000000000..c276211a7c4d
--- /dev/null
+++ b/arch/csky/include/asm/vdso/vsyscall.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_VSYSCALL_H
+#define __ASM_VDSO_CSKY_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <vdso/datapage.h>
+
+extern struct vdso_data *vdso_data;
+
+static __always_inline struct vdso_data *__csky_get_k_vdso_data(void)
+{
+ return vdso_data;
+}
+#define __arch_get_k_vdso_data __csky_get_k_vdso_data
+
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_CSKY_VSYSCALL_H */
diff --git a/arch/csky/include/uapi/asm/byteorder.h b/arch/csky/include/uapi/asm/byteorder.h
index d150cd664873..1aedd513b65a 100644
--- a/arch/csky/include/uapi/asm/byteorder.h
+++ b/arch/csky/include/uapi/asm/byteorder.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_BYTEORDER_H
#define __ASM_CSKY_BYTEORDER_H
diff --git a/arch/csky/include/uapi/asm/perf_regs.h b/arch/csky/include/uapi/asm/perf_regs.h
index 49d4e147a559..d0a8ac6a1b77 100644
--- a/arch/csky/include/uapi/asm/perf_regs.h
+++ b/arch/csky/include/uapi/asm/perf_regs.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef _ASM_CSKY_PERF_REGS_H
#define _ASM_CSKY_PERF_REGS_H
diff --git a/arch/csky/include/uapi/asm/ptrace.h b/arch/csky/include/uapi/asm/ptrace.h
index 66b2268e324e..3be9c14334a6 100644
--- a/arch/csky/include/uapi/asm/ptrace.h
+++ b/arch/csky/include/uapi/asm/ptrace.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef _CSKY_PTRACE_H
#define _CSKY_PTRACE_H
diff --git a/arch/csky/include/uapi/asm/sigcontext.h b/arch/csky/include/uapi/asm/sigcontext.h
index 670c020f2cb8..859afb602477 100644
--- a/arch/csky/include/uapi/asm/sigcontext.h
+++ b/arch/csky/include/uapi/asm/sigcontext.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_SIGCONTEXT_H
#define __ASM_CSKY_SIGCONTEXT_H
diff --git a/arch/csky/include/uapi/asm/unistd.h b/arch/csky/include/uapi/asm/unistd.h
index ba4018929733..7ff6a2466af1 100644
--- a/arch/csky/include/uapi/asm/unistd.h
+++ b/arch/csky/include/uapi/asm/unistd.h
@@ -1,5 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#define __ARCH_WANT_STAT64
#define __ARCH_WANT_NEW_STAT
diff --git a/arch/csky/kernel/Makefile b/arch/csky/kernel/Makefile
index 37f37c0e934a..6c0f36010ed0 100644
--- a/arch/csky/kernel/Makefile
+++ b/arch/csky/kernel/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
extra-y := head.o vmlinux.lds
-obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o
+obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
obj-y += power.o syscall.o syscall_table.o setup.o
obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
obj-y += probes/
diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S
index 3821ef9b7567..e73e548f7855 100644
--- a/arch/csky/kernel/atomic.S
+++ b/arch/csky/kernel/atomic.S
@@ -14,6 +14,10 @@
*/
ENTRY(csky_cmpxchg)
USPTOKSP
+
+ RD_MEH a3
+ WR_MEH a3
+
mfcr a3, epc
addi a3, TRAP0_SIZE
@@ -36,11 +40,11 @@ ENTRY(csky_cmpxchg)
2:
sync.is
#else
-1:
+GLOBAL(csky_cmpxchg_ldw)
ldw a3, (a2)
cmpne a0, a3
bt16 3f
-2:
+GLOBAL(csky_cmpxchg_stw)
stw a1, (a2)
3:
#endif
@@ -55,19 +59,3 @@ ENTRY(csky_cmpxchg)
KSPTOUSP
rte
END(csky_cmpxchg)
-
-#ifndef CONFIG_CPU_HAS_LDSTEX
-/*
- * Called from tlbmodified exception
- */
-ENTRY(csky_cmpxchg_fixup)
- mfcr a0, epc
- lrw a1, 2b
- cmpne a1, a0
- bt 1f
- subi a1, (2b - 1b)
- stw a1, (sp, LSAVE_PC)
-1:
- rts
-END(csky_cmpxchg_fixup)
-#endif
diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
index 5a5cabd076e1..c1bd7a6b4ab6 100644
--- a/arch/csky/kernel/entry.S
+++ b/arch/csky/kernel/entry.S
@@ -13,10 +13,6 @@
#include <asm/page.h>
#include <asm/thread_info.h>
-#define PTE_INDX_MSK 0xffc
-#define PTE_INDX_SHIFT 10
-#define _PGDIR_SHIFT 22
-
.macro zero_fp
#ifdef CONFIG_STACKTRACE
movi r8, 0
@@ -41,108 +37,15 @@
#endif
.endm
-.macro tlbop_begin name, val0, val1, val2
-ENTRY(csky_\name)
- mtcr a3, ss2
- mtcr r6, ss3
- mtcr a2, ss4
-
- RD_PGDR r6
- RD_MEH a3
-#ifdef CONFIG_CPU_HAS_TLBI
- tlbi.vaas a3
- sync.is
-
- btsti a3, 31
- bf 1f
- RD_PGDR_K r6
-1:
-#else
- bgeni a2, 31
- WR_MCIR a2
- bgeni a2, 25
- WR_MCIR a2
-#endif
- bclri r6, 0
- lrw a2, va_pa_offset
- ld.w a2, (a2, 0)
- subu r6, a2
- bseti r6, 31
-
- mov a2, a3
- lsri a2, _PGDIR_SHIFT
- lsli a2, 2
- addu r6, a2
- ldw r6, (r6)
-
- lrw a2, va_pa_offset
- ld.w a2, (a2, 0)
- subu r6, a2
- bseti r6, 31
-
- lsri a3, PTE_INDX_SHIFT
- lrw a2, PTE_INDX_MSK
- and a3, a2
- addu r6, a3
- ldw a3, (r6)
-
- movi a2, (_PAGE_PRESENT | \val0)
- and a3, a2
- cmpne a3, a2
- bt \name
-
- /* First read/write the page, just update the flags */
- ldw a3, (r6)
- bgeni a2, PAGE_VALID_BIT
- bseti a2, PAGE_ACCESSED_BIT
- bseti a2, \val1
- bseti a2, \val2
- or a3, a2
- stw a3, (r6)
-
- /* Some cpu tlb-hardrefill bypass the cache */
-#ifdef CONFIG_CPU_NEED_TLBSYNC
- movi a2, 0x22
- bseti a2, 6
- mtcr r6, cr22
- mtcr a2, cr17
- sync
-#endif
-
- mfcr a3, ss2
- mfcr r6, ss3
- mfcr a2, ss4
- rte
-\name:
- mfcr a3, ss2
- mfcr r6, ss3
- mfcr a2, ss4
+.text
+ENTRY(csky_pagefault)
SAVE_ALL 0
-.endm
-.macro tlbop_end is_write
zero_fp
context_tracking
- RD_MEH a2
- psrset ee, ie
+ psrset ee
mov a0, sp
- movi a1, \is_write
jbsr do_page_fault
jmpi ret_from_exception
-.endm
-
-.text
-
-tlbop_begin tlbinvalidl, _PAGE_READ, PAGE_VALID_BIT, PAGE_ACCESSED_BIT
-tlbop_end 0
-
-tlbop_begin tlbinvalids, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT
-tlbop_end 1
-
-tlbop_begin tlbmodified, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT
-#ifndef CONFIG_CPU_HAS_LDSTEX
-jbsr csky_cmpxchg_fixup
-#endif
-tlbop_end 1
ENTRY(csky_systemcall)
SAVE_ALL TRAP0_SIZE
@@ -314,6 +217,9 @@ ENTRY(csky_trap)
ENTRY(csky_get_tls)
USPTOKSP
+ RD_MEH a0
+ WR_MEH a0
+
/* increase epc for continue */
mfcr a0, epc
addi a0, TRAP0_SIZE
diff --git a/arch/csky/kernel/head.S b/arch/csky/kernel/head.S
index 17ed9d250480..7e3e4f15b052 100644
--- a/arch/csky/kernel/head.S
+++ b/arch/csky/kernel/head.S
@@ -21,10 +21,16 @@ END(_start)
ENTRY(_start_smp_secondary)
SETUP_MMU
- /* copy msa1 from CPU0 */
- lrw r6, secondary_msa1
+#ifdef CONFIG_PAGE_OFFSET_80000000
+ lrw r6, secondary_msa1
ld.w r6, (r6, 0)
mtcr r6, cr<31, 15>
+#endif
+
+ lrw r6, secondary_pgd
+ ld.w r6, (r6, 0)
+ mtcr r6, cr<28, 15>
+ mtcr r6, cr<29, 15>
/* set stack point */
lrw r6, secondary_stack
diff --git a/arch/csky/kernel/perf_event.c b/arch/csky/kernel/perf_event.c
index 1a29f1157449..e5f18420ce64 100644
--- a/arch/csky/kernel/perf_event.c
+++ b/arch/csky/kernel/perf_event.c
@@ -87,7 +87,7 @@ static int csky_pmu_irq;
})
/* cycle counter */
-static uint64_t csky_pmu_read_cc(void)
+uint64_t csky_pmu_read_cc(void)
{
uint32_t lo, hi, tmp;
uint64_t result;
@@ -1319,7 +1319,7 @@ int csky_pmu_device_probe(struct platform_device *pdev,
pr_notice("[perf] PMU request irq fail!\n");
}
- ret = cpuhp_setup_state(CPUHP_AP_PERF_ONLINE, "AP_PERF_ONLINE",
+ ret = cpuhp_setup_state(CPUHP_AP_PERF_CSKY_ONLINE, "AP_PERF_ONLINE",
csky_pmu_starting_cpu,
csky_pmu_dying_cpu);
if (ret) {
diff --git a/arch/csky/kernel/probes/simulate-insn.c b/arch/csky/kernel/probes/simulate-insn.c
index 4e464fed52ec..d6e8d092c9b7 100644
--- a/arch/csky/kernel/probes/simulate-insn.c
+++ b/arch/csky/kernel/probes/simulate-insn.c
@@ -274,9 +274,9 @@ void __kprobes
simulate_bnezad32(u32 opcode, long addr, struct pt_regs *regs)
{
unsigned long tmp = opcode & 0x1f;
- unsigned long val;
+ long val;
- csky_insn_reg_get_val(regs, tmp, &val);
+ csky_insn_reg_get_val(regs, tmp, (unsigned long *)&val);
val -= 1;
@@ -286,7 +286,7 @@ simulate_bnezad32(u32 opcode, long addr, struct pt_regs *regs)
} else
instruction_pointer_set(regs, addr + 4);
- csky_insn_reg_set_val(regs, tmp, val);
+ csky_insn_reg_set_val(regs, tmp, (unsigned long)val);
}
void __kprobes
@@ -297,13 +297,11 @@ simulate_bhsz32(u32 opcode, long addr, struct pt_regs *regs)
csky_insn_reg_get_val(regs, tmp, &val);
- if (val >= 0) {
+ if ((long) val >= 0) {
instruction_pointer_set(regs,
addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
} else
instruction_pointer_set(regs, addr + 4);
-
- csky_insn_reg_set_val(regs, tmp, val);
}
void __kprobes
@@ -314,13 +312,11 @@ simulate_bhz32(u32 opcode, long addr, struct pt_regs *regs)
csky_insn_reg_get_val(regs, tmp, &val);
- if (val > 0) {
+ if ((long) val > 0) {
instruction_pointer_set(regs,
addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
} else
instruction_pointer_set(regs, addr + 4);
-
- csky_insn_reg_set_val(regs, tmp, val);
}
void __kprobes
@@ -331,13 +327,11 @@ simulate_blsz32(u32 opcode, long addr, struct pt_regs *regs)
csky_insn_reg_get_val(regs, tmp, &val);
- if (val <= 0) {
+ if ((long) val <= 0) {
instruction_pointer_set(regs,
addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
} else
instruction_pointer_set(regs, addr + 4);
-
- csky_insn_reg_set_val(regs, tmp, val);
}
void __kprobes
@@ -348,13 +342,11 @@ simulate_blz32(u32 opcode, long addr, struct pt_regs *regs)
csky_insn_reg_get_val(regs, tmp, &val);
- if (val < 0) {
+ if ((long) val < 0) {
instruction_pointer_set(regs,
addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
} else
instruction_pointer_set(regs, addr + 4);
-
- csky_insn_reg_set_val(regs, tmp, val);
}
void __kprobes
diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
index 69af6bc87e64..3d0ca22cd0e2 100644
--- a/arch/csky/kernel/process.c
+++ b/arch/csky/kernel/process.c
@@ -49,7 +49,7 @@ int copy_thread(unsigned long clone_flags,
/* setup thread.sp for switch_to !!! */
p->thread.sp = (unsigned long)childstack;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
childstack->r15 = (unsigned long) ret_from_kernel_thread;
childstack->r10 = kthread_arg;
diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c
index d822144906ac..0105ac81b432 100644
--- a/arch/csky/kernel/ptrace.c
+++ b/arch/csky/kernel/ptrace.c
@@ -22,6 +22,7 @@
#include <asm/asm-offsets.h>
#include <abi/regdef.h>
+#include <abi/ckmmu.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -83,7 +84,7 @@ static int gpr_get(struct task_struct *target,
/* Abiv1 regs->tls is fake and we need sync here. */
regs->tls = task_thread_info(target)->tp_value;
- return membuf_write(&to, regs, sizeof(regs));
+ return membuf_write(&to, regs, sizeof(*regs));
}
static int gpr_set(struct task_struct *target,
@@ -343,6 +344,124 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs)
trace_sys_exit(regs, syscall_get_return_value(current, regs));
}
+#ifdef CONFIG_CPU_CK860
+static void show_iutlb(void)
+{
+ int entry, i;
+ unsigned long flags;
+ unsigned long oldpid;
+ unsigned long entryhi[16], entrylo0[16], entrylo1[16];
+
+ oldpid = read_mmu_entryhi();
+
+ entry = 0x8000;
+
+ local_irq_save(flags);
+
+ for (i = 0; i < 16; i++) {
+ write_mmu_index(entry);
+ tlb_read();
+ entryhi[i] = read_mmu_entryhi();
+ entrylo0[i] = read_mmu_entrylo0();
+ entrylo1[i] = read_mmu_entrylo1();
+
+ entry++;
+ }
+
+ local_irq_restore(flags);
+
+ write_mmu_entryhi(oldpid);
+
+ printk("\n\n\n");
+ for (i = 0; i < 16; i++)
+ printk("iutlb[%d]: entryhi - 0x%lx; entrylo0 - 0x%lx;"
+ " entrylo1 - 0x%lx\n",
+ i, entryhi[i], entrylo0[i], entrylo1[i]);
+ printk("\n\n\n");
+}
+
+static void show_dutlb(void)
+{
+ int entry, i;
+ unsigned long flags;
+ unsigned long oldpid;
+ unsigned long entryhi[16], entrylo0[16], entrylo1[16];
+
+ oldpid = read_mmu_entryhi();
+
+ entry = 0x4000;
+
+ local_irq_save(flags);
+
+ for (i = 0; i < 16; i++) {
+ write_mmu_index(entry);
+ tlb_read();
+ entryhi[i] = read_mmu_entryhi();
+ entrylo0[i] = read_mmu_entrylo0();
+ entrylo1[i] = read_mmu_entrylo1();
+
+ entry++;
+ }
+
+ local_irq_restore(flags);
+
+ write_mmu_entryhi(oldpid);
+
+ printk("\n\n\n");
+ for (i = 0; i < 16; i++)
+ printk("dutlb[%d]: entryhi - 0x%lx; entrylo0 - 0x%lx;"
+ " entrylo1 - 0x%lx\n",
+ i, entryhi[i], entrylo0[i], entrylo1[i]);
+ printk("\n\n\n");
+}
+
+static unsigned long entryhi[1024], entrylo0[1024], entrylo1[1024];
+static void show_jtlb(void)
+{
+ int entry;
+ unsigned long flags;
+ unsigned long oldpid;
+
+ oldpid = read_mmu_entryhi();
+
+ entry = 0;
+
+ local_irq_save(flags);
+ while (entry < 1024) {
+ write_mmu_index(entry);
+ tlb_read();
+ entryhi[entry] = read_mmu_entryhi();
+ entrylo0[entry] = read_mmu_entrylo0();
+ entrylo1[entry] = read_mmu_entrylo1();
+
+ entry++;
+ }
+ local_irq_restore(flags);
+
+ write_mmu_entryhi(oldpid);
+
+ printk("\n\n\n");
+
+ for (entry = 0; entry < 1024; entry++)
+ printk("jtlb[%x]: entryhi - 0x%lx; entrylo0 - 0x%lx;"
+ " entrylo1 - 0x%lx\n",
+ entry, entryhi[entry], entrylo0[entry], entrylo1[entry]);
+ printk("\n\n\n");
+}
+
+static void show_tlb(void)
+{
+ show_iutlb();
+ show_dutlb();
+ show_jtlb();
+}
+#else
+static void show_tlb(void)
+{
+ return;
+}
+#endif
+
void show_regs(struct pt_regs *fp)
{
pr_info("\nCURRENT PROCESS:\n\n");
@@ -363,9 +482,10 @@ void show_regs(struct pt_regs *fp)
pr_info("PC: 0x%08lx (%pS)\n", (long)fp->pc, (void *)fp->pc);
pr_info("LR: 0x%08lx (%pS)\n", (long)fp->lr, (void *)fp->lr);
- pr_info("SP: 0x%08lx\n", (long)fp);
- pr_info("orig_a0: 0x%08lx\n", fp->orig_a0);
+ pr_info("SP: 0x%08lx\n", (long)fp->usp);
pr_info("PSR: 0x%08lx\n", (long)fp->sr);
+ pr_info("orig_a0: 0x%08lx\n", fp->orig_a0);
+ pr_info("PT_REGS: 0x%08lx\n", (long)fp);
pr_info(" a0: 0x%08lx a1: 0x%08lx a2: 0x%08lx a3: 0x%08lx\n",
fp->a0, fp->a1, fp->a2, fp->a3);
@@ -395,5 +515,7 @@ void show_regs(struct pt_regs *fp)
fp->regs[8], fp->regs[9]);
#endif
+ show_tlb();
+
return;
}
diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c
index e4cab16056d6..e93bc6f74432 100644
--- a/arch/csky/kernel/setup.c
+++ b/arch/csky/kernel/setup.c
@@ -45,13 +45,17 @@ static void __init csky_memblock_init(void)
if (size >= lowmem_size) {
max_low_pfn = min_low_pfn + lowmem_size;
+#ifdef CONFIG_PAGE_OFFSET_80000000
write_mmu_msa1(read_mmu_msa0() + SSEG_SIZE);
+#endif
} else if (size > sseg_size) {
max_low_pfn = min_low_pfn + sseg_size;
}
max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
+ mmu_init(min_low_pfn, max_low_pfn);
+
#ifdef CONFIG_HIGHMEM
max_zone_pfn[ZONE_HIGHMEM] = max_pfn;
@@ -101,16 +105,26 @@ void __init setup_arch(char **cmdline_p)
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
+static inline unsigned long read_mmu_msa(void)
+{
+#ifdef CONFIG_PAGE_OFFSET_80000000
+ return read_mmu_msa0();
+#endif
+
+#ifdef CONFIG_PAGE_OFFSET_A0000000
+ return read_mmu_msa1();
+#endif
+}
+
asmlinkage __visible void __init csky_start(unsigned int unused,
void *dtb_start)
{
/* Clean up bss section */
memset(__bss_start, 0, __bss_stop - __bss_start);
- va_pa_offset = read_mmu_msa0() & ~(SSEG_SIZE - 1);
+ va_pa_offset = read_mmu_msa() & ~(SSEG_SIZE - 1);
pre_trap_init();
- pre_mmu_init();
if (dtb_start == NULL)
early_init_dt_scan(__dtb_start);
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index 37ea64ed3c12..312f046d452d 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -134,7 +134,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
{
struct rt_sigframe *frame;
int err = 0;
- struct csky_vdso *vdso = current->mm->context.vdso;
frame = get_sigframe(ksig, regs, sizeof(*frame));
if (!access_ok(frame, sizeof(*frame)))
@@ -152,7 +151,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
return -EFAULT;
/* Set up to return from userspace. */
- regs->lr = (unsigned long)(vdso->rt_signal_retcode);
+ regs->lr = (unsigned long)VDSO_SYMBOL(
+ current->mm->context.vdso, rt_sigreturn);
/*
* Set up registers for signal handler.
diff --git a/arch/csky/kernel/smp.c b/arch/csky/kernel/smp.c
index 041d0de6a1b6..0f9f5eef9338 100644
--- a/arch/csky/kernel/smp.c
+++ b/arch/csky/kernel/smp.c
@@ -203,8 +203,8 @@ volatile unsigned int secondary_hint;
volatile unsigned int secondary_hint2;
volatile unsigned int secondary_ccr;
volatile unsigned int secondary_stack;
-
-unsigned long secondary_msa1;
+volatile unsigned int secondary_msa1;
+volatile unsigned int secondary_pgd;
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
@@ -216,6 +216,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
secondary_hint2 = mfcr("cr<21, 1>");
secondary_ccr = mfcr("cr18");
secondary_msa1 = read_mmu_msa1();
+ secondary_pgd = mfcr("cr<29, 15>");
/*
* Because other CPUs are in reset status, we must flush data
@@ -262,8 +263,6 @@ void csky_start_secondary(void)
flush_tlb_all();
write_mmu_pagemask(0);
- TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir);
- TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir);
#ifdef CONFIG_CPU_HAS_FPU
init_fpu();
diff --git a/arch/csky/kernel/traps.c b/arch/csky/kernel/traps.c
index 959a917c989d..e5fbf8653a21 100644
--- a/arch/csky/kernel/traps.c
+++ b/arch/csky/kernel/traps.c
@@ -39,9 +39,7 @@ asmlinkage void csky_cmpxchg(void);
asmlinkage void csky_get_tls(void);
asmlinkage void csky_irq(void);
-asmlinkage void csky_tlbinvalidl(void);
-asmlinkage void csky_tlbinvalids(void);
-asmlinkage void csky_tlbmodified(void);
+asmlinkage void csky_pagefault(void);
/* Defined in head.S */
asmlinkage void _start_smp_secondary(void);
@@ -66,9 +64,9 @@ void __init trap_init(void)
VEC_INIT(VEC_TRAP3, csky_get_tls);
/* setup MMU TLB exception */
- VEC_INIT(VEC_TLBINVALIDL, csky_tlbinvalidl);
- VEC_INIT(VEC_TLBINVALIDS, csky_tlbinvalids);
- VEC_INIT(VEC_TLBMODIFIED, csky_tlbmodified);
+ VEC_INIT(VEC_TLBINVALIDL, csky_pagefault);
+ VEC_INIT(VEC_TLBINVALIDS, csky_pagefault);
+ VEC_INIT(VEC_TLBMODIFIED, csky_pagefault);
#ifdef CONFIG_CPU_HAS_FPU
init_fpu();
diff --git a/arch/csky/kernel/vdso.c b/arch/csky/kernel/vdso.c
index abc3dbc658d4..16c20d64d165 100644
--- a/arch/csky/kernel/vdso.c
+++ b/arch/csky/kernel/vdso.c
@@ -1,86 +1,107 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/init.h>
#include <linux/binfmts.h>
#include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#ifdef GENERIC_TIME_VSYSCALL
+#include <vdso/datapage.h>
+#else
#include <asm/vdso.h>
-#include <asm/cacheflush.h>
+#endif
-static struct page *vdso_page;
+extern char vdso_start[], vdso_end[];
-static int __init init_vdso(void)
-{
- struct csky_vdso *vdso;
- int err = 0;
-
- vdso_page = alloc_page(GFP_KERNEL);
- if (!vdso_page)
- panic("Cannot allocate vdso");
+static unsigned int vdso_pages;
+static struct page **vdso_pagelist;
- vdso = vmap(&vdso_page, 1, 0, PAGE_KERNEL);
- if (!vdso)
- panic("Cannot map vdso");
+/*
+ * The vDSO data page.
+ */
+static union {
+ struct vdso_data data;
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
- clear_page(vdso);
-
- err = setup_vdso_page(vdso->rt_signal_retcode);
- if (err)
- panic("Cannot set signal return code, err: %x.", err);
+static int __init vdso_init(void)
+{
+ unsigned int i;
+
+ vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
+ vdso_pagelist =
+ kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (unlikely(vdso_pagelist == NULL)) {
+ pr_err("vdso: pagelist allocation failed\n");
+ return -ENOMEM;
+ }
- dcache_wb_range((unsigned long)vdso, (unsigned long)vdso + 16);
+ for (i = 0; i < vdso_pages; i++) {
+ struct page *pg;
- vunmap(vdso);
+ pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
+ vdso_pagelist[i] = pg;
+ }
+ vdso_pagelist[i] = virt_to_page(vdso_data);
return 0;
}
-subsys_initcall(init_vdso);
+arch_initcall(vdso_init);
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+ int uses_interp)
{
- int ret;
- unsigned long addr;
struct mm_struct *mm = current->mm;
+ unsigned long vdso_base, vdso_len;
+ int ret;
- mmap_write_lock(mm);
+ vdso_len = (vdso_pages + 1) << PAGE_SHIFT;
- addr = get_unmapped_area(NULL, STACK_TOP, PAGE_SIZE, 0, 0);
- if (IS_ERR_VALUE(addr)) {
- ret = addr;
- goto up_fail;
+ mmap_write_lock(mm);
+ vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
+ if (IS_ERR_VALUE(vdso_base)) {
+ ret = vdso_base;
+ goto end;
}
- ret = install_special_mapping(
- mm,
- addr,
- PAGE_SIZE,
- VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
- &vdso_page);
- if (ret)
- goto up_fail;
+ /*
+ * Put vDSO base into mm struct. We need to do this before calling
+ * install_special_mapping or the perf counter mmap tracking code
+ * will fail to recognise it as a vDSO (since arch_vma_name fails).
+ */
+ mm->context.vdso = (void *)vdso_base;
+
+ ret =
+ install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+ vdso_pagelist);
+
+ if (unlikely(ret)) {
+ mm->context.vdso = NULL;
+ goto end;
+ }
- mm->context.vdso = (void *)addr;
+ vdso_base += (vdso_pages << PAGE_SHIFT);
+ ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+ (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
-up_fail:
+ if (unlikely(ret))
+ mm->context.vdso = NULL;
+end:
mmap_write_unlock(mm);
return ret;
}
const char *arch_vma_name(struct vm_area_struct *vma)
{
- if (vma->vm_mm == NULL)
- return NULL;
-
- if (vma->vm_start == (long)vma->vm_mm->context.vdso)
+ if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
return "[vdso]";
- else
- return NULL;
+ if (vma->vm_mm && (vma->vm_start ==
+ (long)vma->vm_mm->context.vdso + PAGE_SIZE))
+ return "[vdso_data]";
+ return NULL;
}
diff --git a/arch/csky/kernel/vdso/.gitignore b/arch/csky/kernel/vdso/.gitignore
new file mode 100644
index 000000000000..3a19def868ec
--- /dev/null
+++ b/arch/csky/kernel/vdso/.gitignore
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+vdso.lds
+*.tmp
+vdso-syms.S
diff --git a/arch/csky/kernel/vdso/Makefile b/arch/csky/kernel/vdso/Makefile
new file mode 100644
index 000000000000..0b6909f10667
--- /dev/null
+++ b/arch/csky/kernel/vdso/Makefile
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_CKCORE_ADDR32|R_CKCORE_JUMP_SLOT
+include $(srctree)/lib/vdso/Makefile
+
+# Symbols present in the vdso
+vdso-syms += rt_sigreturn
+vdso-syms += vgettimeofday
+
+# Files to link into the vdso
+obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+
+ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+endif
+
+ccflags-y := -fno-stack-protector -DBUILD_VDSO32
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+obj-y += vdso.o vdso-syms.o
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
+
+# Force dependency
+$(obj)/vdso.o: $(obj)/vdso.so
+
+SYSCFLAGS_vdso.so.dbg = $(c_flags)
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
+ $(call if_changed,vdsold)
+SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
+ -Wl,--build-id=sha1 -Wl,--hash-style=both
+
+$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
+ $(call if_changed,so2s)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+ $(call if_changed,objcopy)
+
+# actual build commands
+# The DSO images are built using a special linker script
+# Make sure only to export the intended __vdso_xxx symbol offsets.
+quiet_cmd_vdsold = VDSOLD $@
+ cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \
+ -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \
+ $(CROSS_COMPILE)objcopy \
+ $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
+ rm $@.tmp
+
+# Extracts symbol offsets from the VDSO, converting them into an assembly file
+# that contains the same symbols at the same offsets.
+quiet_cmd_so2s = SO2S $@
+ cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+ cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso.so: $(obj)/vdso.so.dbg
+ @mkdir -p $(MODLIB)/vdso
+ $(call cmd,vdso_install)
+
+vdso_install: vdso.so
diff --git a/arch/csky/kernel/vdso/note.S b/arch/csky/kernel/vdso/note.S
new file mode 100644
index 000000000000..2a956c942211
--- /dev/null
+++ b/arch/csky/kernel/vdso/note.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/elfnote.h>
+#include <linux/version.h>
+
+ELFNOTE_START(Linux, 0, "a")
+ .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/csky/kernel/vdso/rt_sigreturn.S b/arch/csky/kernel/vdso/rt_sigreturn.S
new file mode 100644
index 000000000000..0a6bd1216118
--- /dev/null
+++ b/arch/csky/kernel/vdso/rt_sigreturn.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+#include <abi/vdso.h>
+
+ .text
+ENTRY(__vdso_rt_sigreturn)
+ .cfi_startproc
+ .cfi_signal_frame
+ SET_SYSCALL_ID
+ trap 0
+ .cfi_endproc
+ENDPROC(__vdso_rt_sigreturn)
diff --git a/arch/csky/kernel/vdso/so2s.sh b/arch/csky/kernel/vdso/so2s.sh
new file mode 100755
index 000000000000..69da3d529c6d
--- /dev/null
+++ b/arch/csky/kernel/vdso/so2s.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+
+sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_5.10\)*!.global \2\n.set \2,0x\1!' \
+| grep '^\.'
diff --git a/arch/csky/kernel/vdso/vdso.S b/arch/csky/kernel/vdso/vdso.S
new file mode 100644
index 000000000000..5162ca069494
--- /dev/null
+++ b/arch/csky/kernel/vdso/vdso.S
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+ __PAGE_ALIGNED_DATA
+
+ .globl vdso_start, vdso_end
+ .balign PAGE_SIZE
+vdso_start:
+ .incbin "arch/csky/kernel/vdso/vdso.so"
+ .balign PAGE_SIZE
+vdso_end:
+
+ .previous
diff --git a/arch/csky/kernel/vdso/vdso.lds.S b/arch/csky/kernel/vdso/vdso.lds.S
new file mode 100644
index 000000000000..590a6c79fff7
--- /dev/null
+++ b/arch/csky/kernel/vdso/vdso.lds.S
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/page.h>
+
+OUTPUT_ARCH(csky)
+
+SECTIONS
+{
+ PROVIDE(_vdso_data = . + PAGE_SIZE);
+ . = SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .gnu.hash : { *(.gnu.hash) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ .note : { *(.note.*) } :text :note
+ .dynamic : { *(.dynamic) } :text :dynamic
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+
+ .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+
+ . = 0x800;
+ .text : { *(.text .text.*) } :text
+
+ .data : {
+ *(.got.plt) *(.got)
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ }
+}
+
+PHDRS
+{
+ text PT_LOAD FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ note PT_NOTE FLAGS(4); /* PF_R */
+ eh_frame_hdr PT_GNU_EH_FRAME;
+}
+
+VERSION
+{
+ LINUX_5.10 {
+ global:
+ __vdso_rt_sigreturn;
+ __vdso_clock_gettime;
+ __vdso_clock_gettime64;
+ __vdso_gettimeofday;
+ __vdso_clock_getres;
+ local: *;
+ };
+}
diff --git a/arch/csky/kernel/vdso/vgettimeofday.c b/arch/csky/kernel/vdso/vgettimeofday.c
new file mode 100644
index 000000000000..da491832c098
--- /dev/null
+++ b/arch/csky/kernel/vdso/vgettimeofday.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __vdso_clock_gettime(clockid_t clock,
+ struct old_timespec32 *ts)
+{
+ return __cvdso_clock_gettime32(clock, ts);
+}
+
+int __vdso_clock_gettime64(clockid_t clock,
+ struct __kernel_timespec *ts)
+{
+ return __cvdso_clock_gettime(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+ struct timezone *tz)
+{
+ return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id,
+ struct old_timespec32 *res)
+{
+ return __cvdso_clock_getres_time32(clock_id, res);
+}
diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S
index f03033e17c29..e8b1a4a49798 100644
--- a/arch/csky/kernel/vmlinux.lds.S
+++ b/arch/csky/kernel/vmlinux.lds.S
@@ -33,6 +33,7 @@ SECTIONS
.text : AT(ADDR(.text) - LOAD_OFFSET) {
_text = .;
+ VBR_BASE
IRQENTRY_TEXT
SOFTIRQENTRY_TEXT
TEXT_TEXT
@@ -104,7 +105,6 @@ SECTIONS
EXCEPTION_TABLE(L1_CACHE_BYTES)
BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES)
- VBR_BASE
_end = . ;
STABS_DEBUG
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index 081b178b41b1..1482de56f4f7 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -1,29 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
-#include <linux/signal.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/version.h>
-#include <linux/vt_kern.h>
#include <linux/extable.h>
-#include <linux/uaccess.h>
-#include <linux/perf_event.h>
#include <linux/kprobes.h>
-
-#include <asm/hardirq.h>
-#include <asm/mmu_context.h>
-#include <asm/traps.h>
-#include <asm/page.h>
+#include <linux/mmu_context.h>
+#include <linux/perf_event.h>
int fixup_exception(struct pt_regs *regs)
{
@@ -39,180 +20,287 @@ int fixup_exception(struct pt_regs *regs)
return 0;
}
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
- unsigned long mmu_meh)
+static inline bool is_write(struct pt_regs *regs)
{
- struct vm_area_struct *vma = NULL;
- struct task_struct *tsk = current;
- struct mm_struct *mm = tsk->mm;
- int si_code;
- int fault;
- unsigned long address = mmu_meh & PAGE_MASK;
+ switch (trap_no(regs)) {
+ case VEC_TLBINVALIDS:
+ return true;
+ case VEC_TLBMODIFIED:
+ return true;
+ }
- if (kprobe_page_fault(regs, tsk->thread.trap_no))
+ return false;
+}
+
+#ifdef CONFIG_CPU_HAS_LDSTEX
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+ return;
+}
+#else
+extern unsigned long csky_cmpxchg_ldw;
+extern unsigned long csky_cmpxchg_stw;
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+ if (trap_no(regs) != VEC_TLBMODIFIED)
return;
- si_code = SEGV_MAPERR;
+ if (instruction_pointer(regs) == csky_cmpxchg_stw)
+ instruction_pointer_set(regs, csky_cmpxchg_ldw);
+ return;
+}
+#endif
+
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+ current->thread.trap_no = trap_no(regs);
+
+ /* Are we prepared to handle this kernel fault? */
+ if (fixup_exception(regs))
+ return;
-#ifndef CONFIG_CPU_HAS_TLBI
/*
- * We fault-in kernel-space virtual memory on-demand. The
- * 'reference' page table is init_mm.pgd.
- *
- * NOTE! We MUST NOT take any locks for this case. We may
- * be in an interrupt or a critical region, and should
- * only copy the information from the master page table,
- * nothing more.
+ * Oops. The kernel tried to access some bad page. We'll have to
+ * terminate things with extreme prejudice.
*/
- if (unlikely(address >= VMALLOC_START) &&
- unlikely(address <= VMALLOC_END)) {
- /*
- * Synchronize this task's top level page-table
- * with the 'reference' page table.
- *
- * Do _not_ use "tsk" here. We might be inside
- * an interrupt in the middle of a task switch..
- */
- int offset = pgd_index(address);
- pgd_t *pgd, *pgd_k;
- pud_t *pud, *pud_k;
- pmd_t *pmd, *pmd_k;
- pte_t *pte_k;
+ bust_spinlocks(1);
+ pr_alert("Unable to handle kernel paging request at virtual "
+ "addr 0x%08lx, pc: 0x%08lx\n", addr, regs->pc);
+ die(regs, "Oops");
+ do_exit(SIGKILL);
+}
- unsigned long pgd_base;
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+ current->thread.trap_no = trap_no(regs);
- pgd_base = (unsigned long)__va(get_pgd());
- pgd = (pgd_t *)pgd_base + offset;
- pgd_k = init_mm.pgd + offset;
+ if (fault & VM_FAULT_OOM) {
+ /*
+ * We ran out of memory, call the OOM killer, and return the userspace
+ * (which will retry the fault, or kill us if we got oom-killed).
+ */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ pagefault_out_of_memory();
+ return;
+ } else if (fault & VM_FAULT_SIGBUS) {
+ /* Kernel mode? Handle exceptions or die */
+ if (!user_mode(regs)) {
+ no_context(regs, addr);
+ return;
+ }
+ do_trap(regs, SIGBUS, BUS_ADRERR, addr);
+ return;
+ }
+ BUG();
+}
- if (!pgd_present(*pgd_k))
- goto no_context;
- set_pgd(pgd, *pgd_k);
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+ /*
+ * Something tried to access memory that isn't in our memory map.
+ * Fix it, but check if it's kernel or user first.
+ */
+ mmap_read_unlock(mm);
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
+ do_trap(regs, SIGSEGV, code, addr);
+ return;
+ }
- pud = (pud_t *)pgd;
- pud_k = (pud_t *)pgd_k;
- if (!pud_present(*pud_k))
- goto no_context;
+ no_context(regs, addr);
+}
- pmd = pmd_offset(pud, address);
- pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- goto no_context;
- set_pmd(pmd, *pmd_k);
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+ pgd_t *pgd, *pgd_k;
+ pud_t *pud, *pud_k;
+ pmd_t *pmd, *pmd_k;
+ pte_t *pte_k;
+ int offset;
- pte_k = pte_offset_kernel(pmd_k, address);
- if (!pte_present(*pte_k))
- goto no_context;
+ /* User mode accesses just cause a SIGSEGV */
+ if (user_mode(regs)) {
+ do_trap(regs, SIGSEGV, code, addr);
return;
}
-#endif
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
/*
- * If we're in an interrupt or have no user
- * context, we must not take the fault..
+ * Synchronize this task's top level page-table
+ * with the 'reference' page table.
+ *
+ * Do _not_ use "tsk" here. We might be inside
+ * an interrupt in the middle of a task switch..
*/
- if (in_atomic() || !mm)
- goto bad_area_nosemaphore;
+ offset = pgd_index(addr);
- mmap_read_lock(mm);
- vma = find_vma(mm, address);
- if (!vma)
- goto bad_area;
- if (vma->vm_start <= address)
- goto good_area;
- if (!(vma->vm_flags & VM_GROWSDOWN))
- goto bad_area;
- if (expand_stack(vma, address))
- goto bad_area;
- /*
- * Ok, we have a good vm_area for this memory access, so
- * we can handle it..
- */
-good_area:
- si_code = SEGV_ACCERR;
+ pgd = get_pgd() + offset;
+ pgd_k = init_mm.pgd + offset;
- if (write) {
+ if (!pgd_present(*pgd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pgd(pgd, *pgd_k);
+
+ pud = (pud_t *)pgd;
+ pud_k = (pud_t *)pgd_k;
+ if (!pud_present(*pud_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ pmd = pmd_offset(pud, addr);
+ pmd_k = pmd_offset(pud_k, addr);
+ if (!pmd_present(*pmd_k)) {
+ no_context(regs, addr);
+ return;
+ }
+ set_pmd(pmd, *pmd_k);
+
+ pte_k = pte_offset_kernel(pmd_k, addr);
+ if (!pte_present(*pte_k)) {
+ no_context(regs, addr);
+ return;
+ }
+
+ flush_tlb_one(addr);
+}
+
+static inline bool access_error(struct pt_regs *regs, struct vm_area_struct *vma)
+{
+ if (is_write(regs)) {
if (!(vma->vm_flags & VM_WRITE))
- goto bad_area;
+ return true;
} else {
if (unlikely(!vma_is_accessible(vma)))
- goto bad_area;
+ return true;
}
+ return false;
+}
+
+/*
+ * This routine handles page faults. It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs)
+{
+ struct task_struct *tsk;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ unsigned long addr = read_mmu_entryhi() & PAGE_MASK;
+ unsigned int flags = FAULT_FLAG_DEFAULT;
+ int code = SEGV_MAPERR;
+ vm_fault_t fault;
+
+ tsk = current;
+ mm = tsk->mm;
+
+ csky_cmpxchg_fixup(regs);
+
+ if (kprobe_page_fault(regs, tsk->thread.trap_no))
+ return;
/*
- * If for any reason at all we couldn't handle the fault,
- * make sure we exit gracefully rather than endlessly redo
- * the fault.
+ * Fault-in kernel-space virtual memory on-demand.
+ * The 'reference' page table is init_mm.pgd.
+ *
+ * NOTE! We MUST NOT take any locks for this case. We may
+ * be in an interrupt or a critical region, and should
+ * only copy the information from the master page table,
+ * nothing more.
*/
- fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0,
- regs);
- if (unlikely(fault & VM_FAULT_ERROR)) {
- if (fault & VM_FAULT_OOM)
- goto out_of_memory;
- else if (fault & VM_FAULT_SIGBUS)
- goto do_sigbus;
- else if (fault & VM_FAULT_SIGSEGV)
- goto bad_area;
- BUG();
+ if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+ vmalloc_fault(regs, code, addr);
+ return;
}
- mmap_read_unlock(mm);
- return;
+
+ /* Enable interrupts if they were enabled in the parent context. */
+ if (likely(regs->sr & BIT(6)))
+ local_irq_enable();
/*
- * Something tried to access memory that isn't in our memory map..
- * Fix it, but check if it's kernel or user first..
+ * If we're in an interrupt, have no user context, or are running
+ * in an atomic region, then we must not take the fault.
*/
-bad_area:
- mmap_read_unlock(mm);
-
-bad_area_nosemaphore:
- /* User mode accesses just cause a SIGSEGV */
- if (user_mode(regs)) {
- tsk->thread.trap_no = trap_no(regs);
- force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+ if (unlikely(faulthandler_disabled() || !mm)) {
+ no_context(regs, addr);
return;
}
-no_context:
- tsk->thread.trap_no = trap_no(regs);
+ if (user_mode(regs))
+ flags |= FAULT_FLAG_USER;
- /* Are we prepared to handle this kernel fault? */
- if (fixup_exception(regs))
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
+ if (is_write(regs))
+ flags |= FAULT_FLAG_WRITE;
+retry:
+ mmap_read_lock(mm);
+ vma = find_vma(mm, addr);
+ if (unlikely(!vma)) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
+ if (likely(vma->vm_start <= addr))
+ goto good_area;
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ bad_area(regs, mm, code, addr);
return;
+ }
+ if (unlikely(expand_stack(vma, addr))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
/*
- * Oops. The kernel tried to access some bad page. We'll have to
- * terminate things with extreme prejudice.
+ * Ok, we have a good vm_area for this memory access, so
+ * we can handle it.
*/
- bust_spinlocks(1);
- pr_alert("Unable to handle kernel paging request at virtual "
- "address 0x%08lx, pc: 0x%08lx\n", address, regs->pc);
- die(regs, "Oops");
+good_area:
+ code = SEGV_ACCERR;
+
+ if (unlikely(access_error(regs, vma))) {
+ bad_area(regs, mm, code, addr);
+ return;
+ }
-out_of_memory:
- tsk->thread.trap_no = trap_no(regs);
+ /*
+ * If for any reason at all we could not handle the fault,
+ * make sure we exit gracefully rather than endlessly redo
+ * the fault.
+ */
+ fault = handle_mm_fault(vma, addr, flags, regs);
/*
- * We ran out of memory, call the OOM killer, and return the userspace
- * (which will retry the fault, or kill us if we got oom-killed).
+ * If we need to retry but a fatal signal is pending, handle the
+ * signal first. We do not need to release the mmap_lock because it
+ * would already be released in __lock_page_or_retry in mm/filemap.c.
*/
- pagefault_out_of_memory();
- return;
+ if (fault_signal_pending(fault, regs)) {
+ if (!user_mode(regs))
+ no_context(regs, addr);
+ return;
+ }
-do_sigbus:
- tsk->thread.trap_no = trap_no(regs);
+ if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+ flags |= FAULT_FLAG_TRIED;
- mmap_read_unlock(mm);
+ /*
+ * No need to mmap_read_unlock(mm) as we would
+ * have already released it in __lock_page_or_retry
+ * in mm/filemap.c.
+ */
+ goto retry;
+ }
- /* Kernel mode? Handle exceptions or die */
- if (!user_mode(regs))
- goto no_context;
+ mmap_read_unlock(mm);
- force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
+ if (unlikely(fault & VM_FAULT_ERROR)) {
+ mm_fault_error(regs, addr, fault);
+ return;
+ }
+ return;
}
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index af627128314f..894050a8ce09 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -28,9 +28,15 @@
#include <asm/mmu_context.h>
#include <asm/sections.h>
#include <asm/tlb.h>
+#include <asm/cacheflush.h>
+
+#define PTRS_KERN_TABLE \
+ ((PTRS_PER_PGD - USER_PTRS_PER_PGD) * PTRS_PER_PTE)
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
+pte_t kernel_pte_tables[PTRS_KERN_TABLE] __page_aligned_bss;
+
EXPORT_SYMBOL(invalid_pte_table);
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
@@ -80,9 +86,9 @@ void __init mem_init(void)
#ifdef CONFIG_HIGHMEM
unsigned long tmp;
- max_mapnr = highend_pfn;
+ set_max_mapnr(highend_pfn - ARCH_PFN_OFFSET);
#else
- max_mapnr = max_low_pfn;
+ set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
#endif
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
@@ -104,24 +110,9 @@ void __init mem_init(void)
mem_init_print_info(NULL);
}
-extern char __init_begin[], __init_end[];
-
void free_initmem(void)
{
- unsigned long addr;
-
- addr = (unsigned long) &__init_begin;
-
- while (addr < (unsigned long) &__init_end) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages_inc();
- addr += PAGE_SIZE;
- }
-
- pr_info("Freeing unused kernel memory: %dk freed\n",
- ((unsigned int)&__init_end - (unsigned int)&__init_begin) >> 10);
+ free_initmem_default(-1);
}
void pgd_init(unsigned long *p)
@@ -130,20 +121,35 @@ void pgd_init(unsigned long *p)
for (i = 0; i < PTRS_PER_PGD; i++)
p[i] = __pa(invalid_pte_table);
+
+ flush_tlb_all();
+ local_icache_inv_all(NULL);
}
-void __init pre_mmu_init(void)
+void __init mmu_init(unsigned long min_pfn, unsigned long max_pfn)
{
- /*
- * Setup page-table and enable TLB-hardrefill
- */
+ int i;
+
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ swapper_pg_dir[i].pgd = __pa(invalid_pte_table);
+
+ for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++)
+ swapper_pg_dir[i].pgd =
+ __pa(kernel_pte_tables + (PTRS_PER_PTE * (i - USER_PTRS_PER_PGD)));
+
+ for (i = 0; i < PTRS_KERN_TABLE; i++)
+ set_pte(&kernel_pte_tables[i], __pte(_PAGE_GLOBAL));
+
+ for (i = min_pfn; i < max_pfn; i++)
+ set_pte(&kernel_pte_tables[i - PFN_DOWN(va_pa_offset)], pfn_pte(i, PAGE_KERNEL));
+
flush_tlb_all();
- pgd_init((unsigned long *)swapper_pg_dir);
- TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir);
- TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir);
+ local_icache_inv_all(NULL);
/* Setup page mask to 4k */
write_mmu_pagemask(0);
+
+ setup_pgd(swapper_pg_dir, 0);
}
void __init fixrange_init(unsigned long start, unsigned long end,
diff --git a/arch/csky/mm/tlb.c b/arch/csky/mm/tlb.c
index ed1512381112..9234c5e5ceaf 100644
--- a/arch/csky/mm/tlb.c
+++ b/arch/csky/mm/tlb.c
@@ -24,7 +24,13 @@ void flush_tlb_all(void)
void flush_tlb_mm(struct mm_struct *mm)
{
#ifdef CONFIG_CPU_HAS_TLBI
- asm volatile("tlbi.asids %0"::"r"(cpu_asid(mm)));
+ sync_is();
+ asm volatile(
+ "tlbi.asids %0 \n"
+ "sync.i \n"
+ :
+ : "r" (cpu_asid(mm))
+ : "memory");
#else
tlb_invalid_all();
#endif
@@ -53,11 +59,17 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
end &= TLB_ENTRY_SIZE_MASK;
#ifdef CONFIG_CPU_HAS_TLBI
+ sync_is();
while (start < end) {
- asm volatile("tlbi.vas %0"::"r"(start | newpid));
+ asm volatile(
+ "tlbi.vas %0 \n"
+ :
+ : "r" (start | newpid)
+ : "memory");
+
start += 2*PAGE_SIZE;
}
- sync_is();
+ asm volatile("sync.i\n");
#else
{
unsigned long flags, oldpid;
@@ -87,11 +99,17 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
end &= TLB_ENTRY_SIZE_MASK;
#ifdef CONFIG_CPU_HAS_TLBI
+ sync_is();
while (start < end) {
- asm volatile("tlbi.vaas %0"::"r"(start));
+ asm volatile(
+ "tlbi.vaas %0 \n"
+ :
+ : "r" (start)
+ : "memory");
+
start += 2*PAGE_SIZE;
}
- sync_is();
+ asm volatile("sync.i\n");
#else
{
unsigned long flags, oldpid;
@@ -121,8 +139,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
addr &= TLB_ENTRY_SIZE_MASK;
#ifdef CONFIG_CPU_HAS_TLBI
- asm volatile("tlbi.vas %0"::"r"(addr | newpid));
sync_is();
+ asm volatile(
+ "tlbi.vas %0 \n"
+ "sync.i \n"
+ :
+ : "r" (addr | newpid)
+ : "memory");
#else
{
int oldpid, idx;
@@ -147,8 +170,13 @@ void flush_tlb_one(unsigned long addr)
addr &= TLB_ENTRY_SIZE_MASK;
#ifdef CONFIG_CPU_HAS_TLBI
- asm volatile("tlbi.vaas %0"::"r"(addr));
sync_is();
+ asm volatile(
+ "tlbi.vaas %0 \n"
+ "sync.i \n"
+ :
+ : "r" (addr)
+ : "memory");
#else
{
int oldpid, idx;
diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
index bc1364db58fe..46b1342ce515 100644
--- a/arch/h8300/kernel/process.c
+++ b/arch/h8300/kernel/process.c
@@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
childregs->retpc = (unsigned long) ret_from_kernel_thread;
childregs->er4 = topstk; /* arg */
diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
index 6a980cba7b29..c61165c99ae0 100644
--- a/arch/hexagon/kernel/process.c
+++ b/arch/hexagon/kernel/process.c
@@ -73,7 +73,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
sizeof(*ss));
ss->lr = (unsigned long)ret_from_fork;
p->thread.switch_sp = ss;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
/* r24 <- fn, r25 <- arg */
ss->r24 = usp;
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 3e9da5e6c3bd..467b7e7f967c 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -14,7 +14,6 @@
KBUILD_DEFCONFIG := generic_defconfig
NM := $(CROSS_COMPILE)nm -B
-READELF := $(CROSS_COMPILE)readelf
CHECKFLAGS += -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 4ebbfa076a26..7e1a1525e202 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -338,7 +338,7 @@ copy_thread(unsigned long clone_flags, unsigned long user_stack_base,
ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
if (unlikely(!user_stack_base)) {
/* fork_idle() called us */
return 0;
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index e67b22fc3c60..c1b299760bf7 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
* need to push through a forced SIGSEGV.
*/
while (1) {
- get_signal(&ksig);
+ if (!get_signal(&ksig))
+ break;
/*
* get_signal() may have run a debugger (via notify_parent())
diff --git a/arch/m68k/coldfire/clk.c b/arch/m68k/coldfire/clk.c
index 7bc666e482eb..076a9caa9557 100644
--- a/arch/m68k/coldfire/clk.c
+++ b/arch/m68k/coldfire/clk.c
@@ -90,6 +90,10 @@ EXPORT_SYMBOL(clk_get);
int clk_enable(struct clk *clk)
{
unsigned long flags;
+
+ if (!clk)
+ return -EINVAL;
+
spin_lock_irqsave(&clk_lock, flags);
if ((clk->enabled++ == 0) && clk->clk_ops)
clk->clk_ops->enable(clk);
diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 7f5912af2a52..9e8f0cc30a2c 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -171,7 +171,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
#include <asm-generic/memory_model.h>
#endif
-#define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
+#define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
#define pfn_valid(pfn) virt_addr_valid(pfn_to_virt(pfn))
#endif /* __ASSEMBLY__ */
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index 6bbe52025de3..8d0f862ee9d7 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -30,8 +30,8 @@ extern unsigned long memory_end;
#define page_to_pfn(page) virt_to_pfn(page_to_virt(page))
#define pfn_valid(pfn) ((pfn) < max_mapnr)
-#define virt_addr_valid(kaddr) (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
- ((void *)(kaddr) < (void *)memory_end))
+#define virt_addr_valid(kaddr) (((unsigned long)(kaddr) >= PAGE_OFFSET) && \
+ ((unsigned long)(kaddr) < memory_end))
#endif /* __ASSEMBLY__ */
diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
index 08359a6e058f..da83cc83e791 100644
--- a/arch/m68k/kernel/process.c
+++ b/arch/m68k/kernel/process.c
@@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
*/
p->thread.fs = get_fs().seg;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* kernel thread */
memset(frame, 0, sizeof(struct fork_frame));
frame->regs.sr = PS_S;
diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
index 657c2beb665e..62aa237180b6 100644
--- a/arch/microblaze/kernel/process.c
+++ b/arch/microblaze/kernel/process.c
@@ -59,7 +59,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct pt_regs *childregs = task_pt_regs(p);
struct thread_info *ti = task_thread_info(p);
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* if we're creating a new kernel thread then just zeroing all
* the registers. That's OK for a brand new thread.*/
memset(childregs, 0, sizeof(struct pt_regs));
diff --git a/arch/mips/boot/compressed/decompress.c b/arch/mips/boot/compressed/decompress.c
index e3946b06e840..3d70d15ada28 100644
--- a/arch/mips/boot/compressed/decompress.c
+++ b/arch/mips/boot/compressed/decompress.c
@@ -14,6 +14,7 @@
#include <asm/addrspace.h>
#include <asm/unaligned.h>
+#include <asm-generic/vmlinux.lds.h>
/*
* These two variables specify the free mem region
@@ -120,6 +121,13 @@ void decompress_kernel(unsigned long boot_heap_start)
/* last four bytes is always image size in little endian */
image_size = get_unaligned_le32((void *)&__image_end - 4);
+ /* The device tree's address must be properly aligned */
+ image_size = ALIGN(image_size, STRUCT_ALIGNMENT);
+
+ puts("Copy device tree to address ");
+ puthex(VMLINUX_LOAD_ADDRESS_ULL + image_size);
+ puts("\n");
+
/* copy dtb to where the booted kernel will expect it */
memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size,
__appended_dtb, dtb_size);
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index 8e1deaf00e0c..5e4105cccf9f 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
poly1305-mips-y := poly1305-core.o poly1305-glue.o
-perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
-perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+perlasm-flavour-$(CONFIG_32BIT) := o32
+perlasm-flavour-$(CONFIG_64BIT) := 64
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h
index 6aa8f126a43d..b710e76c9c65 100644
--- a/arch/mips/include/asm/traps.h
+++ b/arch/mips/include/asm/traps.h
@@ -24,8 +24,11 @@ extern void (*board_ebase_setup)(void);
extern void (*board_cache_error_setup)(void);
extern int register_nmi_notifier(struct notifier_block *nb);
+extern void reserve_exception_space(phys_addr_t addr, unsigned long size);
extern char except_vec_nmi[];
+#define VECTORSPACING 0x100 /* for EI/VI mode */
+
#define nmi_notifier(fn, pri) \
({ \
static struct notifier_block fn##_nb = { \
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 9a89637b4ecf..b71892064f27 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -26,6 +26,7 @@
#include <asm/elf.h>
#include <asm/pgtable-bits.h>
#include <asm/spram.h>
+#include <asm/traps.h>
#include <linux/uaccess.h>
#include "fpu-probe.h"
@@ -1628,6 +1629,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
c->cputype = CPU_BMIPS3300;
__cpu_name[cpu] = "Broadcom BMIPS3300";
set_elf_platform(cpu, "bmips3300");
+ reserve_exception_space(0x400, VECTORSPACING * 64);
break;
case PRID_IMP_BMIPS43XX: {
int rev = c->processor_id & PRID_REV_MASK;
@@ -1638,6 +1640,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
__cpu_name[cpu] = "Broadcom BMIPS4380";
set_elf_platform(cpu, "bmips4380");
c->options |= MIPS_CPU_RIXI;
+ reserve_exception_space(0x400, VECTORSPACING * 64);
} else {
c->cputype = CPU_BMIPS4350;
__cpu_name[cpu] = "Broadcom BMIPS4350";
@@ -1654,6 +1657,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
__cpu_name[cpu] = "Broadcom BMIPS5000";
set_elf_platform(cpu, "bmips5000");
c->options |= MIPS_CPU_ULRI | MIPS_CPU_RIXI;
+ reserve_exception_space(0x1000, VECTORSPACING * 64);
break;
}
}
@@ -2133,6 +2137,8 @@ void cpu_probe(void)
if (cpu == 0)
__ua_limit = ~((1ull << cpu_vmbits) - 1);
#endif
+
+ reserve_exception_space(0, 0x1000);
}
void cpu_report(void)
diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c
index abdbbe8c5a43..af654771918c 100644
--- a/arch/mips/kernel/cpu-r3k-probe.c
+++ b/arch/mips/kernel/cpu-r3k-probe.c
@@ -21,6 +21,7 @@
#include <asm/fpu.h>
#include <asm/mipsregs.h>
#include <asm/elf.h>
+#include <asm/traps.h>
#include "fpu-probe.h"
@@ -158,6 +159,8 @@ void cpu_probe(void)
cpu_set_fpu_opts(c);
else
cpu_set_nofpu_opts(c);
+
+ reserve_exception_space(0, 0x400);
}
void cpu_report(void)
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index af4c862ec5ff..7efa0d1a4c2b 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -120,7 +120,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* kernel thread */
unsigned long status = p->thread.cp0_status;
memset(childregs, 0, sizeof(struct pt_regs));
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index e0352958e2f7..808b8b61ded1 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -2009,13 +2009,16 @@ void __noreturn nmi_exception_handler(struct pt_regs *regs)
nmi_exit();
}
-#define VECTORSPACING 0x100 /* for EI/VI mode */
-
unsigned long ebase;
EXPORT_SYMBOL_GPL(ebase);
unsigned long exception_handlers[32];
unsigned long vi_handlers[64];
+void reserve_exception_space(phys_addr_t addr, unsigned long size)
+{
+ memblock_reserve(addr, size);
+}
+
void __init *set_except_vector(int n, void *addr)
{
unsigned long handler = (unsigned long) addr;
@@ -2367,10 +2370,7 @@ void __init trap_init(void)
if (!cpu_has_mips_r2_r6) {
ebase = CAC_BASE;
- ebase_pa = virt_to_phys((void *)ebase);
vec_size = 0x400;
-
- memblock_reserve(ebase_pa, vec_size);
} else {
if (cpu_has_veic || cpu_has_vint)
vec_size = 0x200 + VECTORSPACING*64;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index c1c345be04ff..1234834cc4c4 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -145,6 +145,7 @@ SECTIONS
}
#ifdef CONFIG_MIPS_ELF_APPENDED_DTB
+ STRUCT_ALIGN();
.appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) {
*(.appended_dtb)
KEEP(*(.appended_dtb))
@@ -172,6 +173,11 @@ SECTIONS
#endif
#ifdef CONFIG_MIPS_RAW_APPENDED_DTB
+ .fill : {
+ FILL(0);
+ BYTE(0);
+ . = ALIGN(8);
+ }
__appended_dtb = .;
/* leave space for appended DTB */
. += 0x100000;
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 1754498b0717..7719d632df8d 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -157,29 +157,31 @@ unsigned long _page_cachable_default;
EXPORT_SYMBOL(_page_cachable_default);
#define PM(p) __pgprot(_page_cachable_default | (p))
+#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p))
static inline void setup_protection_map(void)
{
protection_map[0] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
- protection_map[1] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
- protection_map[2] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
- protection_map[3] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
- protection_map[4] = PM(_PAGE_PRESENT);
- protection_map[5] = PM(_PAGE_PRESENT);
- protection_map[6] = PM(_PAGE_PRESENT);
- protection_map[7] = PM(_PAGE_PRESENT);
+ protection_map[1] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
+ protection_map[2] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+ protection_map[3] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
+ protection_map[4] = PVA(_PAGE_PRESENT);
+ protection_map[5] = PVA(_PAGE_PRESENT);
+ protection_map[6] = PVA(_PAGE_PRESENT);
+ protection_map[7] = PVA(_PAGE_PRESENT);
protection_map[8] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
- protection_map[9] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
- protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
+ protection_map[9] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
+ protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
_PAGE_NO_READ);
- protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
- protection_map[12] = PM(_PAGE_PRESENT);
- protection_map[13] = PM(_PAGE_PRESENT);
- protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
- protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+ protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
+ protection_map[12] = PVA(_PAGE_PRESENT);
+ protection_map[13] = PVA(_PAGE_PRESENT);
+ protection_map[14] = PVA(_PAGE_PRESENT);
+ protection_map[15] = PVA(_PAGE_PRESENT);
}
+#undef _PVA
#undef PM
void cpu_cache_init(void)
diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
index e01ad5d17224..c1327e552ec6 100644
--- a/arch/nds32/kernel/process.c
+++ b/arch/nds32/kernel/process.c
@@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
/* kernel thread fn */
p->thread.cpu_context.r6 = stack_start;
diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
index 50b4eb19a6cc..c5f916ca6845 100644
--- a/arch/nios2/kernel/process.c
+++ b/arch/nios2/kernel/process.c
@@ -109,7 +109,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct switch_stack *childstack =
((struct switch_stack *)childregs) - 1;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(childstack, 0,
sizeof(struct switch_stack) + sizeof(struct pt_regs));
diff --git a/arch/openrisc/Kbuild b/arch/openrisc/Kbuild
new file mode 100644
index 000000000000..4234b4c03e72
--- /dev/null
+++ b/arch/openrisc/Kbuild
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += lib/ kernel/ mm/
+obj-y += boot/dts/
diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile
index bf10141c7426..410e7abfac69 100644
--- a/arch/openrisc/Makefile
+++ b/arch/openrisc/Makefile
@@ -24,6 +24,10 @@ LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
KBUILD_CFLAGS += -pipe -ffixed-r10 -D__linux__
+all: vmlinux.bin
+
+boot := arch/$(ARCH)/boot
+
ifeq ($(CONFIG_OPENRISC_HAVE_INST_MUL),y)
KBUILD_CFLAGS += $(call cc-option,-mhard-mul)
else
@@ -38,14 +42,13 @@ endif
head-y := arch/openrisc/kernel/head.o
-core-y += arch/openrisc/lib/ \
- arch/openrisc/kernel/ \
- arch/openrisc/mm/
+core-y += arch/openrisc/
libs-y += $(LIBGCC)
-ifneq '$(CONFIG_OPENRISC_BUILTIN_DTB)' '""'
-BUILTIN_DTB := y
-else
-BUILTIN_DTB := n
-endif
-core-$(BUILTIN_DTB) += arch/openrisc/boot/dts/
+PHONY += vmlinux.bin
+
+vmlinux.bin: vmlinux
+ $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+archclean:
+ $(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/openrisc/boot/.gitignore b/arch/openrisc/boot/.gitignore
new file mode 100644
index 000000000000..007d6fea3145
--- /dev/null
+++ b/arch/openrisc/boot/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+vmlinux.bin
diff --git a/arch/openrisc/boot/Makefile b/arch/openrisc/boot/Makefile
new file mode 100644
index 000000000000..5b28538f4dd1
--- /dev/null
+++ b/arch/openrisc/boot/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for bootable kernel images
+#
+
+targets += vmlinux.bin
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+$(obj)/vmlinux.bin: vmlinux FORCE
+ $(call if_changed,objcopy)
diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
index 3c98728cce24..eb62429681fc 100644
--- a/arch/openrisc/kernel/process.c
+++ b/arch/openrisc/kernel/process.c
@@ -34,6 +34,7 @@
#include <linux/init_task.h>
#include <linux/mqueue.h>
#include <linux/fs.h>
+#include <linux/reboot.h>
#include <linux/uaccess.h>
#include <asm/io.h>
@@ -49,10 +50,16 @@
*/
struct thread_info *current_thread_info_set[NR_CPUS] = { &init_thread_info, };
-void machine_restart(void)
+void machine_restart(char *cmd)
{
- printk(KERN_INFO "*** MACHINE RESTART ***\n");
- __asm__("l.nop 1");
+ do_kernel_restart(cmd);
+
+ /* Give a grace period for failure to restart of 1s */
+ mdelay(1000);
+
+ /* Whoops - the platform was unable to reboot. Tell the user! */
+ pr_emerg("Reboot failed -- System halted\n");
+ while (1);
}
/*
@@ -167,7 +174,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
sp -= sizeof(struct pt_regs);
kregs = (struct pt_regs *)sp;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(kregs, 0, sizeof(struct pt_regs));
kregs->gpr[20] = usp; /* fn, kernel thread */
kregs->gpr[22] = arg;
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
index 29c82ef2e207..48e1092a64de 100644
--- a/arch/openrisc/kernel/smp.c
+++ b/arch/openrisc/kernel/smp.c
@@ -16,6 +16,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/irq.h>
+#include <linux/of.h>
#include <asm/cpuinfo.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -60,22 +61,32 @@ void __init smp_prepare_boot_cpu(void)
void __init smp_init_cpus(void)
{
- int i;
+ struct device_node *cpu;
+ u32 cpu_id;
- for (i = 0; i < NR_CPUS; i++)
- set_cpu_possible(i, true);
+ for_each_of_cpu_node(cpu) {
+ if (of_property_read_u32(cpu, "reg", &cpu_id)) {
+ pr_warn("%s missing reg property", cpu->full_name);
+ continue;
+ }
+
+ if (cpu_id < NR_CPUS)
+ set_cpu_possible(cpu_id, true);
+ }
}
void __init smp_prepare_cpus(unsigned int max_cpus)
{
- int i;
+ unsigned int cpu;
/*
* Initialise the present map, which describes the set of CPUs
* actually populated at the present time.
*/
- for (i = 0; i < max_cpus; i++)
- set_cpu_present(i, true);
+ for_each_possible_cpu(cpu) {
+ if (cpu < max_cpus)
+ set_cpu_present(cpu, true);
+ }
}
void __init smp_cpus_done(unsigned int max_cpus)
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 07fbe1a82f3e..afc3b8d03572 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -61,6 +61,7 @@ config PARISC
select HAVE_KRETPROBES
select HAVE_DYNAMIC_FTRACE if $(cc-option,-fpatchable-function-entry=1,1)
select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE
+ select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
select HAVE_KPROBES_ON_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
@@ -202,9 +203,12 @@ config PREFETCH
def_bool y
depends on PA8X00 || PA7200
+config PARISC_HUGE_KERNEL
+ def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST
+
config MLONGCALLS
- def_bool y if !MODULES || UBSAN || FTRACE
- bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE
+ def_bool y if PARISC_HUGE_KERNEL
+ bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL
depends on PA8X00
help
If you configure the kernel to include many drivers built-in instead
diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
index fda1c1a6a444..b144fbe29bc1 100644
--- a/arch/parisc/kernel/process.c
+++ b/arch/parisc/kernel/process.c
@@ -200,7 +200,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
extern void * const ret_from_kernel_thread;
extern void * const child_return;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* kernel thread */
memset(cregs, 0, sizeof(struct pt_regs));
if (!usp) /* idle thread */
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 2127974982df..65de6c4c9354 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -567,8 +567,6 @@ static const struct user_regset_view user_parisc_native_view = {
};
#ifdef CONFIG_64BIT
-#include <linux/compat.h>
-
static int gpr32_get(struct task_struct *target,
const struct user_regset *regset,
struct membuf to)
diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h
index 7141ccea8c94..a92059964579 100644
--- a/arch/powerpc/include/asm/dcr-native.h
+++ b/arch/powerpc/include/asm/dcr-native.h
@@ -53,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val)
#define mfdcr(rn) \
({unsigned int rval; \
if (__builtin_constant_p(rn) && rn < 1024) \
- asm volatile("mfdcr %0," __stringify(rn) \
- : "=r" (rval)); \
+ asm volatile("mfdcr %0, %1" : "=r" (rval) \
+ : "n" (rn)); \
else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \
rval = mfdcrx(rn); \
else \
@@ -64,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val)
#define mtdcr(rn, v) \
do { \
if (__builtin_constant_p(rn) && rn < 1024) \
- asm volatile("mtdcr " __stringify(rn) ",%0" \
- : : "r" (v)); \
+ asm volatile("mtdcr %0, %1" \
+ : : "n" (rn), "r" (v)); \
else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \
mtdcrx(rn, v); \
else \
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index 80b27f5d9648..607168b1aef4 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -228,7 +228,7 @@ enum {
#define MMU_FTRS_ALWAYS 0
#endif
-static inline bool early_mmu_has_feature(unsigned long feature)
+static __always_inline bool early_mmu_has_feature(unsigned long feature)
{
if (MMU_FTRS_ALWAYS & feature)
return true;
@@ -286,7 +286,7 @@ static inline void mmu_feature_keys_init(void)
}
-static inline bool mmu_has_feature(unsigned long feature)
+static __always_inline bool mmu_has_feature(unsigned long feature)
{
return early_mmu_has_feature(feature);
}
diff --git a/arch/powerpc/include/asm/vio.h b/arch/powerpc/include/asm/vio.h
index 0cf52746531b..721c0d6715ac 100644
--- a/arch/powerpc/include/asm/vio.h
+++ b/arch/powerpc/include/asm/vio.h
@@ -113,7 +113,7 @@ struct vio_driver {
const char *name;
const struct vio_device_id *id_table;
int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
- int (*remove)(struct vio_dev *dev);
+ void (*remove)(struct vio_dev *dev);
/* A driver must have a get_desired_dma() function to
* be loaded in a CMO environment if it uses DMA.
*/
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 727fdab557c9..565e84e20a72 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -457,11 +457,12 @@ InstructionTLBMiss:
cmplw 0,r1,r3
#endif
mfspr r2, SPRN_SDR1
- li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
rlwinm r2, r2, 28, 0xfffff000
#ifdef CONFIG_MODULES
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
#endif
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
@@ -520,10 +521,11 @@ DataLoadTLBMiss:
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SDR1
- li r1, _PAGE_PRESENT | _PAGE_ACCESSED
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
rlwinm r2, r2, 28, 0xfffff000
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1, _PAGE_PRESENT | _PAGE_ACCESSED
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
@@ -597,10 +599,11 @@ DataStoreTLBMiss:
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SDR1
- li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
rlwinm r2, r2, 28, 0xfffff000
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
+ li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */
lwz r2,0(r2) /* get pmd entry */
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 398cd86b6ada..2ef3c4051bb9 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -149,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
* enabled when the interrupt handler returns (indicating a process-context /
* synchronous interrupt) then irqs_enabled should be true.
*/
-static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri)
+static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
{
/* This must be done with RI=1 because tracing may touch vmaps */
trace_hardirqs_on();
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 924d023dad0a..3231c2df9e26 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1670,7 +1670,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
/* Copy registers */
sp -= sizeof(struct pt_regs);
childregs = (struct pt_regs *) sp;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gpr[1] = sp + sizeof(struct pt_regs);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index c77f2d4f44ca..bb6773594cf8 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -591,7 +591,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
} else {
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
- writing, &write_ok);
+ writing, &write_ok, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..e603de7ade52 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
/* Call KVM generic code to do the slow-path check */
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
- writing, upgrade_p);
+ writing, upgrade_p, NULL);
if (is_error_noslot_pfn(pfn))
return -EFAULT;
page = NULL;
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index bb5c20d4ca91..c6aebc149d14 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -904,7 +904,7 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op,
if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
return -EFAULT;
- nr_vsx_regs = size / sizeof(__vector128);
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
emulate_vsx_load(op, buf, mem, cross_endian);
preempt_disable();
if (reg < 32) {
@@ -951,7 +951,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op,
if (!address_ok(regs, ea, size))
return -EFAULT;
- nr_vsx_regs = size / sizeof(__vector128);
+ nr_vsx_regs = max(1ul, size / sizeof(__vector128));
preempt_disable();
if (reg < 32) {
/* FP regs + extensions */
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6817331e22ff..766f064f00fb 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -222,7 +222,7 @@ static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
- if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
+ if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
*addrp = 0;
}
@@ -507,7 +507,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
* addresses, hence include a check before filtering code
*/
if (!(ppmu->flags & PPMU_ARCH_31) &&
- is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
+ is_kernel_addr(addr) && event->attr.exclude_kernel)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index b3ac2455faad..637300330507 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -4,6 +4,7 @@
* Copyright 2006-2007 Michael Ellerman, IBM Corp.
*/
+#include <linux/crash_dump.h>
#include <linux/device.h>
#include <linux/irq.h>
#include <linux/msi.h>
@@ -458,8 +459,28 @@ again:
return hwirq;
}
- virq = irq_create_mapping_affinity(NULL, hwirq,
- entry->affinity);
+ /*
+ * Depending on the number of online CPUs in the original
+ * kernel, it is likely for CPU #0 to be offline in a kdump
+ * kernel. The associated IRQs in the affinity mappings
+ * provided by irq_create_affinity_masks() are thus not
+ * started by irq_startup(), as per-design for managed IRQs.
+ * This can be a problem with multi-queue block devices driven
+ * by blk-mq : such a non-started IRQ is very likely paired
+ * with the single queue enforced by blk-mq during kdump (see
+ * blk_mq_alloc_tag_set()). This causes the device to remain
+ * silent and likely hangs the guest at some point.
+ *
+ * We don't really care for fine-grained affinity when doing
+ * kdump actually : simply ignore the pre-computed affinity
+ * masks in this case and let the default mask with all CPUs
+ * be used when creating the IRQ mappings.
+ */
+ if (is_kdump_kernel())
+ virq = irq_create_mapping(NULL, hwirq);
+ else
+ virq = irq_create_mapping_affinity(NULL, hwirq,
+ entry->affinity);
if (!virq) {
pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index b2797cfe4e2b..9cb4fc839fd5 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1261,7 +1261,6 @@ static int vio_bus_remove(struct device *dev)
struct vio_dev *viodev = to_vio_dev(dev);
struct vio_driver *viodrv = to_vio_driver(dev->driver);
struct device *devptr;
- int ret = 1;
/*
* Hold a reference to the device after the remove function is called
@@ -1270,13 +1269,13 @@ static int vio_bus_remove(struct device *dev)
devptr = get_device(dev);
if (viodrv->remove)
- ret = viodrv->remove(viodev);
+ viodrv->remove(viodev);
- if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+ if (firmware_has_feature(FW_FEATURE_CMO))
vio_cmo_bus_remove(viodev);
put_device(devptr);
- return ret;
+ return 0;
}
/**
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index e0a34eb5ed3b..85d626b8ce5e 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -57,6 +57,7 @@ config RISCV
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if MMU && 64BIT
+ select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
select HAVE_ARCH_KGDB
select HAVE_ARCH_KGDB_QXFER_PKT
select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -67,14 +68,19 @@ config RISCV
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_EBPF_JIT if MMU
+ select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_GCC_PLUGINS
select HAVE_GENERIC_VDSO if MMU && 64BIT
select HAVE_IRQ_TIME_ACCOUNTING
+ select HAVE_KPROBES
+ select HAVE_KPROBES_ON_FTRACE
+ select HAVE_KRETPROBES
select HAVE_PCI
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
+ select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
@@ -143,7 +149,7 @@ config PAGE_OFFSET
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
config ARCH_FLATMEM_ENABLE
- def_bool y
+ def_bool !NUMA
config ARCH_SPARSEMEM_ENABLE
def_bool y
@@ -156,6 +162,9 @@ config ARCH_SELECT_MEMORY_MODEL
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
+config ARCH_SUPPORTS_UPROBES
+ def_bool y
+
config SYS_SUPPORTS_HUGETLBFS
depends on MMU
def_bool y
@@ -302,6 +311,36 @@ config TUNE_GENERIC
endchoice
+# Common NUMA Features
+config NUMA
+ bool "NUMA Memory Allocation and Scheduler Support"
+ depends on SMP
+ select GENERIC_ARCH_NUMA
+ select OF_NUMA
+ select ARCH_SUPPORTS_NUMA_BALANCING
+ help
+ Enable NUMA (Non-Uniform Memory Access) support.
+
+ The kernel will try to allocate memory used by a CPU on the
+ local memory of the CPU and add some more NUMA awareness to the kernel.
+
+config NODES_SHIFT
+ int "Maximum NUMA Nodes (as a power of 2)"
+ range 1 10
+ default "2"
+ depends on NEED_MULTIPLE_NODES
+ help
+ Specify the maximum number of NUMA Nodes available on the target
+ system. Increases memory reserved to accommodate various tables.
+
+config USE_PERCPU_NUMA_NODE_ID
+ def_bool y
+ depends on NUMA
+
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+ def_bool y
+ depends on NUMA
+
config RISCV_ISA_C
bool "Emit compressed instructions when building Linux"
default y
@@ -416,11 +455,17 @@ config EFI
allow the kernel to be booted as an EFI application. This
is only useful on systems that have UEFI firmware.
+config CC_HAVE_STACKPROTECTOR_TLS
+ def_bool $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=tp -mstack-protector-guard-offset=0)
+
+config STACKPROTECTOR_PER_TASK
+ def_bool y
+ depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
+
endmenu
config BUILTIN_DTB
def_bool n
- depends on RISCV_M_MODE
depends on OF
menu "Power management options"
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 3284d5c291be..7efcece8896c 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -22,30 +22,41 @@ config SOC_VIRT
help
This enables support for QEMU Virt Machine.
-config SOC_KENDRYTE
- bool "Kendryte K210 SoC"
+config SOC_CANAAN
+ bool "Canaan Kendryte K210 SoC"
depends on !MMU
select CLINT_TIMER if RISCV_M_MODE
select SERIAL_SIFIVE if TTY
select SERIAL_SIFIVE_CONSOLE if TTY
select SIFIVE_PLIC
+ select ARCH_HAS_RESET_CONTROLLER
+ select PINCTRL
help
- This enables support for Kendryte K210 SoC platform hardware.
+ This enables support for Canaan Kendryte K210 SoC platform hardware.
-config SOC_KENDRYTE_K210_DTB
- def_bool y
- depends on SOC_KENDRYTE_K210_DTB_BUILTIN
+if SOC_CANAAN
-config SOC_KENDRYTE_K210_DTB_BUILTIN
- bool "Builtin device tree for the Kendryte K210"
- depends on SOC_KENDRYTE
+config SOC_CANAAN_K210_DTB_BUILTIN
+ bool "Builtin device tree for the Canaan Kendryte K210"
+ depends on SOC_CANAAN
default y
select OF
select BUILTIN_DTB
- select SOC_KENDRYTE_K210_DTB
help
- Builds a device tree for the Kendryte K210 into the Linux image.
+ Build a device tree for the Kendryte K210 into the Linux image.
This option should be selected if no bootloader is being used.
If unsure, say Y.
+config SOC_CANAAN_K210_DTB_SOURCE
+ string "Source file for the Canaan Kendryte K210 builtin DTB"
+ depends on SOC_CANAAN
+ depends on SOC_CANAAN_K210_DTB_BUILTIN
+ default "k210_generic"
+ help
+ Base name (without suffix, relative to arch/riscv/boot/dts/canaan)
+ for the DTS file that will be used to produce the DTB linked into the
+ kernel.
+
+endif
+
endmenu
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 8c29e553ef7f..1368d943f1f3 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -12,6 +12,8 @@ OBJCOPYFLAGS := -O binary
LDFLAGS_vmlinux :=
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=8
endif
ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
@@ -65,6 +67,16 @@ KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
# architectures. It's faster to have GCC emit only aligned accesses.
KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
+ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
+prepare: stack_protector_prepare
+stack_protector_prepare: prepare0
+ $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls \
+ -mstack-protector-guard-reg=tp \
+ -mstack-protector-guard-offset=$(shell \
+ awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \
+ include/generated/asm-offsets.h))
+endif
+
# arch specific predefines for sparse
CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
@@ -83,7 +95,7 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
-ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_KENDRYTE),yy)
+ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
KBUILD_IMAGE := $(boot)/loader.bin
else
KBUILD_IMAGE := $(boot)/Image.gz
diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile
index ca1f8cbd78c0..7ffd502e3e7b 100644
--- a/arch/riscv/boot/dts/Makefile
+++ b/arch/riscv/boot/dts/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
subdir-y += sifive
-subdir-y += kendryte
+subdir-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += canaan
obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y))
diff --git a/arch/riscv/boot/dts/canaan/Makefile b/arch/riscv/boot/dts/canaan/Makefile
new file mode 100644
index 000000000000..9ee7156c0c31
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+ifneq ($(CONFIG_SOC_CANAAN_K210_DTB_SOURCE),"")
+dtb-y += $(strip $(shell echo $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))).dtb
+obj-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
+endif
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
new file mode 100644
index 000000000000..039b92abf046
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ model = "Kendryte KD233";
+ compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ gpios = <&gpio0 8 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ gpios = <&gpio0 9 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ key0 {
+ label = "KEY0";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(6, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(7, K210_PCF_SPI0_SCLK)>, /* wr */
+ <K210_FPIOA(8, K210_PCF_GPIOHS21)>; /* dc */
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(9, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(10, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(11, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(12, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(13, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(14, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(15, K210_PCF_DVP_PCLK)>,
+ <K210_FPIOA(17, K210_PCF_DVP_HSYNC)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(20, K210_PCF_GPIOHS4)>, /* Rot. dip sw line 8 */
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>, /* Rot. dip sw line 4 */
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>, /* Rot. dip sw line 2 */
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>, /* Rot. dip sw line 1 */
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(26, K210_PCF_GPIOHS10)>;
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(29, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(30, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(31, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>; /* cs */
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(33, K210_PCF_I2S0_IN_D0)>,
+ <K210_FPIOA(34, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(35, K210_PCF_I2S0_SCLK)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "ilitek,ili9341";
+ reg = <0>;
+ dc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>;
+ spi-max-frequency = <15000000>;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 16 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
new file mode 100644
index 000000000000..5e8ca8142482
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/k210.dtsi
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#include <dt-bindings/clock/k210-clk.h>
+#include <dt-bindings/pinctrl/k210-fpioa.h>
+#include <dt-bindings/reset/k210-rst.h>
+
+/ {
+ /*
+ * Although the K210 is a 64-bit CPU, the address bus is only 32-bits
+ * wide, and the upper half of all addresses is ignored.
+ */
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "canaan,kendryte-k210";
+
+ aliases {
+ serial0 = &uarths0;
+ serial1 = &uart1;
+ serial2 = &uart2;
+ serial3 = &uart3;
+ };
+
+ /*
+ * The K210 has an sv39 MMU following the privileged specification v1.9.
+ * Since this is a non-ratified draft specification, the kernel does not
+ * support it and the K210 support enabled only for the !MMU case.
+ * Be consistent with this by setting the CPUs MMU type to "none".
+ */
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ timebase-frequency = <7800000>;
+ cpu0: cpu@0 {
+ device_type = "cpu";
+ compatible = "canaan,k210", "riscv";
+ reg = <0>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,none";
+ i-cache-block-size = <64>;
+ i-cache-size = <0x8000>;
+ d-cache-block-size = <64>;
+ d-cache-size = <0x8000>;
+ cpu0_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "riscv,cpu-intc";
+ };
+ };
+ cpu1: cpu@1 {
+ device_type = "cpu";
+ compatible = "canaan,k210", "riscv";
+ reg = <1>;
+ riscv,isa = "rv64imafdc";
+ mmu-type = "riscv,none";
+ i-cache-block-size = <64>;
+ i-cache-size = <0x8000>;
+ d-cache-block-size = <64>;
+ d-cache-size = <0x8000>;
+ cpu1_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ compatible = "riscv,cpu-intc";
+ };
+ };
+ };
+
+ sram: memory@80000000 {
+ device_type = "memory";
+ compatible = "canaan,k210-sram";
+ reg = <0x80000000 0x400000>,
+ <0x80400000 0x200000>,
+ <0x80600000 0x200000>;
+ reg-names = "sram0", "sram1", "aisram";
+ clocks = <&sysclk K210_CLK_SRAM0>,
+ <&sysclk K210_CLK_SRAM1>,
+ <&sysclk K210_CLK_AI>;
+ clock-names = "sram0", "sram1", "aisram";
+ };
+
+ clocks {
+ in0: oscillator {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <26000000>;
+ };
+ };
+
+ soc {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-bus";
+ ranges;
+ interrupt-parent = <&plic0>;
+
+ rom0: nvmem@1000 {
+ reg = <0x1000 0x1000>;
+ read-only;
+ };
+
+ clint0: timer@2000000 {
+ compatible = "canaan,k210-clint", "sifive,clint0";
+ reg = <0x2000000 0xC000>;
+ interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
+ &cpu1_intc 3 &cpu1_intc 7>;
+ };
+
+ plic0: interrupt-controller@c000000 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
+ reg = <0xC000000 0x4000000>;
+ interrupt-controller;
+ interrupts-extended = <&cpu0_intc 11 &cpu1_intc 11>;
+ riscv,ndev = <65>;
+ };
+
+ uarths0: serial@38000000 {
+ compatible = "canaan,k210-uarths", "sifive,uart0";
+ reg = <0x38000000 0x1000>;
+ interrupts = <33>;
+ clocks = <&sysclk K210_CLK_CPU>;
+ };
+
+ gpio0: gpio-controller@38001000 {
+ #interrupt-cells = <2>;
+ #gpio-cells = <2>;
+ compatible = "canaan,k210-gpiohs", "sifive,gpio0";
+ reg = <0x38001000 0x1000>;
+ interrupt-controller;
+ interrupts = <34 35 36 37 38 39 40 41
+ 42 43 44 45 46 47 48 49
+ 50 51 52 53 54 55 56 57
+ 58 59 60 61 62 63 64 65>;
+ gpio-controller;
+ ngpios = <32>;
+ };
+
+ dmac0: dma-controller@50000000 {
+ compatible = "snps,axi-dma-1.01a";
+ reg = <0x50000000 0x1000>;
+ interrupts = <27 28 29 30 31 32>;
+ #dma-cells = <1>;
+ clocks = <&sysclk K210_CLK_DMA>, <&sysclk K210_CLK_DMA>;
+ clock-names = "core-clk", "cfgr-clk";
+ resets = <&sysrst K210_RST_DMA>;
+ dma-channels = <6>;
+ snps,dma-masters = <2>;
+ snps,priority = <0 1 2 3 4 5>;
+ snps,data-width = <5>;
+ snps,block-size = <0x200000 0x200000 0x200000
+ 0x200000 0x200000 0x200000>;
+ snps,axi-max-burst-len = <256>;
+ };
+
+ apb0: bus@50200000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-pm-bus";
+ ranges;
+ clocks = <&sysclk K210_CLK_APB0>;
+
+ gpio1: gpio@50200000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dw-apb-gpio";
+ reg = <0x50200000 0x80>;
+ clocks = <&sysclk K210_CLK_APB0>,
+ <&sysclk K210_CLK_GPIO>;
+ clock-names = "bus", "db";
+ resets = <&sysrst K210_RST_GPIO>;
+
+ gpio1_0: gpio-port@0 {
+ #gpio-cells = <2>;
+ #interrupt-cells = <2>;
+ compatible = "snps,dw-apb-gpio-port";
+ reg = <0>;
+ interrupt-controller;
+ interrupts = <23>;
+ gpio-controller;
+ ngpios = <8>;
+ };
+ };
+
+ uart1: serial@50210000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x50210000 0x100>;
+ interrupts = <11>;
+ clocks = <&sysclk K210_CLK_UART1>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&sysrst K210_RST_UART1>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ dcd-override;
+ dsr-override;
+ cts-override;
+ ri-override;
+ };
+
+ uart2: serial@50220000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x50220000 0x100>;
+ interrupts = <12>;
+ clocks = <&sysclk K210_CLK_UART2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&sysrst K210_RST_UART2>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ dcd-override;
+ dsr-override;
+ cts-override;
+ ri-override;
+ };
+
+ uart3: serial@50230000 {
+ compatible = "snps,dw-apb-uart";
+ reg = <0x50230000 0x100>;
+ interrupts = <13>;
+ clocks = <&sysclk K210_CLK_UART3>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "baudclk", "apb_pclk";
+ resets = <&sysrst K210_RST_UART3>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ dcd-override;
+ dsr-override;
+ cts-override;
+ ri-override;
+ };
+
+ spi2: spi@50240000 {
+ compatible = "canaan,k210-spi";
+ spi-slave;
+ reg = <0x50240000 0x100>;
+ #address-cells = <0>;
+ #size-cells = <0>;
+ interrupts = <3>;
+ clocks = <&sysclk K210_CLK_SPI2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI2>;
+ spi-max-frequency = <25000000>;
+ };
+
+ i2s0: i2s@50250000 {
+ compatible = "snps,designware-i2s";
+ reg = <0x50250000 0x200>;
+ interrupts = <5>;
+ clocks = <&sysclk K210_CLK_I2S0>;
+ clock-names = "i2sclk";
+ resets = <&sysrst K210_RST_I2S0>;
+ };
+
+ i2s1: i2s@50260000 {
+ compatible = "snps,designware-i2s";
+ reg = <0x50260000 0x200>;
+ interrupts = <6>;
+ clocks = <&sysclk K210_CLK_I2S1>;
+ clock-names = "i2sclk";
+ resets = <&sysrst K210_RST_I2S1>;
+ };
+
+ i2s2: i2s@50270000 {
+ compatible = "snps,designware-i2s";
+ reg = <0x50270000 0x200>;
+ interrupts = <7>;
+ clocks = <&sysclk K210_CLK_I2S2>;
+ clock-names = "i2sclk";
+ resets = <&sysrst K210_RST_I2S2>;
+ };
+
+ i2c0: i2c@50280000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x50280000 0x100>;
+ interrupts = <8>;
+ clocks = <&sysclk K210_CLK_I2C0>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_I2C0>;
+ };
+
+ i2c1: i2c@50290000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x50290000 0x100>;
+ interrupts = <9>;
+ clocks = <&sysclk K210_CLK_I2C1>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_I2C1>;
+ };
+
+ i2c2: i2c@502a0000 {
+ compatible = "snps,designware-i2c";
+ reg = <0x502A0000 0x100>;
+ interrupts = <10>;
+ clocks = <&sysclk K210_CLK_I2C2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_I2C2>;
+ };
+
+ fpioa: pinmux@502b0000 {
+ compatible = "canaan,k210-fpioa";
+ reg = <0x502B0000 0x100>;
+ clocks = <&sysclk K210_CLK_FPIOA>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "ref", "pclk";
+ resets = <&sysrst K210_RST_FPIOA>;
+ canaan,k210-sysctl-power = <&sysctl 108>;
+ };
+
+ timer0: timer@502d0000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0x502D0000 0x100>;
+ interrupts = <14 15>;
+ clocks = <&sysclk K210_CLK_TIMER0>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "timer", "pclk";
+ resets = <&sysrst K210_RST_TIMER0>;
+ };
+
+ timer1: timer@502e0000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0x502E0000 0x100>;
+ interrupts = <16 17>;
+ clocks = <&sysclk K210_CLK_TIMER1>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "timer", "pclk";
+ resets = <&sysrst K210_RST_TIMER1>;
+ };
+
+ timer2: timer@502f0000 {
+ compatible = "snps,dw-apb-timer";
+ reg = <0x502F0000 0x100>;
+ interrupts = <18 19>;
+ clocks = <&sysclk K210_CLK_TIMER2>,
+ <&sysclk K210_CLK_APB0>;
+ clock-names = "timer", "pclk";
+ resets = <&sysrst K210_RST_TIMER2>;
+ };
+ };
+
+ apb1: bus@50400000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-pm-bus";
+ ranges;
+ clocks = <&sysclk K210_CLK_APB1>;
+
+ wdt0: watchdog@50400000 {
+ compatible = "snps,dw-wdt";
+ reg = <0x50400000 0x100>;
+ interrupts = <21>;
+ clocks = <&sysclk K210_CLK_WDT0>,
+ <&sysclk K210_CLK_APB1>;
+ clock-names = "tclk", "pclk";
+ resets = <&sysrst K210_RST_WDT0>;
+ };
+
+ wdt1: watchdog@50410000 {
+ compatible = "snps,dw-wdt";
+ reg = <0x50410000 0x100>;
+ interrupts = <22>;
+ clocks = <&sysclk K210_CLK_WDT1>,
+ <&sysclk K210_CLK_APB1>;
+ clock-names = "tclk", "pclk";
+ resets = <&sysrst K210_RST_WDT1>;
+ };
+
+ sysctl: syscon@50440000 {
+ compatible = "canaan,k210-sysctl",
+ "syscon", "simple-mfd";
+ reg = <0x50440000 0x100>;
+ clocks = <&sysclk K210_CLK_APB1>;
+ clock-names = "pclk";
+
+ sysclk: clock-controller {
+ #clock-cells = <1>;
+ compatible = "canaan,k210-clk";
+ clocks = <&in0>;
+ };
+
+ sysrst: reset-controller {
+ compatible = "canaan,k210-rst";
+ #reset-cells = <1>;
+ };
+
+ reboot: syscon-reboot {
+ compatible = "syscon-reboot";
+ regmap = <&sysctl>;
+ offset = <48>;
+ mask = <1>;
+ value = <1>;
+ };
+ };
+ };
+
+ apb2: bus@52000000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ compatible = "simple-pm-bus";
+ ranges;
+ clocks = <&sysclk K210_CLK_APB2>;
+
+ spi0: spi@52000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "canaan,k210-spi";
+ reg = <0x52000000 0x100>;
+ interrupts = <1>;
+ clocks = <&sysclk K210_CLK_SPI0>,
+ <&sysclk K210_CLK_APB2>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI0>;
+ reset-names = "spi";
+ spi-max-frequency = <25000000>;
+ num-cs = <4>;
+ reg-io-width = <4>;
+ };
+
+ spi1: spi@53000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "canaan,k210-spi";
+ reg = <0x53000000 0x100>;
+ interrupts = <2>;
+ clocks = <&sysclk K210_CLK_SPI1>,
+ <&sysclk K210_CLK_APB2>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI1>;
+ reset-names = "spi";
+ spi-max-frequency = <25000000>;
+ num-cs = <4>;
+ reg-io-width = <4>;
+ };
+
+ spi3: spi@54000000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "snps,dwc-ssi-1.01a";
+ reg = <0x54000000 0x200>;
+ interrupts = <4>;
+ clocks = <&sysclk K210_CLK_SPI3>,
+ <&sysclk K210_CLK_APB2>;
+ clock-names = "ssi_clk", "pclk";
+ resets = <&sysrst K210_RST_SPI3>;
+ reset-names = "spi";
+ /* Could possibly go up to 200 MHz */
+ spi-max-frequency = <100000000>;
+ num-cs = <4>;
+ reg-io-width = <4>;
+ };
+ };
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts
new file mode 100644
index 000000000000..396c8ca4d24d
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/k210_generic.dts
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ model = "Kendryte K210 generic";
+ compatible = "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pins: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pins: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pins>;
+ pinctrl-names = "default";
+ status = "okay";
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
new file mode 100644
index 000000000000..0bcaf35045e7
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "SiPeed MAIX BiT";
+ compatible = "sipeed,maix-bit", "sipeed,maix-bitm",
+ "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ color = <LED_COLOR_ID_GREEN>;
+ label = "green";
+ gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ color = <LED_COLOR_ID_RED>;
+ label = "red";
+ gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+ };
+
+ led2 {
+ color = <LED_COLOR_ID_BLUE>;
+ label = "blue";
+ gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ boot {
+ label = "BOOT";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-names = "default";
+ pinctrl-0 = <&jtag_pinctrl>;
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(9, K210_PCF_GPIO1)>,
+ <K210_FPIOA(10, K210_PCF_GPIO2)>,
+ <K210_FPIOA(11, K210_PCF_GPIO3)>,
+ <K210_FPIOA(12, K210_PCF_GPIO4)>,
+ <K210_FPIOA(13, K210_PCF_GPIO5)>,
+ <K210_FPIOA(14, K210_PCF_GPIO6)>,
+ <K210_FPIOA(15, K210_PCF_GPIO7)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+ <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+ <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+ <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>,
+ <K210_FPIOA(31, K210_PCF_I2C1_SDA)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+ spi-max-frequency = <15000000>;
+ spi-cs-high;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ spi-flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
new file mode 100644
index 000000000000..ac8a03f5867a
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "SiPeed MAIX Dock";
+ compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w",
+ "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ /*
+ * Note: the board wiring drawing documents green on
+ * gpio #4, red on gpio #5 and blue on gpio #6. However,
+ * the board is actually wired differently as defined here.
+ */
+ led0 {
+ color = <LED_COLOR_ID_BLUE>;
+ label = "blue";
+ gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ color = <LED_COLOR_ID_GREEN>;
+ label = "green";
+ gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+ };
+
+ led2 {
+ color = <LED_COLOR_ID_RED>;
+ label = "red";
+ gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ boot {
+ label = "BOOT";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(11, K210_PCF_GPIO3)>,
+ <K210_FPIOA(12, K210_PCF_GPIO4)>,
+ <K210_FPIOA(13, K210_PCF_GPIO5)>,
+ <K210_FPIOA(14, K210_PCF_GPIO6)>,
+ <K210_FPIOA(15, K210_PCF_GPIO7)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+ <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+ <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+ <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(9, K210_PCF_I2C1_SCLK)>,
+ <K210_FPIOA(10, K210_PCF_I2C1_SDA)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 0>;
+ spi-max-frequency = <15000000>;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ spi-flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
new file mode 100644
index 000000000000..623998194bc1
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+ model = "SiPeed MAIX GO";
+ compatible = "sipeed,maix-go", "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-leds {
+ compatible = "gpio-leds";
+
+ led0 {
+ color = <LED_COLOR_ID_GREEN>;
+ label = "green";
+ gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+ };
+
+ led1 {
+ color = <LED_COLOR_ID_RED>;
+ label = "red";
+ gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+ };
+
+ led2 {
+ color = <LED_COLOR_ID_BLUE>;
+ label = "blue";
+ gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ up {
+ label = "UP";
+ linux,code = <BTN_1>;
+ gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>;
+ };
+
+ press {
+ label = "PRESS";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+
+ down {
+ label = "DOWN";
+ linux,code = <BTN_2>;
+ gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
+ };
+ };
+};
+
+&fpioa {
+ pinctrl-0 = <&jtag_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+
+ jtag_pinctrl: jtag-pinmux {
+ pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+ <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+ <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+ <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+ };
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(9, K210_PCF_GPIO1)>,
+ <K210_FPIOA(10, K210_PCF_GPIO2)>,
+ <K210_FPIOA(11, K210_PCF_GPIO3)>,
+ <K210_FPIOA(12, K210_PCF_GPIO4)>,
+ <K210_FPIOA(13, K210_PCF_GPIO5)>,
+ <K210_FPIOA(14, K210_PCF_GPIO6)>,
+ <K210_FPIOA(15, K210_PCF_GPIO7)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+ <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+ <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+ <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+ <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+ <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+ <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+ <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+ <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+ <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+ <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>,
+ <K210_FPIOA(31, K210_PCF_I2C1_SDA)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+ spi-max-frequency = <15000000>;
+ status = "disabled";
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ spi-flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
new file mode 100644
index 000000000000..cf605ba0d67e
--- /dev/null
+++ b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+ model = "SiPeed MAIXDUINO";
+ compatible = "sipeed,maixduino", "canaan,kendryte-k210";
+
+ chosen {
+ bootargs = "earlycon console=ttySIF0";
+ stdout-path = "serial0:115200n8";
+ };
+
+ gpio-keys {
+ compatible = "gpio-keys";
+
+ boot {
+ label = "BOOT";
+ linux,code = <BTN_0>;
+ gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+ vcc_3v3: regulator-3v3 {
+ compatible = "regulator-fixed";
+ regulator-name = "3v3";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+};
+
+&fpioa {
+ status = "okay";
+
+ uarths_pinctrl: uarths-pinmux {
+ pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */
+ <K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */
+ };
+
+ gpio_pinctrl: gpio-pinmux {
+ pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+ <K210_FPIOA(9, K210_PCF_GPIO1)>;
+ };
+
+ gpiohs_pinctrl: gpiohs-pinmux {
+ pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>, /* BOOT */
+ <K210_FPIOA(21, K210_PCF_GPIOHS2)>, /* Header "2" */
+ <K210_FPIOA(22, K210_PCF_GPIOHS3)>, /* Header "3" */
+ <K210_FPIOA(23, K210_PCF_GPIOHS4)>, /* Header "4" */
+ <K210_FPIOA(24, K210_PCF_GPIOHS5)>, /* Header "5" */
+ <K210_FPIOA(32, K210_PCF_GPIOHS6)>, /* Header "6" */
+ <K210_FPIOA(15, K210_PCF_GPIOHS7)>, /* Header "7" */
+ <K210_FPIOA(14, K210_PCF_GPIOHS8)>, /* Header "8" */
+ <K210_FPIOA(13, K210_PCF_GPIOHS9)>, /* Header "9" */
+ <K210_FPIOA(12, K210_PCF_GPIOHS10)>, /* Header "10" */
+ <K210_FPIOA(11, K210_PCF_GPIOHS11)>, /* Header "11" */
+ <K210_FPIOA(10, K210_PCF_GPIOHS12)>, /* Header "12" */
+ <K210_FPIOA(3, K210_PCF_GPIOHS13)>; /* Header "13" */
+ };
+
+ i2s0_pinctrl: i2s0-pinmux {
+ pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+ <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+ <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+ };
+
+ spi1_pinctrl: spi1-pinmux {
+ pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+ <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+ <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+ <K210_FPIOA(29, K210_PCF_GPIO2)>; /* cs */
+ };
+
+ i2c1_pinctrl: i2c1-pinmux {
+ pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>, /* Header "scl" */
+ <K210_FPIOA(31, K210_PCF_I2C1_SDA)>; /* Header "sda" */
+ };
+
+ i2s1_pinctrl: i2s1-pinmux {
+ pinmux = <K210_FPIOA(33, K210_PCF_I2S1_WS)>,
+ <K210_FPIOA(34, K210_PCF_I2S1_IN_D0)>,
+ <K210_FPIOA(35, K210_PCF_I2S1_SCLK)>;
+ };
+
+ spi0_pinctrl: spi0-pinmux {
+ pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>, /* cs */
+ <K210_FPIOA(37, K210_PCF_GPIOHS21)>, /* rst */
+ <K210_FPIOA(38, K210_PCF_GPIOHS22)>, /* dc */
+ <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+ };
+
+ dvp_pinctrl: dvp-pinmux {
+ pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+ <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+ <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+ <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+ <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+ <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+ <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+ <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+ };
+};
+
+&uarths0 {
+ pinctrl-0 = <&uarths_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio0 {
+ pinctrl-0 = <&gpiohs_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&gpio1 {
+ pinctrl-0 = <&gpio_pinctrl>;
+ pinctrl-names = "default";
+ status = "okay";
+};
+
+&i2s0 {
+ #sound-dai-cells = <1>;
+ pinctrl-0 = <&i2s0_pinctrl>;
+ pinctrl-names = "default";
+};
+
+&i2c1 {
+ pinctrl-0 = <&i2c1_pinctrl>;
+ pinctrl-names = "default";
+ clock-frequency = <400000>;
+ status = "okay";
+};
+
+&spi0 {
+ pinctrl-0 = <&spi0_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+ panel@0 {
+ compatible = "sitronix,st7789v";
+ reg = <0>;
+ reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+ dc-gpios = <&gpio0 22 0>;
+ spi-max-frequency = <15000000>;
+ power-supply = <&vcc_3v3>;
+ };
+};
+
+&spi1 {
+ pinctrl-0 = <&spi1_pinctrl>;
+ pinctrl-names = "default";
+ num-cs = <1>;
+ cs-gpios = <&gpio1_0 2 GPIO_ACTIVE_LOW>;
+ status = "okay";
+
+ slot@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ voltage-ranges = <3300 3300>;
+ spi-max-frequency = <25000000>;
+ broken-cd;
+ };
+};
+
+&spi3 {
+ spi-flash@0 {
+ compatible = "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ m25p,fast-read;
+ broken-flash-reset;
+ };
+};
diff --git a/arch/riscv/boot/dts/kendryte/Makefile b/arch/riscv/boot/dts/kendryte/Makefile
deleted file mode 100644
index 1a88e616f18e..000000000000
--- a/arch/riscv/boot/dts/kendryte/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-dtb-$(CONFIG_SOC_KENDRYTE_K210_DTB) += k210.dtb
-
-obj-$(CONFIG_SOC_KENDRYTE_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/kendryte/k210.dts b/arch/riscv/boot/dts/kendryte/k210.dts
deleted file mode 100644
index 0d1f28fce6b2..000000000000
--- a/arch/riscv/boot/dts/kendryte/k210.dts
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
- */
-
-/dts-v1/;
-
-#include "k210.dtsi"
-
-/ {
- model = "Kendryte K210 generic";
- compatible = "kendryte,k210";
-
- chosen {
- bootargs = "earlycon console=ttySIF0";
- stdout-path = "serial0";
- };
-};
-
-&uarths0 {
- status = "okay";
-};
-
diff --git a/arch/riscv/boot/dts/kendryte/k210.dtsi b/arch/riscv/boot/dts/kendryte/k210.dtsi
deleted file mode 100644
index d2d0ff645632..000000000000
--- a/arch/riscv/boot/dts/kendryte/k210.dtsi
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2019 Sean Anderson <seanga2@gmail.com>
- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
- */
-#include <dt-bindings/clock/k210-clk.h>
-
-/ {
- /*
- * Although the K210 is a 64-bit CPU, the address bus is only 32-bits
- * wide, and the upper half of all addresses is ignored.
- */
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "kendryte,k210";
-
- aliases {
- serial0 = &uarths0;
- };
-
- /*
- * The K210 has an sv39 MMU following the priviledge specification v1.9.
- * Since this is a non-ratified draft specification, the kernel does not
- * support it and the K210 support enabled only for the !MMU case.
- * Be consistent with this by setting the CPUs MMU type to "none".
- */
- cpus {
- #address-cells = <1>;
- #size-cells = <0>;
- timebase-frequency = <7800000>;
- cpu0: cpu@0 {
- device_type = "cpu";
- reg = <0>;
- compatible = "kendryte,k210", "sifive,rocket0", "riscv";
- riscv,isa = "rv64imafdc";
- mmu-type = "none";
- i-cache-size = <0x8000>;
- i-cache-block-size = <64>;
- d-cache-size = <0x8000>;
- d-cache-block-size = <64>;
- clocks = <&sysctl K210_CLK_CPU>;
- clock-frequency = <390000000>;
- cpu0_intc: interrupt-controller {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "riscv,cpu-intc";
- };
- };
- cpu1: cpu@1 {
- device_type = "cpu";
- reg = <1>;
- compatible = "kendryte,k210", "sifive,rocket0", "riscv";
- riscv,isa = "rv64imafdc";
- mmu-type = "none";
- i-cache-size = <0x8000>;
- i-cache-block-size = <64>;
- d-cache-size = <0x8000>;
- d-cache-block-size = <64>;
- clocks = <&sysctl K210_CLK_CPU>;
- clock-frequency = <390000000>;
- cpu1_intc: interrupt-controller {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "riscv,cpu-intc";
- };
- };
- };
-
- sram: memory@80000000 {
- device_type = "memory";
- reg = <0x80000000 0x400000>,
- <0x80400000 0x200000>,
- <0x80600000 0x200000>;
- reg-names = "sram0", "sram1", "aisram";
- };
-
- clocks {
- in0: oscillator {
- compatible = "fixed-clock";
- #clock-cells = <0>;
- clock-frequency = <26000000>;
- };
- };
-
- soc {
- #address-cells = <1>;
- #size-cells = <1>;
- compatible = "kendryte,k210-soc", "simple-bus";
- ranges;
- interrupt-parent = <&plic0>;
-
- sysctl: sysctl@50440000 {
- compatible = "kendryte,k210-sysctl", "simple-mfd";
- reg = <0x50440000 0x1000>;
- #clock-cells = <1>;
- };
-
- clint0: clint@2000000 {
- #interrupt-cells = <1>;
- compatible = "riscv,clint0";
- reg = <0x2000000 0xC000>;
- interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
- &cpu1_intc 3 &cpu1_intc 7>;
- clocks = <&sysctl K210_CLK_ACLK>;
- };
-
- plic0: interrupt-controller@c000000 {
- #interrupt-cells = <1>;
- interrupt-controller;
- compatible = "kendryte,k210-plic0", "riscv,plic0";
- reg = <0xC000000 0x4000000>;
- interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 0xffffffff>,
- <&cpu1_intc 11>, <&cpu1_intc 0xffffffff>;
- riscv,ndev = <65>;
- riscv,max-priority = <7>;
- };
-
- uarths0: serial@38000000 {
- compatible = "kendryte,k210-uarths", "sifive,uart0";
- reg = <0x38000000 0x1000>;
- interrupts = <33>;
- clocks = <&sysctl K210_CLK_CPU>;
- };
- };
-};
diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile
index 6d6189e6e4af..74c47fe9fc22 100644
--- a/arch/riscv/boot/dts/sifive/Makefile
+++ b/arch/riscv/boot/dts/sifive/Makefile
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb
+dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb \
+ hifive-unmatched-a00.dtb
diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
new file mode 100644
index 000000000000..eeb4f8c3e0e7
--- /dev/null
+++ b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 SiFive, Inc */
+
+/dts-v1/;
+
+#include <dt-bindings/clock/sifive-fu740-prci.h>
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ compatible = "sifive,fu740-c000", "sifive,fu740";
+
+ aliases {
+ serial0 = &uart0;
+ serial1 = &uart1;
+ ethernet0 = &eth0;
+ };
+
+ chosen {
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ cpu0: cpu@0 {
+ compatible = "sifive,bullet0", "riscv";
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <16384>;
+ next-level-cache = <&ccache>;
+ reg = <0x0>;
+ riscv,isa = "rv64imac";
+ status = "disabled";
+ cpu0_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu1: cpu@1 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x1>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu1_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu2: cpu@2 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x2>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu2_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu3: cpu@3 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x3>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu3_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ cpu4: cpu@4 {
+ compatible = "sifive,bullet0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <40>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <40>;
+ mmu-type = "riscv,sv39";
+ next-level-cache = <&ccache>;
+ reg = <0x4>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ cpu4_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ };
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ compatible = "simple-bus";
+ ranges;
+ plic0: interrupt-controller@c000000 {
+ #interrupt-cells = <1>;
+ #address-cells = <0>;
+ compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
+ reg = <0x0 0xc000000 0x0 0x4000000>;
+ riscv,ndev = <69>;
+ interrupt-controller;
+ interrupts-extended = <
+ &cpu0_intc 0xffffffff
+ &cpu1_intc 0xffffffff &cpu1_intc 9
+ &cpu2_intc 0xffffffff &cpu2_intc 9
+ &cpu3_intc 0xffffffff &cpu3_intc 9
+ &cpu4_intc 0xffffffff &cpu4_intc 9>;
+ };
+ prci: clock-controller@10000000 {
+ compatible = "sifive,fu740-c000-prci";
+ reg = <0x0 0x10000000 0x0 0x1000>;
+ clocks = <&hfclk>, <&rtcclk>;
+ #clock-cells = <1>;
+ };
+ uart0: serial@10010000 {
+ compatible = "sifive,fu740-c000-uart", "sifive,uart0";
+ reg = <0x0 0x10010000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <39>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ status = "disabled";
+ };
+ uart1: serial@10011000 {
+ compatible = "sifive,fu740-c000-uart", "sifive,uart0";
+ reg = <0x0 0x10011000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <40>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ status = "disabled";
+ };
+ i2c0: i2c@10030000 {
+ compatible = "sifive,fu740-c000-i2c", "sifive,i2c0";
+ reg = <0x0 0x10030000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <52>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ reg-shift = <2>;
+ reg-io-width = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ i2c1: i2c@10031000 {
+ compatible = "sifive,fu740-c000-i2c", "sifive,i2c0";
+ reg = <0x0 0x10031000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <53>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ reg-shift = <2>;
+ reg-io-width = <1>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ qspi0: spi@10040000 {
+ compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+ reg = <0x0 0x10040000 0x0 0x1000>,
+ <0x0 0x20000000 0x0 0x10000000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <41>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ qspi1: spi@10041000 {
+ compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+ reg = <0x0 0x10041000 0x0 0x1000>,
+ <0x0 0x30000000 0x0 0x10000000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <42>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ spi0: spi@10050000 {
+ compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+ reg = <0x0 0x10050000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <43>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ eth0: ethernet@10090000 {
+ compatible = "sifive,fu540-c000-gem";
+ interrupt-parent = <&plic0>;
+ interrupts = <55>;
+ reg = <0x0 0x10090000 0x0 0x2000>,
+ <0x0 0x100a0000 0x0 0x1000>;
+ local-mac-address = [00 00 00 00 00 00];
+ clock-names = "pclk", "hclk";
+ clocks = <&prci PRCI_CLK_GEMGXLPLL>,
+ <&prci PRCI_CLK_GEMGXLPLL>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+ pwm0: pwm@10020000 {
+ compatible = "sifive,fu740-c000-pwm", "sifive,pwm0";
+ reg = <0x0 0x10020000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <44>, <45>, <46>, <47>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+ pwm1: pwm@10021000 {
+ compatible = "sifive,fu740-c000-pwm", "sifive,pwm0";
+ reg = <0x0 0x10021000 0x0 0x1000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <48>, <49>, <50>, <51>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ #pwm-cells = <3>;
+ status = "disabled";
+ };
+ ccache: cache-controller@2010000 {
+ compatible = "sifive,fu740-c000-ccache", "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <2048>;
+ cache-size = <2097152>;
+ cache-unified;
+ interrupt-parent = <&plic0>;
+ interrupts = <19 20 21 22>;
+ reg = <0x0 0x2010000 0x0 0x1000>;
+ };
+ gpio: gpio@10060000 {
+ compatible = "sifive,fu740-c000-gpio", "sifive,gpio0";
+ interrupt-parent = <&plic0>;
+ interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+ <30>, <31>, <32>, <33>, <34>, <35>, <36>,
+ <37>, <38>;
+ reg = <0x0 0x10060000 0x0 0x1000>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ clocks = <&prci PRCI_CLK_PCLK>;
+ status = "disabled";
+ };
+ };
+};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
new file mode 100644
index 000000000000..b1c3c596578f
--- /dev/null
+++ b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 SiFive, Inc */
+
+#include "fu740-c000.dtsi"
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/* Clock frequency (in Hz) of the PCB crystal for rtcclk */
+#define RTCCLK_FREQ 1000000
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ model = "SiFive HiFive Unmatched A00";
+ compatible = "sifive,hifive-unmatched-a00", "sifive,fu740-c000",
+ "sifive,fu740";
+
+ chosen {
+ stdout-path = "serial0";
+ };
+
+ cpus {
+ timebase-frequency = <RTCCLK_FREQ>;
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x2 0x00000000>;
+ };
+
+ soc {
+ };
+
+ hfclk: hfclk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <26000000>;
+ clock-output-names = "hfclk";
+ };
+
+ rtcclk: rtcclk {
+ #clock-cells = <0>;
+ compatible = "fixed-clock";
+ clock-frequency = <RTCCLK_FREQ>;
+ clock-output-names = "rtcclk";
+ };
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&uart1 {
+ status = "okay";
+};
+
+&i2c0 {
+ status = "okay";
+
+ temperature-sensor@4c {
+ compatible = "ti,tmp451";
+ reg = <0x4c>;
+ interrupt-parent = <&gpio>;
+ interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+ };
+
+ pmic@58 {
+ compatible = "dlg,da9063";
+ reg = <0x58>;
+ interrupt-parent = <&gpio>;
+ interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+ interrupt-controller;
+
+ regulators {
+ vdd_bcore1: bcore1 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ regulator-min-microamp = <5000000>;
+ regulator-max-microamp = <5000000>;
+ regulator-always-on;
+ };
+
+ vdd_bcore2: bcore2 {
+ regulator-min-microvolt = <900000>;
+ regulator-max-microvolt = <900000>;
+ regulator-min-microamp = <5000000>;
+ regulator-max-microamp = <5000000>;
+ regulator-always-on;
+ };
+
+ vdd_bpro: bpro {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <2500000>;
+ regulator-max-microamp = <2500000>;
+ regulator-always-on;
+ };
+
+ vdd_bperi: bperi {
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-min-microamp = <1500000>;
+ regulator-max-microamp = <1500000>;
+ regulator-always-on;
+ };
+
+ vdd_bmem: bmem {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-min-microamp = <3000000>;
+ regulator-max-microamp = <3000000>;
+ regulator-always-on;
+ };
+
+ vdd_bio: bio {
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
+ regulator-min-microamp = <3000000>;
+ regulator-max-microamp = <3000000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo1: ldo1 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <100000>;
+ regulator-max-microamp = <100000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo2: ldo2 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <200000>;
+ regulator-max-microamp = <200000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo3: ldo3 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <200000>;
+ regulator-max-microamp = <200000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo4: ldo4 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <200000>;
+ regulator-max-microamp = <200000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo5: ldo5 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <100000>;
+ regulator-max-microamp = <100000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo6: ldo6 {
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ regulator-min-microamp = <200000>;
+ regulator-max-microamp = <200000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo7: ldo7 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <200000>;
+ regulator-max-microamp = <200000>;
+ regulator-always-on;
+ };
+
+ vdd_ldo8: ldo8 {
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ regulator-min-microamp = <200000>;
+ regulator-max-microamp = <200000>;
+ regulator-always-on;
+ };
+
+ vdd_ld09: ldo9 {
+ regulator-min-microvolt = <1050000>;
+ regulator-max-microvolt = <1050000>;
+ regulator-min-microamp = <200000>;
+ regulator-max-microamp = <200000>;
+ };
+
+ vdd_ldo10: ldo10 {
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ regulator-min-microamp = <300000>;
+ regulator-max-microamp = <300000>;
+ };
+
+ vdd_ldo11: ldo11 {
+ regulator-min-microvolt = <2500000>;
+ regulator-max-microvolt = <2500000>;
+ regulator-min-microamp = <300000>;
+ regulator-max-microamp = <300000>;
+ regulator-always-on;
+ };
+ };
+ };
+};
+
+&qspi0 {
+ status = "okay";
+ flash@0 {
+ compatible = "issi,is25wp256", "jedec,spi-nor";
+ reg = <0>;
+ spi-max-frequency = <50000000>;
+ m25p,fast-read;
+ spi-tx-bus-width = <4>;
+ spi-rx-bus-width = <4>;
+ };
+};
+
+&spi0 {
+ status = "okay";
+ mmc@0 {
+ compatible = "mmc-spi-slot";
+ reg = <0>;
+ spi-max-frequency = <20000000>;
+ voltage-ranges = <3300 3300>;
+ disable-wp;
+ };
+};
+
+&eth0 {
+ status = "okay";
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+};
+
+&pwm0 {
+ status = "okay";
+};
+
+&pwm1 {
+ status = "okay";
+};
+
+&gpio {
+ status = "okay";
+};
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 8c3d1e451703..6c0625aa96c7 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -17,6 +17,7 @@ CONFIG_BPF_SYSCALL=y
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig
index cd1df62b13c7..b16a2a12c82a 100644
--- a/arch/riscv/configs/nommu_k210_defconfig
+++ b/arch/riscv/configs/nommu_k210_defconfig
@@ -1,17 +1,19 @@
# CONFIG_CPU_ISOLATION is not set
-CONFIG_LOG_BUF_SHIFT=15
+CONFIG_LOG_BUF_SHIFT=13
CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_FORCE=y
+# CONFIG_RD_GZIP is not set
# CONFIG_RD_BZIP2 is not set
# CONFIG_RD_LZMA is not set
# CONFIG_RD_XZ is not set
# CONFIG_RD_LZO is not set
# CONFIG_RD_LZ4 is not set
+# CONFIG_RD_ZSTD is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
# CONFIG_SYSFS_SYSCALL is not set
# CONFIG_FHANDLE is not set
# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
# CONFIG_EPOLL is not set
# CONFIG_SIGNALFD is not set
# CONFIG_TIMERFD is not set
@@ -25,15 +27,17 @@ CONFIG_EMBEDDED=y
# CONFIG_VM_EVENT_COUNTERS is not set
# CONFIG_COMPAT_BRK is not set
CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
# CONFIG_MMU is not set
-CONFIG_SOC_KENDRYTE=y
+CONFIG_SOC_CANAAN=y
+CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
CONFIG_MAXPHYSMEM_2GB=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
CONFIG_CMDLINE="earlycon console=ttySIF0"
CONFIG_CMDLINE_FORCE=y
-CONFIG_JUMP_LABEL=y
+# CONFIG_SECCOMP is not set
+# CONFIG_STACKPROTECTOR is not set
+# CONFIG_GCC_PLUGINS is not set
# CONFIG_BLOCK is not set
CONFIG_BINFMT_FLAT=y
# CONFIG_COREDUMP is not set
@@ -41,23 +45,47 @@ CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
# CONFIG_FW_LOADER is not set
# CONFIG_ALLOW_DEV_COREDUMP is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_LDISC_AUTOLOAD is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_DEVMEM is not set
+CONFIG_I2C=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_SPI=y
+# CONFIG_SPI_MEM is not set
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+# CONFIG_GPIO_CDEV_V1 is not set
+CONFIG_GPIO_DWAPB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_USER=y
# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
+# CONFIG_FILE_LOCKING is not set
# CONFIG_DNOTIFY is not set
# CONFIG_INOTIFY_USER is not set
# CONFIG_MISC_FILESYSTEMS is not set
CONFIG_LSM="[]"
CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+# CONFIG_FRAME_POINTER is not set
# CONFIG_DEBUG_MISC is not set
CONFIG_PANIC_ON_OOPS=y
# CONFIG_SCHED_DEBUG is not set
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
new file mode 100644
index 000000000000..61f887f65419
--- /dev/null
+++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig
@@ -0,0 +1,92 @@
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_FHANDLE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+# CONFIG_IO_URING is not set
+# CONFIG_ADVISE_SYSCALLS is not set
+# CONFIG_MEMBARRIER is not set
+# CONFIG_KALLSYMS is not set
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLOB=y
+# CONFIG_MMU is not set
+CONFIG_SOC_CANAAN=y
+CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
+CONFIG_MAXPHYSMEM_2GB=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
+CONFIG_CMDLINE_FORCE=y
+# CONFIG_SECCOMP is not set
+# CONFIG_STACKPROTECTOR is not set
+# CONFIG_GCC_PLUGINS is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+CONFIG_BINFMT_FLAT=y
+# CONFIG_COREDUMP is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_LDISC_AUTOLOAD is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_DEVMEM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_SPI=y
+# CONFIG_SPI_MEM is not set
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+# CONFIG_GPIO_CDEV_V1 is not set
+CONFIG_GPIO_DWAPB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_SPI=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_USER=y
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_LSM="[]"
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_MISC is not set
+CONFIG_PANIC_ON_OOPS=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 2c2cda6cc1c5..8dd02b842fef 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -18,6 +18,7 @@ CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
CONFIG_ARCH_RV32I=y
CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h
index d6f1ec08d97b..d3804a2f9aad 100644
--- a/arch/riscv/include/asm/bug.h
+++ b/arch/riscv/include/asm/bug.h
@@ -85,6 +85,7 @@ do { \
struct pt_regs;
struct task_struct;
+void __show_regs(struct pt_regs *regs);
void die(struct pt_regs *regs, const char *str);
void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index cec462e198ce..caadfc1d7487 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -41,10 +41,16 @@
#define SATP_PPN _AC(0x003FFFFF, UL)
#define SATP_MODE_32 _AC(0x80000000, UL)
#define SATP_MODE SATP_MODE_32
+#define SATP_ASID_BITS 9
+#define SATP_ASID_SHIFT 22
+#define SATP_ASID_MASK _AC(0x1FF, UL)
#else
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
#define SATP_MODE SATP_MODE_39
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT 44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
#endif
/* Exception cause high bit - is an interrupt if set */
diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h
index b04028c6218c..a2b3d9cdbc86 100644
--- a/arch/riscv/include/asm/kasan.h
+++ b/arch/riscv/include/asm/kasan.h
@@ -8,12 +8,28 @@
#ifdef CONFIG_KASAN
+/*
+ * The following comment was copied from arm64:
+ * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
+ * where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
+ *
+ * KASAN_SHADOW_OFFSET:
+ * This value is used to map an address to the corresponding shadow
+ * address by the following formula:
+ * shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
+ *
+ * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
+ * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
+ * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
+ * KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
+ * (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
+ */
#define KASAN_SHADOW_SCALE_SHIFT 3
-#define KASAN_SHADOW_SIZE (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START KERN_VIRT_START /* 2^64 - 2^38 */
+#define KASAN_SHADOW_SIZE (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+#define KASAN_SHADOW_START KERN_VIRT_START
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
-
#define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \
(64 - KASAN_SHADOW_SCALE_SHIFT)))
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index 56a98ea30731..4647d38018f6 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -11,4 +11,44 @@
#include <asm-generic/kprobes.h>
+#ifdef CONFIG_KPROBES
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE 2
+
+#define flush_insn_slot(p) do { } while (0)
+#define kretprobe_blacklist_size 0
+
+#include <asm/probes.h>
+
+struct prev_kprobe {
+ struct kprobe *kp;
+ unsigned int status;
+};
+
+/* Single step context for kprobe */
+struct kprobe_step_ctx {
+ unsigned long ss_pending;
+ unsigned long match_addr;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+ unsigned int kprobe_status;
+ unsigned long saved_status;
+ struct prev_kprobe prev_kprobe;
+ struct kprobe_step_ctx ss_ctx;
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr);
+bool kprobe_breakpoint_handler(struct pt_regs *regs);
+bool kprobe_single_step_handler(struct pt_regs *regs);
+void kretprobe_trampoline(void);
+void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
#endif /* _ASM_RISCV_KPROBES_H */
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index dabcf2cfb3dc..0099dc116168 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -12,6 +12,8 @@
typedef struct {
#ifndef CONFIG_MMU
unsigned long end_brk;
+#else
+ atomic_long_t id;
#endif
void *vdso;
#ifdef CONFIG_SMP
diff --git a/arch/riscv/include/asm/mmu_context.h b/arch/riscv/include/asm/mmu_context.h
index 250defa06f3a..b0659413a080 100644
--- a/arch/riscv/include/asm/mmu_context.h
+++ b/arch/riscv/include/asm/mmu_context.h
@@ -23,6 +23,16 @@ static inline void activate_mm(struct mm_struct *prev,
switch_mm(prev, next, NULL);
}
+#define init_new_context init_new_context
+static inline int init_new_context(struct task_struct *tsk,
+ struct mm_struct *mm)
+{
+#ifdef CONFIG_MMU
+ atomic_long_set(&mm->context.id, 0);
+#endif
+ return 0;
+}
+
#include <asm-generic/mmu_context.h>
#endif /* _ASM_RISCV_MMU_CONTEXT_H */
diff --git a/arch/riscv/include/asm/mmzone.h b/arch/riscv/include/asm/mmzone.h
new file mode 100644
index 000000000000..fa17e01d9ab2
--- /dev/null
+++ b/arch/riscv/include/asm/mmzone.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMZONE_H
+#define __ASM_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+#include <asm/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid) (node_data[(nid)])
+
+#endif /* CONFIG_NUMA */
+#endif /* __ASM_MMZONE_H */
diff --git a/arch/riscv/include/asm/numa.h b/arch/riscv/include/asm/numa.h
new file mode 100644
index 000000000000..8c8cf4297cc3
--- /dev/null
+++ b/arch/riscv/include/asm/numa.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#include <asm/topology.h>
+#include <asm-generic/numa.h>
+
+#endif /* __ASM_NUMA_H */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 64a675c5c30a..adc9d26f3d75 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -97,9 +97,6 @@ extern unsigned long pfn_base;
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
-extern unsigned long max_low_pfn;
-extern unsigned long min_low_pfn;
-
#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
index 1c473a1bd986..658e112c3ce7 100644
--- a/arch/riscv/include/asm/pci.h
+++ b/arch/riscv/include/asm/pci.h
@@ -32,6 +32,20 @@ static inline int pci_proc_domain(struct pci_bus *bus)
/* always show the domain in /proc */
return 1;
}
+
+#ifdef CONFIG_NUMA
+
+static inline int pcibus_to_node(struct pci_bus *bus)
+{
+ return dev_to_node(&bus->dev);
+}
+#ifndef cpumask_of_pcibus
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \
+ cpu_all_mask : \
+ cpumask_of_node(pcibus_to_node(bus)))
+#endif
+#endif /* CONFIG_NUMA */
+
#endif /* CONFIG_PCI */
#endif /* _ASM_RISCV_PCI_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 251e1db088fa..ebf817c1bdf4 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -186,6 +186,11 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PAGE_PFN_SHIFT);
}
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+ return __pte(pmd_val(pmd));
+}
+
/* Yields the page frame number (PFN) of a page table entry */
static inline unsigned long pte_pfn(pte_t pte)
{
@@ -289,6 +294,21 @@ static inline pte_t pte_mkhuge(pte_t pte)
return pte;
}
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * See the comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+ return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+ return pte_protnone(pmd_pte(pmd));
+}
+#endif
+
/* Modify page protection bits */
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
@@ -468,6 +488,7 @@ extern void *dtb_early_va;
extern uintptr_t dtb_early_pa;
void setup_bootmem(void);
void paging_init(void);
+void misc_mem_init(void);
#define FIRST_USER_ADDRESS 0
diff --git a/arch/riscv/include/asm/probes.h b/arch/riscv/include/asm/probes.h
new file mode 100644
index 000000000000..a787e6d537b9
--- /dev/null
+++ b/arch/riscv/include/asm/probes.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_PROBES_H
+#define _ASM_RISCV_PROBES_H
+
+typedef u32 probe_opcode_t;
+typedef bool (probes_handler_t) (u32 opcode, unsigned long addr, struct pt_regs *);
+
+/* architecture specific copy of original instruction */
+struct arch_probe_insn {
+ probe_opcode_t *insn;
+ probes_handler_t *handler;
+ /* restore address after simulation */
+ unsigned long restore;
+};
+
+#ifdef CONFIG_KPROBES
+typedef u32 kprobe_opcode_t;
+struct arch_specific_insn {
+ struct arch_probe_insn api;
+};
+#endif
+
+#endif /* _ASM_RISCV_PROBES_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index bdddcd5c1b71..3a240037bde2 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -34,6 +34,7 @@ struct thread_struct {
unsigned long sp; /* Kernel mode stack */
unsigned long s[12]; /* s[0]: frame pointer */
struct __riscv_d_ext_state fstate;
+ unsigned long bad_cause;
};
#define INIT_THREAD { \
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index ee49f80c9533..cb4abb639e8d 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -8,6 +8,7 @@
#include <uapi/asm/ptrace.h>
#include <asm/csr.h>
+#include <linux/compiler.h>
#ifndef __ASSEMBLY__
@@ -60,6 +61,7 @@ struct pt_regs {
#define user_mode(regs) (((regs)->status & SR_PP) == 0)
+#define MAX_REG_OFFSET offsetof(struct pt_regs, orig_a0)
/* Helpers for working with the instruction pointer */
static inline unsigned long instruction_pointer(struct pt_regs *regs)
@@ -85,6 +87,12 @@ static inline void user_stack_pointer_set(struct pt_regs *regs,
regs->sp = val;
}
+/* Valid only for Kernel mode traps. */
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+ return regs->sp;
+}
+
/* Helpers for working with the frame pointer */
static inline unsigned long frame_pointer(struct pt_regs *regs)
{
@@ -101,6 +109,33 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
return regs->a0;
}
+static inline void regs_set_return_value(struct pt_regs *regs,
+ unsigned long val)
+{
+ regs->a0 = val;
+}
+
+extern int regs_query_register_offset(const char *name);
+extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+ unsigned int n);
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs: pt_regs from which register value is gotten
+ * @offset: offset of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+ unsigned int offset)
+{
+ if (unlikely(offset > MAX_REG_OFFSET))
+ return 0;
+
+ return *(unsigned long *)((unsigned long)regs + offset);
+}
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PTRACE_H */
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 653edb25d495..99895d9c3bdd 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -89,7 +89,7 @@ struct sbiret {
long value;
};
-int sbi_init(void);
+void sbi_init(void);
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg1, unsigned long arg2,
unsigned long arg3, unsigned long arg4,
@@ -100,13 +100,13 @@ int sbi_console_getchar(void);
void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_clear_ipi(void);
-void sbi_send_ipi(const unsigned long *hart_mask);
-void sbi_remote_fence_i(const unsigned long *hart_mask);
-void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_send_ipi(const unsigned long *hart_mask);
+int sbi_remote_fence_i(const unsigned long *hart_mask);
+int sbi_remote_sfence_vma(const unsigned long *hart_mask,
unsigned long start,
unsigned long size);
-void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
unsigned long start,
unsigned long size,
unsigned long asid);
@@ -147,11 +147,7 @@ static inline unsigned long sbi_minor_version(void)
int sbi_err_map_linux_errno(int err);
#else /* CONFIG_RISCV_SBI */
-/* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
-void sbi_set_timer(uint64_t stime_value);
-void sbi_clear_ipi(void);
-void sbi_send_ipi(const unsigned long *hart_mask);
-void sbi_remote_fence_i(const unsigned long *hart_mask);
-void sbi_init(void);
+static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; }
+static inline void sbi_init(void) {}
#endif /* CONFIG_RISCV_SBI */
#endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index 8b80c80c7f1a..6887b3d9f371 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -22,7 +22,7 @@ static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
-static inline void protect_kernel_text_data(void) {};
+static inline void protect_kernel_text_data(void) {}
static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
#endif
diff --git a/arch/riscv/include/asm/soc.h b/arch/riscv/include/asm/soc.h
index 6c8363b1f327..f494066051a2 100644
--- a/arch/riscv/include/asm/soc.h
+++ b/arch/riscv/include/asm/soc.h
@@ -21,42 +21,4 @@ void soc_early_init(void);
extern unsigned long __soc_early_init_table_start;
extern unsigned long __soc_early_init_table_end;
-/*
- * Allows Linux to provide a device tree, which is necessary for SOCs that
- * don't provide a useful one on their own.
- */
-struct soc_builtin_dtb {
- unsigned long vendor_id;
- unsigned long arch_id;
- unsigned long imp_id;
- void *(*dtb_func)(void);
-};
-
-/*
- * The argument name must specify a valid DTS file name without the dts
- * extension.
- */
-#define SOC_BUILTIN_DTB_DECLARE(name, vendor, arch, impl) \
- extern void *__dtb_##name##_begin; \
- \
- static __init __used \
- void *__soc_builtin_dtb_f__##name(void) \
- { \
- return (void *)&__dtb_##name##_begin; \
- } \
- \
- static const struct soc_builtin_dtb __soc_builtin_dtb__##name \
- __used __section("__soc_builtin_dtb_table") = \
- { \
- .vendor_id = vendor, \
- .arch_id = arch, \
- .imp_id = impl, \
- .dtb_func = __soc_builtin_dtb_f__##name, \
- }
-
-extern unsigned long __soc_builtin_dtb_table_start;
-extern unsigned long __soc_builtin_dtb_table_end;
-
-void *soc_lookup_builtin_dtb(void);
-
#endif
diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h
index 5962f8891f06..09093af46565 100644
--- a/arch/riscv/include/asm/stackprotector.h
+++ b/arch/riscv/include/asm/stackprotector.h
@@ -24,6 +24,7 @@ static __always_inline void boot_init_stack_canary(void)
canary &= CANARY_MASK;
current->stack_canary = canary;
- __stack_chk_guard = current->stack_canary;
+ if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
+ __stack_chk_guard = current->stack_canary;
}
#endif /* _ASM_RISCV_STACKPROTECTOR_H */
diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h
index 470a65c4ccdc..3450c1912afd 100644
--- a/arch/riscv/include/asm/stacktrace.h
+++ b/arch/riscv/include/asm/stacktrace.h
@@ -13,5 +13,7 @@ struct stackframe {
extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
bool (*fn)(void *, unsigned long), void *arg);
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
+ const char *loglvl);
#endif /* _ASM_RISCV_STACKTRACE_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 97bf5a1575d2..0e549a3089b3 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -75,6 +75,7 @@ struct thread_info {
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
#define TIF_SECCOMP 8 /* syscall secure computing */
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
+#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -84,10 +85,11 @@ struct thread_info {
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
- _TIF_NOTIFY_SIGNAL)
+ _TIF_NOTIFY_SIGNAL | _TIF_UPROBE)
#define _TIF_SYSCALL_WORK \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h
new file mode 100644
index 000000000000..f2183e00fdd2
--- /dev/null
+++ b/arch/riscv/include/asm/uprobes.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_RISCV_UPROBES_H
+#define _ASM_RISCV_UPROBES_H
+
+#include <asm/probes.h>
+#include <asm/patch.h>
+#include <asm/bug.h>
+
+#define MAX_UINSN_BYTES 8
+
+#ifdef CONFIG_RISCV_ISA_C
+#define UPROBE_SWBP_INSN __BUG_INSN_16
+#define UPROBE_SWBP_INSN_SIZE 2
+#else
+#define UPROBE_SWBP_INSN __BUG_INSN_32
+#define UPROBE_SWBP_INSN_SIZE 4
+#endif
+#define UPROBE_XOL_SLOT_BYTES MAX_UINSN_BYTES
+
+typedef u32 uprobe_opcode_t;
+
+struct arch_uprobe_task {
+ unsigned long saved_cause;
+};
+
+struct arch_uprobe {
+ union {
+ u8 insn[MAX_UINSN_BYTES];
+ u8 ixol[MAX_UINSN_BYTES];
+ };
+ struct arch_probe_insn api;
+ unsigned long insn_size;
+ bool simulate;
+};
+
+bool uprobe_breakpoint_handler(struct pt_regs *regs);
+bool uprobe_single_step_handler(struct pt_regs *regs);
+
+#endif /* _ASM_RISCV_UPROBES_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index f6caf4d9ca15..3dc0abde988a 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -4,8 +4,9 @@
#
ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_patch.o = -pg
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
endif
extra-y += head.o
@@ -29,6 +30,7 @@ obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
obj-y += patch.o
+obj-y += probes/
obj-$(CONFIG_MMU) += vdso.o vdso/
obj-$(CONFIG_RISCV_M_MODE) += traps_misaligned.o
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index b79ffa3561fd..9ef33346853c 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -68,6 +68,9 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_F30, task_struct, thread.fstate.f[30]);
OFFSET(TASK_THREAD_F31, task_struct, thread.fstate.f[31]);
OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(TSK_STACK_CANARY, task_struct, stack_canary);
+#endif
DEFINE(PT_SIZE, sizeof(struct pt_regs));
OFFSET(PT_EPC, pt_regs, epc);
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 765b62434f30..7f1e5203de88 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -72,29 +72,56 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
return 0;
}
+/*
+ * Put 5 instructions with 16 bytes at the front of function within
+ * patchable function entry nops' area.
+ *
+ * 0: REG_S ra, -SZREG(sp)
+ * 1: auipc ra, 0x?
+ * 2: jalr -?(ra)
+ * 3: REG_L ra, -SZREG(sp)
+ *
+ * So the opcodes is:
+ * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+ * 1: 0x???????? -> auipc
+ * 2: 0x???????? -> jalr
+ * 3: 0xff813083 (ld)/0xffc12083 (lw)
+ */
+#if __riscv_xlen == 64
+#define INSN0 0xfe113c23
+#define INSN3 0xff813083
+#elif __riscv_xlen == 32
+#define INSN0 0xfe112e23
+#define INSN3 0xffc12083
+#endif
+
+#define FUNC_ENTRY_SIZE 16
+#define FUNC_ENTRY_JMP 4
+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
- int ret = ftrace_check_current_call(rec->ip, NULL);
+ unsigned int call[4] = {INSN0, 0, 0, INSN3};
+ unsigned long target = addr;
+ unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
- if (ret)
- return ret;
+ call[1] = to_auipc_insn((unsigned int)(target - caller));
+ call[2] = to_jalr_insn((unsigned int)(target - caller));
- return __ftrace_modify_call(rec->ip, addr, true);
+ if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
+ return -EPERM;
+
+ return 0;
}
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
unsigned long addr)
{
- unsigned int call[2];
- int ret;
+ unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
- make_call(rec->ip, addr, call);
- ret = ftrace_check_current_call(rec->ip, call);
-
- if (ret)
- return ret;
+ if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
+ return -EPERM;
- return __ftrace_modify_call(rec->ip, addr, false);
+ return 0;
}
@@ -139,15 +166,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
unsigned int call[2];
+ unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
int ret;
- make_call(rec->ip, old_addr, call);
- ret = ftrace_check_current_call(rec->ip, call);
+ make_call(caller, old_addr, call);
+ ret = ftrace_check_current_call(caller, call);
if (ret)
return ret;
- return __ftrace_modify_call(rec->ip, addr, true);
+ return __ftrace_modify_call(caller, addr, true);
}
#endif
@@ -176,53 +204,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_graph_call(void);
+extern void ftrace_graph_regs_call(void);
int ftrace_enable_ftrace_graph_caller(void)
{
- unsigned int call[2];
- static int init_graph = 1;
int ret;
- make_call(&ftrace_graph_call, &ftrace_stub, call);
-
- /*
- * When enabling graph tracer for the first time, ftrace_graph_call
- * should contains a call to ftrace_stub. Once it has been disabled,
- * the 8-bytes at the position becomes NOPs.
- */
- if (init_graph) {
- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
- call);
- init_graph = 0;
- } else {
- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
- NULL);
- }
-
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ (unsigned long)&prepare_ftrace_return, true);
if (ret)
return ret;
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
(unsigned long)&prepare_ftrace_return, true);
}
int ftrace_disable_ftrace_graph_caller(void)
{
- unsigned int call[2];
int ret;
- make_call(&ftrace_graph_call, &prepare_ftrace_return, call);
-
- /*
- * This is to make sure that ftrace_enable_ftrace_graph_caller
- * did the right thing.
- */
- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
- call);
-
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ (unsigned long)&prepare_ftrace_return, false);
if (ret)
return ret;
- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
(unsigned long)&prepare_ftrace_return, false);
}
#endif /* CONFIG_DYNAMIC_FTRACE */
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 16e9941900c4..f5a9bad86e58 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -260,7 +260,11 @@ clear_bss_done:
/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
+#ifdef CONFIG_BUILTIN_DTB
+ la a0, __dtb_start
+#else
mv a0, s1
+#endif /* CONFIG_BUILTIN_DTB */
call setup_vm
#ifdef CONFIG_MMU
la a0, early_pg_dir
diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h
index 8c212efb37a6..71a76a623257 100644
--- a/arch/riscv/kernel/image-vars.h
+++ b/arch/riscv/kernel/image-vars.h
@@ -3,7 +3,7 @@
* Copyright (C) 2020 Western Digital Corporation or its affiliates.
* Linker script variables to be set after section resolution, as
* ld.lld does not like variables assigned before SECTIONS is processed.
- * Based on arch/arm64/kerne/image-vars.h
+ * Based on arch/arm64/kernel/image-vars.h
*/
#ifndef __RISCV_KERNEL_IMAGE_VARS_H
#define __RISCV_KERNEL_IMAGE_VARS_H
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 35a6ed76cb8b..d171eca623b6 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -13,224 +13,186 @@
.text
- .macro SAVE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi sp, sp, -48
- sd s0, 32(sp)
- sd ra, 40(sp)
- addi s0, sp, 48
- sd t0, 24(sp)
- sd t1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- sd t2, 8(sp)
-#endif
-#else
- addi sp, sp, -16
- sd s0, 0(sp)
- sd ra, 8(sp)
- addi s0, sp, 16
-#endif
+#define FENTRY_RA_OFFSET 12
+#define ABI_SIZE_ON_STACK 72
+#define ABI_A0 0
+#define ABI_A1 8
+#define ABI_A2 16
+#define ABI_A3 24
+#define ABI_A4 32
+#define ABI_A5 40
+#define ABI_A6 48
+#define ABI_A7 56
+#define ABI_RA 64
+
+ .macro SAVE_ABI
+ addi sp, sp, -SZREG
+ addi sp, sp, -ABI_SIZE_ON_STACK
+
+ REG_S a0, ABI_A0(sp)
+ REG_S a1, ABI_A1(sp)
+ REG_S a2, ABI_A2(sp)
+ REG_S a3, ABI_A3(sp)
+ REG_S a4, ABI_A4(sp)
+ REG_S a5, ABI_A5(sp)
+ REG_S a6, ABI_A6(sp)
+ REG_S a7, ABI_A7(sp)
+ REG_S ra, ABI_RA(sp)
.endm
- .macro RESTORE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- ld s0, 32(sp)
- ld ra, 40(sp)
- addi sp, sp, 48
-#else
- ld ra, 8(sp)
- ld s0, 0(sp)
- addi sp, sp, 16
-#endif
+ .macro RESTORE_ABI
+ REG_L a0, ABI_A0(sp)
+ REG_L a1, ABI_A1(sp)
+ REG_L a2, ABI_A2(sp)
+ REG_L a3, ABI_A3(sp)
+ REG_L a4, ABI_A4(sp)
+ REG_L a5, ABI_A5(sp)
+ REG_L a6, ABI_A6(sp)
+ REG_L a7, ABI_A7(sp)
+ REG_L ra, ABI_RA(sp)
+
+ addi sp, sp, ABI_SIZE_ON_STACK
+ addi sp, sp, SZREG
.endm
- .macro RESTORE_GRAPH_ARGS
- ld a0, 24(sp)
- ld a1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld a2, 8(sp)
-#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ .macro SAVE_ALL
+ addi sp, sp, -SZREG
+ addi sp, sp, -PT_SIZE_ON_STACK
+
+ REG_S x1, PT_EPC(sp)
+ addi sp, sp, PT_SIZE_ON_STACK
+ REG_L x1, (sp)
+ addi sp, sp, -PT_SIZE_ON_STACK
+ REG_S x1, PT_RA(sp)
+ REG_L x1, PT_EPC(sp)
+
+ REG_S x2, PT_SP(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+ REG_S x5, PT_T0(sp)
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x8, PT_S0(sp)
+ REG_S x9, PT_S1(sp)
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+ REG_S x18, PT_S2(sp)
+ REG_S x19, PT_S3(sp)
+ REG_S x20, PT_S4(sp)
+ REG_S x21, PT_S5(sp)
+ REG_S x22, PT_S6(sp)
+ REG_S x23, PT_S7(sp)
+ REG_S x24, PT_S8(sp)
+ REG_S x25, PT_S9(sp)
+ REG_S x26, PT_S10(sp)
+ REG_S x27, PT_S11(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
.endm
-ENTRY(ftrace_graph_caller)
- addi sp, sp, -16
- sd s0, 0(sp)
- sd ra, 8(sp)
- addi s0, sp, 16
-ftrace_graph_call:
- .global ftrace_graph_call
- /*
- * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
- * call below. Check ftrace_modify_all_code for details.
- */
- call ftrace_stub
- ld ra, 8(sp)
- ld s0, 0(sp)
- addi sp, sp, 16
- ret
-ENDPROC(ftrace_graph_caller)
+ .macro RESTORE_ALL
+ REG_L x1, PT_RA(sp)
+ addi sp, sp, PT_SIZE_ON_STACK
+ REG_S x1, (sp)
+ addi sp, sp, -PT_SIZE_ON_STACK
+ REG_L x1, PT_EPC(sp)
+ REG_L x2, PT_SP(sp)
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+ REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+ REG_L x9, PT_S1(sp)
+ REG_L x10, PT_A0(sp)
+ REG_L x11, PT_A1(sp)
+ REG_L x12, PT_A2(sp)
+ REG_L x13, PT_A3(sp)
+ REG_L x14, PT_A4(sp)
+ REG_L x15, PT_A5(sp)
+ REG_L x16, PT_A6(sp)
+ REG_L x17, PT_A7(sp)
+ REG_L x18, PT_S2(sp)
+ REG_L x19, PT_S3(sp)
+ REG_L x20, PT_S4(sp)
+ REG_L x21, PT_S5(sp)
+ REG_L x22, PT_S6(sp)
+ REG_L x23, PT_S7(sp)
+ REG_L x24, PT_S8(sp)
+ REG_L x25, PT_S9(sp)
+ REG_L x26, PT_S10(sp)
+ REG_L x27, PT_S11(sp)
+ REG_L x28, PT_T3(sp)
+ REG_L x29, PT_T4(sp)
+ REG_L x30, PT_T5(sp)
+ REG_L x31, PT_T6(sp)
+
+ addi sp, sp, PT_SIZE_ON_STACK
+ addi sp, sp, SZREG
+ .endm
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
ENTRY(ftrace_caller)
- /*
- * a0: the address in the caller when calling ftrace_caller
- * a1: the caller's return address
- * a2: the address of global variable function_trace_op
- */
- ld a1, -8(s0)
- addi a0, ra, -MCOUNT_INSN_SIZE
- la t5, function_trace_op
- ld a2, 0(t5)
+ SAVE_ABI
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /*
- * the graph tracer (specifically, prepare_ftrace_return) needs these
- * arguments but for now the function tracer occupies the regs, so we
- * save them in temporary regs to recover later.
- */
- addi t0, s0, -8
- mv t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld t2, -16(s0)
-#endif
-#endif
+ addi a0, ra, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+ REG_L a1, ABI_SIZE_ON_STACK(sp)
+ mv a3, sp
- SAVE_ABI_STATE
ftrace_call:
.global ftrace_call
- /*
- * For the dynamic ftrace to work, here we should reserve at least
- * 8 bytes for a functional auipc-jalr pair. The following call
- * serves this purpose.
- *
- * Calling ftrace_update_ftrace_func would overwrite the nops below.
- * Check ftrace_modify_all_code for details.
- */
call ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- RESTORE_GRAPH_ARGS
- call ftrace_graph_caller
+ addi a0, sp, ABI_SIZE_ON_STACK
+ REG_L a1, ABI_RA(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
#endif
-
- RESTORE_ABI_STATE
+ftrace_graph_call:
+ .global ftrace_graph_call
+ call ftrace_stub
+#endif
+ RESTORE_ABI
ret
ENDPROC(ftrace_caller)
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
- .macro SAVE_ALL
- addi sp, sp, -(PT_SIZE_ON_STACK+16)
- sd s0, (PT_SIZE_ON_STACK)(sp)
- sd ra, (PT_SIZE_ON_STACK+8)(sp)
- addi s0, sp, (PT_SIZE_ON_STACK+16)
-
- sd x1, PT_RA(sp)
- sd x2, PT_SP(sp)
- sd x3, PT_GP(sp)
- sd x4, PT_TP(sp)
- sd x5, PT_T0(sp)
- sd x6, PT_T1(sp)
- sd x7, PT_T2(sp)
- sd x8, PT_S0(sp)
- sd x9, PT_S1(sp)
- sd x10, PT_A0(sp)
- sd x11, PT_A1(sp)
- sd x12, PT_A2(sp)
- sd x13, PT_A3(sp)
- sd x14, PT_A4(sp)
- sd x15, PT_A5(sp)
- sd x16, PT_A6(sp)
- sd x17, PT_A7(sp)
- sd x18, PT_S2(sp)
- sd x19, PT_S3(sp)
- sd x20, PT_S4(sp)
- sd x21, PT_S5(sp)
- sd x22, PT_S6(sp)
- sd x23, PT_S7(sp)
- sd x24, PT_S8(sp)
- sd x25, PT_S9(sp)
- sd x26, PT_S10(sp)
- sd x27, PT_S11(sp)
- sd x28, PT_T3(sp)
- sd x29, PT_T4(sp)
- sd x30, PT_T5(sp)
- sd x31, PT_T6(sp)
- .endm
-
- .macro RESTORE_ALL
- ld x1, PT_RA(sp)
- ld x2, PT_SP(sp)
- ld x3, PT_GP(sp)
- ld x4, PT_TP(sp)
- ld x5, PT_T0(sp)
- ld x6, PT_T1(sp)
- ld x7, PT_T2(sp)
- ld x8, PT_S0(sp)
- ld x9, PT_S1(sp)
- ld x10, PT_A0(sp)
- ld x11, PT_A1(sp)
- ld x12, PT_A2(sp)
- ld x13, PT_A3(sp)
- ld x14, PT_A4(sp)
- ld x15, PT_A5(sp)
- ld x16, PT_A6(sp)
- ld x17, PT_A7(sp)
- ld x18, PT_S2(sp)
- ld x19, PT_S3(sp)
- ld x20, PT_S4(sp)
- ld x21, PT_S5(sp)
- ld x22, PT_S6(sp)
- ld x23, PT_S7(sp)
- ld x24, PT_S8(sp)
- ld x25, PT_S9(sp)
- ld x26, PT_S10(sp)
- ld x27, PT_S11(sp)
- ld x28, PT_T3(sp)
- ld x29, PT_T4(sp)
- ld x30, PT_T5(sp)
- ld x31, PT_T6(sp)
-
- ld s0, (PT_SIZE_ON_STACK)(sp)
- ld ra, (PT_SIZE_ON_STACK+8)(sp)
- addi sp, sp, (PT_SIZE_ON_STACK+16)
- .endm
-
- .macro RESTORE_GRAPH_REG_ARGS
- ld a0, PT_T0(sp)
- ld a1, PT_T1(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld a2, PT_T2(sp)
-#endif
- .endm
-
-/*
- * Most of the contents are the same as ftrace_caller.
- */
ENTRY(ftrace_regs_caller)
- /*
- * a3: the address of all registers in the stack
- */
- ld a1, -8(s0)
- addi a0, ra, -MCOUNT_INSN_SIZE
- la t5, function_trace_op
- ld a2, 0(t5)
- addi a3, sp, -(PT_SIZE_ON_STACK+16)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- addi t0, s0, -8
- mv t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
- ld t2, -16(s0)
-#endif
-#endif
SAVE_ALL
+ addi a0, ra, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+ REG_L a1, PT_SIZE_ON_STACK(sp)
+ mv a3, sp
+
ftrace_regs_call:
.global ftrace_regs_call
call ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- RESTORE_GRAPH_REG_ARGS
- call ftrace_graph_caller
+ addi a0, sp, PT_RA
+ REG_L a1, PT_EPC(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
+#endif
+ftrace_graph_regs_call:
+ .global ftrace_graph_regs_call
+ call ftrace_stub
#endif
RESTORE_ALL
diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
index 3fe7a5296aa5..0b552873a577 100644
--- a/arch/riscv/kernel/patch.c
+++ b/arch/riscv/kernel/patch.c
@@ -20,7 +20,12 @@ struct patch_insn {
};
#ifdef CONFIG_MMU
-static void *patch_map(void *addr, int fixmap)
+/*
+ * The fix_to_virt(, idx) needs a const value (not a dynamic variable of
+ * reg-a0) or BUILD_BUG_ON failed with "idx >= __end_of_fixed_addresses".
+ * So use '__always_inline' and 'const unsigned int fixmap' here.
+ */
+static __always_inline void *patch_map(void *addr, const unsigned int fixmap)
{
uintptr_t uintaddr = (uintptr_t) addr;
struct page *page;
@@ -37,7 +42,6 @@ static void *patch_map(void *addr, int fixmap)
return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
(uintaddr & ~PAGE_MASK));
}
-NOKPROBE_SYMBOL(patch_map);
static void patch_unmap(int fixmap)
{
diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile
new file mode 100644
index 000000000000..7f0840dcc31b
--- /dev/null
+++ b/arch/riscv/kernel/probes/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o
+obj-$(CONFIG_KPROBES) += kprobes_trampoline.o
+obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o
+obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o
+CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
new file mode 100644
index 000000000000..0ed043acc882
--- /dev/null
+++ b/arch/riscv/kernel/probes/decode-insn.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <asm/sections.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+/* Return:
+ * INSN_REJECTED If instruction is one not allowed to kprobe,
+ * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ */
+enum probe_insn __kprobes
+riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
+{
+ probe_opcode_t insn = *addr;
+
+ /*
+ * Reject instructions list:
+ */
+ RISCV_INSN_REJECTED(system, insn);
+ RISCV_INSN_REJECTED(fence, insn);
+
+ /*
+ * Simulate instructions list:
+ * TODO: the REJECTED ones below need to be implemented
+ */
+#ifdef CONFIG_RISCV_ISA_C
+ RISCV_INSN_REJECTED(c_j, insn);
+ RISCV_INSN_REJECTED(c_jr, insn);
+ RISCV_INSN_REJECTED(c_jal, insn);
+ RISCV_INSN_REJECTED(c_jalr, insn);
+ RISCV_INSN_REJECTED(c_beqz, insn);
+ RISCV_INSN_REJECTED(c_bnez, insn);
+ RISCV_INSN_REJECTED(c_ebreak, insn);
+#endif
+
+ RISCV_INSN_REJECTED(auipc, insn);
+ RISCV_INSN_REJECTED(branch, insn);
+
+ RISCV_INSN_SET_SIMULATE(jal, insn);
+ RISCV_INSN_SET_SIMULATE(jalr, insn);
+
+ return INSN_GOOD;
+}
diff --git a/arch/riscv/kernel/probes/decode-insn.h b/arch/riscv/kernel/probes/decode-insn.h
new file mode 100644
index 000000000000..42269a7d676d
--- /dev/null
+++ b/arch/riscv/kernel/probes/decode-insn.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _RISCV_KERNEL_KPROBES_DECODE_INSN_H
+#define _RISCV_KERNEL_KPROBES_DECODE_INSN_H
+
+#include <asm/sections.h>
+#include <asm/kprobes.h>
+
+enum probe_insn {
+ INSN_REJECTED,
+ INSN_GOOD_NO_SLOT,
+ INSN_GOOD,
+};
+
+enum probe_insn __kprobes
+riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *asi);
+
+#endif /* _RISCV_KERNEL_KPROBES_DECODE_INSN_H */
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
new file mode 100644
index 000000000000..e6372490aa0b
--- /dev/null
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kprobes.h>
+
+/* Ftrace callback handler for kprobes -- called under preepmt disabed */
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+ struct ftrace_ops *ops, struct ftrace_regs *regs)
+{
+ struct kprobe *p;
+ struct kprobe_ctlblk *kcb;
+
+ p = get_kprobe((kprobe_opcode_t *)ip);
+ if (unlikely(!p) || kprobe_disabled(p))
+ return;
+
+ kcb = get_kprobe_ctlblk();
+ if (kprobe_running()) {
+ kprobes_inc_nmissed_count(p);
+ } else {
+ unsigned long orig_ip = instruction_pointer(&(regs->regs));
+
+ instruction_pointer_set(&(regs->regs), ip);
+
+ __this_cpu_write(current_kprobe, p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+ if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) {
+ /*
+ * Emulate singlestep (and also recover regs->pc)
+ * as if there is a nop
+ */
+ instruction_pointer_set(&(regs->regs),
+ (unsigned long)p->addr + MCOUNT_INSN_SIZE);
+ if (unlikely(p->post_handler)) {
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ p->post_handler(p, &(regs->regs), 0);
+ }
+ instruction_pointer_set(&(regs->regs), orig_ip);
+ }
+
+ /*
+ * If pre_handler returns !0, it changes regs->pc. We have to
+ * skip emulating post_handler.
+ */
+ __this_cpu_write(current_kprobe, NULL);
+ }
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+ p->ainsn.api.insn = NULL;
+ return 0;
+}
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
new file mode 100644
index 000000000000..a2ec18662fee
--- /dev/null
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/kprobes.h>
+#include <linux/extable.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <asm/ptrace.h>
+#include <linux/uaccess.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/bug.h>
+#include <asm/patch.h>
+
+#include "decode-insn.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+
+static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+{
+ unsigned long offset = GET_INSN_LENGTH(p->opcode);
+
+ p->ainsn.api.restore = (unsigned long)p->addr + offset;
+
+ patch_text(p->ainsn.api.insn, p->opcode);
+ patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset),
+ __BUG_INSN_32);
+}
+
+static void __kprobes arch_prepare_simulate(struct kprobe *p)
+{
+ p->ainsn.api.restore = 0;
+}
+
+static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if (p->ainsn.api.handler)
+ p->ainsn.api.handler((u32)p->opcode,
+ (unsigned long)p->addr, regs);
+
+ post_kprobe_handler(kcb, regs);
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+ unsigned long probe_addr = (unsigned long)p->addr;
+
+ if (probe_addr & 0x1) {
+ pr_warn("Address not aligned.\n");
+
+ return -EINVAL;
+ }
+
+ /* copy instruction */
+ p->opcode = *p->addr;
+
+ /* decode instruction */
+ switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) {
+ case INSN_REJECTED: /* insn not supported */
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT: /* insn need simulation */
+ p->ainsn.api.insn = NULL;
+ break;
+
+ case INSN_GOOD: /* instruction uses slot */
+ p->ainsn.api.insn = get_insn_slot();
+ if (!p->ainsn.api.insn)
+ return -ENOMEM;
+ break;
+ }
+
+ /* prepare the instruction */
+ if (p->ainsn.api.insn)
+ arch_prepare_ss_slot(p);
+ else
+ arch_prepare_simulate(p);
+
+ return 0;
+}
+
+/* install breakpoint in text */
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+ if ((p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32)
+ patch_text(p->addr, __BUG_INSN_32);
+ else
+ patch_text(p->addr, __BUG_INSN_16);
+}
+
+/* remove breakpoint from text */
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+ patch_text(p->addr, p->opcode);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ kcb->prev_kprobe.kp = kprobe_running();
+ kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+ __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+ kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+ __this_cpu_write(current_kprobe, p);
+}
+
+/*
+ * Interrupts need to be disabled before single-step mode is set, and not
+ * reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and start of
+ * out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ kcb->saved_status = regs->status;
+ regs->status &= ~SR_SPIE;
+}
+
+static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
+ struct pt_regs *regs)
+{
+ regs->status = kcb->saved_status;
+}
+
+static void __kprobes
+set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr, struct kprobe *p)
+{
+ unsigned long offset = GET_INSN_LENGTH(p->opcode);
+
+ kcb->ss_ctx.ss_pending = true;
+ kcb->ss_ctx.match_addr = addr + offset;
+}
+
+static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
+{
+ kcb->ss_ctx.ss_pending = false;
+ kcb->ss_ctx.match_addr = 0;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb, int reenter)
+{
+ unsigned long slot;
+
+ if (reenter) {
+ save_previous_kprobe(kcb);
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_REENTER;
+ } else {
+ kcb->kprobe_status = KPROBE_HIT_SS;
+ }
+
+ if (p->ainsn.api.insn) {
+ /* prepare for single stepping */
+ slot = (unsigned long)p->ainsn.api.insn;
+
+ set_ss_context(kcb, slot, p); /* mark pending ss */
+
+ /* IRQs and single stepping do not mix well. */
+ kprobes_save_local_irqflag(kcb, regs);
+
+ instruction_pointer_set(regs, slot);
+ } else {
+ /* insn simulation */
+ arch_simulate_insn(p, regs);
+ }
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+ struct pt_regs *regs,
+ struct kprobe_ctlblk *kcb)
+{
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SSDONE:
+ case KPROBE_HIT_ACTIVE:
+ kprobes_inc_nmissed_count(p);
+ setup_singlestep(p, regs, kcb, 1);
+ break;
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ pr_warn("Unrecoverable kprobe detected.\n");
+ dump_kprobe(p);
+ BUG();
+ break;
+ default:
+ WARN_ON(1);
+ return 0;
+ }
+
+ return 1;
+}
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+ struct kprobe *cur = kprobe_running();
+
+ if (!cur)
+ return;
+
+ /* return addr restore if non-branching insn */
+ if (cur->ainsn.api.restore != 0)
+ regs->epc = cur->ainsn.api.restore;
+
+ /* restore back original saved kprobe variables and continue */
+ if (kcb->kprobe_status == KPROBE_REENTER) {
+ restore_previous_kprobe(kcb);
+ return;
+ }
+
+ /* call post handler */
+ kcb->kprobe_status = KPROBE_HIT_SSDONE;
+ if (cur->post_handler) {
+ /* post_handler can hit breakpoint and single step
+ * again, so we enable D-flag for recursive exception.
+ */
+ cur->post_handler(cur, regs, 0);
+ }
+
+ reset_current_kprobe();
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
+{
+ struct kprobe *cur = kprobe_running();
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ switch (kcb->kprobe_status) {
+ case KPROBE_HIT_SS:
+ case KPROBE_REENTER:
+ /*
+ * We are here because the instruction being single
+ * stepped caused a page fault. We reset the current
+ * kprobe and the ip points back to the probe address
+ * and allow the page fault handler to continue as a
+ * normal page fault.
+ */
+ regs->epc = (unsigned long) cur->addr;
+ if (!instruction_pointer(regs))
+ BUG();
+
+ if (kcb->kprobe_status == KPROBE_REENTER)
+ restore_previous_kprobe(kcb);
+ else
+ reset_current_kprobe();
+
+ break;
+ case KPROBE_HIT_ACTIVE:
+ case KPROBE_HIT_SSDONE:
+ /*
+ * We increment the nmissed count for accounting,
+ * we can also use npre/npostfault count for accounting
+ * these specific fault cases.
+ */
+ kprobes_inc_nmissed_count(cur);
+
+ /*
+ * We come here because instructions in the pre/post
+ * handler caused the page_fault, this could happen
+ * if handler tries to access user space by
+ * copy_from_user(), get_user() etc. Let the
+ * user-specified handler try to fix it first.
+ */
+ if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+ return 1;
+
+ /*
+ * In case the user-specified fault handler returned
+ * zero, try to fix up.
+ */
+ if (fixup_exception(regs))
+ return 1;
+ }
+ return 0;
+}
+
+bool __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs)
+{
+ struct kprobe *p, *cur_kprobe;
+ struct kprobe_ctlblk *kcb;
+ unsigned long addr = instruction_pointer(regs);
+
+ kcb = get_kprobe_ctlblk();
+ cur_kprobe = kprobe_running();
+
+ p = get_kprobe((kprobe_opcode_t *) addr);
+
+ if (p) {
+ if (cur_kprobe) {
+ if (reenter_kprobe(p, regs, kcb))
+ return true;
+ } else {
+ /* Probe hit */
+ set_current_kprobe(p);
+ kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+ /*
+ * If we have no pre-handler or it returned 0, we
+ * continue with normal processing. If we have a
+ * pre-handler and it returned non-zero, it will
+ * modify the execution path and no need to single
+ * stepping. Let's just reset current kprobe and exit.
+ *
+ * pre_handler can hit a breakpoint and can step thru
+ * before return.
+ */
+ if (!p->pre_handler || !p->pre_handler(p, regs))
+ setup_singlestep(p, regs, kcb, 0);
+ else
+ reset_current_kprobe();
+ }
+ return true;
+ }
+
+ /*
+ * The breakpoint instruction was removed right
+ * after we hit it. Another cpu has removed
+ * either a probepoint or a debugger breakpoint
+ * at this address. In either case, no further
+ * handling of this interrupt is appropriate.
+ * Return back to original instruction, and continue.
+ */
+ return false;
+}
+
+bool __kprobes
+kprobe_single_step_handler(struct pt_regs *regs)
+{
+ struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+ if ((kcb->ss_ctx.ss_pending)
+ && (kcb->ss_ctx.match_addr == instruction_pointer(regs))) {
+ clear_ss_context(kcb); /* clear pending ss */
+
+ kprobes_restore_local_irqflag(kcb, regs);
+
+ post_kprobe_handler(kcb, regs);
+ return true;
+ }
+ return false;
+}
+
+/*
+ * Provide a blacklist of symbols identifying ranges which cannot be kprobed.
+ * This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
+ */
+int __init arch_populate_kprobe_blacklist(void)
+{
+ int ret;
+
+ ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+ (unsigned long)__irqentry_text_end);
+ return ret;
+}
+
+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+ return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+ struct pt_regs *regs)
+{
+ ri->ret_addr = (kprobe_opcode_t *)regs->ra;
+ ri->fp = NULL;
+ regs->ra = (unsigned long) &kretprobe_trampoline;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+ return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+ return 0;
+}
diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/kprobes_trampoline.S
new file mode 100644
index 000000000000..6e85d021e2a2
--- /dev/null
+++ b/arch/riscv/kernel/probes/kprobes_trampoline.S
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Author: Patrick Stählin <me@packi.ch>
+ */
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+
+ .text
+ .altmacro
+
+ .macro save_all_base_regs
+ REG_S x1, PT_RA(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+ REG_S x5, PT_T0(sp)
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x8, PT_S0(sp)
+ REG_S x9, PT_S1(sp)
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+ REG_S x18, PT_S2(sp)
+ REG_S x19, PT_S3(sp)
+ REG_S x20, PT_S4(sp)
+ REG_S x21, PT_S5(sp)
+ REG_S x22, PT_S6(sp)
+ REG_S x23, PT_S7(sp)
+ REG_S x24, PT_S8(sp)
+ REG_S x25, PT_S9(sp)
+ REG_S x26, PT_S10(sp)
+ REG_S x27, PT_S11(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
+ .endm
+
+ .macro restore_all_base_regs
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+ REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+ REG_L x9, PT_S1(sp)
+ REG_L x10, PT_A0(sp)
+ REG_L x11, PT_A1(sp)
+ REG_L x12, PT_A2(sp)
+ REG_L x13, PT_A3(sp)
+ REG_L x14, PT_A4(sp)
+ REG_L x15, PT_A5(sp)
+ REG_L x16, PT_A6(sp)
+ REG_L x17, PT_A7(sp)
+ REG_L x18, PT_S2(sp)
+ REG_L x19, PT_S3(sp)
+ REG_L x20, PT_S4(sp)
+ REG_L x21, PT_S5(sp)
+ REG_L x22, PT_S6(sp)
+ REG_L x23, PT_S7(sp)
+ REG_L x24, PT_S8(sp)
+ REG_L x25, PT_S9(sp)
+ REG_L x26, PT_S10(sp)
+ REG_L x27, PT_S11(sp)
+ REG_L x28, PT_T3(sp)
+ REG_L x29, PT_T4(sp)
+ REG_L x30, PT_T5(sp)
+ REG_L x31, PT_T6(sp)
+ .endm
+
+ENTRY(kretprobe_trampoline)
+ addi sp, sp, -(PT_SIZE_ON_STACK)
+ save_all_base_regs
+
+ move a0, sp /* pt_regs */
+
+ call trampoline_probe_handler
+
+ /* use the result as the return-address */
+ move ra, a0
+
+ restore_all_base_regs
+ addi sp, sp, PT_SIZE_ON_STACK
+
+ ret
+ENDPROC(kretprobe_trampoline)
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
new file mode 100644
index 000000000000..2519ce26377d
--- /dev/null
+++ b/arch/riscv/kernel/probes/simulate-insn.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+static inline bool rv_insn_reg_get_val(struct pt_regs *regs, u32 index,
+ unsigned long *ptr)
+{
+ if (index == 0)
+ *ptr = 0;
+ else if (index <= 31)
+ *ptr = *((unsigned long *)regs + index);
+ else
+ return false;
+
+ return true;
+}
+
+static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index,
+ unsigned long val)
+{
+ if (index == 0)
+ return false;
+ else if (index <= 31)
+ *((unsigned long *)regs + index) = val;
+ else
+ return false;
+
+ return true;
+}
+
+bool __kprobes simulate_jal(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * 31 30 21 20 19 12 11 7 6 0
+ * imm [20] | imm[10:1] | imm[11] | imm[19:12] | rd | opcode
+ * 1 10 1 8 5 JAL/J
+ */
+ bool ret;
+ u32 imm;
+ u32 index = (opcode >> 7) & 0x1f;
+
+ ret = rv_insn_reg_set_val(regs, index, addr + 4);
+ if (!ret)
+ return ret;
+
+ imm = ((opcode >> 21) & 0x3ff) << 1;
+ imm |= ((opcode >> 20) & 0x1) << 11;
+ imm |= ((opcode >> 12) & 0xff) << 12;
+ imm |= ((opcode >> 31) & 0x1) << 20;
+
+ instruction_pointer_set(regs, addr + sign_extend32((imm), 20));
+
+ return ret;
+}
+
+bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+ /*
+ * 31 20 19 15 14 12 11 7 6 0
+ * offset[11:0] | rs1 | 010 | rd | opcode
+ * 12 5 3 5 JALR/JR
+ */
+ bool ret;
+ unsigned long base_addr;
+ u32 imm = (opcode >> 20) & 0xfff;
+ u32 rd_index = (opcode >> 7) & 0x1f;
+ u32 rs1_index = (opcode >> 15) & 0x1f;
+
+ ret = rv_insn_reg_set_val(regs, rd_index, addr + 4);
+ if (!ret)
+ return ret;
+
+ ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr);
+ if (!ret)
+ return ret;
+
+ instruction_pointer_set(regs, (base_addr + sign_extend32((imm), 11))&~1);
+
+ return ret;
+}
diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
new file mode 100644
index 000000000000..cb6ff7dccb92
--- /dev/null
+++ b/arch/riscv/kernel/probes/simulate-insn.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _RISCV_KERNEL_PROBES_SIMULATE_INSN_H
+#define _RISCV_KERNEL_PROBES_SIMULATE_INSN_H
+
+#define __RISCV_INSN_FUNCS(name, mask, val) \
+static __always_inline bool riscv_insn_is_##name(probe_opcode_t code) \
+{ \
+ BUILD_BUG_ON(~(mask) & (val)); \
+ return (code & (mask)) == (val); \
+} \
+bool simulate_##name(u32 opcode, unsigned long addr, \
+ struct pt_regs *regs)
+
+#define RISCV_INSN_REJECTED(name, code) \
+ do { \
+ if (riscv_insn_is_##name(code)) { \
+ return INSN_REJECTED; \
+ } \
+ } while (0)
+
+__RISCV_INSN_FUNCS(system, 0x7f, 0x73);
+__RISCV_INSN_FUNCS(fence, 0x7f, 0x0f);
+
+#define RISCV_INSN_SET_SIMULATE(name, code) \
+ do { \
+ if (riscv_insn_is_##name(code)) { \
+ api->handler = simulate_##name; \
+ return INSN_GOOD_NO_SLOT; \
+ } \
+ } while (0)
+
+__RISCV_INSN_FUNCS(c_j, 0xe003, 0xa001);
+__RISCV_INSN_FUNCS(c_jr, 0xf007, 0x8002);
+__RISCV_INSN_FUNCS(c_jal, 0xe003, 0x2001);
+__RISCV_INSN_FUNCS(c_jalr, 0xf007, 0x9002);
+__RISCV_INSN_FUNCS(c_beqz, 0xe003, 0xc001);
+__RISCV_INSN_FUNCS(c_bnez, 0xe003, 0xe001);
+__RISCV_INSN_FUNCS(c_ebreak, 0xffff, 0x9002);
+
+__RISCV_INSN_FUNCS(auipc, 0x7f, 0x17);
+__RISCV_INSN_FUNCS(branch, 0x7f, 0x63);
+
+__RISCV_INSN_FUNCS(jal, 0x7f, 0x6f);
+__RISCV_INSN_FUNCS(jalr, 0x707f, 0x67);
+
+#endif /* _RISCV_KERNEL_PROBES_SIMULATE_INSN_H */
diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
new file mode 100644
index 000000000000..7a057b5f0adc
--- /dev/null
+++ b/arch/riscv/kernel/probes/uprobes.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/highmem.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+
+#include "decode-insn.h"
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+bool is_swbp_insn(uprobe_opcode_t *insn)
+{
+#ifdef CONFIG_RISCV_ISA_C
+ return (*insn & 0xffff) == UPROBE_SWBP_INSN;
+#else
+ return *insn == UPROBE_SWBP_INSN;
+#endif
+}
+
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+ return instruction_pointer(regs);
+}
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+ unsigned long addr)
+{
+ probe_opcode_t opcode;
+
+ opcode = *(probe_opcode_t *)(&auprobe->insn[0]);
+
+ auprobe->insn_size = GET_INSN_LENGTH(opcode);
+
+ switch (riscv_probe_decode_insn(&opcode, &auprobe->api)) {
+ case INSN_REJECTED:
+ return -EINVAL;
+
+ case INSN_GOOD_NO_SLOT:
+ auprobe->simulate = true;
+ break;
+
+ case INSN_GOOD:
+ auprobe->simulate = false;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ utask->autask.saved_cause = current->thread.bad_cause;
+ current->thread.bad_cause = UPROBE_TRAP_NR;
+
+ instruction_pointer_set(regs, utask->xol_vaddr);
+
+ regs->status &= ~SR_SPIE;
+
+ return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ WARN_ON_ONCE(current->thread.bad_cause != UPROBE_TRAP_NR);
+
+ instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size);
+
+ regs->status |= SR_SPIE;
+
+ return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+ if (t->thread.bad_cause != UPROBE_TRAP_NR)
+ return true;
+
+ return false;
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ probe_opcode_t insn;
+ unsigned long addr;
+
+ if (!auprobe->simulate)
+ return false;
+
+ insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+ addr = instruction_pointer(regs);
+
+ if (auprobe->api.handler)
+ auprobe->api.handler(insn, addr, regs);
+
+ return true;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+ struct uprobe_task *utask = current->utask;
+
+ /*
+ * Task has received a fatal signal, so reset back to probbed
+ * address.
+ */
+ instruction_pointer_set(regs, utask->vaddr);
+
+ regs->status &= ~SR_SPIE;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+ struct pt_regs *regs)
+{
+ if (ctx == RP_CHECK_CHAIN_CALL)
+ return regs->sp <= ret->stack;
+ else
+ return regs->sp < ret->stack;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+ struct pt_regs *regs)
+{
+ unsigned long ra;
+
+ ra = regs->ra;
+
+ regs->ra = trampoline_vaddr;
+
+ return ra;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+ unsigned long val, void *data)
+{
+ return NOTIFY_DONE;
+}
+
+bool uprobe_breakpoint_handler(struct pt_regs *regs)
+{
+ if (uprobe_pre_sstep_notifier(regs))
+ return true;
+
+ return false;
+}
+
+bool uprobe_single_step_handler(struct pt_regs *regs)
+{
+ if (uprobe_post_sstep_notifier(regs))
+ return true;
+
+ return false;
+}
+
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+ void *src, unsigned long len)
+{
+ /* Initialize the slot */
+ void *kaddr = kmap_atomic(page);
+ void *dst = kaddr + (vaddr & ~PAGE_MASK);
+
+ memcpy(dst, src, len);
+
+ /* Add ebreak behind opcode to simulate singlestep */
+ if (vaddr) {
+ dst += GET_INSN_LENGTH(*(probe_opcode_t *)src);
+ *(uprobe_opcode_t *)dst = __BUG_INSN_32;
+ }
+
+ kunmap_atomic(kaddr);
+
+ /*
+ * We probably need flush_icache_user_page() but it needs vma.
+ * This should work on most of architectures by default. If
+ * architecture needs to do something different it can define
+ * its own version of the function.
+ */
+ flush_dcache_page(page);
+}
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index dd5f985b1f40..6f728e731bed 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -18,13 +18,14 @@
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/csr.h>
+#include <asm/stacktrace.h>
#include <asm/string.h>
#include <asm/switch_to.h>
#include <asm/thread_info.h>
register unsigned long gp_in_global __asm__("gp");
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
@@ -39,11 +40,16 @@ void arch_cpu_idle(void)
raw_local_irq_enable();
}
-void show_regs(struct pt_regs *regs)
+void __show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);
- pr_cont("epc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
+ if (!user_mode(regs)) {
+ pr_cont("epc : %pS\n", (void *)regs->epc);
+ pr_cont(" ra : %pS\n", (void *)regs->ra);
+ }
+
+ pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
regs->epc, regs->ra, regs->sp);
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
regs->gp, regs->tp, regs->t0);
@@ -69,6 +75,12 @@ void show_regs(struct pt_regs *regs)
pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
regs->status, regs->badaddr, regs->cause);
}
+void show_regs(struct pt_regs *regs)
+{
+ __show_regs(regs);
+ if (!user_mode(regs))
+ dump_backtrace(regs, NULL, KERN_DEFAULT);
+}
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
@@ -112,7 +124,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
struct pt_regs *childregs = task_pt_regs(p);
/* p->thread holds context to be restored by __switch_to() */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp_in_global;
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 2d6395f5ad54..1a85305720e8 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -114,6 +114,105 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
return &riscv_user_native_view;
}
+struct pt_regs_offset {
+ const char *name;
+ int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+ REG_OFFSET_NAME(epc),
+ REG_OFFSET_NAME(ra),
+ REG_OFFSET_NAME(sp),
+ REG_OFFSET_NAME(gp),
+ REG_OFFSET_NAME(tp),
+ REG_OFFSET_NAME(t0),
+ REG_OFFSET_NAME(t1),
+ REG_OFFSET_NAME(t2),
+ REG_OFFSET_NAME(s0),
+ REG_OFFSET_NAME(s1),
+ REG_OFFSET_NAME(a0),
+ REG_OFFSET_NAME(a1),
+ REG_OFFSET_NAME(a2),
+ REG_OFFSET_NAME(a3),
+ REG_OFFSET_NAME(a4),
+ REG_OFFSET_NAME(a5),
+ REG_OFFSET_NAME(a6),
+ REG_OFFSET_NAME(a7),
+ REG_OFFSET_NAME(s2),
+ REG_OFFSET_NAME(s3),
+ REG_OFFSET_NAME(s4),
+ REG_OFFSET_NAME(s5),
+ REG_OFFSET_NAME(s6),
+ REG_OFFSET_NAME(s7),
+ REG_OFFSET_NAME(s8),
+ REG_OFFSET_NAME(s9),
+ REG_OFFSET_NAME(s10),
+ REG_OFFSET_NAME(s11),
+ REG_OFFSET_NAME(t3),
+ REG_OFFSET_NAME(t4),
+ REG_OFFSET_NAME(t5),
+ REG_OFFSET_NAME(t6),
+ REG_OFFSET_NAME(status),
+ REG_OFFSET_NAME(badaddr),
+ REG_OFFSET_NAME(cause),
+ REG_OFFSET_NAME(orig_a0),
+ REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name: the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+ const struct pt_regs_offset *roff;
+
+ for (roff = regoffset_table; roff->name != NULL; roff++)
+ if (!strcmp(roff->name, name))
+ return roff->offset;
+ return -EINVAL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @addr: address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+ return (addr & ~(THREAD_SIZE - 1)) ==
+ (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs: pt_regs which contains kernel stack pointer.
+ * @n: stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+ unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+ addr += n;
+ if (regs_within_kernel_stack(regs, (unsigned long)addr))
+ return *addr;
+ else
+ return 0;
+}
+
void ptrace_disable(struct task_struct *child)
{
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index 226ccce0f9e0..f4a7db3d309e 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -351,7 +351,7 @@ static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
* sbi_set_timer() - Program the timer for next timer event.
* @stime_value: The value after which next timer event should fire.
*
- * Return: None
+ * Return: None.
*/
void sbi_set_timer(uint64_t stime_value)
{
@@ -362,11 +362,11 @@ void sbi_set_timer(uint64_t stime_value)
* sbi_send_ipi() - Send an IPI to any hart.
* @hart_mask: A cpu mask containing all the target harts.
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_send_ipi(const unsigned long *hart_mask)
+int sbi_send_ipi(const unsigned long *hart_mask)
{
- __sbi_send_ipi(hart_mask);
+ return __sbi_send_ipi(hart_mask);
}
EXPORT_SYMBOL(sbi_send_ipi);
@@ -374,12 +374,12 @@ EXPORT_SYMBOL(sbi_send_ipi);
* sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
* @hart_mask: A cpu mask containing all the target harts.
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_remote_fence_i(const unsigned long *hart_mask)
+int sbi_remote_fence_i(const unsigned long *hart_mask)
{
- __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
- hart_mask, 0, 0, 0, 0);
+ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
+ hart_mask, 0, 0, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_fence_i);
@@ -390,14 +390,14 @@ EXPORT_SYMBOL(sbi_remote_fence_i);
* @start: Start of the virtual address
* @size: Total size of the virtual address range.
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma(const unsigned long *hart_mask,
unsigned long start,
unsigned long size)
{
- __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
- hart_mask, start, size, 0, 0);
+ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
+ hart_mask, start, size, 0, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma);
@@ -410,15 +410,15 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
* @size: Total size of the virtual address range.
* @asid: The value of address space identifier (ASID).
*
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
*/
-void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
unsigned long start,
unsigned long size,
unsigned long asid)
{
- __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
- hart_mask, start, size, asid, 0);
+ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
+ hart_mask, start, size, asid, 0);
}
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
@@ -560,7 +560,7 @@ static struct riscv_ipi_ops sbi_ipi_ops = {
.ipi_inject = sbi_send_cpumask_ipi
};
-int __init sbi_init(void)
+void __init sbi_init(void)
{
int ret;
@@ -600,6 +600,4 @@ int __init sbi_init(void)
}
riscv_set_ipi_ops(&sbi_ipi_ops);
-
- return 0;
}
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index c7c0655dd45b..e85bacff1b50 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -216,8 +216,15 @@ static void __init init_resources(void)
static void __init parse_dtb(void)
{
/* Early scan of device tree from init memory */
- if (early_init_dt_scan(dtb_early_va))
+ if (early_init_dt_scan(dtb_early_va)) {
+ const char *name = of_flat_dt_get_machine_name();
+
+ if (name) {
+ pr_info("Machine model: %s\n", name);
+ dump_stack_set_arch_desc("%s (DT)", name);
+ }
return;
+ }
pr_err("No DTB passed to the kernel\n");
#ifdef CONFIG_CMDLINE_FORCE
@@ -252,9 +259,9 @@ void __init setup_arch(char **cmdline_p)
else
pr_err("No DTB found in kernel mappings\n");
#endif
+ misc_mem_init();
- if (IS_ENABLED(CONFIG_RISCV_SBI))
- sbi_init();
+ sbi_init();
if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
protect_kernel_text_data();
@@ -275,13 +282,19 @@ void __init setup_arch(char **cmdline_p)
static int __init topology_init(void)
{
- int i;
+ int i, ret;
+
+ for_each_online_node(i)
+ register_one_node(i);
for_each_possible_cpu(i) {
struct cpu *cpu = &per_cpu(cpu_devices, i);
cpu->hotpluggable = cpu_has_hotplug(i);
- register_cpu(cpu, i);
+ ret = register_cpu(cpu, i);
+ if (unlikely(ret))
+ pr_warn("Warning: %s: register_cpu %d failed (%d)\n",
+ __func__, i, ret);
}
return 0;
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 469aef8ed922..65942b3748b4 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -309,6 +309,9 @@ static void do_signal(struct pt_regs *regs)
asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
unsigned long thread_info_flags)
{
+ if (thread_info_flags & _TIF_UPROBE)
+ uprobe_notify_resume(regs);
+
/* Handle pending signal delivery */
if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
do_signal(regs);
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 96167d55ed98..5e276c25646f 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -27,6 +27,7 @@
#include <asm/cpu_ops.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
+#include <asm/numa.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/sbi.h>
@@ -45,13 +46,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
{
int cpuid;
int ret;
+ unsigned int curr_cpuid;
+
+ curr_cpuid = smp_processor_id();
+ numa_store_cpu_info(curr_cpuid);
+ numa_add_cpu(curr_cpuid);
/* This covers non-smp usecase mandated by "nosmp" option */
if (max_cpus == 0)
return;
for_each_possible_cpu(cpuid) {
- if (cpuid == smp_processor_id())
+ if (cpuid == curr_cpuid)
continue;
if (cpu_ops[cpuid]->cpu_prepare) {
ret = cpu_ops[cpuid]->cpu_prepare(cpuid);
@@ -59,6 +65,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
continue;
}
set_cpu_present(cpuid, true);
+ numa_store_cpu_info(cpuid);
}
}
@@ -79,6 +86,7 @@ void __init setup_smp(void)
if (hart == cpuid_to_hartid_map(0)) {
BUG_ON(found_boot_cpu);
found_boot_cpu = 1;
+ early_map_cpu_to_node(0, of_node_to_nid(dn));
continue;
}
if (cpuid >= NR_CPUS) {
@@ -88,6 +96,7 @@ void __init setup_smp(void)
}
cpuid_to_hartid_map(cpuid) = hart;
+ early_map_cpu_to_node(cpuid, of_node_to_nid(dn));
cpuid++;
}
@@ -153,6 +162,7 @@ asmlinkage __visible void smp_callin(void)
current->active_mm = mm;
notify_cpu_starting(curr_cpuid);
+ numa_add_cpu(curr_cpuid);
update_siblings_masks(curr_cpuid);
set_cpu_online(curr_cpuid, 1);
diff --git a/arch/riscv/kernel/soc.c b/arch/riscv/kernel/soc.c
index c7b0a73e382e..a0516172a33c 100644
--- a/arch/riscv/kernel/soc.c
+++ b/arch/riscv/kernel/soc.c
@@ -26,30 +26,3 @@ void __init soc_early_init(void)
}
}
}
-
-static bool soc_builtin_dtb_match(unsigned long vendor_id,
- unsigned long arch_id, unsigned long imp_id,
- const struct soc_builtin_dtb *entry)
-{
- return entry->vendor_id == vendor_id &&
- entry->arch_id == arch_id &&
- entry->imp_id == imp_id;
-}
-
-void * __init soc_lookup_builtin_dtb(void)
-{
- unsigned long vendor_id, arch_id, imp_id;
- const struct soc_builtin_dtb *s;
-
- __asm__ ("csrr %0, mvendorid" : "=r"(vendor_id));
- __asm__ ("csrr %0, marchid" : "=r"(arch_id));
- __asm__ ("csrr %0, mimpid" : "=r"(imp_id));
-
- for (s = (void *)&__soc_builtin_dtb_table_start;
- (void *)s < (void *)&__soc_builtin_dtb_table_end; s++) {
- if (soc_builtin_dtb_match(vendor_id, arch_id, imp_id, s))
- return s->dtb_func();
- }
-
- return NULL;
-}
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index df5d2da7c40b..3f893c9d9d85 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -53,9 +53,15 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
/* Unwind stack frame */
frame = (struct stackframe *)fp - 1;
sp = fp;
- fp = frame->fp;
- pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
- (unsigned long *)(fp - 8));
+ if (regs && (regs->epc == pc) && (frame->fp & 0x7)) {
+ fp = frame->ra;
+ pc = regs->ra;
+ } else {
+ fp = frame->fp;
+ pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+ (unsigned long *)(fp - 8));
+ }
+
}
}
@@ -100,10 +106,16 @@ static bool print_trace_address(void *arg, unsigned long pc)
return true;
}
+void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
+ const char *loglvl)
+{
+ pr_cont("%sCall Trace:\n", loglvl);
+ walk_stackframe(task, regs, print_trace_address, (void *)loglvl);
+}
+
void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
{
- pr_cont("Call Trace:\n");
- walk_stackframe(task, NULL, print_trace_address, (void *)loglvl);
+ dump_backtrace(NULL, task, loglvl);
}
static bool save_wchan(void *arg, unsigned long pc)
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index ad14f4466d92..3ed2c23601a0 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -12,10 +12,12 @@
#include <linux/signal.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
+#include <linux/kprobes.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/irq.h>
+#include <asm/bug.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
#include <asm/csr.h>
@@ -66,7 +68,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
tsk->comm, task_pid_nr(tsk), signo, code, addr);
print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
pr_cont("\n");
- show_regs(regs);
+ __show_regs(regs);
}
force_sig_fault(signo, code, (void __user *)addr);
@@ -75,6 +77,8 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
static void do_trap_error(struct pt_regs *regs, int signo, int code,
unsigned long addr, const char *str)
{
+ current->thread.bad_cause = regs->cause;
+
if (user_mode(regs)) {
do_trap(regs, signo, code, addr);
} else {
@@ -145,6 +149,22 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
asmlinkage __visible void do_trap_break(struct pt_regs *regs)
{
+#ifdef CONFIG_KPROBES
+ if (kprobe_single_step_handler(regs))
+ return;
+
+ if (kprobe_breakpoint_handler(regs))
+ return;
+#endif
+#ifdef CONFIG_UPROBES
+ if (uprobe_single_step_handler(regs))
+ return;
+
+ if (uprobe_breakpoint_handler(regs))
+ return;
+#endif
+ current->thread.bad_cause = regs->cause;
+
if (user_mode(regs))
force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->epc);
#ifdef CONFIG_KGDB
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 0cfd6da784f8..71a315e73cbe 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -32,9 +32,10 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
# Disable -pg to prevent insert call site
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
-# Disable gcov profiling for VDSO code
+# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
KCOV_INSTRUMENT := n
+KASAN_SANITIZE := n
# Force dependency
$(obj)/vdso.o: $(obj)/vdso.so
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index ac6171e9c19e..25d5c9664e57 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -5,3 +5,5 @@ lib-y += memset.o
lib-y += memmove.o
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/riscv/lib/error-inject.c b/arch/riscv/lib/error-inject.c
new file mode 100644
index 000000000000..d667ade2bc41
--- /dev/null
+++ b/arch/riscv/lib/error-inject.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ instruction_pointer_set(regs, regs->ra);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index c0185e556ca5..7ebaef10ea1b 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -2,7 +2,8 @@
CFLAGS_init.o := -mcmodel=medany
ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_init.o = -pg
+CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
endif
KCOV_INSTRUMENT_init.o := n
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 613ec81a8979..68aa312fc352 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -2,13 +2,273 @@
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
*/
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
#include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/static_key.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
#include <asm/mmu_context.h>
+#ifdef CONFIG_MMU
+
+static DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
+
+static unsigned long asid_bits;
+static unsigned long num_asids;
+static unsigned long asid_mask;
+
+static atomic_long_t current_version;
+
+static DEFINE_RAW_SPINLOCK(context_lock);
+static cpumask_t context_tlb_flush_pending;
+static unsigned long *context_asid_map;
+
+static DEFINE_PER_CPU(atomic_long_t, active_context);
+static DEFINE_PER_CPU(unsigned long, reserved_context);
+
+static bool check_update_reserved_context(unsigned long cntx,
+ unsigned long newcntx)
+{
+ int cpu;
+ bool hit = false;
+
+ /*
+ * Iterate over the set of reserved CONTEXT looking for a match.
+ * If we find one, then we can update our mm to use new CONTEXT
+ * (i.e. the same CONTEXT in the current_version) but we can't
+ * exit the loop early, since we need to ensure that all copies
+ * of the old CONTEXT are updated to reflect the mm. Failure to do
+ * so could result in us missing the reserved CONTEXT in a future
+ * version.
+ */
+ for_each_possible_cpu(cpu) {
+ if (per_cpu(reserved_context, cpu) == cntx) {
+ hit = true;
+ per_cpu(reserved_context, cpu) = newcntx;
+ }
+ }
+
+ return hit;
+}
+
+static void __flush_context(void)
+{
+ int i;
+ unsigned long cntx;
+
+ /* Must be called with context_lock held */
+ lockdep_assert_held(&context_lock);
+
+ /* Update the list of reserved ASIDs and the ASID bitmap. */
+ bitmap_clear(context_asid_map, 0, num_asids);
+
+ /* Mark already active ASIDs as used */
+ for_each_possible_cpu(i) {
+ cntx = atomic_long_xchg_relaxed(&per_cpu(active_context, i), 0);
+ /*
+ * If this CPU has already been through a rollover, but
+ * hasn't run another task in the meantime, we must preserve
+ * its reserved CONTEXT, as this is the only trace we have of
+ * the process it is still running.
+ */
+ if (cntx == 0)
+ cntx = per_cpu(reserved_context, i);
+
+ __set_bit(cntx & asid_mask, context_asid_map);
+ per_cpu(reserved_context, i) = cntx;
+ }
+
+ /* Mark ASID #0 as used because it is used at boot-time */
+ __set_bit(0, context_asid_map);
+
+ /* Queue a TLB invalidation for each CPU on next context-switch */
+ cpumask_setall(&context_tlb_flush_pending);
+}
+
+static unsigned long __new_context(struct mm_struct *mm)
+{
+ static u32 cur_idx = 1;
+ unsigned long cntx = atomic_long_read(&mm->context.id);
+ unsigned long asid, ver = atomic_long_read(&current_version);
+
+ /* Must be called with context_lock held */
+ lockdep_assert_held(&context_lock);
+
+ if (cntx != 0) {
+ unsigned long newcntx = ver | (cntx & asid_mask);
+
+ /*
+ * If our current CONTEXT was active during a rollover, we
+ * can continue to use it and this was just a false alarm.
+ */
+ if (check_update_reserved_context(cntx, newcntx))
+ return newcntx;
+
+ /*
+ * We had a valid CONTEXT in a previous life, so try to
+ * re-use it if possible.
+ */
+ if (!__test_and_set_bit(cntx & asid_mask, context_asid_map))
+ return newcntx;
+ }
+
+ /*
+ * Allocate a free ASID. If we can't find one then increment
+ * current_version and flush all ASIDs.
+ */
+ asid = find_next_zero_bit(context_asid_map, num_asids, cur_idx);
+ if (asid != num_asids)
+ goto set_asid;
+
+ /* We're out of ASIDs, so increment current_version */
+ ver = atomic_long_add_return_relaxed(num_asids, &current_version);
+
+ /* Flush everything */
+ __flush_context();
+
+ /* We have more ASIDs than CPUs, so this will always succeed */
+ asid = find_next_zero_bit(context_asid_map, num_asids, 1);
+
+set_asid:
+ __set_bit(asid, context_asid_map);
+ cur_idx = asid;
+ return asid | ver;
+}
+
+static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
+{
+ unsigned long flags;
+ bool need_flush_tlb = false;
+ unsigned long cntx, old_active_cntx;
+
+ cntx = atomic_long_read(&mm->context.id);
+
+ /*
+ * If our active_context is non-zero and the context matches the
+ * current_version, then we update the active_context entry with a
+ * relaxed cmpxchg.
+ *
+ * Following is how we handle racing with a concurrent rollover:
+ *
+ * - We get a zero back from the cmpxchg and end up waiting on the
+ * lock. Taking the lock synchronises with the rollover and so
+ * we are forced to see the updated verion.
+ *
+ * - We get a valid context back from the cmpxchg then we continue
+ * using old ASID because __flush_context() would have marked ASID
+ * of active_context as used and next context switch we will
+ * allocate new context.
+ */
+ old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu));
+ if (old_active_cntx &&
+ ((cntx & ~asid_mask) == atomic_long_read(&current_version)) &&
+ atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu),
+ old_active_cntx, cntx))
+ goto switch_mm_fast;
+
+ raw_spin_lock_irqsave(&context_lock, flags);
+
+ /* Check that our ASID belongs to the current_version. */
+ cntx = atomic_long_read(&mm->context.id);
+ if ((cntx & ~asid_mask) != atomic_long_read(&current_version)) {
+ cntx = __new_context(mm);
+ atomic_long_set(&mm->context.id, cntx);
+ }
+
+ if (cpumask_test_and_clear_cpu(cpu, &context_tlb_flush_pending))
+ need_flush_tlb = true;
+
+ atomic_long_set(&per_cpu(active_context, cpu), cntx);
+
+ raw_spin_unlock_irqrestore(&context_lock, flags);
+
+switch_mm_fast:
+ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
+ ((cntx & asid_mask) << SATP_ASID_SHIFT) |
+ SATP_MODE);
+
+ if (need_flush_tlb)
+ local_flush_tlb_all();
+}
+
+static void set_mm_noasid(struct mm_struct *mm)
+{
+ /* Switch the page table and blindly nuke entire local TLB */
+ csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
+ local_flush_tlb_all();
+}
+
+static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+{
+ if (static_branch_unlikely(&use_asid_allocator))
+ set_mm_asid(mm, cpu);
+ else
+ set_mm_noasid(mm);
+}
+
+static int asids_init(void)
+{
+ unsigned long old;
+
+ /* Figure-out number of ASID bits in HW */
+ old = csr_read(CSR_SATP);
+ asid_bits = old | (SATP_ASID_MASK << SATP_ASID_SHIFT);
+ csr_write(CSR_SATP, asid_bits);
+ asid_bits = (csr_read(CSR_SATP) >> SATP_ASID_SHIFT) & SATP_ASID_MASK;
+ asid_bits = fls_long(asid_bits);
+ csr_write(CSR_SATP, old);
+
+ /*
+ * In the process of determining number of ASID bits (above)
+ * we polluted the TLB of current HART so let's do TLB flushed
+ * to remove unwanted TLB enteries.
+ */
+ local_flush_tlb_all();
+
+ /* Pre-compute ASID details */
+ num_asids = 1 << asid_bits;
+ asid_mask = num_asids - 1;
+
+ /*
+ * Use ASID allocator only if number of HW ASIDs are
+ * at-least twice more than CPUs
+ */
+ if (num_asids > (2 * num_possible_cpus())) {
+ atomic_long_set(&current_version, num_asids);
+
+ context_asid_map = kcalloc(BITS_TO_LONGS(num_asids),
+ sizeof(*context_asid_map), GFP_KERNEL);
+ if (!context_asid_map)
+ panic("Failed to allocate bitmap for %lu ASIDs\n",
+ num_asids);
+
+ __set_bit(0, context_asid_map);
+
+ static_branch_enable(&use_asid_allocator);
+
+ pr_info("ASID allocator using %lu bits (%lu entries)\n",
+ asid_bits, num_asids);
+ } else {
+ pr_info("ASID allocator disabled\n");
+ }
+
+ return 0;
+}
+early_initcall(asids_init);
+#else
+static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+{
+ /* Nothing to do here when there is no MMU */
+}
+#endif
+
/*
* When necessary, performs a deferred icache flush for the given MM context,
* on the local CPU. RISC-V has no direct mechanism for instruction cache
@@ -58,10 +318,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
-#ifdef CONFIG_MMU
- csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
- local_flush_tlb_all();
-#endif
+ set_mm(next, cpu);
flush_icache_deferred(next);
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 3c8b9e433c67..8f17519208c7 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -13,14 +13,30 @@
#include <linux/perf_event.h>
#include <linux/signal.h>
#include <linux/uaccess.h>
+#include <linux/kprobes.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
#include "../kernel/head.h"
+static void die_kernel_fault(const char *msg, unsigned long addr,
+ struct pt_regs *regs)
+{
+ bust_spinlocks(1);
+
+ pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg,
+ addr);
+
+ bust_spinlocks(0);
+ die(regs, "Oops");
+ do_exit(SIGKILL);
+}
+
static inline void no_context(struct pt_regs *regs, unsigned long addr)
{
+ const char *msg;
+
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs))
return;
@@ -29,12 +45,8 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr)
* Oops. The kernel tried to access some bad page. We'll have to
* terminate things with extreme prejudice.
*/
- bust_spinlocks(1);
- pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
- (addr < PAGE_SIZE) ? "NULL pointer dereference" :
- "paging request", addr);
- die(regs, "Oops");
- do_exit(SIGKILL);
+ msg = (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request";
+ die_kernel_fault(msg, addr, regs);
}
static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
@@ -202,6 +214,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
tsk = current;
mm = tsk->mm;
+ if (kprobe_page_fault(regs, cause))
+ return;
+
/*
* Fault-in kernel-space virtual memory on-demand.
* The 'reference' page table is init_mm.pgd.
@@ -225,6 +240,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
* in an atomic region, then we must not take the fault.
*/
if (unlikely(faulthandler_disabled() || !mm)) {
+ tsk->thread.bad_cause = cause;
no_context(regs, addr);
return;
}
@@ -232,6 +248,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
if (user_mode(regs))
flags |= FAULT_FLAG_USER;
+ if (!user_mode(regs) && addr < TASK_SIZE &&
+ unlikely(!(regs->status & SR_SUM)))
+ die_kernel_fault("access to user memory without uaccess routines",
+ addr, regs);
+
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
if (cause == EXC_STORE_PAGE_FAULT)
@@ -242,16 +263,19 @@ retry:
mmap_read_lock(mm);
vma = find_vma(mm, addr);
if (unlikely(!vma)) {
+ tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
if (likely(vma->vm_start <= addr))
goto good_area;
if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+ tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
if (unlikely(expand_stack(vma, addr))) {
+ tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
@@ -264,6 +288,7 @@ good_area:
code = SEGV_ACCERR;
if (unlikely(access_error(cause, vma))) {
+ tsk->thread.bad_cause = cause;
bad_area(regs, mm, code, addr);
return;
}
@@ -297,6 +322,7 @@ good_area:
mmap_read_unlock(mm);
if (unlikely(fault & VM_FAULT_ERROR)) {
+ tsk->thread.bad_cause = cause;
mm_fault_error(regs, addr, fault);
return;
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index f9f9568d689e..067583ab1bd7 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -21,6 +21,7 @@
#include <asm/soc.h>
#include <asm/io.h>
#include <asm/ptdump.h>
+#include <asm/numa.h>
#include "../kernel/head.h"
@@ -105,85 +106,19 @@ void __init mem_init(void)
print_vm_layout();
}
-#ifdef CONFIG_BLK_DEV_INITRD
-static void __init setup_initrd(void)
-{
- phys_addr_t start;
- unsigned long size;
-
- /* Ignore the virtul address computed during device tree parsing */
- initrd_start = initrd_end = 0;
-
- if (!phys_initrd_size)
- return;
- /*
- * Round the memory region to page boundaries as per free_initrd_mem()
- * This allows us to detect whether the pages overlapping the initrd
- * are in use, but more importantly, reserves the entire set of pages
- * as we don't want these pages allocated for other purposes.
- */
- start = round_down(phys_initrd_start, PAGE_SIZE);
- size = phys_initrd_size + (phys_initrd_start - start);
- size = round_up(size, PAGE_SIZE);
-
- if (!memblock_is_region_memory(start, size)) {
- pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
- (u64)start, size);
- goto disable;
- }
-
- if (memblock_is_region_reserved(start, size)) {
- pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
- (u64)start, size);
- goto disable;
- }
-
- memblock_reserve(start, size);
- /* Now convert initrd to virtual addresses */
- initrd_start = (unsigned long)__va(phys_initrd_start);
- initrd_end = initrd_start + phys_initrd_size;
- initrd_below_start_ok = 1;
-
- pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
- (void *)(initrd_start), size);
- return;
-disable:
- pr_cont(" - disabling initrd\n");
- initrd_start = 0;
- initrd_end = 0;
-}
-#endif /* CONFIG_BLK_DEV_INITRD */
-
void __init setup_bootmem(void)
{
- phys_addr_t mem_start = 0;
- phys_addr_t start, dram_end, end = 0;
phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start);
+ phys_addr_t dram_end = memblock_end_of_DRAM();
phys_addr_t max_mapped_addr = __pa(~(ulong)0);
- u64 i;
- /* Find the memory region containing the kernel */
- for_each_mem_range(i, &start, &end) {
- phys_addr_t size = end - start;
- if (!mem_start)
- mem_start = start;
- if (start <= vmlinux_start && vmlinux_end <= end)
- BUG_ON(size == 0);
- }
-
- /*
- * The maximal physical memory size is -PAGE_OFFSET.
- * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed
- * as it is unusable by kernel.
- */
+ /* The maximal physical memory size is -PAGE_OFFSET. */
memblock_enforce_memory_limit(-PAGE_OFFSET);
/* Reserve from the start of the kernel to the end of the kernel */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
- dram_end = memblock_end_of_DRAM();
-
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -198,20 +133,19 @@ void __init setup_bootmem(void)
dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
-#ifdef CONFIG_BLK_DEV_INITRD
- setup_initrd();
-#endif /* CONFIG_BLK_DEV_INITRD */
-
+ reserve_initrd_mem();
/*
- * Avoid using early_init_fdt_reserve_self() since __pa() does
+ * If DTB is built in, no need to reserve its memblock.
+ * Otherwise, do reserve it but avoid using
+ * early_init_fdt_reserve_self() since __pa() does
* not work for DTB pointers that are fixmap addresses
*/
- memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+ if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
+ memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
early_init_fdt_scan_reserved_mem();
dma_contiguous_reserve(dma32_phys_limit);
memblock_allow_resize();
- memblock_dump_all();
}
#ifdef CONFIG_MMU
@@ -226,8 +160,6 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
-#define MAX_EARLY_MAPPING_SIZE SZ_128M
-
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
@@ -302,13 +234,7 @@ static void __init create_pte_mapping(pte_t *ptep,
pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
-
-#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
-#define NUM_EARLY_PMDS 1UL
-#else
-#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
-#endif
-pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
+pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
@@ -330,11 +256,9 @@ static pmd_t *get_pmd_virt_late(phys_addr_t pa)
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
- uintptr_t pmd_num;
+ BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT);
- pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
- BUG_ON(pmd_num >= NUM_EARLY_PMDS);
- return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
+ return (uintptr_t)early_pmd;
}
static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
@@ -452,7 +376,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
uintptr_t va, pa, end_va;
uintptr_t load_pa = (uintptr_t)(&_start);
uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
- uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
+ uintptr_t map_size;
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t fix_bmap_spmd, fix_bmap_epmd;
#endif
@@ -464,12 +388,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
* Enforce boot alignment requirements of RV32 and
* RV64 by only allowing PMD or PGD mappings.
*/
- BUG_ON(map_size == PAGE_SIZE);
+ map_size = PMD_SIZE;
/* Sanity check alignment and size */
BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
BUG_ON((load_pa % map_size) != 0);
- BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
pt_ops.alloc_pte = alloc_pte_early;
pt_ops.get_pte_virt = get_pte_virt_early;
@@ -511,6 +434,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
/* Setup early PMD for DTB */
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
(uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
+#ifndef CONFIG_BUILTIN_DTB
/* Create two consecutive PMD mappings for FDT early scan */
pa = dtb_pa & ~(PMD_SIZE - 1);
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
@@ -518,7 +442,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else /* CONFIG_BUILTIN_DTB */
+ dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_BUILTIN_DTB */
#else
+#ifndef CONFIG_BUILTIN_DTB
/* Create two consecutive PGD mappings for FDT early scan */
pa = dtb_pa & ~(PGDIR_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
@@ -526,6 +454,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
+#else /* CONFIG_BUILTIN_DTB */
+ dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_BUILTIN_DTB */
#endif
dtb_early_pa = dtb_pa;
@@ -616,15 +547,7 @@ static void __init setup_vm_final(void)
#else
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
-#ifdef CONFIG_BUILTIN_DTB
- dtb_early_va = soc_lookup_builtin_dtb();
- if (!dtb_early_va) {
- /* Fallback to first available DTS */
- dtb_early_va = (void *) __dtb_start;
- }
-#else
dtb_early_va = (void *)dtb_pa;
-#endif
dtb_early_pa = dtb_pa;
}
@@ -665,9 +588,15 @@ void mark_rodata_ro(void)
void __init paging_init(void)
{
setup_vm_final();
- sparse_init();
setup_zero_page();
+}
+
+void __init misc_mem_init(void)
+{
+ arch_numa_init();
+ sparse_init();
zone_sizes_init();
+ memblock_dump_all();
}
#ifdef CONFIG_SPARSEMEM_VMEMMAP
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index a8a2ffd9114a..3fc18f469efb 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -9,6 +9,19 @@
#include <linux/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+
+static __init void *early_alloc(size_t size, int node)
+{
+ void *ptr = memblock_alloc_try_nid(size, size,
+ __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+
+ if (!ptr)
+ panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d from=%llx\n",
+ __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
+
+ return ptr;
+}
extern pgd_t early_pg_dir[PTRS_PER_PGD];
asmlinkage void __init kasan_early_init(void)
@@ -47,40 +60,133 @@ asmlinkage void __init kasan_early_init(void)
local_flush_tlb_all();
}
-static void __init populate(void *start, void *end)
+static void kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
+{
+ phys_addr_t phys_addr;
+ pte_t *ptep, *base_pte;
+
+ if (pmd_none(*pmd))
+ base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+ else
+ base_pte = (pte_t *)pmd_page_vaddr(*pmd);
+
+ ptep = base_pte + pte_index(vaddr);
+
+ do {
+ if (pte_none(*ptep)) {
+ phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ }
+ } while (ptep++, vaddr += PAGE_SIZE, vaddr != end);
+
+ set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
+}
+
+static void kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
+{
+ phys_addr_t phys_addr;
+ pmd_t *pmdp, *base_pmd;
+ unsigned long next;
+
+ base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
+ if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+ base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+
+ pmdp = base_pmd + pmd_index(vaddr);
+
+ do {
+ next = pmd_addr_end(vaddr, end);
+
+ if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) {
+ phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
+ if (phys_addr) {
+ set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+
+ kasan_populate_pte(pmdp, vaddr, next);
+ } while (pmdp++, vaddr = next, vaddr != end);
+
+ /*
+ * Wait for the whole PGD to be populated before setting the PGD in
+ * the page table, otherwise, if we did set the PGD before populating
+ * it entirely, memblock could allocate a page at a physical address
+ * where KASAN is not populated yet and then we'd get a page fault.
+ */
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+}
+
+static void kasan_populate_pgd(unsigned long vaddr, unsigned long end)
+{
+ phys_addr_t phys_addr;
+ pgd_t *pgdp = pgd_offset_k(vaddr);
+ unsigned long next;
+
+ do {
+ next = pgd_addr_end(vaddr, end);
+
+ /*
+ * pgdp can't be none since kasan_early_init initialized all KASAN
+ * shadow region with kasan_early_shadow_pmd: if this is stillthe case,
+ * that means we can try to allocate a hugepage as a replacement.
+ */
+ if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
+ IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
+ phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+ if (phys_addr) {
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ continue;
+ }
+ }
+
+ kasan_populate_pmd(pgdp, vaddr, next);
+ } while (pgdp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_populate(void *start, void *end)
{
- unsigned long i, offset;
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
- unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
- unsigned long n_ptes =
- ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE;
- unsigned long n_pmds =
- ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD;
-
- pte_t *pte =
- memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
- pmd_t *pmd =
- memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
- pgd_t *pgd = pgd_offset_k(vaddr);
-
- for (i = 0; i < n_pages; i++) {
- phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
- set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
- }
-
- for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE)
- set_pmd(&pmd[i],
- pfn_pmd(PFN_DOWN(__pa(&pte[offset])),
- __pgprot(_PAGE_TABLE)));
- for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD)
- set_pgd(&pgd[i],
- pfn_pgd(PFN_DOWN(__pa(&pmd[offset])),
- __pgprot(_PAGE_TABLE)));
+ kasan_populate_pgd(vaddr, vend);
local_flush_tlb_all();
- memset(start, 0, end - start);
+ memset(start, KASAN_SHADOW_INIT, end - start);
+}
+
+void __init kasan_shallow_populate(void *start, void *end)
+{
+ unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+ unsigned long vend = PAGE_ALIGN((unsigned long)end);
+ unsigned long pfn;
+ int index;
+ void *p;
+ pud_t *pud_dir, *pud_k;
+ pgd_t *pgd_dir, *pgd_k;
+ p4d_t *p4d_dir, *p4d_k;
+
+ while (vaddr < vend) {
+ index = pgd_index(vaddr);
+ pfn = csr_read(CSR_SATP) & SATP_PPN;
+ pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
+ pgd_k = init_mm.pgd + index;
+ pgd_dir = pgd_offset_k(vaddr);
+ set_pgd(pgd_dir, *pgd_k);
+
+ p4d_dir = p4d_offset(pgd_dir, vaddr);
+ p4d_k = p4d_offset(pgd_k, vaddr);
+
+ vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
+ pud_dir = pud_offset(p4d_dir, vaddr);
+ pud_k = pud_offset(p4d_k, vaddr);
+
+ if (pud_present(*pud_dir)) {
+ p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+ pud_populate(&init_mm, pud_dir, p);
+ }
+ vaddr += PAGE_SIZE;
+ }
}
void __init kasan_init(void)
@@ -90,7 +196,15 @@ void __init kasan_init(void)
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
(void *)kasan_mem_to_shadow((void *)
- VMALLOC_END));
+ VMEMMAP_END));
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ kasan_shallow_populate(
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+ else
+ kasan_populate_early_shadow(
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
for_each_mem_range(i, &_start, &_end) {
void *start = (void *)__va(_start);
@@ -99,7 +213,7 @@ void __init kasan_init(void)
if (start >= end)
break;
- populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
+ kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
};
for (i = 0; i < PTRS_PER_PTE; i++)
@@ -108,6 +222,6 @@ void __init kasan_init(void)
__pgprot(_PAGE_PRESENT | _PAGE_READ |
_PAGE_ACCESSED)));
- memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+ memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
init_task.kasan_depth = 0;
}
diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h
new file mode 100644
index 000000000000..603783766d0a
--- /dev/null
+++ b/arch/s390/include/asm/irq_work.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_IRQ_WORK_H
+#define _ASM_S390_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+ return true;
+}
+
+void arch_irq_work_raise(void);
+
+#endif /* _ASM_S390_IRQ_WORK_H */
diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index d1297d6bbdcf..6b187cd72251 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h
@@ -135,7 +135,7 @@ static inline void pmd_populate(struct mm_struct *mm,
#define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
#define pmd_pgtable(pmd) \
- (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
+ ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
/*
* page table entry allocation/free routines.
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 794746a32806..29c7ecd5ad1d 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1219,8 +1219,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
-#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+#define p4d_deref(pud) ((unsigned long)__va(p4d_val(pud) & _REGION_ENTRY_ORIGIN))
+#define pgd_deref(pgd) ((unsigned long)__va(pgd_val(pgd) & _REGION_ENTRY_ORIGIN))
static inline unsigned long pmd_deref(pmd_t pmd)
{
@@ -1229,12 +1229,12 @@ static inline unsigned long pmd_deref(pmd_t pmd)
origin_mask = _SEGMENT_ENTRY_ORIGIN;
if (pmd_large(pmd))
origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
- return pmd_val(pmd) & origin_mask;
+ return (unsigned long)__va(pmd_val(pmd) & origin_mask);
}
static inline unsigned long pmd_pfn(pmd_t pmd)
{
- return pmd_deref(pmd) >> PAGE_SHIFT;
+ return __pa(pmd_deref(pmd)) >> PAGE_SHIFT;
}
static inline unsigned long pud_deref(pud_t pud)
@@ -1244,12 +1244,12 @@ static inline unsigned long pud_deref(pud_t pud)
origin_mask = _REGION_ENTRY_ORIGIN;
if (pud_large(pud))
origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
- return pud_val(pud) & origin_mask;
+ return (unsigned long)__va(pud_val(pud) & origin_mask);
}
static inline unsigned long pud_pfn(pud_t pud)
{
- return pud_deref(pud) >> PAGE_SHIFT;
+ return __pa(pud_deref(pud)) >> PAGE_SHIFT;
}
/*
@@ -1329,7 +1329,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
}
#define gup_fast_permitted gup_fast_permitted
-#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+#define pfn_pte(pfn, pgprot) mk_pte_phys(((pfn) << PAGE_SHIFT), (pgprot))
#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
#define pte_page(x) pfn_to_page(pte_pfn(x))
@@ -1636,7 +1636,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
}
#define pmdp_collapse_flush pmdp_collapse_flush
-#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define pfn_pmd(pfn, pgprot) mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
static inline int pmd_trans_huge(pmd_t pmd)
diff --git a/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h
new file mode 100644
index 000000000000..3d8284b95f87
--- /dev/null
+++ b/arch/s390/include/uapi/asm/perf_cpum_cf_diag.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2021
+ * Interface implementation for communication with the CPU Measurement
+ * counter facility device driver.
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Define for ioctl() commands to communicate with the CPU Measurement
+ * counter facility device driver.
+ */
+
+#ifndef _PERF_CPUM_CF_DIAG_H
+#define _PERF_CPUM_CF_DIAG_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define S390_HWCTR_DEVICE "hwctr"
+#define S390_HWCTR_START_VERSION 1
+
+struct s390_ctrset_start { /* Set CPUs to operate on */
+ __u64 version; /* Version of interface */
+ __u64 data_bytes; /* # of bytes required */
+ __u64 cpumask_len; /* Length of CPU mask in bytes */
+ __u64 *cpumask; /* Pointer to CPU mask */
+ __u64 counter_sets; /* Bit mask of counter sets to get */
+};
+
+struct s390_ctrset_setdata { /* Counter set data */
+ __u32 set; /* Counter set number */
+ __u32 no_cnts; /* # of counters stored in cv[] */
+ __u64 cv[0]; /* Counter values (variable length) */
+};
+
+struct s390_ctrset_cpudata { /* Counter set data per CPU */
+ __u32 cpu_nr; /* CPU number */
+ __u32 no_sets; /* # of counters sets in data[] */
+ struct s390_ctrset_setdata data[0];
+};
+
+struct s390_ctrset_read { /* Structure to get all ctr sets */
+ __u64 no_cpus; /* Total # of CPUs data taken from */
+ struct s390_ctrset_cpudata data[0];
+};
+
+#define S390_HWCTR_MAGIC 'C' /* Random magic # for ioctls */
+#define S390_HWCTR_START _IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start)
+#define S390_HWCTR_STOP _IO(S390_HWCTR_MAGIC, 2)
+#define S390_HWCTR_READ _IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read)
+#endif
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index b5c86fb70d63..db4877bbb9aa 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -2,7 +2,7 @@
/*
* Performance event support for s390x - CPU-measurement Counter Sets
*
- * Copyright IBM Corp. 2019
+ * Copyright IBM Corp. 2019, 2021
* Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
* Thomas Richer <tmricht@linux.ibm.com>
*/
@@ -17,6 +17,8 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/processor.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
#include <asm/ctl_reg.h>
#include <asm/irq.h>
@@ -24,15 +26,20 @@
#include <asm/timex.h>
#include <asm/debug.h>
-#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
+#include <asm/perf_cpum_cf_diag.h>
+#define CF_DIAG_CTRSET_DEF 0xfeef /* Counter set header mark */
+#define CF_DIAG_MIN_INTERVAL 60 /* Minimum counter set read */
+ /* interval in seconds */
+static unsigned long cf_diag_interval = CF_DIAG_MIN_INTERVAL;
static unsigned int cf_diag_cpu_speed;
static debug_info_t *cf_diag_dbg;
-struct cf_diag_csd { /* Counter set data per CPU */
+struct cf_diag_csd { /* Counter set data per CPU */
size_t used; /* Bytes used in data/start */
unsigned char start[PAGE_SIZE]; /* Counter set at event start */
unsigned char data[PAGE_SIZE]; /* Counter set at event delete */
+ unsigned int sets; /* # Counter set saved in data */
};
static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd);
@@ -178,18 +185,35 @@ static void cf_diag_disable(struct pmu *pmu)
/* Number of perf events counting hardware events */
static atomic_t cf_diag_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(cf_diag_reserve_mutex);
/* Release the PMU if event is the last perf event */
static void cf_diag_perf_event_destroy(struct perf_event *event)
{
debug_sprintf_event(cf_diag_dbg, 5,
"%s event %p cpu %d cf_diag_events %d\n",
- __func__, event, event->cpu,
+ __func__, event, smp_processor_id(),
atomic_read(&cf_diag_events));
if (atomic_dec_return(&cf_diag_events) == 0)
__kernel_cpumcf_end();
}
+static int get_authctrsets(void)
+{
+ struct cpu_cf_events *cpuhw;
+ unsigned long auth = 0;
+ enum cpumf_ctr_set i;
+
+ cpuhw = &get_cpu_var(cpu_cf_events);
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
+ auth |= cpumf_ctr_ctl[i];
+ }
+ put_cpu_var(cpu_cf_events);
+ return auth;
+}
+
/* Setup the event. Test for authorized counter sets and only include counter
* sets which are authorized at the time of the setup. Including unauthorized
* counter sets result in specification exception (and panic).
@@ -197,15 +221,12 @@ static void cf_diag_perf_event_destroy(struct perf_event *event)
static int __hw_perf_event_init(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
- struct cpu_cf_events *cpuhw;
- enum cpumf_ctr_set i;
int err = 0;
debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__,
event, event->cpu);
event->hw.config = attr->config;
- event->hw.config_base = 0;
/* Add all authorized counter sets to config_base. The
* the hardware init function is either called per-cpu or just once
@@ -215,11 +236,7 @@ static int __hw_perf_event_init(struct perf_event *event)
* Checking the authorization on any CPU is fine as the hardware
* applies the same authorization settings to all CPUs.
*/
- cpuhw = &get_cpu_var(cpu_cf_events);
- for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
- if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
- event->hw.config_base |= cpumf_ctr_ctl[i];
- put_cpu_var(cpu_cf_events);
+ event->hw.config_base = get_authctrsets();
/* No authorized counter sets, nothing to count/sample */
if (!event->hw.config_base) {
@@ -237,6 +254,25 @@ out:
return err;
}
+/* Return 0 if the CPU-measurement counter facility is currently free
+ * and an error otherwise.
+ */
+static int cf_diag_perf_event_inuse(void)
+{
+ int err = 0;
+
+ if (!atomic_inc_not_zero(&cf_diag_events)) {
+ mutex_lock(&cf_diag_reserve_mutex);
+ if (atomic_read(&cf_diag_events) == 0 &&
+ __kernel_cpumcf_begin())
+ err = -EBUSY;
+ else
+ err = atomic_inc_return(&cf_diag_events);
+ mutex_unlock(&cf_diag_reserve_mutex);
+ }
+ return err;
+}
+
static int cf_diag_event_init(struct perf_event *event)
{
struct perf_event_attr *attr = &event->attr;
@@ -264,13 +300,9 @@ static int cf_diag_event_init(struct perf_event *event)
}
/* Initialize for using the CPU-measurement counter facility */
- if (atomic_inc_return(&cf_diag_events) == 1) {
- if (__kernel_cpumcf_begin()) {
- atomic_dec(&cf_diag_events);
- err = -EBUSY;
- goto out;
- }
- }
+ err = cf_diag_perf_event_inuse();
+ if (err < 0)
+ goto out;
event->destroy = cf_diag_perf_event_destroy;
err = __hw_perf_event_init(event);
@@ -599,6 +631,8 @@ static void cf_diag_del(struct perf_event *event, int flags)
cpuhw->flags &= ~PMU_F_IN_USE;
}
+/* Default counter set events and format attribute groups */
+
CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
static struct attribute *cf_diag_events_attr[] = {
@@ -663,6 +697,452 @@ static void cf_diag_get_cpu_speed(void)
}
}
+/* Code to create device and file I/O operations */
+static atomic_t ctrset_opencnt = ATOMIC_INIT(0); /* Excl. access */
+
+static int cf_diag_open(struct inode *inode, struct file *file)
+{
+ int err = 0;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ if (atomic_xchg(&ctrset_opencnt, 1))
+ return -EBUSY;
+
+ /* Avoid concurrent access with perf_event_open() system call */
+ mutex_lock(&cf_diag_reserve_mutex);
+ if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin())
+ err = -EBUSY;
+ mutex_unlock(&cf_diag_reserve_mutex);
+ if (err) {
+ atomic_set(&ctrset_opencnt, 0);
+ return err;
+ }
+ file->private_data = NULL;
+ debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
+ /* nonseekable_open() never fails */
+ return nonseekable_open(inode, file);
+}
+
+/* Variables for ioctl() interface support */
+static DEFINE_MUTEX(cf_diag_ctrset_mutex);
+static struct cf_diag_ctrset {
+ unsigned long ctrset; /* Bit mask of counter set to read */
+ cpumask_t mask; /* CPU mask to read from */
+ time64_t lastread; /* Epoch counter set last read */
+} cf_diag_ctrset;
+
+static void cf_diag_ctrset_clear(void)
+{
+ cpumask_clear(&cf_diag_ctrset.mask);
+ cf_diag_ctrset.ctrset = 0;
+}
+
+static void cf_diag_release_cpu(void *p)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+ debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__,
+ smp_processor_id());
+ lcctl(0); /* Reset counter sets */
+ cpuhw->state = 0; /* Save state in CPU hardware state */
+}
+
+/* Release function is also called when application gets terminated without
+ * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
+ * Since only one application is allowed to open the device, simple stop all
+ * CPU counter sets.
+ */
+static int cf_diag_release(struct inode *inode, struct file *file)
+{
+ on_each_cpu(cf_diag_release_cpu, NULL, 1);
+ cf_diag_ctrset_clear();
+ atomic_set(&ctrset_opencnt, 0);
+ __kernel_cpumcf_end();
+ debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
+ return 0;
+}
+
+struct cf_diag_call_on_cpu_parm { /* Parm struct for smp_call_on_cpu */
+ unsigned int sets; /* Counter set bit mask */
+ atomic_t cpus_ack; /* # CPUs successfully executed func */
+};
+
+static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask)
+{
+ struct s390_ctrset_read __user *ctrset_read;
+ unsigned int cpu, cpus, rc;
+ void __user *uptr;
+
+ ctrset_read = (struct s390_ctrset_read __user *)arg;
+ uptr = ctrset_read->data;
+ for_each_cpu(cpu, mask) {
+ struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu);
+ struct s390_ctrset_cpudata __user *ctrset_cpudata;
+
+ ctrset_cpudata = uptr;
+ debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n",
+ __func__, cpu, csd->used);
+ rc = put_user(cpu, &ctrset_cpudata->cpu_nr);
+ rc |= put_user(csd->sets, &ctrset_cpudata->no_sets);
+ rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used);
+ if (rc)
+ return -EFAULT;
+ uptr += sizeof(struct s390_ctrset_cpudata) + csd->used;
+ cond_resched();
+ }
+ cpus = cpumask_weight(mask);
+ if (put_user(cpus, &ctrset_read->no_cpus))
+ return -EFAULT;
+ debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n",
+ __func__, uptr - (void __user *)ctrset_read->data);
+ return 0;
+}
+
+static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
+ int ctrset_size, size_t room)
+{
+ size_t need = 0;
+ int rc = -1;
+
+ need = sizeof(*p) + sizeof(u64) * ctrset_size;
+ debug_sprintf_event(cf_diag_dbg, 5,
+ "%s room %zd need %zd set %#x set_size %d\n",
+ __func__, room, need, ctrset, ctrset_size);
+ if (need <= room) {
+ p->set = cpumf_ctr_ctl[ctrset];
+ p->no_cnts = ctrset_size;
+ rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
+ if (rc == 3) /* Nothing stored */
+ need = 0;
+ }
+ debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__,
+ need, rc);
+ return need;
+}
+
+/* Read all counter sets. Since the perf_event_open() system call with
+ * event cpum_cf_diag/.../ is blocked when this interface is active, reuse
+ * the perf_event_open() data buffer to store the counter sets.
+ */
+static void cf_diag_cpu_read(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd);
+ struct cf_diag_call_on_cpu_parm *p = parm;
+ int set, set_size;
+ size_t space;
+
+ debug_sprintf_event(cf_diag_dbg, 5,
+ "%s new %#x flags %#x state %#llx\n",
+ __func__, p->sets, cpuhw->flags,
+ cpuhw->state);
+ /* No data saved yet */
+ csd->used = 0;
+ csd->sets = 0;
+ memset(csd->data, 0, sizeof(csd->data));
+
+ /* Scan the counter sets */
+ for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
+ struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used;
+
+ if (!(p->sets & cpumf_ctr_ctl[set]))
+ continue; /* Counter set not in list */
+ set_size = cf_diag_ctrset_size(set, &cpuhw->info);
+ space = sizeof(csd->data) - csd->used;
+ space = cf_diag_cpuset_read(sp, set, set_size, space);
+ if (space) {
+ csd->used += space;
+ csd->sets += 1;
+ }
+ debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n",
+ __func__, sp, space);
+ }
+ debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__,
+ csd->sets, csd->used);
+}
+
+static int cf_diag_all_read(unsigned long arg)
+{
+ struct cf_diag_call_on_cpu_parm p;
+ cpumask_var_t mask;
+ time64_t now;
+ int rc = 0;
+
+ debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ now = ktime_get_seconds();
+ if (cf_diag_ctrset.lastread + cf_diag_interval > now) {
+ debug_sprintf_event(cf_diag_dbg, 5, "%s now %lld "
+ " lastread %lld\n", __func__, now,
+ cf_diag_ctrset.lastread);
+ rc = -EAGAIN;
+ goto out;
+ } else {
+ cf_diag_ctrset.lastread = now;
+ }
+ p.sets = cf_diag_ctrset.ctrset;
+ cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1);
+ rc = cf_diag_all_copy(arg, mask);
+out:
+ free_cpumask_var(mask);
+ debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc);
+ return rc;
+}
+
+/* Stop all counter sets via ioctl interface */
+static void cf_diag_ioctl_off(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cf_diag_call_on_cpu_parm *p = parm;
+ int rc;
+
+ debug_sprintf_event(cf_diag_dbg, 5,
+ "%s new %#x flags %#x state %#llx\n",
+ __func__, p->sets, cpuhw->flags,
+ cpuhw->state);
+
+ ctr_set_multiple_disable(&cpuhw->state, p->sets);
+ ctr_set_multiple_stop(&cpuhw->state, p->sets);
+ rc = lcctl(cpuhw->state); /* Stop counter sets */
+ if (!cpuhw->state)
+ cpuhw->flags &= ~PMU_F_IN_USE;
+ debug_sprintf_event(cf_diag_dbg, 5,
+ "%s rc %d flags %#x state %#llx\n", __func__,
+ rc, cpuhw->flags, cpuhw->state);
+}
+
+/* Start counter sets on particular CPU */
+static void cf_diag_ioctl_on(void *parm)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cf_diag_call_on_cpu_parm *p = parm;
+ int rc;
+
+ debug_sprintf_event(cf_diag_dbg, 5,
+ "%s new %#x flags %#x state %#llx\n",
+ __func__, p->sets, cpuhw->flags,
+ cpuhw->state);
+
+ if (!(cpuhw->flags & PMU_F_IN_USE))
+ cpuhw->state = 0;
+ cpuhw->flags |= PMU_F_IN_USE;
+ rc = lcctl(cpuhw->state); /* Reset unused counter sets */
+ ctr_set_multiple_enable(&cpuhw->state, p->sets);
+ ctr_set_multiple_start(&cpuhw->state, p->sets);
+ rc |= lcctl(cpuhw->state); /* Start counter sets */
+ if (!rc)
+ atomic_inc(&p->cpus_ack);
+ debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n",
+ __func__, rc, cpuhw->state);
+}
+
+static int cf_diag_all_stop(void)
+{
+ struct cf_diag_call_on_cpu_parm p = {
+ .sets = cf_diag_ctrset.ctrset,
+ };
+ cpumask_var_t mask;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
+ free_cpumask_var(mask);
+ return 0;
+}
+
+static int cf_diag_all_start(void)
+{
+ struct cf_diag_call_on_cpu_parm p = {
+ .sets = cf_diag_ctrset.ctrset,
+ .cpus_ack = ATOMIC_INIT(0),
+ };
+ cpumask_var_t mask;
+ int rc = 0;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+ cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+ on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1);
+ if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
+ on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
+ rc = -EIO;
+ }
+ free_cpumask_var(mask);
+ return rc;
+}
+
+/* Return the maximum required space for all possible CPUs in case one
+ * CPU will be onlined during the START, READ, STOP cycles.
+ * To find out the size of the counter sets, any one CPU will do. They
+ * all have the same counter sets.
+ */
+static size_t cf_diag_needspace(unsigned int sets)
+{
+ struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ size_t bytes = 0;
+ int i;
+
+ for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+ if (!(sets & cpumf_ctr_ctl[i]))
+ continue;
+ bytes += cf_diag_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
+ sizeof(((struct s390_ctrset_setdata *)0)->set) +
+ sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
+ }
+ bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
+ (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
+ sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
+ debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__,
+ bytes);
+ return bytes;
+}
+
+static long cf_diag_ioctl_read(unsigned long arg)
+{
+ struct s390_ctrset_read read;
+ int ret = 0;
+
+ debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+ if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
+ return -EFAULT;
+ ret = cf_diag_all_read(arg);
+ debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
+ return ret;
+}
+
+static long cf_diag_ioctl_stop(void)
+{
+ int ret;
+
+ debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+ ret = cf_diag_all_stop();
+ cf_diag_ctrset_clear();
+ debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
+ return ret;
+}
+
+static long cf_diag_ioctl_start(unsigned long arg)
+{
+ struct s390_ctrset_start __user *ustart;
+ struct s390_ctrset_start start;
+ void __user *umask;
+ unsigned int len;
+ int ret = 0;
+ size_t need;
+
+ if (cf_diag_ctrset.ctrset)
+ return -EBUSY;
+ ustart = (struct s390_ctrset_start __user *)arg;
+ if (copy_from_user(&start, ustart, sizeof(start)))
+ return -EFAULT;
+ if (start.version != S390_HWCTR_START_VERSION)
+ return -EINVAL;
+ if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
+ cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
+ return -EINVAL; /* Invalid counter set */
+ if (!start.counter_sets)
+ return -EINVAL; /* No counter set at all? */
+ cpumask_clear(&cf_diag_ctrset.mask);
+ len = min_t(u64, start.cpumask_len, cpumask_size());
+ umask = (void __user *)start.cpumask;
+ if (copy_from_user(&cf_diag_ctrset.mask, umask, len))
+ return -EFAULT;
+ if (cpumask_empty(&cf_diag_ctrset.mask))
+ return -EINVAL;
+ need = cf_diag_needspace(start.counter_sets);
+ if (put_user(need, &ustart->data_bytes))
+ ret = -EFAULT;
+ if (ret)
+ goto out;
+ cf_diag_ctrset.ctrset = start.counter_sets;
+ ret = cf_diag_all_start();
+out:
+ if (ret)
+ cf_diag_ctrset_clear();
+ debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n",
+ __func__, cf_diag_ctrset.ctrset, need, ret);
+ return ret;
+}
+
+static long cf_diag_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret;
+
+ debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__,
+ cmd, arg);
+ get_online_cpus();
+ mutex_lock(&cf_diag_ctrset_mutex);
+ switch (cmd) {
+ case S390_HWCTR_START:
+ ret = cf_diag_ioctl_start(arg);
+ break;
+ case S390_HWCTR_STOP:
+ ret = cf_diag_ioctl_stop();
+ break;
+ case S390_HWCTR_READ:
+ ret = cf_diag_ioctl_read(arg);
+ break;
+ default:
+ ret = -ENOTTY;
+ break;
+ }
+ mutex_unlock(&cf_diag_ctrset_mutex);
+ put_online_cpus();
+ debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret);
+ return ret;
+}
+
+static const struct file_operations cf_diag_fops = {
+ .owner = THIS_MODULE,
+ .open = cf_diag_open,
+ .release = cf_diag_release,
+ .unlocked_ioctl = cf_diag_ioctl,
+ .compat_ioctl = cf_diag_ioctl,
+ .llseek = no_llseek
+};
+
+static struct miscdevice cf_diag_dev = {
+ .name = S390_HWCTR_DEVICE,
+ .minor = MISC_DYNAMIC_MINOR,
+ .fops = &cf_diag_fops,
+};
+
+static int cf_diag_online_cpu(unsigned int cpu)
+{
+ struct cf_diag_call_on_cpu_parm p;
+
+ mutex_lock(&cf_diag_ctrset_mutex);
+ if (!cf_diag_ctrset.ctrset)
+ goto out;
+ p.sets = cf_diag_ctrset.ctrset;
+ cf_diag_ioctl_on(&p);
+out:
+ mutex_unlock(&cf_diag_ctrset_mutex);
+ return 0;
+}
+
+static int cf_diag_offline_cpu(unsigned int cpu)
+{
+ struct cf_diag_call_on_cpu_parm p;
+
+ mutex_lock(&cf_diag_ctrset_mutex);
+ if (!cf_diag_ctrset.ctrset)
+ goto out;
+ p.sets = cf_diag_ctrset.ctrset;
+ cf_diag_ioctl_off(&p);
+out:
+ mutex_unlock(&cf_diag_ctrset_mutex);
+ return 0;
+}
+
/* Initialize the counter set PMU to generate complete counter set data as
* event raw data. This relies on the CPU Measurement Counter Facility device
* already being loaded and initialized.
@@ -685,21 +1165,43 @@ static int __init cf_diag_init(void)
return -ENOMEM;
}
+ rc = misc_register(&cf_diag_dev);
+ if (rc) {
+ pr_err("Registration of /dev/" S390_HWCTR_DEVICE
+ "failed rc=%d\n", rc);
+ goto out;
+ }
+
/* Setup s390dbf facility */
cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
if (!cf_diag_dbg) {
pr_err("Registration of s390dbf(cpum_cf_diag) failed\n");
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out_dbf;
}
debug_register_view(cf_diag_dbg, &debug_sprintf_view);
rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
if (rc) {
- debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
- debug_unregister(cf_diag_dbg);
pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
rc);
+ goto out_perf;
}
+ rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE,
+ "perf/s390/cfd:online",
+ cf_diag_online_cpu, cf_diag_offline_cpu);
+ if (!rc)
+ goto out;
+
+ pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n",
+ rc);
+ perf_pmu_unregister(&cf_diag);
+out_perf:
+ debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
+ debug_unregister(cf_diag_dbg);
+out_dbf:
+ misc_deregister(&cf_diag_dev);
+out:
return rc;
}
-arch_initcall(cf_diag_init);
+device_initcall(cf_diag_init);
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 367bd000f6d1..e20bed1ed34a 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -130,7 +130,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
frame->sf.gprs[9] = (unsigned long)frame;
/* Store access registers to kernel stack of new process. */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index e299892440b6..58c8afa3da65 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -30,6 +30,7 @@
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/irqflags.h>
+#include <linux/irq_work.h>
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/sched/hotplug.h>
@@ -62,6 +63,7 @@ enum {
ec_call_function_single,
ec_stop_cpu,
ec_mcck_pending,
+ ec_irq_work,
};
enum {
@@ -434,10 +436,12 @@ void notrace smp_yield_cpu(int cpu)
*/
void notrace smp_emergency_stop(void)
{
- cpumask_t cpumask;
+ static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+ static cpumask_t cpumask;
u64 end;
int cpu;
+ arch_spin_lock(&lock);
cpumask_copy(&cpumask, cpu_online_mask);
cpumask_clear_cpu(smp_processor_id(), &cpumask);
@@ -458,6 +462,7 @@ void notrace smp_emergency_stop(void)
break;
cpu_relax();
}
+ arch_spin_unlock(&lock);
}
NOKPROBE_SYMBOL(smp_emergency_stop);
@@ -505,6 +510,8 @@ static void smp_handle_ext_call(void)
generic_smp_call_function_single_interrupt();
if (test_bit(ec_mcck_pending, &bits))
__s390_handle_mcck();
+ if (test_bit(ec_irq_work, &bits))
+ irq_work_run();
}
static void do_ext_call_interrupt(struct ext_code ext_code,
@@ -537,6 +544,13 @@ void smp_send_reschedule(int cpu)
pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
}
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+ pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+}
+#endif
+
/*
* parameter area for the set/clear control bit callbacks
*/
@@ -775,11 +789,13 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
{
struct sclp_core_entry *core;
- cpumask_t avail;
+ static cpumask_t avail;
bool configured;
u16 core_id;
int nr, i;
+ get_online_cpus();
+ mutex_lock(&smp_cpu_state_mutex);
nr = 0;
cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
/*
@@ -800,6 +816,8 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
configured = i < info->configured;
nr += smp_add_core(&info->core[i], &avail, configured, early);
}
+ mutex_unlock(&smp_cpu_state_mutex);
+ put_online_cpus();
return nr;
}
@@ -847,9 +865,7 @@ void __init smp_detect_cpus(void)
pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
/* Add CPUs present at boot */
- get_online_cpus();
__smp_rescan_cpus(info, true);
- put_online_cpus();
memblock_free_early((unsigned long)info, sizeof(*info));
}
@@ -1178,11 +1194,7 @@ int __ref smp_rescan_cpus(void)
if (!info)
return -ENOMEM;
smp_get_core_info(info, 0);
- get_online_cpus();
- mutex_lock(&smp_cpu_state_mutex);
nr = __smp_rescan_cpus(info, false);
- mutex_unlock(&smp_cpu_state_mutex);
- put_online_cpus();
kfree(info);
if (nr)
topology_schedule_update();
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index ca47141a5be9..e7ce447651b9 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -62,16 +62,16 @@ static struct mask_info drawer_info;
struct cpu_topology_s390 cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
-static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
+static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
{
- cpumask_t mask;
+ static cpumask_t mask;
cpumask_copy(&mask, cpumask_of(cpu));
switch (topology_mode) {
case TOPOLOGY_MODE_HW:
while (info) {
if (cpumask_test_cpu(cpu, &info->mask)) {
- mask = info->mask;
+ cpumask_copy(&mask, &info->mask);
break;
}
info = info->next;
@@ -89,23 +89,24 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
break;
}
cpumask_and(&mask, &mask, cpu_online_mask);
- return mask;
+ cpumask_copy(dst, &mask);
}
-static cpumask_t cpu_thread_map(unsigned int cpu)
+static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
{
- cpumask_t mask;
+ static cpumask_t mask;
int i;
cpumask_copy(&mask, cpumask_of(cpu));
if (topology_mode != TOPOLOGY_MODE_HW)
- return mask;
+ goto out;
cpu -= cpu % (smp_cpu_mtid + 1);
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_present(cpu + i))
cpumask_set_cpu(cpu + i, &mask);
cpumask_and(&mask, &mask, cpu_online_mask);
- return mask;
+out:
+ cpumask_copy(dst, &mask);
}
#define TOPOLOGY_CORE_BITS 64
@@ -250,10 +251,10 @@ void update_cpu_masks(void)
for_each_possible_cpu(cpu) {
topo = &cpu_topology[cpu];
- topo->thread_mask = cpu_thread_map(cpu);
- topo->core_mask = cpu_group_map(&socket_info, cpu);
- topo->book_mask = cpu_group_map(&book_info, cpu);
- topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+ cpu_thread_map(&topo->thread_mask, cpu);
+ cpu_group_map(&topo->core_mask, &socket_info, cpu);
+ cpu_group_map(&topo->book_mask, &book_info, cpu);
+ cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
topo->booted_cores = 0;
if (topology_mode != TOPOLOGY_MODE_HW) {
id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 73a163065b95..0e76b2127dc6 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -297,6 +297,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
return -EINVAL;
+ VM_BUG_ON(!mhp_range_allowed(start, size, true));
rc = vmem_add_mapping(start, size);
if (rc)
return rc;
diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c
index 4e87c819ddea..781965f7210e 100644
--- a/arch/s390/mm/pgalloc.c
+++ b/arch/s390/mm/pgalloc.c
@@ -58,7 +58,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
if (!page)
return NULL;
arch_set_page_dat(page, 2);
- return (unsigned long *) page_to_phys(page);
+ return (unsigned long *) page_to_virt(page);
}
void crst_table_free(struct mm_struct *mm, unsigned long *table)
@@ -161,7 +161,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
page = alloc_page(GFP_KERNEL);
if (page) {
- table = (u64 *)page_to_phys(page);
+ table = (u64 *)page_to_virt(page);
memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
}
@@ -194,7 +194,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
mask = atomic_read(&page->_refcount) >> 24;
mask = (mask | (mask >> 4)) & 3;
if (mask != 3) {
- table = (unsigned long *) page_to_phys(page);
+ table = (unsigned long *) page_to_virt(page);
bit = mask & 1; /* =1 -> second 2K */
if (bit)
table += PTRS_PER_PTE;
@@ -217,7 +217,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
}
arch_set_page_dat(page, 0);
/* Initialize page table */
- table = (unsigned long *) page_to_phys(page);
+ table = (unsigned long *) page_to_virt(page);
if (mm_alloc_pgste(mm)) {
/* Return 4K page table with PGSTEs */
atomic_xor_bits(&page->_refcount, 3 << 24);
@@ -239,10 +239,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
struct page *page;
unsigned int bit, mask;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ page = virt_to_page(table);
if (!mm_alloc_pgste(mm)) {
/* Free 2K page table fragment of a 4K page */
- bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+ bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.lock);
mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
mask >>= 24;
@@ -269,14 +269,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
unsigned int bit, mask;
mm = tlb->mm;
- page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ page = virt_to_page(table);
if (mm_alloc_pgste(mm)) {
gmap_unlink(mm, table, vmaddr);
- table = (unsigned long *) (__pa(table) | 3);
+ table = (unsigned long *) ((unsigned long)table | 3);
tlb_remove_table(tlb, table);
return;
}
- bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+ bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
spin_lock_bh(&mm->context.lock);
mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
mask >>= 24;
@@ -285,7 +285,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
else
list_del(&page->lru);
spin_unlock_bh(&mm->context.lock);
- table = (unsigned long *) (__pa(table) | (1U << bit));
+ table = (unsigned long *) ((unsigned long) table | (1U << bit));
tlb_remove_table(tlb, table);
}
@@ -293,7 +293,7 @@ void __tlb_remove_table(void *_table)
{
unsigned int mask = (unsigned long) _table & 3;
void *table = (void *)((unsigned long) _table ^ mask);
- struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+ struct page *page = virt_to_page(table);
switch (mask) {
case 0: /* pmd, pud, or p4d */
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index 01f3a5f58e64..96897fab89dc 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -4,6 +4,7 @@
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
+#include <linux/memory_hotplug.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/mm.h>
@@ -26,14 +27,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
if (slab_is_available())
return (void *)__get_free_pages(GFP_KERNEL, order);
- return (void *) memblock_phys_alloc(size, size);
+ return memblock_alloc(size, size);
}
static void vmem_free_pages(unsigned long addr, int order)
{
/* We don't expect boot memory to be removed ever. */
if (!slab_is_available() ||
- WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+ WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
return;
free_pages(addr, order);
}
@@ -56,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void)
if (slab_is_available())
pte = (pte_t *) page_table_alloc(&init_mm);
else
- pte = (pte_t *) memblock_phys_alloc(size, size);
+ pte = (pte_t *) memblock_alloc(size, size);
if (!pte)
return NULL;
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
@@ -84,7 +85,7 @@ static void vmemmap_flush_unused_sub_pmd(void)
{
if (!unused_sub_pmd_start)
return;
- memset(__va(unused_sub_pmd_start), PAGE_UNUSED,
+ memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
unused_sub_pmd_start = 0;
}
@@ -97,7 +98,7 @@ static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
* getting removed (just in case the memmap never gets initialized,
* e.g., because the memory block never gets onlined).
*/
- memset(__va(start), 0, sizeof(struct page));
+ memset((void *)start, 0, sizeof(struct page));
}
static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
@@ -118,7 +119,7 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
{
- void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+ unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
vmemmap_flush_unused_sub_pmd();
@@ -127,7 +128,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
/* Mark the unused parts of the new memmap page PAGE_UNUSED. */
if (!IS_ALIGNED(start, PMD_SIZE))
- memset(page, PAGE_UNUSED, start - __pa(page));
+ memset((void *)page, PAGE_UNUSED, start - page);
/*
* We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
* consecutive sections. Remember for the last added PMD the last
@@ -140,11 +141,11 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
/* Returns true if the PMD is completely unused and can be freed. */
static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
{
- void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+ unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
vmemmap_flush_unused_sub_pmd();
- memset(__va(start), PAGE_UNUSED, end - start);
- return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
+ memset((void *)start, PAGE_UNUSED, end - start);
+ return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
}
/* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
@@ -165,7 +166,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
if (pte_none(*pte))
continue;
if (!direct)
- vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
+ vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
pte_clear(&init_mm, addr, pte);
} else if (pte_none(*pte)) {
if (!direct) {
@@ -175,7 +176,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
goto out;
pte_val(*pte) = __pa(new_page) | prot;
} else {
- pte_val(*pte) = addr | prot;
+ pte_val(*pte) = __pa(addr) | prot;
}
} else {
continue;
@@ -200,7 +201,7 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start)
if (!pte_none(*pte))
return;
}
- vmem_pte_free(__va(pmd_deref(*pmd)));
+ vmem_pte_free((unsigned long *) pmd_deref(*pmd));
pmd_clear(pmd);
}
@@ -241,7 +242,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
IS_ALIGNED(next, PMD_SIZE) &&
MACHINE_HAS_EDAT1 && addr && direct &&
!debug_pagealloc_enabled()) {
- pmd_val(*pmd) = addr | prot;
+ pmd_val(*pmd) = __pa(addr) | prot;
pages++;
continue;
} else if (!direct && MACHINE_HAS_EDAT1) {
@@ -337,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
IS_ALIGNED(next, PUD_SIZE) &&
MACHINE_HAS_EDAT2 && addr && direct &&
!debug_pagealloc_enabled()) {
- pud_val(*pud) = addr | prot;
+ pud_val(*pud) = __pa(addr) | prot;
pages++;
continue;
}
@@ -532,11 +533,22 @@ void vmem_remove_mapping(unsigned long start, unsigned long size)
mutex_unlock(&vmem_mutex);
}
+struct range arch_get_mappable_range(void)
+{
+ struct range mhp_range;
+
+ mhp_range.start = 0;
+ mhp_range.end = VMEM_MAX_PHYS - 1;
+ return mhp_range;
+}
+
int vmem_add_mapping(unsigned long start, unsigned long size)
{
+ struct range range = arch_get_mappable_range();
int ret;
- if (start + size > VMEM_MAX_PHYS ||
+ if (start < range.start ||
+ start + size > range.end + 1 ||
start + size < start)
return -ERANGE;
diff --git a/arch/s390/tools/opcodes.txt b/arch/s390/tools/opcodes.txt
index 46d8ed96cf06..0e207c46e8da 100644
--- a/arch/s390/tools/opcodes.txt
+++ b/arch/s390/tools/opcodes.txt
@@ -597,7 +597,7 @@ b9b3 cu42 RRE_RR
b9bd trtre RRF_U0RR
b9be srstu RRE_RR
b9bf trte RRF_U0RR
-b9c0 selhhhr RRF_RURR
+b9c0 selfhr RRF_RURR
b9c8 ahhhr RRF_R0RR2
b9c9 shhhr RRF_R0RR2
b9ca alhhhr RRF_R0RR2
diff --git a/arch/sh/boards/mach-landisk/gio.c b/arch/sh/boards/mach-landisk/gio.c
index 1c0da99dfc60..ff2200fec29a 100644
--- a/arch/sh/boards/mach-landisk/gio.c
+++ b/arch/sh/boards/mach-landisk/gio.c
@@ -27,11 +27,10 @@ static int openCnt;
static int gio_open(struct inode *inode, struct file *filp)
{
- int minor;
+ int minor = iminor(inode);
int ret = -ENOENT;
preempt_disable();
- minor = MINOR(inode->i_rdev);
if (minor < DEVCOUNT) {
if (openCnt > 0) {
ret = -EALREADY;
@@ -46,9 +45,8 @@ static int gio_open(struct inode *inode, struct file *filp)
static int gio_close(struct inode *inode, struct file *filp)
{
- int minor;
+ int minor = iminor(inode);
- minor = MINOR(inode->i_rdev);
if (minor < DEVCOUNT) {
openCnt--;
}
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index 80a5d1c66a51..1aa508eb0823 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -114,7 +114,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
p->thread.pc = (unsigned long) ret_from_kernel_thread;
childregs->regs[4] = arg;
diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig
index 148f44b33890..12a4fb0bd52a 100644
--- a/arch/sparc/configs/sparc64_defconfig
+++ b/arch/sparc/configs/sparc64_defconfig
@@ -93,7 +93,7 @@ CONFIG_NETDEVICES=y
CONFIG_NET_ETHERNET=y
CONFIG_MII=m
CONFIG_SUNLANCE=m
-CONFIG_HAPPYMEAL=m
+CONFIG_HAPPYMEAL=y
CONFIG_SUNGEM=m
CONFIG_SUNVNET=m
CONFIG_LDMVSW=m
@@ -234,9 +234,7 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRC16=m
CONFIG_LIBCRC32C=m
CONFIG_VCC=m
-CONFIG_ATA=y
CONFIG_PATA_CMD64X=y
-CONFIG_HAPPYMEAL=y
CONFIG_IP_PNP=y
CONFIG_IP_PNP_DHCP=y
CONFIG_DEVTMPFS=y
diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h
index 7e078bc73ef5..8fb09eec8c3e 100644
--- a/arch/sparc/include/asm/elf_64.h
+++ b/arch/sparc/include/asm/elf_64.h
@@ -8,7 +8,6 @@
#include <asm/ptrace.h>
#include <asm/processor.h>
-#include <asm/extable_64.h>
#include <asm/spitfire.h>
#include <asm/adi.h>
diff --git a/arch/sparc/include/asm/extable_64.h b/arch/sparc/include/asm/extable.h
index 5a0171907b7e..554a9dc376fc 100644
--- a/arch/sparc/include/asm/extable_64.h
+++ b/arch/sparc/include/asm/extable.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_EXTABLE64_H
-#define __ASM_EXTABLE64_H
+#ifndef __ASM_EXTABLE_H
+#define __ASM_EXTABLE_H
/*
* The exception table consists of pairs of addresses: the first is the
* address of an instruction that is allowed to fault, and the second is
diff --git a/arch/sparc/include/asm/processor_32.h b/arch/sparc/include/asm/processor_32.h
index 3c4bc2189092..b6242f7771e9 100644
--- a/arch/sparc/include/asm/processor_32.h
+++ b/arch/sparc/include/asm/processor_32.h
@@ -50,16 +50,12 @@ struct thread_struct {
unsigned long fsr;
unsigned long fpqdepth;
struct fpq fpqueue[16];
- unsigned long flags;
mm_segment_t current_ds;
};
-#define SPARC_FLAG_KTHREAD 0x1 /* task is a kernel thread */
-#define SPARC_FLAG_UNALIGNED 0x2 /* is allowed to do unaligned accesses */
-
#define INIT_THREAD { \
- .flags = SPARC_FLAG_KTHREAD, \
.current_ds = KERNEL_DS, \
+ .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \
}
/* Do necessary setup to start up a newly executed thread. */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 42cd4cd3892e..8047a9caab2f 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -118,6 +118,7 @@ struct thread_info {
.task = &tsk, \
.current_ds = ASI_P, \
.preempt_count = INIT_PREEMPT_COUNT, \
+ .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \
}
/* how to get the thread information struct from C */
diff --git a/arch/sparc/include/asm/uaccess.h b/arch/sparc/include/asm/uaccess.h
index dd85bc2c2cad..390094200fc4 100644
--- a/arch/sparc/include/asm/uaccess.h
+++ b/arch/sparc/include/asm/uaccess.h
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef ___ASM_SPARC_UACCESS_H
#define ___ASM_SPARC_UACCESS_H
+
+#include <asm/extable.h>
+
#if defined(__sparc__) && defined(__arch64__)
#include <asm/uaccess_64.h>
#else
diff --git a/arch/sparc/include/asm/uaccess_32.h b/arch/sparc/include/asm/uaccess_32.h
index 0a2d3ebc4bb8..4a12346bb69c 100644
--- a/arch/sparc/include/asm/uaccess_32.h
+++ b/arch/sparc/include/asm/uaccess_32.h
@@ -13,9 +13,6 @@
#include <asm/processor.h>
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
-
/* Sparc is not segmented, however we need to be able to fool access_ok()
* when doing system calls from kernel mode legitimately.
*
@@ -40,36 +37,6 @@
#define __access_ok(addr, size) (__user_ok((addr) & get_fs().seg, (size)))
#define access_ok(addr, size) __access_ok((unsigned long)(addr), size)
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue. No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path. This means when everything is well,
- * we don't even have to jump over them. Further, they do not intrude
- * on our cache or tlb entries.
- *
- * There is a special way how to put a range of potentially faulting
- * insns (like twenty ldd/std's with now intervening other instructions)
- * You specify address of first in insn and 0 in fixup and in the next
- * exception_table_entry you specify last potentially faulting insn + 1
- * and in fixup the routine which should handle the fault.
- * That fixup code will get
- * (faulting_insn_address - first_insn_in_the_range_address)/4
- * in %g2 (ie. index of the faulting instruction in the range).
- */
-
-struct exception_table_entry
-{
- unsigned long insn, fixup;
-};
-
-/* Returns 0 if exception not found and fixup otherwise. */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2);
-
/* Uh, these should become the main single-value transfer routines..
* They automatically use the right size if we just have the right
* pointer type..
@@ -252,12 +219,7 @@ static inline unsigned long __clear_user(void __user *addr, unsigned long size)
unsigned long ret;
__asm__ __volatile__ (
- ".section __ex_table,#alloc\n\t"
- ".align 4\n\t"
- ".word 1f,3\n\t"
- ".previous\n\t"
"mov %2, %%o1\n"
- "1:\n\t"
"call __bzero\n\t"
" mov %1, %%o0\n\t"
"mov %%o0, %0\n"
diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h
index 698cf69f74e9..30eb4c6414d1 100644
--- a/arch/sparc/include/asm/uaccess_64.h
+++ b/arch/sparc/include/asm/uaccess_64.h
@@ -10,7 +10,6 @@
#include <linux/string.h>
#include <asm/asi.h>
#include <asm/spitfire.h>
-#include <asm/extable_64.h>
#include <asm/processor.h>
diff --git a/arch/sparc/kernel/head_32.S b/arch/sparc/kernel/head_32.S
index be30c8d4cc73..6044b82b9767 100644
--- a/arch/sparc/kernel/head_32.S
+++ b/arch/sparc/kernel/head_32.S
@@ -515,7 +515,7 @@ continue_boot:
/* I want a kernel stack NOW! */
set init_thread_union, %g1
- set (THREAD_SIZE - STACKFRAME_SZ), %g2
+ set (THREAD_SIZE - STACKFRAME_SZ - TRACEREG_SZ), %g2
add %g1, %g2, %sp
mov 0, %fp /* And for good luck */
diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S
index c5ff2472b3d9..72a5bdc833ea 100644
--- a/arch/sparc/kernel/head_64.S
+++ b/arch/sparc/kernel/head_64.S
@@ -706,7 +706,7 @@ tlb_fixup_done:
wr %g0, ASI_P, %asi
mov 1, %g1
sllx %g1, THREAD_SHIFT, %g1
- sub %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+ sub %g1, (STACKFRAME_SZ + STACK_BIAS + TRACEREG_SZ), %g1
add %g6, %g1, %sp
/* Set per-cpu pointer initially to zero, this makes
diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
index 7649b14d69f8..3b9794978e5b 100644
--- a/arch/sparc/kernel/process_32.c
+++ b/arch/sparc/kernel/process_32.c
@@ -216,16 +216,6 @@ void flush_thread(void)
clear_thread_flag(TIF_USEDFPU);
#endif
}
-
- /* This task is no longer a kernel thread. */
- if (current->thread.flags & SPARC_FLAG_KTHREAD) {
- current->thread.flags &= ~SPARC_FLAG_KTHREAD;
-
- /* We must fixup kregs as well. */
- /* XXX This was not fixed for ti for a while, worked. Unused? */
- current->thread.kregs = (struct pt_regs *)
- (task_stack_page(current) + (THREAD_SIZE - TRACEREG_SZ));
- }
}
static inline struct sparc_stackf __user *
@@ -309,11 +299,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
ti->ksp = (unsigned long) new_stack;
p->thread.kregs = childregs;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
extern int nwindows;
unsigned long psr;
memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
- p->thread.flags |= SPARC_FLAG_KTHREAD;
p->thread.current_ds = KERNEL_DS;
ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
childregs->u_regs[UREG_G1] = sp; /* function */
@@ -325,7 +314,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
}
memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
childregs->u_regs[UREG_FP] = sp;
- p->thread.flags &= ~SPARC_FLAG_KTHREAD;
p->thread.current_ds = USER_DS;
ti->kpc = (((unsigned long) ret_from_fork) - 0x8);
ti->kpsr = current->thread.fork_kpsr | PSR_PIL;
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 6f8c7822fc06..7afd0a859a78 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -597,7 +597,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
sizeof(struct sparc_stackf));
t->fpsaved[0] = 0;
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(child_trap_frame, 0, child_stack_sz);
__thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] =
(current_pt_regs()->tstate + 1) & TSTATE_CWP;
diff --git a/arch/sparc/kernel/setup_32.c b/arch/sparc/kernel/setup_32.c
index eea43a1aef1b..c8e0dd99f370 100644
--- a/arch/sparc/kernel/setup_32.c
+++ b/arch/sparc/kernel/setup_32.c
@@ -266,7 +266,6 @@ static __init void leon_patch(void)
}
struct tt_entry *sparc_ttable;
-static struct pt_regs fake_swapper_regs;
/* Called from head_32.S - before we have setup anything
* in the kernel. Be very careful with what you do here.
@@ -363,8 +362,6 @@ void __init setup_arch(char **cmdline_p)
(*(linux_dbvec->teach_debugger))();
}
- init_task.thread.kregs = &fake_swapper_regs;
-
/* Run-time patch instructions to match the cpu model */
per_cpu_patch();
diff --git a/arch/sparc/kernel/setup_64.c b/arch/sparc/kernel/setup_64.c
index d87244197d5c..48abee4eee29 100644
--- a/arch/sparc/kernel/setup_64.c
+++ b/arch/sparc/kernel/setup_64.c
@@ -165,8 +165,6 @@ extern int root_mountflags;
char reboot_command[COMMAND_LINE_SIZE];
-static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
-
static void __init per_cpu_patch(void)
{
struct cpuid_patch_entry *p;
@@ -661,8 +659,6 @@ void __init setup_arch(char **cmdline_p)
rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
#endif
- task_thread_info(&init_task)->kregs = &fake_swapper_regs;
-
#ifdef CONFIG_IP_PNP
if (!ic_set_manually) {
phandle chosen = prom_finddevice("/chosen");
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index d92e5eaa4c1d..a850dccd78ea 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -275,14 +275,13 @@ bool is_no_fault_exception(struct pt_regs *regs)
asi = (regs->tstate >> 24); /* saved %asi */
else
asi = (insn >> 5); /* immediate asi */
- if ((asi & 0xf2) == ASI_PNF) {
- if (insn & 0x1000000) { /* op3[5:4]=3 */
- handle_ldf_stq(insn, regs);
- return true;
- } else if (insn & 0x200000) { /* op3[2], stores */
+ if ((asi & 0xf6) == ASI_PNF) {
+ if (insn & 0x200000) /* op3[2], stores */
return false;
- }
- handle_ld_nf(insn, regs);
+ if (insn & 0x1000000) /* op3[5:4]=3 (fp) */
+ handle_ldf_stq(insn, regs);
+ else
+ handle_ld_nf(insn, regs);
return true;
}
}
diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c
index 83db94c0b431..ef5c5207c9ff 100644
--- a/arch/sparc/kernel/unaligned_32.c
+++ b/arch/sparc/kernel/unaligned_32.c
@@ -16,6 +16,7 @@
#include <linux/uaccess.h>
#include <linux/smp.h>
#include <linux/perf_event.h>
+#include <linux/extable.h>
#include <asm/setup.h>
@@ -213,10 +214,10 @@ static inline int ok_for_kernel(unsigned int insn)
static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
{
- unsigned long g2 = regs->u_regs [UREG_G2];
- unsigned long fixup = search_extables_range(regs->pc, &g2);
+ const struct exception_table_entry *entry;
- if (!fixup) {
+ entry = search_exception_tables(regs->pc);
+ if (!entry) {
unsigned long address = compute_effective_address(regs, insn);
if(address < PAGE_SIZE) {
printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference in mna handler");
@@ -232,9 +233,8 @@ static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
die_if_kernel("Oops", regs);
/* Not reached */
}
- regs->pc = fixup;
+ regs->pc = entry->fixup;
regs->npc = regs->pc + 4;
- regs->u_regs [UREG_G2] = g2;
}
asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
@@ -274,103 +274,9 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
}
}
-static inline int ok_for_user(struct pt_regs *regs, unsigned int insn,
- enum direction dir)
-{
- unsigned int reg;
- int size = ((insn >> 19) & 3) == 3 ? 8 : 4;
-
- if ((regs->pc | regs->npc) & 3)
- return 0;
-
- /* Must access_ok() in all the necessary places. */
-#define WINREG_ADDR(regnum) \
- ((void __user *)(((unsigned long *)regs->u_regs[UREG_FP])+(regnum)))
-
- reg = (insn >> 25) & 0x1f;
- if (reg >= 16) {
- if (!access_ok(WINREG_ADDR(reg - 16), size))
- return -EFAULT;
- }
- reg = (insn >> 14) & 0x1f;
- if (reg >= 16) {
- if (!access_ok(WINREG_ADDR(reg - 16), size))
- return -EFAULT;
- }
- if (!(insn & 0x2000)) {
- reg = (insn & 0x1f);
- if (reg >= 16) {
- if (!access_ok(WINREG_ADDR(reg - 16), size))
- return -EFAULT;
- }
- }
-#undef WINREG_ADDR
- return 0;
-}
-
-static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
+asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
{
send_sig_fault(SIGBUS, BUS_ADRALN,
(void __user *)safe_compute_effective_address(regs, insn),
0, current);
}
-
-asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
-{
- enum direction dir;
-
- if(!(current->thread.flags & SPARC_FLAG_UNALIGNED) ||
- (((insn >> 30) & 3) != 3))
- goto kill_user;
- dir = decode_direction(insn);
- if(!ok_for_user(regs, insn, dir)) {
- goto kill_user;
- } else {
- int err, size = decode_access_size(insn);
- unsigned long addr;
-
- if(floating_point_load_or_store_p(insn)) {
- printk("User FPU load/store unaligned unsupported.\n");
- goto kill_user;
- }
-
- addr = compute_effective_address(regs, insn);
- perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
- switch(dir) {
- case load:
- err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
- regs),
- size, (unsigned long *) addr,
- decode_signedness(insn));
- break;
-
- case store:
- err = do_int_store(((insn>>25)&0x1f), size,
- (unsigned long *) addr, regs);
- break;
-
- case both:
- /*
- * This was supported in 2.4. However, we question
- * the value of SWAP instruction across word boundaries.
- */
- printk("Unaligned SWAP unsupported.\n");
- err = -EFAULT;
- break;
-
- default:
- unaligned_panic("Impossible user unaligned trap.");
- goto out;
- }
- if (err)
- goto kill_user;
- else
- advance(regs);
- goto out;
- }
-
-kill_user:
- user_mna_trap_fault(regs, insn);
-out:
- ;
-}
diff --git a/arch/sparc/lib/checksum_32.S b/arch/sparc/lib/checksum_32.S
index 7488d130faf7..781e39b3c009 100644
--- a/arch/sparc/lib/checksum_32.S
+++ b/arch/sparc/lib/checksum_32.S
@@ -155,13 +155,6 @@ cpout: retl ! get outta here
.text; \
.align 4
-#define EXT(start,end) \
- .section __ex_table,ALLOC; \
- .align 4; \
- .word start, 0, end, cc_fault; \
- .text; \
- .align 4
-
/* This aligned version executes typically in 8.5 superscalar cycles, this
* is the best I can do. I say 8.5 because the final add will pair with
* the next ldd in the main unrolled loop. Thus the pipe is always full.
@@ -169,20 +162,20 @@ cpout: retl ! get outta here
* please check the fixup code below as well.
*/
#define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [src + off + 0x00], t0; \
- ldd [src + off + 0x08], t2; \
+ EX(ldd [src + off + 0x00], t0); \
+ EX(ldd [src + off + 0x08], t2); \
addxcc t0, sum, sum; \
- ldd [src + off + 0x10], t4; \
+ EX(ldd [src + off + 0x10], t4); \
addxcc t1, sum, sum; \
- ldd [src + off + 0x18], t6; \
+ EX(ldd [src + off + 0x18], t6); \
addxcc t2, sum, sum; \
- std t0, [dst + off + 0x00]; \
+ EX(std t0, [dst + off + 0x00]); \
addxcc t3, sum, sum; \
- std t2, [dst + off + 0x08]; \
+ EX(std t2, [dst + off + 0x08]); \
addxcc t4, sum, sum; \
- std t4, [dst + off + 0x10]; \
+ EX(std t4, [dst + off + 0x10]); \
addxcc t5, sum, sum; \
- std t6, [dst + off + 0x18]; \
+ EX(std t6, [dst + off + 0x18]); \
addxcc t6, sum, sum; \
addxcc t7, sum, sum;
@@ -191,39 +184,39 @@ cpout: retl ! get outta here
* Viking MXCC into streaming mode. Ho hum...
*/
#define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [src + off + 0x00], t0; \
- ldd [src + off + 0x08], t2; \
- ldd [src + off + 0x10], t4; \
- ldd [src + off + 0x18], t6; \
- st t0, [dst + off + 0x00]; \
+ EX(ldd [src + off + 0x00], t0); \
+ EX(ldd [src + off + 0x08], t2); \
+ EX(ldd [src + off + 0x10], t4); \
+ EX(ldd [src + off + 0x18], t6); \
+ EX(st t0, [dst + off + 0x00]); \
addxcc t0, sum, sum; \
- st t1, [dst + off + 0x04]; \
+ EX(st t1, [dst + off + 0x04]); \
addxcc t1, sum, sum; \
- st t2, [dst + off + 0x08]; \
+ EX(st t2, [dst + off + 0x08]); \
addxcc t2, sum, sum; \
- st t3, [dst + off + 0x0c]; \
+ EX(st t3, [dst + off + 0x0c]); \
addxcc t3, sum, sum; \
- st t4, [dst + off + 0x10]; \
+ EX(st t4, [dst + off + 0x10]); \
addxcc t4, sum, sum; \
- st t5, [dst + off + 0x14]; \
+ EX(st t5, [dst + off + 0x14]); \
addxcc t5, sum, sum; \
- st t6, [dst + off + 0x18]; \
+ EX(st t6, [dst + off + 0x18]); \
addxcc t6, sum, sum; \
- st t7, [dst + off + 0x1c]; \
+ EX(st t7, [dst + off + 0x1c]); \
addxcc t7, sum, sum;
/* Yuck, 6 superscalar cycles... */
#define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
- ldd [src - off - 0x08], t0; \
- ldd [src - off - 0x00], t2; \
+ EX(ldd [src - off - 0x08], t0); \
+ EX(ldd [src - off - 0x00], t2); \
addxcc t0, sum, sum; \
- st t0, [dst - off - 0x08]; \
+ EX(st t0, [dst - off - 0x08]); \
addxcc t1, sum, sum; \
- st t1, [dst - off - 0x04]; \
+ EX(st t1, [dst - off - 0x04]); \
addxcc t2, sum, sum; \
- st t2, [dst - off - 0x00]; \
+ EX(st t2, [dst - off - 0x00]); \
addxcc t3, sum, sum; \
- st t3, [dst - off + 0x04];
+ EX(st t3, [dst - off + 0x04]);
/* Handle the end cruft code out of band for better cache patterns. */
cc_end_cruft:
@@ -331,7 +324,6 @@ __csum_partial_copy_sparc_generic:
CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-10: EXT(5b, 10b) ! note for exception handling
sub %g1, 128, %g1 ! detract from length
addx %g0, %g7, %g7 ! add in last carry bit
andcc %g1, 0xffffff80, %g0 ! more to csum?
@@ -356,8 +348,7 @@ cctbl: CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
-12: EXT(cctbl, 12b) ! note for exception table handling
- addx %g0, %g7, %g7
+12: addx %g0, %g7, %g7
andcc %o3, 0xf, %g0 ! check for low bits set
ccte: bne cc_end_cruft ! something left, handle it out of band
andcc %o3, 8, %g0 ! begin checks for that code
@@ -367,7 +358,6 @@ ccdbl: CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o
CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-11: EXT(ccdbl, 11b) ! note for exception table handling
sub %g1, 128, %g1 ! detract from length
addx %g0, %g7, %g7 ! add in last carry bit
andcc %g1, 0xffffff80, %g0 ! more to csum?
diff --git a/arch/sparc/lib/copy_user.S b/arch/sparc/lib/copy_user.S
index dc72f2b970b7..954572c78539 100644
--- a/arch/sparc/lib/copy_user.S
+++ b/arch/sparc/lib/copy_user.S
@@ -21,98 +21,134 @@
/* Work around cpp -rob */
#define ALLOC #alloc
#define EXECINSTR #execinstr
+
+#define EX_ENTRY(l1, l2) \
+ .section __ex_table,ALLOC; \
+ .align 4; \
+ .word l1, l2; \
+ .text;
+
#define EX(x,y,a,b) \
98: x,y; \
.section .fixup,ALLOC,EXECINSTR; \
.align 4; \
-99: ba fixupretl; \
- a, b, %g3; \
- .section __ex_table,ALLOC; \
- .align 4; \
- .word 98b, 99b; \
- .text; \
- .align 4
+99: retl; \
+ a, b, %o0; \
+ EX_ENTRY(98b, 99b)
#define EX2(x,y,c,d,e,a,b) \
98: x,y; \
.section .fixup,ALLOC,EXECINSTR; \
.align 4; \
99: c, d, e; \
- ba fixupretl; \
- a, b, %g3; \
- .section __ex_table,ALLOC; \
- .align 4; \
- .word 98b, 99b; \
- .text; \
- .align 4
+ retl; \
+ a, b, %o0; \
+ EX_ENTRY(98b, 99b)
#define EXO2(x,y) \
98: x, y; \
- .section __ex_table,ALLOC; \
- .align 4; \
- .word 98b, 97f; \
- .text; \
- .align 4
+ EX_ENTRY(98b, 97f)
-#define EXT(start,end,handler) \
- .section __ex_table,ALLOC; \
- .align 4; \
- .word start, 0, end, handler; \
- .text; \
- .align 4
+#define LD(insn, src, offset, reg, label) \
+98: insn [%src + (offset)], %reg; \
+ .section .fixup,ALLOC,EXECINSTR; \
+99: ba label; \
+ mov offset, %g5; \
+ EX_ENTRY(98b, 99b)
-/* Please do not change following macros unless you change logic used
- * in .fixup at the end of this file as well
- */
+#define ST(insn, dst, offset, reg, label) \
+98: insn %reg, [%dst + (offset)]; \
+ .section .fixup,ALLOC,EXECINSTR; \
+99: ba label; \
+ mov offset, %g5; \
+ EX_ENTRY(98b, 99b)
/* Both these macros have to start with exactly the same insn */
+/* left: g7 + (g1 % 128) - offset */
#define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [%src + (offset) + 0x00], %t0; \
- ldd [%src + (offset) + 0x08], %t2; \
- ldd [%src + (offset) + 0x10], %t4; \
- ldd [%src + (offset) + 0x18], %t6; \
- st %t0, [%dst + (offset) + 0x00]; \
- st %t1, [%dst + (offset) + 0x04]; \
- st %t2, [%dst + (offset) + 0x08]; \
- st %t3, [%dst + (offset) + 0x0c]; \
- st %t4, [%dst + (offset) + 0x10]; \
- st %t5, [%dst + (offset) + 0x14]; \
- st %t6, [%dst + (offset) + 0x18]; \
- st %t7, [%dst + (offset) + 0x1c];
-
+ LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \
+ LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \
+ LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \
+ LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \
+ ST(st, dst, offset + 0x00, t0, bigchunk_fault) \
+ ST(st, dst, offset + 0x04, t1, bigchunk_fault) \
+ ST(st, dst, offset + 0x08, t2, bigchunk_fault) \
+ ST(st, dst, offset + 0x0c, t3, bigchunk_fault) \
+ ST(st, dst, offset + 0x10, t4, bigchunk_fault) \
+ ST(st, dst, offset + 0x14, t5, bigchunk_fault) \
+ ST(st, dst, offset + 0x18, t6, bigchunk_fault) \
+ ST(st, dst, offset + 0x1c, t7, bigchunk_fault)
+
+/* left: g7 + (g1 % 128) - offset */
#define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
- ldd [%src + (offset) + 0x00], %t0; \
- ldd [%src + (offset) + 0x08], %t2; \
- ldd [%src + (offset) + 0x10], %t4; \
- ldd [%src + (offset) + 0x18], %t6; \
- std %t0, [%dst + (offset) + 0x00]; \
- std %t2, [%dst + (offset) + 0x08]; \
- std %t4, [%dst + (offset) + 0x10]; \
- std %t6, [%dst + (offset) + 0x18];
+ LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \
+ LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \
+ LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \
+ LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \
+ ST(std, dst, offset + 0x00, t0, bigchunk_fault) \
+ ST(std, dst, offset + 0x08, t2, bigchunk_fault) \
+ ST(std, dst, offset + 0x10, t4, bigchunk_fault) \
+ ST(std, dst, offset + 0x18, t6, bigchunk_fault)
+ .section .fixup,#alloc,#execinstr
+bigchunk_fault:
+ sub %g7, %g5, %o0
+ and %g1, 127, %g1
+ retl
+ add %o0, %g1, %o0
+
+/* left: offset + 16 + (g1 % 16) */
#define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
- ldd [%src - (offset) - 0x10], %t0; \
- ldd [%src - (offset) - 0x08], %t2; \
- st %t0, [%dst - (offset) - 0x10]; \
- st %t1, [%dst - (offset) - 0x0c]; \
- st %t2, [%dst - (offset) - 0x08]; \
- st %t3, [%dst - (offset) - 0x04];
+ LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault) \
+ LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault) \
+ ST(st, dst, -(offset + 0x10), t0, lastchunk_fault) \
+ ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault) \
+ ST(st, dst, -(offset + 0x08), t2, lastchunk_fault) \
+ ST(st, dst, -(offset + 0x04), t3, lastchunk_fault)
-#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
- lduh [%src + (offset) + 0x00], %t0; \
- lduh [%src + (offset) + 0x02], %t1; \
- lduh [%src + (offset) + 0x04], %t2; \
- lduh [%src + (offset) + 0x06], %t3; \
- sth %t0, [%dst + (offset) + 0x00]; \
- sth %t1, [%dst + (offset) + 0x02]; \
- sth %t2, [%dst + (offset) + 0x04]; \
- sth %t3, [%dst + (offset) + 0x06];
+ .section .fixup,#alloc,#execinstr
+lastchunk_fault:
+ and %g1, 15, %g1
+ retl
+ sub %g1, %g5, %o0
+/* left: o3 + (o2 % 16) - offset */
+#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
+ LD(lduh, src, offset + 0x00, t0, halfchunk_fault) \
+ LD(lduh, src, offset + 0x02, t1, halfchunk_fault) \
+ LD(lduh, src, offset + 0x04, t2, halfchunk_fault) \
+ LD(lduh, src, offset + 0x06, t3, halfchunk_fault) \
+ ST(sth, dst, offset + 0x00, t0, halfchunk_fault) \
+ ST(sth, dst, offset + 0x02, t1, halfchunk_fault) \
+ ST(sth, dst, offset + 0x04, t2, halfchunk_fault) \
+ ST(sth, dst, offset + 0x06, t3, halfchunk_fault)
+
+/* left: o3 + (o2 % 16) + offset + 2 */
#define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
- ldub [%src - (offset) - 0x02], %t0; \
- ldub [%src - (offset) - 0x01], %t1; \
- stb %t0, [%dst - (offset) - 0x02]; \
- stb %t1, [%dst - (offset) - 0x01];
+ LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault) \
+ LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault) \
+ ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault) \
+ ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault)
+
+ .section .fixup,#alloc,#execinstr
+halfchunk_fault:
+ and %o2, 15, %o2
+ sub %o3, %g5, %o3
+ retl
+ add %o2, %o3, %o0
+
+/* left: offset + 2 + (o2 % 2) */
+#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \
+ LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault) \
+ LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault) \
+ ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault) \
+ ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault)
+
+ .section .fixup,#alloc,#execinstr
+last_shortchunk_fault:
+ and %o2, 1, %o2
+ retl
+ sub %o2, %g5, %o0
.text
.align 4
@@ -182,8 +218,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */
MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-80:
- EXT(5b, 80b, 50f)
subcc %g7, 128, %g7
add %o1, 128, %o1
bne 5b
@@ -201,7 +235,6 @@ __copy_user: /* %o0=dst %o1=src %o2=len */
jmpl %o5 + %lo(copy_user_table_end), %g0
add %o0, %g7, %o0
-copy_user_table:
MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
@@ -210,7 +243,6 @@ copy_user_table:
MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
copy_user_table_end:
- EXT(copy_user_table, copy_user_table_end, 51f)
be copy_user_last7
andcc %g1, 4, %g0
@@ -250,8 +282,6 @@ ldd_std:
MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-81:
- EXT(ldd_std, 81b, 52f)
subcc %g7, 128, %g7
add %o1, 128, %o1
bne ldd_std
@@ -290,8 +320,6 @@ cannot_optimize:
10:
MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
-82:
- EXT(10b, 82b, 53f)
subcc %o3, 0x10, %o3
add %o1, 0x10, %o1
bne 10b
@@ -308,8 +336,6 @@ byte_chunk:
MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
-83:
- EXT(byte_chunk, 83b, 54f)
subcc %o3, 0x10, %o3
add %o1, 0x10, %o1
bne byte_chunk
@@ -325,16 +351,14 @@ short_end:
add %o1, %o3, %o1
jmpl %o5 + %lo(short_table_end), %g0
andcc %o2, 1, %g0
-84:
- MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
- MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
+ MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
+ MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
+ MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3)
+ MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3)
+ MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3)
+ MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3)
+ MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3)
short_table_end:
- EXT(84b, short_table_end, 55f)
be 1f
nop
EX(ldub [%o1], %g2, add %g0, 1)
@@ -363,123 +387,8 @@ short_aligned_end:
.section .fixup,#alloc,#execinstr
.align 4
97:
- mov %o2, %g3
-fixupretl:
retl
- mov %g3, %o0
-
-/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
-50:
-/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
- * happens. This is derived from the amount ldd reads, st stores, etc.
- * x = g2 % 12;
- * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
- * o0 += (g2 / 12) * 32;
- */
- cmp %g2, 12
- add %o0, %g7, %o0
- bcs 1f
- cmp %g2, 24
- bcs 2f
- cmp %g2, 36
- bcs 3f
- nop
- sub %g2, 12, %g2
- sub %g7, 32, %g7
-3: sub %g2, 12, %g2
- sub %g7, 32, %g7
-2: sub %g2, 12, %g2
- sub %g7, 32, %g7
-1: cmp %g2, 4
- bcs,a 60f
- clr %g2
- sub %g2, 4, %g2
- sll %g2, 2, %g2
-60: and %g1, 0x7f, %g3
- sub %o0, %g7, %o0
- add %g3, %g7, %g3
- ba fixupretl
- sub %g3, %g2, %g3
-51:
-/* i = 41 - g2; j = i % 6;
- * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
- * o0 -= (i / 6) * 16 + 16;
- */
- neg %g2
- and %g1, 0xf, %g1
- add %g2, 41, %g2
- add %o0, %g1, %o0
-1: cmp %g2, 6
- bcs,a 2f
- cmp %g2, 4
- add %g1, 16, %g1
- b 1b
- sub %g2, 6, %g2
-2: bcc,a 2f
- mov 16, %g2
- inc %g2
- sll %g2, 2, %g2
-2: add %g1, %g2, %g3
- ba fixupretl
- sub %o0, %g3, %o0
-52:
-/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
- o0 += (g2 / 8) * 32 */
- andn %g2, 7, %g4
- add %o0, %g7, %o0
- andcc %g2, 4, %g0
- and %g2, 3, %g2
- sll %g4, 2, %g4
- sll %g2, 3, %g2
- bne 60b
- sub %g7, %g4, %g7
- ba 60b
- clr %g2
-53:
-/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
- o0 += (g2 & 8) */
- and %g2, 3, %g4
- andcc %g2, 4, %g0
- and %g2, 8, %g2
- sll %g4, 1, %g4
- be 1f
- add %o0, %g2, %o0
- add %g2, %g4, %g2
-1: and %o2, 0xf, %g3
- add %g3, %o3, %g3
- ba fixupretl
- sub %g3, %g2, %g3
-54:
-/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
- o0 += (g2 / 4) * 2 */
- srl %g2, 2, %o4
- and %g2, 1, %o5
- srl %g2, 1, %g2
- add %o4, %o4, %o4
- and %o5, %g2, %o5
- and %o2, 0xf, %o2
- add %o0, %o4, %o0
- sub %o3, %o5, %o3
- sub %o2, %o4, %o2
- ba fixupretl
- add %o2, %o3, %g3
-55:
-/* i = 27 - g2;
- g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
- o0 -= i / 4 * 2 + 1 */
- neg %g2
- and %o2, 1, %o2
- add %g2, 27, %g2
- srl %g2, 2, %o5
- andcc %g2, 3, %g0
- mov 1, %g2
- add %o5, %o5, %o5
- be,a 1f
- clr %g2
-1: add %g2, %o5, %g3
- sub %o0, %g3, %o0
- ba fixupretl
- add %g3, %o2, %g3
+ mov %o2, %o0
.globl __copy_user_end
__copy_user_end:
diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S
index f427f34b8b79..eaff68213fdf 100644
--- a/arch/sparc/lib/memset.S
+++ b/arch/sparc/lib/memset.S
@@ -19,7 +19,7 @@
98: x,y; \
.section .fixup,ALLOC,EXECINSTR; \
.align 4; \
-99: ba 30f; \
+99: retl; \
a, b, %o0; \
.section __ex_table,ALLOC; \
.align 4; \
@@ -27,35 +27,44 @@
.text; \
.align 4
-#define EXT(start,end,handler) \
+#define STORE(source, base, offset, n) \
+98: std source, [base + offset + n]; \
+ .section .fixup,ALLOC,EXECINSTR; \
+ .align 4; \
+99: ba 30f; \
+ sub %o3, n - offset, %o3; \
.section __ex_table,ALLOC; \
.align 4; \
- .word start, 0, end, handler; \
+ .word 98b, 99b; \
.text; \
- .align 4
+ .align 4;
+
+#define STORE_LAST(source, base, offset, n) \
+ EX(std source, [base - offset - n], \
+ add %o1, offset + n);
/* Please don't change these macros, unless you change the logic
* in the .fixup section below as well.
* Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
-#define ZERO_BIG_BLOCK(base, offset, source) \
- std source, [base + offset + 0x00]; \
- std source, [base + offset + 0x08]; \
- std source, [base + offset + 0x10]; \
- std source, [base + offset + 0x18]; \
- std source, [base + offset + 0x20]; \
- std source, [base + offset + 0x28]; \
- std source, [base + offset + 0x30]; \
- std source, [base + offset + 0x38];
+#define ZERO_BIG_BLOCK(base, offset, source) \
+ STORE(source, base, offset, 0x00); \
+ STORE(source, base, offset, 0x08); \
+ STORE(source, base, offset, 0x10); \
+ STORE(source, base, offset, 0x18); \
+ STORE(source, base, offset, 0x20); \
+ STORE(source, base, offset, 0x28); \
+ STORE(source, base, offset, 0x30); \
+ STORE(source, base, offset, 0x38);
#define ZERO_LAST_BLOCKS(base, offset, source) \
- std source, [base - offset - 0x38]; \
- std source, [base - offset - 0x30]; \
- std source, [base - offset - 0x28]; \
- std source, [base - offset - 0x20]; \
- std source, [base - offset - 0x18]; \
- std source, [base - offset - 0x10]; \
- std source, [base - offset - 0x08]; \
- std source, [base - offset - 0x00];
+ STORE_LAST(source, base, offset, 0x38); \
+ STORE_LAST(source, base, offset, 0x30); \
+ STORE_LAST(source, base, offset, 0x28); \
+ STORE_LAST(source, base, offset, 0x20); \
+ STORE_LAST(source, base, offset, 0x18); \
+ STORE_LAST(source, base, offset, 0x10); \
+ STORE_LAST(source, base, offset, 0x08); \
+ STORE_LAST(source, base, offset, 0x00);
.text
.align 4
@@ -68,8 +77,6 @@ __bzero_begin:
.globl memset
EXPORT_SYMBOL(__bzero)
EXPORT_SYMBOL(memset)
- .globl __memset_start, __memset_end
-__memset_start:
memset:
mov %o0, %g1
mov 1, %g4
@@ -122,8 +129,6 @@ __bzero:
ZERO_BIG_BLOCK(%o0, 0x00, %g2)
subcc %o3, 128, %o3
ZERO_BIG_BLOCK(%o0, 0x40, %g2)
-11:
- EXT(10b, 11b, 20f)
bne 10b
add %o0, 128, %o0
@@ -138,11 +143,9 @@ __bzero:
jmp %o4
add %o0, %o2, %o0
-12:
ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
13:
- EXT(12b, 13b, 21f)
be 8f
andcc %o1, 4, %g0
@@ -182,37 +185,13 @@ __bzero:
5:
retl
clr %o0
-__memset_end:
.section .fixup,#alloc,#execinstr
.align 4
-20:
- cmp %g2, 8
- bleu 1f
- and %o1, 0x7f, %o1
- sub %g2, 9, %g2
- add %o3, 64, %o3
-1:
- sll %g2, 3, %g2
- add %o3, %o1, %o0
- b 30f
- sub %o0, %g2, %o0
-21:
- mov 8, %o0
- and %o1, 7, %o1
- sub %o0, %g2, %o0
- sll %o0, 3, %o0
- b 30f
- add %o0, %o1, %o0
30:
-/* %o4 is faulting address, %o5 is %pc where fault occurred */
- save %sp, -104, %sp
- mov %i5, %o0
- mov %i7, %o1
- call lookup_fault
- mov %i4, %o2
- ret
- restore
+ and %o1, 0x7f, %o1
+ retl
+ add %o3, %o1, %o0
.globl __bzero_end
__bzero_end:
diff --git a/arch/sparc/mm/Makefile b/arch/sparc/mm/Makefile
index 68db1f859b02..871354aa3c00 100644
--- a/arch/sparc/mm/Makefile
+++ b/arch/sparc/mm/Makefile
@@ -8,7 +8,7 @@ ccflags-y := -Werror
obj-$(CONFIG_SPARC64) += ultra.o tlb.o tsb.o
obj-y += fault_$(BITS).o
obj-y += init_$(BITS).o
-obj-$(CONFIG_SPARC32) += extable.o srmmu.o iommu.o io-unit.o
+obj-$(CONFIG_SPARC32) += srmmu.o iommu.o io-unit.o
obj-$(CONFIG_SPARC32) += srmmu_access.o
obj-$(CONFIG_SPARC32) += hypersparc.o viking.o tsunami.o swift.o
obj-$(CONFIG_SPARC32) += leon_mm.o
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
deleted file mode 100644
index 241b40641873..000000000000
--- a/arch/sparc/mm/extable.c
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/arch/sparc/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/extable.h>
-#include <linux/uaccess.h>
-
-void sort_extable(struct exception_table_entry *start,
- struct exception_table_entry *finish)
-{
-}
-
-/* Caller knows they are in a range if ret->fixup == 0 */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *base,
- const size_t num,
- unsigned long value)
-{
- int i;
-
- /* Single insn entries are encoded as:
- * word 1: insn address
- * word 2: fixup code address
- *
- * Range entries are encoded as:
- * word 1: first insn address
- * word 2: 0
- * word 3: last insn address + 4 bytes
- * word 4: fixup code address
- *
- * Deleted entries are encoded as:
- * word 1: unused
- * word 2: -1
- *
- * See asm/uaccess.h for more details.
- */
-
- /* 1. Try to find an exact match. */
- for (i = 0; i < num; i++) {
- if (base[i].fixup == 0) {
- /* A range entry, skip both parts. */
- i++;
- continue;
- }
-
- /* A deleted entry; see trim_init_extable */
- if (base[i].fixup == -1)
- continue;
-
- if (base[i].insn == value)
- return &base[i];
- }
-
- /* 2. Try to find a range match. */
- for (i = 0; i < (num - 1); i++) {
- if (base[i].fixup)
- continue;
-
- if (base[i].insn <= value && base[i + 1].insn > value)
- return &base[i];
-
- i++;
- }
-
- return NULL;
-}
-
-#ifdef CONFIG_MODULES
-/* We could memmove them around; easier to mark the trimmed ones. */
-void trim_init_extable(struct module *m)
-{
- unsigned int i;
- bool range;
-
- for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
- range = m->extable[i].fixup == 0;
-
- if (within_module_init(m->extable[i].insn, m)) {
- m->extable[i].fixup = -1;
- if (range)
- m->extable[i+1].fixup = -1;
- }
- if (range)
- i++;
- }
-}
-#endif /* CONFIG_MODULES */
-
-/* Special extable search, which handles ranges. Returns fixup */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
-{
- const struct exception_table_entry *entry;
-
- entry = search_exception_tables(addr);
- if (!entry)
- return 0;
-
- /* Inside range? Fix g2 and return correct fixup */
- if (!entry->fixup) {
- *g2 = (addr - entry->insn) / 4;
- return (entry + 1)->fixup;
- }
-
- return entry->fixup;
-}
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 40ce087dfecf..de2031c2b2d7 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -23,6 +23,7 @@
#include <linux/interrupt.h>
#include <linux/kdebug.h>
#include <linux/uaccess.h>
+#include <linux/extable.h>
#include <asm/page.h>
#include <asm/openprom.h>
@@ -54,54 +55,6 @@ static void __noreturn unhandled_fault(unsigned long address,
die_if_kernel("Oops", regs);
}
-asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
- unsigned long address)
-{
- struct pt_regs regs;
- unsigned long g2;
- unsigned int insn;
- int i;
-
- i = search_extables_range(ret_pc, &g2);
- switch (i) {
- case 3:
- /* load & store will be handled by fixup */
- return 3;
-
- case 1:
- /* store will be handled by fixup, load will bump out */
- /* for _to_ macros */
- insn = *((unsigned int *) pc);
- if ((insn >> 21) & 1)
- return 1;
- break;
-
- case 2:
- /* load will be handled by fixup, store will bump out */
- /* for _from_ macros */
- insn = *((unsigned int *) pc);
- if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15)
- return 2;
- break;
-
- default:
- break;
- }
-
- memset(&regs, 0, sizeof(regs));
- regs.pc = pc;
- regs.npc = pc + 4;
- __asm__ __volatile__(
- "rd %%psr, %0\n\t"
- "nop\n\t"
- "nop\n\t"
- "nop\n" : "=r" (regs.psr));
- unhandled_fault(address, current, &regs);
-
- /* Not reached */
- return 0;
-}
-
static inline void
show_signal_msg(struct pt_regs *regs, int sig, int code,
unsigned long address, struct task_struct *tsk)
@@ -162,8 +115,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
struct vm_area_struct *vma;
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
- unsigned int fixup;
- unsigned long g2;
int from_user = !(regs->psr & PSR_PS);
int code;
vm_fault_t fault;
@@ -281,30 +232,19 @@ bad_area_nosemaphore:
/* Is this in ex_table? */
no_context:
- g2 = regs->u_regs[UREG_G2];
if (!from_user) {
- fixup = search_extables_range(regs->pc, &g2);
- /* Values below 10 are reserved for other things */
- if (fixup > 10) {
- extern const unsigned int __memset_start[];
- extern const unsigned int __memset_end[];
+ const struct exception_table_entry *entry;
+ entry = search_exception_tables(regs->pc);
#ifdef DEBUG_EXCEPTIONS
- printk("Exception: PC<%08lx> faddr<%08lx>\n",
- regs->pc, address);
- printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n",
- regs->pc, fixup, g2);
+ printk("Exception: PC<%08lx> faddr<%08lx>\n",
+ regs->pc, address);
+ printk("EX_TABLE: insn<%08lx> fixup<%08x>\n",
+ regs->pc, entry->fixup);
#endif
- if ((regs->pc >= (unsigned long)__memset_start &&
- regs->pc < (unsigned long)__memset_end)) {
- regs->u_regs[UREG_I4] = address;
- regs->u_regs[UREG_I5] = regs->pc;
- }
- regs->u_regs[UREG_G2] = g2;
- regs->pc = fixup;
- regs->npc = regs->pc + 4;
- return;
- }
+ regs->pc = entry->fixup;
+ regs->npc = regs->pc + 4;
+ return;
}
unhandled_fault(address, tsk, regs);
diff --git a/arch/sparc/mm/mm_32.h b/arch/sparc/mm/mm_32.h
index ce750a99eea9..ee55f1080634 100644
--- a/arch/sparc/mm/mm_32.h
+++ b/arch/sparc/mm/mm_32.h
@@ -1,7 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* fault_32.c - visible as they are called from assembler */
-asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
- unsigned long address);
asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
unsigned long address);
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 81d508daf67c..c5011064b5dd 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct * p, unsigned long tls)
{
void (*handler)(void);
- int kthread = current->flags & PF_KTHREAD;
+ int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER);
int ret = 0;
p->thread = (struct thread_struct) INIT_THREAD;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cd4b9b1204a8..2792879d398e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -151,6 +151,7 @@ config X86
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if X86_64
select HAVE_ARCH_KASAN_VMALLOC if X86_64
+ select HAVE_ARCH_KFENCE
select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
new file mode 100644
index 000000000000..97bbb4a9083a
--- /dev/null
+++ b/arch/x86/include/asm/kfence.h
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef _ASM_X86_KFENCE_H
+#define _ASM_X86_KFENCE_H
+
+#include <linux/bug.h>
+#include <linux/kfence.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/set_memory.h>
+#include <asm/tlbflush.h>
+
+/* Force 4K pages for __kfence_pool. */
+static inline bool arch_kfence_init_pool(void)
+{
+ unsigned long addr;
+
+ for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+ addr += PAGE_SIZE) {
+ unsigned int level;
+
+ if (!lookup_address(addr, &level))
+ return false;
+
+ if (level != PG_LEVEL_4K)
+ set_memory_4k(addr, 1);
+ }
+
+ return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+ unsigned int level;
+ pte_t *pte = lookup_address(addr, &level);
+
+ if (WARN_ON(!pte || level != PG_LEVEL_4K))
+ return false;
+
+ /*
+ * We need to avoid IPIs, as we may get KFENCE allocations or faults
+ * with interrupts disabled. Therefore, the below is best-effort, and
+ * does not flush TLBs on all CPUs. We can tolerate some inaccuracy;
+ * lazy fault handling takes care of faults after the page is PRESENT.
+ */
+
+ if (protect)
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+ else
+ set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+
+ /* Flush this CPU's TLB. */
+ flush_tlb_one_kernel(addr);
+ return true;
+}
+
+#endif /* _ASM_X86_KFENCE_H */
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 355a2ab8fc09..323641097f63 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -93,11 +93,7 @@ KVM_X86_OP(check_intercept)
KVM_X86_OP(handle_exit_irqoff)
KVM_X86_OP_NULL(request_immediate_exit)
KVM_X86_OP(sched_in)
-KVM_X86_OP_NULL(slot_enable_log_dirty)
-KVM_X86_OP_NULL(slot_disable_log_dirty)
-KVM_X86_OP_NULL(flush_log_dirty)
-KVM_X86_OP_NULL(enable_log_dirty_pt_masked)
-KVM_X86_OP_NULL(cpu_dirty_log_size)
+KVM_X86_OP_NULL(update_cpu_dirty_logging)
KVM_X86_OP_NULL(pre_block)
KVM_X86_OP_NULL(post_block)
KVM_X86_OP_NULL(vcpu_blocking)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 84499aad01a4..877a4025d8da 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -89,6 +89,8 @@
KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_APF_READY KVM_ARCH_REQ(28)
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
+#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -533,10 +535,16 @@ struct kvm_vcpu_hv {
/* Xen HVM per vcpu emulation context */
struct kvm_vcpu_xen {
u64 hypercall_rip;
+ u32 current_runstate;
bool vcpu_info_set;
bool vcpu_time_info_set;
+ bool runstate_set;
struct gfn_to_hva_cache vcpu_info_cache;
struct gfn_to_hva_cache vcpu_time_info_cache;
+ struct gfn_to_hva_cache runstate_cache;
+ u64 last_steal;
+ u64 runstate_entry_time;
+ u64 runstate_times[4];
};
struct kvm_vcpu_arch {
@@ -937,9 +945,6 @@ struct kvm_arch {
unsigned int indirect_shadow_pages;
u8 mmu_valid_gen;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
- /*
- * Hash table of struct kvm_mmu_page.
- */
struct list_head active_mmu_pages;
struct list_head zapped_obsolete_pages;
struct list_head lpage_disallowed_mmu_pages;
@@ -1007,6 +1012,7 @@ struct kvm_arch {
u32 bsp_vcpu_id;
u64 disabled_quirks;
+ int cpu_dirty_logging_count;
enum kvm_irqchip_mode irqchip_mode;
u8 nr_reserved_ioapic_pins;
@@ -1271,30 +1277,11 @@ struct kvm_x86_ops {
void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
/*
- * Arch-specific dirty logging hooks. These hooks are only supposed to
- * be valid if the specific arch has hardware-accelerated dirty logging
- * mechanism. Currently only for PML on VMX.
- *
- * - slot_enable_log_dirty:
- * called when enabling log dirty mode for the slot.
- * - slot_disable_log_dirty:
- * called when disabling log dirty mode for the slot.
- * also called when slot is created with log dirty disabled.
- * - flush_log_dirty:
- * called before reporting dirty_bitmap to userspace.
- * - enable_log_dirty_pt_masked:
- * called when reenabling log dirty for the GFNs in the mask after
- * corresponding bits are cleared in slot->dirty_bitmap.
+ * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer. A zero
+ * value indicates CPU dirty logging is unsupported or disabled.
*/
- void (*slot_enable_log_dirty)(struct kvm *kvm,
- struct kvm_memory_slot *slot);
- void (*slot_disable_log_dirty)(struct kvm *kvm,
- struct kvm_memory_slot *slot);
- void (*flush_log_dirty)(struct kvm *kvm);
- void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- gfn_t offset, unsigned long mask);
- int (*cpu_dirty_log_size)(void);
+ int cpu_dirty_log_size;
+ void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
/* pmu operations of sub-arch */
const struct kvm_pmu_ops *pmu_ops;
@@ -1437,11 +1424,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot);
-void kvm_mmu_slot_set_dirty(struct kvm *kvm,
- struct kvm_memory_slot *memslot);
-void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- gfn_t gfn_offset, unsigned long mask);
void kvm_mmu_zap_all(struct kvm *kvm);
void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
@@ -1613,7 +1595,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
void kvm_update_dr7(struct kvm_vcpu *vcpu);
int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 1a162e559753..7068e4bb057d 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -87,6 +87,18 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
#endif
/*
+ * The maximum amount of extra memory compared to the base size. The
+ * main scaling factor is the size of struct page. At extreme ratios
+ * of base:extra, all the base memory can be filled with page
+ * structures for the extra memory, leaving no space for anything
+ * else.
+ *
+ * 10x seems like a reasonable balance between scaling flexibility and
+ * leaving a practically usable system.
+ */
+#define XEN_EXTRA_MEM_RATIO (10)
+
+/*
* Helper functions to write or read unsigned long values to/from
* memory, when the access may fault.
*/
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 145a7ac0c19a..9c214d7085a4 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -161,7 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
#endif
/* Kernel thread ? */
- if (unlikely(p->flags & PF_KTHREAD)) {
+ if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
memset(childregs, 0, sizeof(struct pt_regs));
kthread_frame_init(frame, sp, arg);
return 0;
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 7ac592664c52..a788d5120d4d 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -103,6 +103,15 @@ config KVM_AMD_SEV
Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
with Encrypted State (SEV-ES) on AMD processors.
+config KVM_XEN
+ bool "Support for Xen hypercall interface"
+ depends on KVM
+ help
+ Provides KVM support for the hosting Xen HVM guests and
+ passing Xen hypercalls to userspace.
+
+ If in doubt, say "N".
+
config KVM_MMU_AUDIT
bool "Audit KVM MMU"
depends on KVM && TRACEPOINTS
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index aeab168c5711..1b4766fe1de2 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -14,11 +14,12 @@ kvm-y += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
$(KVM)/dirty_ring.o
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
-kvm-y += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
+kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
mmu/spte.o
kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
+kvm-$(CONFIG_KVM_XEN) += xen.o
kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index c8f2592ccc99..6bd2f8b830e4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -408,7 +408,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_mask(CPUID_7_0_EBX,
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
- F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
+ F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 7d2dae92d638..58fa8c029867 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
struct kvm_vcpu_hv_synic *synic;
vcpu = get_vcpu_by_vpidx(kvm, vpidx);
- if (!vcpu)
+ if (!vcpu || !to_hv_vcpu(vcpu))
return NULL;
synic = to_hv_synic(vcpu);
return (synic->active) ? synic : NULL;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e507568cd55d..d75524bc8423 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1165,7 +1165,8 @@ static bool spte_wrprot_for_clear_dirty(u64 *sptep)
* - W bit on ad-disabled SPTEs.
* Returns true iff any D or W bits were cleared.
*/
-static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+ struct kvm_memory_slot *slot)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1180,35 +1181,6 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
return flush;
}
-static bool spte_set_dirty(u64 *sptep)
-{
- u64 spte = *sptep;
-
- rmap_printk("spte %p %llx\n", sptep, *sptep);
-
- /*
- * Similar to the !kvm_x86_ops.slot_disable_log_dirty case,
- * do not bother adding back write access to pages marked
- * SPTE_AD_WRPROT_ONLY_MASK.
- */
- spte |= shadow_dirty_mask;
-
- return mmu_spte_update(sptep, spte);
-}
-
-static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
-{
- u64 *sptep;
- struct rmap_iterator iter;
- bool flush = false;
-
- for_each_rmap_spte(rmap_head, &iter, sptep)
- if (spte_ad_enabled(*sptep))
- flush |= spte_set_dirty(sptep);
-
- return flush;
-}
-
/**
* kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
* @kvm: kvm instance
@@ -1248,9 +1220,9 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
*
* Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
*/
-void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
- struct kvm_memory_slot *slot,
- gfn_t gfn_offset, unsigned long mask)
+static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ gfn_t gfn_offset, unsigned long mask)
{
struct kvm_rmap_head *rmap_head;
@@ -1260,13 +1232,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
while (mask) {
rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
PG_LEVEL_4K, slot);
- __rmap_clear_dirty(kvm, rmap_head);
+ __rmap_clear_dirty(kvm, rmap_head, slot);
/* clear the first set bit */
mask &= mask - 1;
}
}
-EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
/**
* kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
@@ -1282,20 +1253,15 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn_offset, unsigned long mask)
{
- if (kvm_x86_ops.enable_log_dirty_pt_masked)
- static_call(kvm_x86_enable_log_dirty_pt_masked)(kvm, slot,
- gfn_offset,
- mask);
+ if (kvm_x86_ops.cpu_dirty_log_size)
+ kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
else
kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
}
int kvm_cpu_dirty_log_size(void)
{
- if (kvm_x86_ops.cpu_dirty_log_size)
- return static_call(kvm_x86_cpu_dirty_log_size)();
-
- return 0;
+ return kvm_x86_ops.cpu_dirty_log_size;
}
bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
@@ -1325,7 +1291,8 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
}
-static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+ struct kvm_memory_slot *slot)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -1345,7 +1312,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
struct kvm_memory_slot *slot, gfn_t gfn, int level,
unsigned long data)
{
- return kvm_zap_rmapp(kvm, rmap_head);
+ return kvm_zap_rmapp(kvm, rmap_head, slot);
}
static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
@@ -2499,7 +2466,21 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
return r;
}
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
+
+static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
+{
+ gpa_t gpa;
+ int r;
+
+ if (vcpu->arch.mmu->direct_map)
+ return 0;
+
+ gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
+
+ r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+ return r;
+}
static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
@@ -2753,11 +2734,18 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
if (sp->role.level > PG_LEVEL_4K)
return;
+ /*
+ * If addresses are being invalidated, skip prefetching to avoid
+ * accidentally prefetching those addresses.
+ */
+ if (unlikely(vcpu->kvm->mmu_notifier_count))
+ return;
+
__direct_pte_prefetch(vcpu, sp, sptep);
}
-static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
- kvm_pfn_t pfn, struct kvm_memory_slot *slot)
+static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
+ struct kvm_memory_slot *slot)
{
unsigned long hva;
pte_t *pte;
@@ -2776,19 +2764,36 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
*/
hva = __gfn_to_hva_memslot(slot, gfn);
- pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level);
+ pte = lookup_address_in_mm(kvm->mm, hva, &level);
if (unlikely(!pte))
return PG_LEVEL_4K;
return level;
}
+int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, kvm_pfn_t pfn, int max_level)
+{
+ struct kvm_lpage_info *linfo;
+
+ max_level = min(max_level, max_huge_page_level);
+ for ( ; max_level > PG_LEVEL_4K; max_level--) {
+ linfo = lpage_info_slot(gfn, slot, max_level);
+ if (!linfo->disallow_lpage)
+ break;
+ }
+
+ if (max_level == PG_LEVEL_4K)
+ return PG_LEVEL_4K;
+
+ return host_pfn_mapping_level(kvm, gfn, pfn, slot);
+}
+
int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
int max_level, kvm_pfn_t *pfnp,
bool huge_page_disallowed, int *req_level)
{
struct kvm_memory_slot *slot;
- struct kvm_lpage_info *linfo;
kvm_pfn_t pfn = *pfnp;
kvm_pfn_t mask;
int level;
@@ -2805,17 +2810,7 @@ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
if (!slot)
return PG_LEVEL_4K;
- max_level = min(max_level, max_huge_page_level);
- for ( ; max_level > PG_LEVEL_4K; max_level--) {
- linfo = lpage_info_slot(gfn, slot, max_level);
- if (!linfo->disallow_lpage)
- break;
- }
-
- if (max_level == PG_LEVEL_4K)
- return PG_LEVEL_4K;
-
- level = host_pfn_mapping_level(vcpu, gfn, pfn, slot);
+ level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
if (level == PG_LEVEL_4K)
return level;
@@ -3437,7 +3432,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
write_unlock(&vcpu->kvm->mmu_lock);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
u32 access, struct x86_exception *exception)
@@ -3653,8 +3647,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
- gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
- bool *writable)
+ gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
+ bool write, bool *writable)
{
struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
bool async;
@@ -3667,7 +3661,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
}
async = false;
- *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
+ *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async,
+ write, writable, hva);
if (!async)
return false; /* *pfn has correct page already */
@@ -3681,7 +3676,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return true;
}
- *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
+ *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
+ write, writable, hva);
return false;
}
@@ -3694,6 +3690,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
gfn_t gfn = gpa >> PAGE_SHIFT;
unsigned long mmu_seq;
kvm_pfn_t pfn;
+ hva_t hva;
int r;
if (page_fault_handle_page_track(vcpu, error_code, gfn))
@@ -3712,7 +3709,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+ if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, &hva,
+ write, &map_writable))
return RET_PF_RETRY;
if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r))
@@ -3725,7 +3723,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
else
write_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
goto out_unlock;
r = make_mmu_pages_available(vcpu);
if (r)
@@ -5003,22 +5001,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
write_unlock(&vcpu->kvm->mmu_lock);
}
-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
-{
- gpa_t gpa;
- int r;
-
- if (vcpu->arch.mmu->direct_map)
- return 0;
-
- gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
-
- r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-
- return r;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
-
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
void *insn, int insn_len)
{
@@ -5117,7 +5099,6 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
mmu->invlpg(vcpu, gva, root_hpa);
}
}
-EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
{
@@ -5157,7 +5138,6 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
* for them.
*/
}
-EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
int tdp_huge_page_level)
@@ -5182,7 +5162,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
EXPORT_SYMBOL_GPL(kvm_configure_mmu);
/* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
+typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+ struct kvm_memory_slot *slot);
/* The caller should hold mmu-lock before calling this function. */
static __always_inline bool
@@ -5196,7 +5177,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
end_gfn, &iterator) {
if (iterator.rmap)
- flush |= fn(kvm, iterator.rmap);
+ flush |= fn(kvm, iterator.rmap, memslot);
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
if (flush && lock_flush_tlb) {
@@ -5230,22 +5211,6 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
}
static __always_inline bool
-slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
- slot_level_handler fn, bool lock_flush_tlb)
-{
- return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
- KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
-
-static __always_inline bool
-slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
- slot_level_handler fn, bool lock_flush_tlb)
-{
- return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K + 1,
- KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
-
-static __always_inline bool
slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
slot_level_handler fn, bool lock_flush_tlb)
{
@@ -5485,7 +5450,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
}
static bool slot_rmap_write_protect(struct kvm *kvm,
- struct kvm_rmap_head *rmap_head)
+ struct kvm_rmap_head *rmap_head,
+ struct kvm_memory_slot *slot)
{
return __rmap_write_protect(kvm, rmap_head, false);
}
@@ -5519,7 +5485,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
}
static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
- struct kvm_rmap_head *rmap_head)
+ struct kvm_rmap_head *rmap_head,
+ struct kvm_memory_slot *slot)
{
u64 *sptep;
struct rmap_iterator iter;
@@ -5540,8 +5507,8 @@ restart:
* mapping if the indirect sp has level = 1.
*/
if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
- (kvm_is_zone_device_pfn(pfn) ||
- PageCompound(pfn_to_page(pfn)))) {
+ sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
+ pfn, PG_LEVEL_NUM)) {
pte_list_remove(rmap_head, sptep);
if (kvm_available_flush_tlb_with_range())
@@ -5561,12 +5528,13 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
const struct kvm_memory_slot *memslot)
{
/* FIXME: const-ify all uses of struct kvm_memory_slot. */
+ struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
+
write_lock(&kvm->mmu_lock);
- slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
- kvm_mmu_zap_collapsible_spte, true);
+ slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
if (is_tdp_mmu_enabled(kvm))
- kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
+ kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
write_unlock(&kvm->mmu_lock);
}
@@ -5605,40 +5573,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
-
-void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
-{
- bool flush;
-
- write_lock(&kvm->mmu_lock);
- flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
- false);
- if (is_tdp_mmu_enabled(kvm))
- flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
- write_unlock(&kvm->mmu_lock);
-
- if (flush)
- kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
-
-void kvm_mmu_slot_set_dirty(struct kvm *kvm,
- struct kvm_memory_slot *memslot)
-{
- bool flush;
-
- write_lock(&kvm->mmu_lock);
- flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
- if (is_tdp_mmu_enabled(kvm))
- flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
- write_unlock(&kvm->mmu_lock);
-
- if (flush)
- kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
void kvm_mmu_zap_all(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 9e38d3c5daad..ec4fc28b325a 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -81,12 +81,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
{
/*
- * When using the EPT page-modification log, the GPAs in the log
- * would come from L2 rather than L1. Therefore, we need to rely
- * on write protection to record dirty pages. This also bypasses
- * PML, since writes now result in a vmexit.
+ * When using the EPT page-modification log, the GPAs in the CPU dirty
+ * log would come from L2 rather than L1. Therefore, we need to rely
+ * on write protection to record dirty pages, which bypasses PML, since
+ * writes now result in a vmexit. Note, the check on CPU dirty logging
+ * being enabled is mandatory as the bits used to denote WP-only SPTEs
+ * are reserved for NPT w/ PAE (32-bit KVM).
*/
- return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+ return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
+ kvm_x86_ops.cpu_dirty_log_size;
}
bool is_nx_huge_page_enabled(void);
@@ -138,6 +141,8 @@ enum {
#define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
#define SET_SPTE_SPURIOUS BIT(2)
+int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_memory_slot *slot,
+ gfn_t gfn, kvm_pfn_t pfn, int max_level);
int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
int max_level, kvm_pfn_t *pfnp,
bool huge_page_disallowed, int *req_level);
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index d9f66cc459e8..55d7b473ac44 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -601,6 +601,13 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
if (sp->role.level > PG_LEVEL_4K)
return;
+ /*
+ * If addresses are being invalidated, skip prefetching to avoid
+ * accidentally prefetching those addresses.
+ */
+ if (unlikely(vcpu->kvm->mmu_notifier_count))
+ return;
+
if (sp->role.direct)
return __direct_pte_prefetch(vcpu, sp, sptep);
@@ -790,6 +797,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
struct guest_walker walker;
int r;
kvm_pfn_t pfn;
+ hva_t hva;
unsigned long mmu_seq;
bool map_writable, is_self_change_mapping;
int max_level;
@@ -840,8 +848,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
mmu_seq = vcpu->kvm->mmu_notifier_seq;
smp_rmb();
- if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
- &map_writable))
+ if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
+ write_fault, &map_writable))
return RET_PF_RETRY;
if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
@@ -869,7 +877,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
r = RET_PF_RETRY;
write_lock(&vcpu->kvm->mmu_lock);
- if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+ if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
goto out_unlock;
kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 71e100a5670f..c926c6b899a1 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1269,67 +1269,15 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
}
/*
- * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
- * only used for PML, and so will involve setting the dirty bit on each SPTE.
- * Returns true if an SPTE has been changed and the TLBs need to be flushed.
- */
-static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
- gfn_t start, gfn_t end)
-{
- struct tdp_iter iter;
- u64 new_spte;
- bool spte_set = false;
-
- rcu_read_lock();
-
- tdp_root_for_each_pte(iter, root, start, end) {
- if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
- continue;
-
- if (!is_shadow_present_pte(iter.old_spte) ||
- iter.old_spte & shadow_dirty_mask)
- continue;
-
- new_spte = iter.old_spte | shadow_dirty_mask;
-
- tdp_mmu_set_spte(kvm, &iter, new_spte);
- spte_set = true;
- }
-
- rcu_read_unlock();
- return spte_set;
-}
-
-/*
- * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
- * only used for PML, and so will involve setting the dirty bit on each SPTE.
- * Returns true if an SPTE has been changed and the TLBs need to be flushed.
- */
-bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- struct kvm_mmu_page *root;
- int root_as_id;
- bool spte_set = false;
-
- for_each_tdp_mmu_root_yield_safe(kvm, root) {
- root_as_id = kvm_mmu_page_as_id(root);
- if (root_as_id != slot->as_id)
- continue;
-
- spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
- slot->base_gfn + slot->npages);
- }
- return spte_set;
-}
-
-/*
* Clear leaf entries which could be replaced by large mappings, for
* GFNs within the slot.
*/
static void zap_collapsible_spte_range(struct kvm *kvm,
struct kvm_mmu_page *root,
- gfn_t start, gfn_t end)
+ struct kvm_memory_slot *slot)
{
+ gfn_t start = slot->base_gfn;
+ gfn_t end = start + slot->npages;
struct tdp_iter iter;
kvm_pfn_t pfn;
bool spte_set = false;
@@ -1348,7 +1296,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
pfn = spte_to_pfn(iter.old_spte);
if (kvm_is_reserved_pfn(pfn) ||
- !PageTransCompoundMap(pfn_to_page(pfn)))
+ iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
+ pfn, PG_LEVEL_NUM))
continue;
tdp_mmu_set_spte(kvm, &iter, 0);
@@ -1366,7 +1315,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
* be replaced by large mappings, for GFNs within the slot.
*/
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *slot)
+ struct kvm_memory_slot *slot)
{
struct kvm_mmu_page *root;
int root_as_id;
@@ -1376,8 +1325,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
if (root_as_id != slot->as_id)
continue;
- zap_collapsible_spte_range(kvm, root, slot->base_gfn,
- slot->base_gfn + slot->npages);
+ zap_collapsible_spte_range(kvm, root, slot);
}
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index b4b65e3699b3..3b761c111bff 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -33,9 +33,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
struct kvm_memory_slot *slot,
gfn_t gfn, unsigned long mask,
bool wrprot);
-bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot);
void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
- const struct kvm_memory_slot *slot);
+ struct kvm_memory_slot *slot);
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cc91738ab445..35891d9a1099 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -51,6 +51,23 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
nested_svm_vmexit(svm);
}
+static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+ WARN_ON(!is_guest_mode(vcpu));
+
+ if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
+ !svm->nested.nested_run_pending) {
+ svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
+ svm->vmcb->control.exit_code_hi = 0;
+ svm->vmcb->control.exit_info_1 = fault->error_code;
+ svm->vmcb->control.exit_info_2 = fault->address;
+ nested_svm_vmexit(svm);
+ } else {
+ kvm_inject_page_fault(vcpu, fault);
+ }
+}
+
static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -436,16 +453,33 @@ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
{
int ret;
+ trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
+ vmcb12->save.rip,
+ vmcb12->control.int_ctl,
+ vmcb12->control.event_inj,
+ vmcb12->control.nested_ctl);
+
+ trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
+ vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
+ vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
+ vmcb12->control.intercepts[INTERCEPT_WORD3],
+ vmcb12->control.intercepts[INTERCEPT_WORD4],
+ vmcb12->control.intercepts[INTERCEPT_WORD5]);
+
+
svm->nested.vmcb12_gpa = vmcb12_gpa;
load_nested_vmcb_control(svm, &vmcb12->control);
- nested_prepare_vmcb_save(svm, vmcb12);
nested_prepare_vmcb_control(svm);
+ nested_prepare_vmcb_save(svm, vmcb12);
ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
nested_npt_enabled(svm));
if (ret)
return ret;
+ if (!npt_enabled)
+ svm->vcpu.arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
+
svm_set_gif(svm, true);
return 0;
@@ -489,18 +523,6 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
goto out;
}
- trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
- vmcb12->save.rip,
- vmcb12->control.int_ctl,
- vmcb12->control.event_inj,
- vmcb12->control.nested_ctl);
-
- trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
- vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
- vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
- vmcb12->control.intercepts[INTERCEPT_WORD3],
- vmcb12->control.intercepts[INTERCEPT_WORD4],
- vmcb12->control.intercepts[INTERCEPT_WORD5]);
/* Clear internal status */
kvm_clear_exception_queue(&svm->vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index adb3619a3c16..baee91c1e936 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -926,9 +926,6 @@ static __init void svm_set_cpu_caps(void)
if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
boot_cpu_has(X86_FEATURE_AMD_SSBD))
kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
-
- /* Enable INVPCID feature */
- kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
}
static __init int svm_hardware_setup(void)
@@ -1103,12 +1100,12 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
static void svm_check_invpcid(struct vcpu_svm *svm)
{
/*
- * Intercept INVPCID instruction only if shadow page table is
- * enabled. Interception is not required with nested page table
- * enabled.
+ * Intercept INVPCID if shadow paging is enabled to sync/free shadow
+ * roots, or if INVPCID is disabled in the guest to inject #UD.
*/
if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
- if (!npt_enabled)
+ if (!npt_enabled ||
+ !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
svm_set_intercept(svm, INTERCEPT_INVPCID);
else
svm_clr_intercept(svm, INTERCEPT_INVPCID);
@@ -1203,6 +1200,7 @@ static void init_vmcb(struct vcpu_svm *svm)
init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
+ svm_set_cr4(&svm->vcpu, 0);
svm_set_efer(&svm->vcpu, 0);
save->dr6 = 0xffff0ff0;
kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
@@ -2214,15 +2212,20 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
[SVM_INSTR_VMSAVE] = vmsave_interception,
};
struct vcpu_svm *svm = to_svm(vcpu);
+ int ret;
if (is_guest_mode(vcpu)) {
svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
svm->vmcb->control.exit_info_1 = 0;
svm->vmcb->control.exit_info_2 = 0;
- return nested_svm_vmexit(svm);
- } else
- return svm_instr_handlers[opcode](svm);
+ /* Returns '1' or -errno on failure, '0' on success. */
+ ret = nested_svm_vmexit(svm);
+ if (ret)
+ return ret;
+ return 1;
+ }
+ return svm_instr_handlers[opcode](svm);
}
/*
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b2f0b5e9cd63..bcca0b80e0d0 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2167,15 +2167,13 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
/*
- * The PML address never changes, so it is constant in vmcs02.
- * Conceptually we want to copy the PML index from vmcs01 here,
- * and then back to vmcs01 on nested vmexit. But since we flush
- * the log and reset GUEST_PML_INDEX on each vmexit, the PML
- * index is also effectively constant in vmcs02.
+ * PML is emulated for L2, but never enabled in hardware as the MMU
+ * handles A/D emulation. Disabling PML for L2 also avoids having to
+ * deal with filtering out L2 GPAs from the buffer.
*/
if (enable_pml) {
- vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
- vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+ vmcs_write64(PML_ADDRESS, 0);
+ vmcs_write16(GUEST_PML_INDEX, -1);
}
if (cpu_has_vmx_encls_vmexit())
@@ -2210,7 +2208,7 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
{
- u32 exec_control, vmcs12_exec_ctrl;
+ u32 exec_control;
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
@@ -2284,11 +2282,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC);
if (nested_cpu_has(vmcs12,
- CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
- vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
- ~SECONDARY_EXEC_ENABLE_PML;
- exec_control |= vmcs12_exec_ctrl;
- }
+ CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ exec_control |= vmcs12->secondary_vm_exec_control;
+
+ /* PML is emulated and never enabled in hardware for L2. */
+ exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
/* VMCS shadowing for L2 is emulated for now */
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
@@ -4200,9 +4198,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
- if (!enable_ept)
- vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-
nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
@@ -4495,6 +4490,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
vmx_set_virtual_apic_mode(vcpu);
}
+ if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
+ vmx->nested.update_vmcs01_cpu_dirty_logging = false;
+ vmx_update_cpu_dirty_logging(vcpu);
+ }
+
/* Unpin physical memory we referred to in vmcs02 */
if (vmx->nested.apic_access_page) {
kvm_release_page_clean(vmx->nested.apic_access_page);
@@ -5793,7 +5793,10 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
case EXIT_REASON_PREEMPTION_TIMER:
return true;
case EXIT_REASON_PML_FULL:
- /* We emulate PML support to L1. */
+ /*
+ * PML is emulated for an L1 VMM and should never be enabled in
+ * vmcs02, always "handle" PML_FULL by exiting to userspace.
+ */
return true;
case EXIT_REASON_VMFUNC:
/* VM functions are emulated through L2->L0 vmexits. */
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index d1df618cb7de..9efc1a6b8693 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -298,7 +298,7 @@ int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
if (IS_ERR(event)) {
pr_debug_ratelimited("%s: failed %ld\n",
__func__, PTR_ERR(event));
- return -ENOENT;
+ return PTR_ERR(event);
}
lbr_desc->event = event;
pmu->event_count++;
@@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
return false;
- if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
+ if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0)
goto dummy;
/*
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index e0a3a9be654b..50810d471462 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -4277,7 +4277,12 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
*/
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
- if (!enable_pml)
+ /*
+ * PML is enabled/disabled when dirty logging of memsmlots changes, but
+ * it needs to be set here when dirty logging is already active, e.g.
+ * if this vCPU was created after dirty logging was enabled.
+ */
+ if (!vcpu->kvm->arch.cpu_dirty_logging_count)
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
if (cpu_has_vmx_xsaves()) {
@@ -4295,18 +4300,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
}
vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
-
- /*
- * Expose INVPCID if and only if PCID is also exposed to the guest.
- * INVPCID takes a #UD when it's disabled in the VMCS, but a #GP or #PF
- * if CR4.PCIDE=0. Enumerating CPUID.INVPCID=1 would lead to incorrect
- * behavior from the guest perspective (it would expect #GP or #PF).
- */
- if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
- guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
-
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED);
@@ -5776,24 +5771,6 @@ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
}
-/*
- * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
- * Called before reporting dirty_bitmap to userspace.
- */
-static void kvm_flush_pml_buffers(struct kvm *kvm)
-{
- int i;
- struct kvm_vcpu *vcpu;
- /*
- * We only need to kick vcpu out of guest mode here, as PML buffer
- * is flushed at beginning of all VMEXITs, and it's obvious that only
- * vcpus running in guest are possible to have unflushed GPAs in PML
- * buffer.
- */
- kvm_for_each_vcpu(i, vcpu, kvm)
- kvm_vcpu_kick(vcpu);
-}
-
static void vmx_dump_sel(char *name, uint32_t sel)
{
pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
@@ -5976,9 +5953,10 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
* updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
* querying dirty_bitmap, we only need to kick all vcpus out of guest
* mode as if vcpus is in root mode, the PML buffer must has been
- * flushed already.
+ * flushed already. Note, PML is never enabled in hardware while
+ * running L2.
*/
- if (enable_pml)
+ if (enable_pml && !is_guest_mode(vcpu))
vmx_flush_pml_buffer(vcpu);
/*
@@ -5995,6 +5973,13 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
if (is_guest_mode(vcpu)) {
/*
+ * PML is never enabled when running L2, bail immediately if a
+ * PML full exit occurs as something is horribly wrong.
+ */
+ if (exit_reason.basic == EXIT_REASON_PML_FULL)
+ goto unexpected_vmexit;
+
+ /*
* The host physical addresses of some pages of guest memory
* are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
* Page). The CPU may write to these pages via their host
@@ -6851,13 +6836,15 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
kvm_machine_check();
+ if (likely(!vmx->exit_reason.failed_vmentry))
+ vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+
trace_kvm_exit(vmx->exit_reason.full, vcpu, KVM_ISA_VMX);
if (unlikely(vmx->exit_reason.failed_vmentry))
return EXIT_FASTPATH_NONE;
vmx->loaded_vmcs->launched = 1;
- vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
@@ -7330,8 +7317,8 @@ static __init void vmx_set_cpu_caps(void)
/* CPUID 0x7 */
if (kvm_mpx_supported())
kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);
- if (cpu_has_vmx_invpcid())
- kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
+ if (!cpu_has_vmx_invpcid())
+ kvm_cpu_cap_clear(X86_FEATURE_INVPCID);
if (vmx_pt_mode_is_host_guest())
kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
@@ -7509,30 +7496,24 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
shrink_ple_window(vcpu);
}
-static void vmx_slot_enable_log_dirty(struct kvm *kvm,
- struct kvm_memory_slot *slot)
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
{
- if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
- kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
- kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
-}
-
-static void vmx_slot_disable_log_dirty(struct kvm *kvm,
- struct kvm_memory_slot *slot)
-{
- kvm_mmu_slot_set_dirty(kvm, slot);
-}
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
-static void vmx_flush_log_dirty(struct kvm *kvm)
-{
- kvm_flush_pml_buffers(kvm);
-}
+ if (is_guest_mode(vcpu)) {
+ vmx->nested.update_vmcs01_cpu_dirty_logging = true;
+ return;
+ }
-static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
- struct kvm_memory_slot *memslot,
- gfn_t offset, unsigned long mask)
-{
- kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
+ /*
+ * Note, cpu_dirty_logging_count can be changed concurrent with this
+ * code, but in that case another update request will be made and so
+ * the guest will never run with a stale PML value.
+ */
+ if (vcpu->kvm->arch.cpu_dirty_logging_count)
+ secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
+ else
+ secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
}
static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -7642,11 +7623,6 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
return supported & BIT(bit);
}
-static int vmx_cpu_dirty_log_size(void)
-{
- return enable_pml ? PML_ENTITY_NUM : 0;
-}
-
static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = hardware_unsetup,
@@ -7746,10 +7722,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.sched_in = vmx_sched_in,
- .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
- .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
- .flush_log_dirty = vmx_flush_log_dirty,
- .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+ .cpu_dirty_log_size = PML_ENTITY_NUM,
+ .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
.pre_block = vmx_pre_block,
.post_block = vmx_post_block,
@@ -7777,7 +7751,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.msr_filter_changed = vmx_msr_filter_changed,
.complete_emulated_msr = kvm_complete_insn_gp,
- .cpu_dirty_log_size = vmx_cpu_dirty_log_size,
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
};
@@ -7894,13 +7867,8 @@ static __init int hardware_setup(void)
if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
enable_pml = 0;
- if (!enable_pml) {
- vmx_x86_ops.slot_enable_log_dirty = NULL;
- vmx_x86_ops.slot_disable_log_dirty = NULL;
- vmx_x86_ops.flush_log_dirty = NULL;
- vmx_x86_ops.enable_log_dirty_pt_masked = NULL;
- vmx_x86_ops.cpu_dirty_log_size = NULL;
- }
+ if (!enable_pml)
+ vmx_x86_ops.cpu_dirty_log_size = 0;
if (!cpu_has_vmx_preemption_timer())
enable_preemption_timer = false;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 12c53d05a902..89da5e1251f1 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -165,6 +165,7 @@ struct nested_vmx {
bool change_vmcs01_virtual_apic_mode;
bool reload_vmcs01_apic_access_page;
+ bool update_vmcs01_cpu_dirty_logging;
/*
* Enlightened VMCS has been enabled. It does not mean that L1 has to
@@ -393,6 +394,7 @@ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
u32 msr, int type, bool value);
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
static inline u8 vmx_get_rvi(void)
{
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 884e5b3838c7..2a20ce60152e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2957,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
struct kvm_host_map map;
struct kvm_steal_time *st;
+ if (kvm_xen_msr_enabled(vcpu->kvm)) {
+ kvm_xen_runstate_set_running(vcpu);
+ return;
+ }
+
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
@@ -3756,11 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
r = 1;
break;
+#ifdef CONFIG_KVM_XEN
case KVM_CAP_XEN_HVM:
r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
KVM_XEN_HVM_CONFIG_SHARED_INFO;
+ if (sched_info_on())
+ r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
break;
+#endif
case KVM_CAP_SYNC_REGS:
r = KVM_SYNC_X86_VALID_FIELDS;
break;
@@ -4038,7 +4047,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
if (vcpu->preempted && !vcpu->arch.guest_state_protected)
vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
- kvm_steal_time_set_preempted(vcpu);
+ if (kvm_xen_msr_enabled(vcpu->kvm))
+ kvm_xen_runstate_set_preempted(vcpu);
+ else
+ kvm_steal_time_set_preempted(vcpu);
+
static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
/*
@@ -5013,6 +5026,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
case KVM_GET_SUPPORTED_HV_CPUID:
r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
break;
+#ifdef CONFIG_KVM_XEN
case KVM_XEN_VCPU_GET_ATTR: {
struct kvm_xen_vcpu_attr xva;
@@ -5033,6 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_xen_vcpu_set_attr(vcpu, &xva);
break;
}
+#endif
default:
r = -EINVAL;
}
@@ -5215,10 +5230,18 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
+
/*
- * Flush potentially hardware-cached dirty pages to dirty_bitmap.
+ * Flush all CPUs' dirty log buffers to the dirty_bitmap. Called
+ * before reporting dirty_bitmap to userspace. KVM flushes the buffers
+ * on all VM-Exits, thus we only need to kick running vCPUs to force a
+ * VM-Exit.
*/
- static_call_cond(kvm_x86_flush_log_dirty)(kvm);
+ struct kvm_vcpu *vcpu;
+ int i;
+
+ kvm_for_each_vcpu(i, vcpu, kvm)
+ kvm_vcpu_kick(vcpu);
}
int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
@@ -5646,6 +5669,7 @@ set_pit2_out:
kvm->arch.bsp_vcpu_id = arg;
mutex_unlock(&kvm->lock);
break;
+#ifdef CONFIG_KVM_XEN
case KVM_XEN_HVM_CONFIG: {
struct kvm_xen_hvm_config xhc;
r = -EFAULT;
@@ -5674,6 +5698,7 @@ set_pit2_out:
r = kvm_xen_hvm_set_attr(kvm, &xha);
break;
}
+#endif
case KVM_SET_CLOCK: {
struct kvm_clock_data user_ns;
u64 now_ns;
@@ -8032,7 +8057,10 @@ void kvm_arch_exit(void)
kvm_mmu_module_exit();
free_percpu(user_return_msrs);
kmem_cache_destroy(x86_fpu_cache);
+#ifdef CONFIG_KVM_XEN
+ static_key_deferred_flush(&kvm_xen_enabled);
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
+#endif
}
static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
@@ -8980,6 +9008,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_check_async_pf_completion(vcpu);
if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
static_call(kvm_x86_msr_filter_changed)(vcpu);
+
+ if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
+ static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
}
if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -10748,75 +10779,96 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
return 0;
}
+
+static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
+{
+ struct kvm_arch *ka = &kvm->arch;
+
+ if (!kvm_x86_ops.cpu_dirty_log_size)
+ return;
+
+ if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
+ (!enable && --ka->cpu_dirty_logging_count == 0))
+ kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
+
+ WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
+}
+
static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
struct kvm_memory_slot *old,
struct kvm_memory_slot *new,
enum kvm_mr_change change)
{
+ bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
+
/*
- * Nothing to do for RO slots or CREATE/MOVE/DELETE of a slot.
- * See comments below.
+ * Update CPU dirty logging if dirty logging is being toggled. This
+ * applies to all operations.
*/
- if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
- return;
+ if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
+ kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
/*
- * Dirty logging tracks sptes in 4k granularity, meaning that large
- * sptes have to be split. If live migration is successful, the guest
- * in the source machine will be destroyed and large sptes will be
- * created in the destination. However, if the guest continues to run
- * in the source machine (for example if live migration fails), small
- * sptes will remain around and cause bad performance.
+ * Nothing more to do for RO slots (which can't be dirtied and can't be
+ * made writable) or CREATE/MOVE/DELETE of a slot.
*
- * Scan sptes if dirty logging has been stopped, dropping those
- * which can be collapsed into a single large-page spte. Later
- * page faults will create the large-page sptes.
- *
- * There is no need to do this in any of the following cases:
+ * For a memslot with dirty logging disabled:
* CREATE: No dirty mappings will already exist.
* MOVE/DELETE: The old mappings will already have been cleaned up by
* kvm_arch_flush_shadow_memslot()
+ *
+ * For a memslot with dirty logging enabled:
+ * CREATE: No shadow pages exist, thus nothing to write-protect
+ * and no dirty bits to clear.
+ * MOVE/DELETE: The old mappings will already have been cleaned up by
+ * kvm_arch_flush_shadow_memslot().
*/
- if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
- !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
- kvm_mmu_zap_collapsible_sptes(kvm, new);
+ if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
+ return;
/*
- * Enable or disable dirty logging for the slot.
- *
- * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of the old
- * slot have been zapped so no dirty logging updates are needed for
- * the old slot.
- * For KVM_MR_CREATE and KVM_MR_MOVE, once the new slot is visible
- * any mappings that might be created in it will consume the
- * properties of the new slot and do not need to be updated here.
- *
- * When PML is enabled, the kvm_x86_ops dirty logging hooks are
- * called to enable/disable dirty logging.
- *
- * When disabling dirty logging with PML enabled, the D-bit is set
- * for sptes in the slot in order to prevent unnecessary GPA
- * logging in the PML buffer (and potential PML buffer full VMEXIT).
- * This guarantees leaving PML enabled for the guest's lifetime
- * won't have any additional overhead from PML when the guest is
- * running with dirty logging disabled.
- *
- * When enabling dirty logging, large sptes are write-protected
- * so they can be split on first write. New large sptes cannot
- * be created for this slot until the end of the logging.
- * See the comments in fast_page_fault().
- * For small sptes, nothing is done if the dirty log is in the
- * initial-all-set state. Otherwise, depending on whether pml
- * is enabled the D-bit or the W-bit will be cleared.
+ * READONLY and non-flags changes were filtered out above, and the only
+ * other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty
+ * logging isn't being toggled on or off.
*/
- if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
- if (kvm_x86_ops.slot_enable_log_dirty) {
- static_call(kvm_x86_slot_enable_log_dirty)(kvm, new);
- } else {
- int level =
- kvm_dirty_log_manual_protect_and_init_set(kvm) ?
- PG_LEVEL_2M : PG_LEVEL_4K;
+ if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
+ return;
+
+ if (!log_dirty_pages) {
+ /*
+ * Dirty logging tracks sptes in 4k granularity, meaning that
+ * large sptes have to be split. If live migration succeeds,
+ * the guest in the source machine will be destroyed and large
+ * sptes will be created in the destination. However, if the
+ * guest continues to run in the source machine (for example if
+ * live migration fails), small sptes will remain around and
+ * cause bad performance.
+ *
+ * Scan sptes if dirty logging has been stopped, dropping those
+ * which can be collapsed into a single large-page spte. Later
+ * page faults will create the large-page sptes.
+ */
+ kvm_mmu_zap_collapsible_sptes(kvm, new);
+ } else {
+ /* By default, write-protect everything to log writes. */
+ int level = PG_LEVEL_4K;
+ if (kvm_x86_ops.cpu_dirty_log_size) {
+ /*
+ * Clear all dirty bits, unless pages are treated as
+ * dirty from the get-go.
+ */
+ if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
+ kvm_mmu_slot_leaf_clear_dirty(kvm, new);
+
+ /*
+ * Write-protect large pages on write so that dirty
+ * logging happens at 4k granularity. No need to
+ * write-protect small SPTEs since write accesses are
+ * logged by the CPU via dirty bits.
+ */
+ level = PG_LEVEL_2M;
+ } else if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
/*
* If we're with initial-all-set, we don't need
* to write protect any small page because
@@ -10825,10 +10877,9 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
* so that the page split can happen lazily on
* the first write to the huge page.
*/
- kvm_mmu_slot_remove_write_access(kvm, new, level);
+ level = PG_LEVEL_2M;
}
- } else {
- static_call_cond(kvm_x86_slot_disable_log_dirty)(kvm, new);
+ kvm_mmu_slot_remove_write_access(kvm, new, level);
}
}
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index af8f6562fce4..ae17250e1efe 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -11,9 +11,11 @@
#include "hyperv.h"
#include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
#include <trace/events/kvm.h>
#include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
#include "trace.h"
@@ -61,6 +63,132 @@ out:
return ret;
}
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+{
+ struct kvm_vcpu_xen *vx = &v->arch.xen;
+ u64 now = get_kvmclock_ns(v->kvm);
+ u64 delta_ns = now - vx->runstate_entry_time;
+ u64 run_delay = current->sched_info.run_delay;
+
+ if (unlikely(!vx->runstate_entry_time))
+ vx->current_runstate = RUNSTATE_offline;
+
+ /*
+ * Time waiting for the scheduler isn't "stolen" if the
+ * vCPU wasn't running anyway.
+ */
+ if (vx->current_runstate == RUNSTATE_running) {
+ u64 steal_ns = run_delay - vx->last_steal;
+
+ delta_ns -= steal_ns;
+
+ vx->runstate_times[RUNSTATE_runnable] += steal_ns;
+ }
+ vx->last_steal = run_delay;
+
+ vx->runstate_times[vx->current_runstate] += delta_ns;
+ vx->current_runstate = state;
+ vx->runstate_entry_time = now;
+}
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+{
+ struct kvm_vcpu_xen *vx = &v->arch.xen;
+ uint64_t state_entry_time;
+ unsigned int offset;
+
+ kvm_xen_update_runstate(v, state);
+
+ if (!vx->runstate_set)
+ return;
+
+ BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+ offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+#ifdef CONFIG_X86_64
+ /*
+ * The only difference is alignment of uint64_t in 32-bit.
+ * So the first field 'state' is accessed directly using
+ * offsetof() (where its offset happens to be zero), while the
+ * remaining fields which are all uint64_t, start at 'offset'
+ * which we tweak here by adding 4.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+ if (v->kvm->arch.xen.long_mode)
+ offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+ /*
+ * First write the updated state_entry_time at the appropriate
+ * location determined by 'offset'.
+ */
+ state_entry_time = vx->runstate_entry_time;
+ state_entry_time |= XEN_RUNSTATE_UPDATE;
+
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+ sizeof(state_entry_time));
+ BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+ sizeof(state_entry_time));
+
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &state_entry_time, offset,
+ sizeof(state_entry_time)))
+ return;
+ smp_wmb();
+
+ /*
+ * Next, write the new runstate. This is in the *same* place
+ * for 32-bit and 64-bit guests, asserted here for paranoia.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+ offsetof(struct compat_vcpu_runstate_info, state));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+ sizeof(vx->current_runstate));
+ BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+ sizeof(vx->current_runstate));
+
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &vx->current_runstate,
+ offsetof(struct vcpu_runstate_info, state),
+ sizeof(vx->current_runstate)))
+ return;
+
+ /*
+ * Write the actual runstate times immediately after the
+ * runstate_entry_time.
+ */
+ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+ BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+ offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+ BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+ sizeof(vx->runstate_times));
+
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &vx->runstate_times[0],
+ offset + sizeof(u64),
+ sizeof(vx->runstate_times)))
+ return;
+
+ smp_wmb();
+
+ /*
+ * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
+ * runstate_entry_time field.
+ */
+
+ state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+ if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+ &state_entry_time, offset,
+ sizeof(state_entry_time)))
+ return;
+}
+
int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
{
u8 rc = 0;
@@ -187,9 +315,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
/* No compat necessary here. */
BUILD_BUG_ON(sizeof(struct vcpu_info) !=
sizeof(struct compat_vcpu_info));
+ BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
+ offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.vcpu_info_set = false;
+ r = 0;
break;
}
@@ -206,6 +337,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
if (data->u.gpa == GPA_INVALID) {
vcpu->arch.xen.vcpu_time_info_set = false;
+ r = 0;
break;
}
@@ -219,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
}
break;
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.gpa == GPA_INVALID) {
+ vcpu->arch.xen.runstate_set = false;
+ r = 0;
+ break;
+ }
+
+ r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ &vcpu->arch.xen.runstate_cache,
+ data->u.gpa,
+ sizeof(struct vcpu_runstate_info));
+ if (!r) {
+ vcpu->arch.xen.runstate_set = true;
+ }
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.runstate.state > RUNSTATE_offline) {
+ r = -EINVAL;
+ break;
+ }
+
+ kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.runstate.state > RUNSTATE_offline) {
+ r = -EINVAL;
+ break;
+ }
+ if (data->u.runstate.state_entry_time !=
+ (data->u.runstate.time_running +
+ data->u.runstate.time_runnable +
+ data->u.runstate.time_blocked +
+ data->u.runstate.time_offline)) {
+ r = -EINVAL;
+ break;
+ }
+ if (get_kvmclock_ns(vcpu->kvm) <
+ data->u.runstate.state_entry_time) {
+ r = -EINVAL;
+ break;
+ }
+
+ vcpu->arch.xen.current_runstate = data->u.runstate.state;
+ vcpu->arch.xen.runstate_entry_time =
+ data->u.runstate.state_entry_time;
+ vcpu->arch.xen.runstate_times[RUNSTATE_running] =
+ data->u.runstate.time_running;
+ vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
+ data->u.runstate.time_runnable;
+ vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
+ data->u.runstate.time_blocked;
+ vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
+ data->u.runstate.time_offline;
+ vcpu->arch.xen.last_steal = current->sched_info.run_delay;
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (data->u.runstate.state > RUNSTATE_offline &&
+ data->u.runstate.state != (u64)-1) {
+ r = -EINVAL;
+ break;
+ }
+ /* The adjustment must add up */
+ if (data->u.runstate.state_entry_time !=
+ (data->u.runstate.time_running +
+ data->u.runstate.time_runnable +
+ data->u.runstate.time_blocked +
+ data->u.runstate.time_offline)) {
+ r = -EINVAL;
+ break;
+ }
+
+ if (get_kvmclock_ns(vcpu->kvm) <
+ (vcpu->arch.xen.runstate_entry_time +
+ data->u.runstate.state_entry_time)) {
+ r = -EINVAL;
+ break;
+ }
+
+ vcpu->arch.xen.runstate_entry_time +=
+ data->u.runstate.state_entry_time;
+ vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
+ data->u.runstate.time_running;
+ vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
+ data->u.runstate.time_runnable;
+ vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
+ data->u.runstate.time_blocked;
+ vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
+ data->u.runstate.time_offline;
+
+ if (data->u.runstate.state <= RUNSTATE_offline)
+ kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+ r = 0;
+ break;
+
default:
break;
}
@@ -251,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
r = 0;
break;
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ if (vcpu->arch.xen.runstate_set) {
+ data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
+ r = 0;
+ }
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ data->u.runstate.state = vcpu->arch.xen.current_runstate;
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+ if (!sched_info_on()) {
+ r = -EOPNOTSUPP;
+ break;
+ }
+ data->u.runstate.state = vcpu->arch.xen.current_runstate;
+ data->u.runstate.state_entry_time =
+ vcpu->arch.xen.runstate_entry_time;
+ data->u.runstate.time_running =
+ vcpu->arch.xen.runstate_times[RUNSTATE_running];
+ data->u.runstate.time_runnable =
+ vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
+ data->u.runstate.time_blocked =
+ vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
+ data->u.runstate.time_offline =
+ vcpu->arch.xen.runstate_times[RUNSTATE_offline];
+ r = 0;
+ break;
+
+ case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+ r = -EINVAL;
+ break;
+
default:
break;
}
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index b66a921776f4..463a7844a8ca 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -9,6 +9,7 @@
#ifndef __ARCH_X86_KVM_XEN_H__
#define __ARCH_X86_KVM_XEN_H__
+#ifdef CONFIG_KVM_XEN
#include <linux/jump_label_ratelimit.h>
extern struct static_key_false_deferred kvm_xen_enabled;
@@ -18,11 +19,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
-int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
void kvm_xen_destroy_vm(struct kvm *kvm);
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+ return static_branch_unlikely(&kvm_xen_enabled.key) &&
+ kvm->arch.xen_hvm_config.msr;
+}
+
static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
{
return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -38,11 +44,59 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
return 0;
}
+#else
+static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
+{
+ return 1;
+}
+
+static inline void kvm_xen_destroy_vm(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+ return false;
+}
+
+static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
+{
+ return false;
+}
+
+static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+#endif
+
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
-/* 32-bit compatibility definitions, also used natively in 32-bit build */
#include <asm/pvclock-abi.h>
#include <asm/xen/interface.h>
+#include <xen/interface/vcpu.h>
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
+static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
+{
+ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
+}
+
+static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+{
+ /*
+ * If the vCPU wasn't preempted but took a normal exit for
+ * some reason (hypercalls, I/O, etc.), that is accounted as
+ * still RUNSTATE_running, as the VMM is still operating on
+ * behalf of the vCPU. Only if the VMM does actually block
+ * does it need to enter RUNSTATE_blocked.
+ */
+ if (vcpu->preempted)
+ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+}
+
+/* 32-bit compatibility definitions, also used natively in 32-bit build */
struct compat_arch_vcpu_info {
unsigned int cr2;
unsigned int pad[5];
@@ -75,4 +129,10 @@ struct compat_shared_info {
struct compat_arch_shared_info arch;
};
+struct compat_vcpu_runstate_info {
+ int state;
+ uint64_t state_entry_time;
+ uint64_t time[4];
+} __attribute__((packed));
+
#endif /* __ARCH_X86_KVM_XEN_H__ */
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 525197381baa..a73347e2cdfc 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -9,6 +9,7 @@
#include <linux/kdebug.h> /* oops_begin/end, ... */
#include <linux/extable.h> /* search_exception_tables */
#include <linux/memblock.h> /* max_low_pfn */
+#include <linux/kfence.h> /* kfence_handle_page_fault */
#include <linux/kprobes.h> /* NOKPROBE_SYMBOL, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
@@ -680,6 +681,11 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
if (IS_ENABLED(CONFIG_EFI))
efi_crash_gracefully_on_page_fault(address);
+ /* Only not-present faults should be handled by KFENCE. */
+ if (!(error_code & X86_PF_PROT) &&
+ kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs))
+ return;
+
oops:
/*
* Oops. The kernel tried to access some bad page. We'll have to
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 8f665c352bf0..ca311aaa67b8 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -1164,12 +1164,14 @@ static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
+ kfree(v);
++*pos;
return memtype_get_idx(*pos);
}
static void memtype_seq_stop(struct seq_file *seq, void *v)
{
+ kfree(v);
}
static int memtype_seq_show(struct seq_file *seq, void *v)
@@ -1181,8 +1183,6 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
entry_print->end,
cattr_name(entry_print->type));
- kfree(entry_print);
-
return 0;
}
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index b5949e5a83ec..a3cc33091f46 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -416,6 +416,9 @@ void __init xen_vmalloc_p2m_tree(void)
xen_p2m_last_pfn = xen_max_p2m_pfn;
p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
+ if (!p2m_limit && IS_ENABLED(CONFIG_XEN_UNPOPULATED_ALLOC))
+ p2m_limit = xen_start_info->nr_pages * XEN_EXTRA_MEM_RATIO;
+
vm.flags = VM_ALLOC;
vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
PMD_SIZE * PMDS_PER_MID_PAGE);
@@ -652,10 +655,9 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
pte_t *ptep;
unsigned int level;
- if (unlikely(pfn >= xen_p2m_size)) {
- BUG_ON(mfn != INVALID_P2M_ENTRY);
- return true;
- }
+ /* Only invalid entries allowed above the highest p2m covered frame. */
+ if (unlikely(pfn >= xen_p2m_size))
+ return mfn == INVALID_P2M_ENTRY;
/*
* The interface requires atomic updates on p2m elements.
@@ -710,6 +712,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
for (i = 0; i < count; i++) {
unsigned long mfn, pfn;
+ struct gnttab_unmap_grant_ref unmap[2];
+ int rc;
/* Do not add to override if the map failed. */
if (map_ops[i].status != GNTST_okay ||
@@ -727,10 +731,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");
- if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
- ret = -ENOMEM;
- goto out;
+ if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
+ continue;
+
+ /*
+ * Signal an error for this slot. This in turn requires
+ * immediate unmapping.
+ */
+ map_ops[i].status = GNTST_general_error;
+ unmap[0].host_addr = map_ops[i].host_addr,
+ unmap[0].handle = map_ops[i].handle;
+ map_ops[i].handle = ~0;
+ if (map_ops[i].flags & GNTMAP_device_map)
+ unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr;
+ else
+ unmap[0].dev_bus_addr = 0;
+
+ if (kmap_ops) {
+ kmap_ops[i].status = GNTST_general_error;
+ unmap[1].host_addr = kmap_ops[i].host_addr,
+ unmap[1].handle = kmap_ops[i].handle;
+ kmap_ops[i].handle = ~0;
+ if (kmap_ops[i].flags & GNTMAP_device_map)
+ unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr;
+ else
+ unmap[1].dev_bus_addr = 0;
}
+
+ /*
+ * Pre-populate both status fields, to be recognizable in
+ * the log message below.
+ */
+ unmap[0].status = 1;
+ unmap[1].status = 1;
+
+ rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+ unmap, 1 + !!kmap_ops);
+ if (rc || unmap[0].status != GNTST_okay ||
+ unmap[1].status != GNTST_okay)
+ pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n",
+ rc, unmap[0].status, unmap[1].status);
}
out:
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 7eab14d56369..1a3b75652fa4 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -59,18 +59,6 @@ static struct {
} xen_remap_buf __initdata __aligned(PAGE_SIZE);
static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
-/*
- * The maximum amount of extra memory compared to the base size. The
- * main scaling factor is the size of struct page. At extreme ratios
- * of base:extra, all the base memory can be filled with page
- * structures for the extra memory, leaving no space for anything
- * else.
- *
- * 10x seems like a reasonable balance between scaling flexibility and
- * leaving a practically usable system.
- */
-#define EXTRA_MEM_RATIO (10)
-
static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
static void __init xen_parse_512gb(void)
@@ -790,20 +778,13 @@ char * __init xen_memory_setup(void)
extra_pages += max_pages - max_pfn;
/*
- * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
- * factor the base size. On non-highmem systems, the base
- * size is the full initial memory allocation; on highmem it
- * is limited to the max size of lowmem, so that it doesn't
- * get completely filled.
+ * Clamp the amount of extra memory to a XEN_EXTRA_MEM_RATIO
+ * factor the base size.
*
* Make sure we have no memory above max_pages, as this area
* isn't handled by the p2m management.
- *
- * In principle there could be a problem in lowmem systems if
- * the initial memory is also very large with respect to
- * lowmem, but we won't try to deal with that here.
*/
- extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+ extra_pages = min3(XEN_EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
extra_pages, max_pages - max_pfn);
i = 0;
addr = xen_e820_table.entries[0].addr;
diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
index 397a7de56377..9534ef515d74 100644
--- a/arch/xtensa/kernel/process.c
+++ b/arch/xtensa/kernel/process.c
@@ -217,7 +217,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
p->thread.sp = (unsigned long)childregs;
- if (!(p->flags & PF_KTHREAD)) {
+ if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
struct pt_regs *regs = current_pt_regs();
unsigned long usp = usp_thread_fn ?
usp_thread_fn : regs->areg[1];
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index b398dde53af9..95586137194e 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -125,6 +125,8 @@
#include <linux/delay.h>
#include <linux/backing-dev.h>
+#include <trace/events/block.h>
+
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-tag.h"
@@ -160,7 +162,7 @@ BFQ_BFQQ_FNS(split_coop);
BFQ_BFQQ_FNS(softrt_update);
#undef BFQ_BFQQ_FNS \
-/* Expiration time of sync (0) and async (1) requests, in ns. */
+/* Expiration time of async (0) and sync (1) requests, in ns. */
static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
/* Maximum backwards seek (magic number lifted from CFQ), in KiB. */
@@ -5621,7 +5623,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
spin_unlock_irq(&bfqd->lock);
- blk_mq_sched_request_inserted(rq);
+ trace_block_rq_insert(rq);
spin_lock_irq(&bfqd->lock);
bfqq = bfq_init_rq(rq);
diff --git a/block/blk-core.c b/block/blk-core.c
index 5e752840b41a..fc60ff208497 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -59,6 +59,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert);
DEFINE_IDA(blk_queue_ida);
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index e8327c50d7c9..c176b7af56a7 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -80,6 +80,7 @@ static struct blk_crypto_keyslot {
static struct blk_keyslot_manager blk_crypto_ksm;
static struct workqueue_struct *blk_crypto_wq;
static mempool_t *blk_crypto_bounce_page_pool;
+static struct bio_set crypto_bio_split;
/*
* This is the key we set when evicting a keyslot. This *should* be the all 0's
@@ -224,7 +225,8 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
if (num_sectors < bio_sectors(bio)) {
struct bio *split_bio;
- split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL);
+ split_bio = bio_split(bio, num_sectors, GFP_NOIO,
+ &crypto_bio_split);
if (!split_bio) {
bio->bi_status = BLK_STS_RESOURCE;
return false;
@@ -538,9 +540,13 @@ static int blk_crypto_fallback_init(void)
prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE);
- err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+ err = bioset_init(&crypto_bio_split, 64, 0, 0);
if (err)
goto out;
+
+ err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+ if (err)
+ goto fail_free_bioset;
err = -ENOMEM;
blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops;
@@ -591,6 +597,8 @@ fail_free_wq:
destroy_workqueue(blk_crypto_wq);
fail_free_ksm:
blk_ksm_destroy(&blk_crypto_ksm);
+fail_free_bioset:
+ bioset_exit(&crypto_bio_split);
out:
return err;
}
diff --git a/block/blk-map.c b/block/blk-map.c
index 21630dccac62..369e204d14d0 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -150,9 +150,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
bmd->is_our_pages = !map_data;
bmd->is_null_mapped = (map_data && map_data->null_mapped);
- nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
- if (nr_pages > BIO_MAX_PAGES)
- nr_pages = BIO_MAX_PAGES;
+ nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
ret = -ENOMEM;
bio = bio_kmalloc(gfp_mask, nr_pages);
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 4de03da9a624..9ebb344e2585 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -292,7 +292,6 @@ static const char *const cmd_flag_name[] = {
#define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
static const char *const rqf_name[] = {
- RQF_NAME(SORTED),
RQF_NAME(STARTED),
RQF_NAME(SOFTBARRIER),
RQF_NAME(FLUSH_SEQ),
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index deff4e826e23..e1e997af89a0 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -384,14 +384,7 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
}
EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
-void blk_mq_sched_request_inserted(struct request *rq)
-{
- trace_block_rq_insert(rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
-
static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
- bool has_sched,
struct request *rq)
{
/*
@@ -408,9 +401,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
return true;
- if (has_sched)
- rq->rq_flags |= RQF_SORTED;
-
return false;
}
@@ -424,7 +414,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
- if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+ if (blk_mq_sched_bypass_insert(hctx, rq)) {
/*
* Firstly normal IO request is inserted to scheduler queue or
* sw queue, meantime we add flush request to dispatch queue(
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index 0476360f05f1..5b18ab915c65 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -7,7 +7,6 @@
void blk_mq_sched_assign_ioc(struct request *rq);
-void blk_mq_sched_request_inserted(struct request *rq);
bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, struct request **merged_request);
bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
diff --git a/block/blk-pm.h b/block/blk-pm.h
index a2283cc9f716..8a5a0d4b357f 100644
--- a/block/blk-pm.h
+++ b/block/blk-pm.h
@@ -21,31 +21,6 @@ static inline void blk_pm_mark_last_busy(struct request *rq)
if (rq->q->dev && !(rq->rq_flags & RQF_PM))
pm_runtime_mark_last_busy(rq->q->dev);
}
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
- lockdep_assert_held(&rq->q->queue_lock);
-
- if (rq->q->dev && !(rq->rq_flags & RQF_PM))
- rq->q->nr_pending--;
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
- struct request *rq)
-{
- lockdep_assert_held(&q->queue_lock);
-
- if (q->dev && !(rq->rq_flags & RQF_PM))
- q->nr_pending++;
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
- lockdep_assert_held(&rq->q->queue_lock);
-
- if (rq->q->dev && !(rq->rq_flags & RQF_PM))
- --rq->q->nr_pending;
-}
#else
static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
{
@@ -55,19 +30,6 @@ static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
static inline void blk_pm_mark_last_busy(struct request *rq)
{
}
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
- struct request *rq)
-{
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
-}
#endif
#endif /* _BLOCK_BLK_PM_H_ */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 7dd8be314ac6..b4aa2f37fab6 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -504,6 +504,14 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
}
EXPORT_SYMBOL(blk_queue_io_opt);
+static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
+{
+ sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
+ if (sectors < PAGE_SIZE >> SECTOR_SHIFT)
+ sectors = PAGE_SIZE >> SECTOR_SHIFT;
+ return sectors;
+}
+
/**
* blk_stack_limits - adjust queue_limits for stacked devices
* @t: the stacking driver limits (top device)
@@ -630,6 +638,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
ret = -1;
}
+ t->max_sectors = blk_round_down_sectors(t->max_sectors, t->logical_block_size);
+ t->max_hw_sectors = blk_round_down_sectors(t->max_hw_sectors, t->logical_block_size);
+ t->max_dev_sectors = blk_round_down_sectors(t->max_dev_sectors, t->logical_block_size);
+
/* Discard alignment and granularity */
if (b->discard_granularity) {
alignment = queue_limit_discard_alignment(b, start);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index ae39c7f3d83d..0f4f0c8a7825 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -434,10 +434,13 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
if (ret < 0)
return ret;
- if (poll_on)
+ if (poll_on) {
blk_queue_flag_set(QUEUE_FLAG_POLL, q);
- else
+ } else {
+ blk_mq_freeze_queue(q);
blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+ blk_mq_unfreeze_queue(q);
+ }
return ret;
}
diff --git a/block/bounce.c b/block/bounce.c
index fc55314aa426..87983a35079c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -214,8 +214,7 @@ static void bounce_end_io_read_isa(struct bio *bio)
__bounce_end_io_read(bio, &isa_page_pool);
}
-static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
- struct bio_set *bs)
+static struct bio *bounce_clone_bio(struct bio *bio_src)
{
struct bvec_iter iter;
struct bio_vec bv;
@@ -242,10 +241,12 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
* asking for trouble and would force extra work on
* __bio_clone_fast() anyways.
*/
-
- bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
- if (!bio)
- return NULL;
+ if (bio_is_passthrough(bio_src))
+ bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL,
+ bio_segments(bio_src));
+ else
+ bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
+ &bounce_bio_set);
bio->bi_bdev = bio_src->bi_bdev;
if (bio_flagged(bio_src, BIO_REMAPPED))
bio_set_flag(bio, BIO_REMAPPED);
@@ -269,11 +270,11 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
break;
}
- if (bio_crypt_clone(bio, bio_src, gfp_mask) < 0)
+ if (bio_crypt_clone(bio, bio_src, GFP_NOIO) < 0)
goto err_put;
if (bio_integrity(bio_src) &&
- bio_integrity_clone(bio, bio_src, gfp_mask) < 0)
+ bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0)
goto err_put;
bio_clone_blkg_association(bio, bio_src);
@@ -296,7 +297,6 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
unsigned i = 0;
bool bounce = false;
int sectors = 0;
- bool passthrough = bio_is_passthrough(*bio_orig);
bio_for_each_segment(from, *bio_orig, iter) {
if (i++ < BIO_MAX_PAGES)
@@ -307,14 +307,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
if (!bounce)
return;
- if (!passthrough && sectors < bio_sectors(*bio_orig)) {
+ if (!bio_is_passthrough(*bio_orig) &&
+ sectors < bio_sectors(*bio_orig)) {
bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
bio_chain(bio, *bio_orig);
submit_bio_noacct(*bio_orig);
*bio_orig = bio;
}
- bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
- &bounce_bio_set);
+ bio = bounce_clone_bio(*bio_orig);
/*
* Bvec table can't be updated by bio_for_each_segment_all(),
diff --git a/block/genhd.c b/block/genhd.c
index 36ff45bbaaaf..c55e8f0fced1 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -45,11 +45,10 @@ static void disk_release_events(struct gendisk *disk);
void set_capacity(struct gendisk *disk, sector_t sectors)
{
struct block_device *bdev = disk->part0;
- unsigned long flags;
- spin_lock_irqsave(&bdev->bd_size_lock, flags);
+ spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
- spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
+ spin_unlock(&bdev->bd_size_lock);
}
EXPORT_SYMBOL(set_capacity);
@@ -74,7 +73,7 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
return false;
pr_info("%s: detected capacity change from %lld to %lld\n",
- disk->disk_name, size, capacity);
+ disk->disk_name, capacity, size);
/*
* Historically we did not send a uevent for changes to/from an empty
@@ -476,7 +475,7 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
struct disk_part_iter piter;
struct block_device *part;
- disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+ disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0);
while ((part = disk_part_iter_next(&piter)))
kobject_uevent(bdev_kobj(part), action);
disk_part_iter_exit(&piter);
diff --git a/block/ioctl.c b/block/ioctl.c
index d61d652078f4..ff241e663c01 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -81,20 +81,27 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
}
#endif
-static int blkdev_reread_part(struct block_device *bdev)
+static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
{
- int ret;
+ struct block_device *tmp;
if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev))
return -EINVAL;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- mutex_lock(&bdev->bd_mutex);
- ret = bdev_disk_changed(bdev, false);
- mutex_unlock(&bdev->bd_mutex);
+ /*
+ * Reopen the device to revalidate the driver state and force a
+ * partition rescan.
+ */
+ mode &= ~FMODE_EXCL;
+ set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
- return ret;
+ tmp = blkdev_get_by_dev(bdev->bd_dev, mode, NULL);
+ if (IS_ERR(tmp))
+ return PTR_ERR(tmp);
+ blkdev_put(tmp, mode);
+ return 0;
}
static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
@@ -498,7 +505,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
return 0;
case BLKRRPART:
- return blkdev_reread_part(bdev);
+ return blkdev_reread_part(bdev, mode);
case BLKTRACESTART:
case BLKTRACESTOP:
case BLKTRACETEARDOWN:
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index c25c41d0d061..33d34d69cade 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -13,6 +13,8 @@
#include <linux/module.h>
#include <linux/sbitmap.h>
+#include <trace/events/block.h>
+
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
@@ -353,19 +355,9 @@ static void kyber_timer_fn(struct timer_list *t)
}
}
-static unsigned int kyber_sched_tags_shift(struct request_queue *q)
-{
- /*
- * All of the hardware queues have the same depth, so we can just grab
- * the shift of the first one.
- */
- return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
-}
-
static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
{
struct kyber_queue_data *kqd;
- unsigned int shift;
int ret = -ENOMEM;
int i;
@@ -400,9 +392,6 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
kqd->latency_targets[i] = kyber_latency_targets[i];
}
- shift = kyber_sched_tags_shift(q);
- kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
-
return kqd;
err_buckets:
@@ -458,9 +447,19 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
INIT_LIST_HEAD(&kcq->rq_list[i]);
}
-static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
{
struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
+ struct blk_mq_tags *tags = hctx->sched_tags;
+ unsigned int shift = tags->bitmap_tags->sb.shift;
+
+ kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
+
+ sbitmap_queue_min_shallow_depth(tags->bitmap_tags, kqd->async_depth);
+}
+
+static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
struct kyber_hctx_data *khd;
int i;
@@ -502,8 +501,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
khd->batching = 0;
hctx->sched_data = khd;
- sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
- kqd->async_depth);
+ kyber_depth_updated(hctx);
return 0;
@@ -602,7 +600,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
list_move_tail(&rq->queuelist, head);
sbitmap_set_bit(&khd->kcq_map[sched_domain],
rq->mq_ctx->index_hw[hctx->type]);
- blk_mq_sched_request_inserted(rq);
+ trace_block_rq_insert(rq);
spin_unlock(&kcq->lock);
}
}
@@ -1022,6 +1020,7 @@ static struct elevator_type kyber_sched = {
.completed_request = kyber_completed_request,
.dispatch_request = kyber_dispatch_request,
.has_work = kyber_has_work,
+ .depth_updated = kyber_depth_updated,
},
#ifdef CONFIG_BLK_DEBUG_FS
.queue_debugfs_attrs = kyber_queue_debugfs_attrs,
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index b57470e154c8..f3631a287466 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -18,6 +18,8 @@
#include <linux/rbtree.h>
#include <linux/sbitmap.h>
+#include <trace/events/block.h>
+
#include "blk.h"
#include "blk-mq.h"
#include "blk-mq-debugfs.h"
@@ -496,7 +498,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
if (blk_mq_sched_try_insert_merge(q, rq))
return;
- blk_mq_sched_request_inserted(rq);
+ trace_block_rq_insert(rq);
if (at_head || blk_rq_is_passthrough(rq)) {
if (at_head)
diff --git a/block/partitions/core.c b/block/partitions/core.c
index f3d9ff2cafb6..1a7558917c47 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -88,11 +88,9 @@ static int (*check_part[])(struct parsed_partitions *) = {
static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
{
- unsigned long flags;
-
- spin_lock_irqsave(&bdev->bd_size_lock, flags);
+ spin_lock(&bdev->bd_size_lock);
i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
- spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
+ spin_unlock(&bdev->bd_size_lock);
}
static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 15c9c28d9f53..5809cc198fa7 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -767,7 +767,7 @@ config CRYPTO_POLY1305_X86_64
config CRYPTO_POLY1305_MIPS
tristate "Poly1305 authenticator algorithm (MIPS optimized)"
- depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+ depends on MIPS
select CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_MD4
diff --git a/drivers/auxdisplay/cfag12864b.c b/drivers/auxdisplay/cfag12864b.c
index 7eebae7e322c..fd430e6866a1 100644
--- a/drivers/auxdisplay/cfag12864b.c
+++ b/drivers/auxdisplay/cfag12864b.c
@@ -5,7 +5,7 @@
* Description: cfag12864b LCD driver
* Depends: ks0108
*
- * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
* Date: 2006-10-31
*/
@@ -376,5 +376,5 @@ module_init(cfag12864b_init);
module_exit(cfag12864b_exit);
MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
+MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
MODULE_DESCRIPTION("cfag12864b LCD driver");
diff --git a/drivers/auxdisplay/cfag12864bfb.c b/drivers/auxdisplay/cfag12864bfb.c
index 2002291ab338..d66821adf453 100644
--- a/drivers/auxdisplay/cfag12864bfb.c
+++ b/drivers/auxdisplay/cfag12864bfb.c
@@ -5,7 +5,7 @@
* Description: cfag12864b LCD framebuffer driver
* Depends: cfag12864b
*
- * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
* Date: 2006-10-31
*/
@@ -171,5 +171,5 @@ module_init(cfag12864bfb_init);
module_exit(cfag12864bfb_exit);
MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
+MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
MODULE_DESCRIPTION("cfag12864b LCD framebuffer driver");
diff --git a/drivers/auxdisplay/ks0108.c b/drivers/auxdisplay/ks0108.c
index abfe3fa9e6f4..03c95ad4216c 100644
--- a/drivers/auxdisplay/ks0108.c
+++ b/drivers/auxdisplay/ks0108.c
@@ -5,7 +5,7 @@
* Description: ks0108 LCD Controller driver
* Depends: parport
*
- * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
* Date: 2006-10-31
*/
@@ -182,6 +182,6 @@ module_init(ks0108_init);
module_exit(ks0108_exit);
MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
+MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
MODULE_DESCRIPTION("ks0108 LCD Controller driver");
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 324aa03fed3c..ffcbe2bc460e 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -213,4 +213,10 @@ config GENERIC_ARCH_TOPOLOGY
appropriate scaling, sysfs interface for reading capacity values at
runtime.
+config GENERIC_ARCH_NUMA
+ bool
+ help
+ Enable support for generic NUMA implementation. Currently, RISC-V
+ and ARM64 use it.
+
endmenu
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 5e7bf9669a81..8b93a7f291ec 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_PINCTRL) += pinctrl.o
obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o
obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o
+obj-$(CONFIG_GENERIC_ARCH_NUMA) += arch_numa.o
obj-y += test/
diff --git a/arch/arm64/mm/numa.c b/drivers/base/arch_numa.c
index a8303bc6b62a..4cc4e117727d 100644
--- a/arch/arm64/mm/numa.c
+++ b/drivers/base/arch_numa.c
@@ -13,7 +13,6 @@
#include <linux/module.h>
#include <linux/of.h>
-#include <asm/acpi.h>
#include <asm/sections.h>
struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
@@ -356,11 +355,12 @@ static int __init numa_register_nodes(void)
/* Check that valid nid is set to memblks */
for_each_mem_region(mblk) {
int mblk_nid = memblock_get_region_node(mblk);
+ phys_addr_t start = mblk->base;
+ phys_addr_t end = mblk->base + mblk->size - 1;
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
- pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
- mblk_nid, mblk->base,
- mblk->base + mblk->size - 1);
+ pr_warn("Warning: invalid memblk node %d [mem %pap-%pap]\n",
+ mblk_nid, &start, &end);
return -EINVAL;
}
}
@@ -428,14 +428,14 @@ out_free_distance:
static int __init dummy_numa_init(void)
{
phys_addr_t start = memblock_start_of_DRAM();
- phys_addr_t end = memblock_end_of_DRAM();
+ phys_addr_t end = memblock_end_of_DRAM() - 1;
int ret;
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
- pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
+ pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end);
- ret = numa_add_memblk(0, start, end);
+ ret = numa_add_memblk(0, start, end + 1);
if (ret) {
pr_err("NUMA init failed\n");
return ret;
@@ -445,16 +445,36 @@ static int __init dummy_numa_init(void)
return 0;
}
+#ifdef CONFIG_ACPI_NUMA
+static int __init arch_acpi_numa_init(void)
+{
+ int ret;
+
+ ret = acpi_numa_init();
+ if (ret) {
+ pr_info("Failed to initialise from firmware\n");
+ return ret;
+ }
+
+ return srat_disabled() ? -EINVAL : 0;
+}
+#else
+static int __init arch_acpi_numa_init(void)
+{
+ return -EOPNOTSUPP;
+}
+#endif
+
/**
- * arm64_numa_init() - Initialize NUMA
+ * arch_numa_init() - Initialize NUMA
*
* Try each configured NUMA initialization method until one succeeds. The
* last fallback is dummy single node config encompassing whole memory.
*/
-void __init arm64_numa_init(void)
+void __init arch_numa_init(void)
{
if (!numa_off) {
- if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
+ if (!acpi_disabled && !numa_init(arch_acpi_numa_init))
return;
if (acpi_disabled && !numa_init(of_numa_init))
return;
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index eef4ffb6122c..f35298425575 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -35,7 +35,7 @@ static const char *const online_type_to_str[] = {
[MMOP_ONLINE_MOVABLE] = "online_movable",
};
-int memhp_online_type_from_str(const char *str)
+int mhp_online_type_from_str(const char *str)
{
int i;
@@ -253,7 +253,7 @@ static int memory_subsys_offline(struct device *dev)
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
- const int online_type = memhp_online_type_from_str(buf);
+ const int online_type = mhp_online_type_from_str(buf);
struct memory_block *mem = to_memory_block(dev);
int ret;
@@ -290,20 +290,20 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
}
/*
- * phys_device is a bad name for this. What I really want
- * is a way to differentiate between memory ranges that
- * are part of physical devices that constitute
- * a complete removable unit or fru.
- * i.e. do these ranges belong to the same physical device,
- * s.t. if I offline all of these sections I can then
- * remove the physical device?
+ * Legacy interface that we cannot remove: s390x exposes the storage increment
+ * covered by a memory block, allowing for identifying which memory blocks
+ * comprise a storage increment. Since a memory block spans complete
+ * storage increments nowadays, this interface is basically unused. Other
+ * archs never exposed != 0.
*/
static ssize_t phys_device_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct memory_block *mem = to_memory_block(dev);
+ unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
- return sysfs_emit(buf, "%d\n", mem->phys_device);
+ return sysfs_emit(buf, "%d\n",
+ arch_get_memory_phys_device(start_pfn));
}
#ifdef CONFIG_MEMORY_HOTREMOVE
@@ -387,19 +387,19 @@ static ssize_t auto_online_blocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sysfs_emit(buf, "%s\n",
- online_type_to_str[memhp_default_online_type]);
+ online_type_to_str[mhp_default_online_type]);
}
static ssize_t auto_online_blocks_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- const int online_type = memhp_online_type_from_str(buf);
+ const int online_type = mhp_online_type_from_str(buf);
if (online_type < 0)
return -EINVAL;
- memhp_default_online_type = online_type;
+ mhp_default_online_type = online_type;
return count;
}
@@ -488,11 +488,7 @@ static DEVICE_ATTR_WO(soft_offline_page);
static DEVICE_ATTR_WO(hard_offline_page);
#endif
-/*
- * Note that phys_device is optional. It is here to allow for
- * differentiation between which *physical* devices each
- * section belongs to...
- */
+/* See phys_device_show(). */
int __weak arch_get_memory_phys_device(unsigned long start_pfn)
{
return 0;
@@ -574,7 +570,6 @@ int register_memory(struct memory_block *memory)
static int init_memory_block(unsigned long block_id, unsigned long state)
{
struct memory_block *mem;
- unsigned long start_pfn;
int ret = 0;
mem = find_memory_block_by_id(block_id);
@@ -588,8 +583,6 @@ static int init_memory_block(unsigned long block_id, unsigned long state)
mem->start_section_nr = block_id * sections_per_block;
mem->state = state;
- start_pfn = section_nr_to_pfn(mem->start_section_nr);
- mem->phys_device = arch_get_memory_phys_device(start_pfn);
mem->nid = NUMA_NO_NODE;
ret = register_memory(mem);
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index a46a7e30881b..18b82427d0cb 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev)
static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
__releases(&dev->power.lock) __acquires(&dev->power.lock)
{
- int retval, idx;
bool use_links = dev->power.links_count > 0;
+ bool get = false;
+ int retval, idx;
+ bool put;
if (dev->power.irq_safe) {
spin_unlock(&dev->power.lock);
+ } else if (!use_links) {
+ spin_unlock_irq(&dev->power.lock);
} else {
+ get = dev->power.runtime_status == RPM_RESUMING;
+
spin_unlock_irq(&dev->power.lock);
- /*
- * Resume suppliers if necessary.
- *
- * The device's runtime PM status cannot change until this
- * routine returns, so it is safe to read the status outside of
- * the lock.
- */
- if (use_links && dev->power.runtime_status == RPM_RESUMING) {
+ /* Resume suppliers if necessary. */
+ if (get) {
idx = device_links_read_lock();
retval = rpm_get_suppliers(dev);
@@ -355,24 +355,36 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
if (dev->power.irq_safe) {
spin_lock(&dev->power.lock);
- } else {
- /*
- * If the device is suspending and the callback has returned
- * success, drop the usage counters of the suppliers that have
- * been reference counted on its resume.
- *
- * Do that if resume fails too.
- */
- if (use_links
- && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
- || (dev->power.runtime_status == RPM_RESUMING && retval))) {
- idx = device_links_read_lock();
+ return retval;
+ }
- fail:
- rpm_put_suppliers(dev);
+ spin_lock_irq(&dev->power.lock);
- device_links_read_unlock(idx);
- }
+ if (!use_links)
+ return retval;
+
+ /*
+ * If the device is suspending and the callback has returned success,
+ * drop the usage counters of the suppliers that have been reference
+ * counted on its resume.
+ *
+ * Do that if the resume fails too.
+ */
+ put = dev->power.runtime_status == RPM_SUSPENDING && !retval;
+ if (put)
+ __update_runtime_status(dev, RPM_SUSPENDED);
+ else
+ put = get && retval;
+
+ if (put) {
+ spin_unlock_irq(&dev->power.lock);
+
+ idx = device_links_read_lock();
+
+fail:
+ rpm_put_suppliers(dev);
+
+ device_links_read_unlock(idx);
spin_lock_irq(&dev->power.lock);
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 578fc034db3f..a370cde3ddd4 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -663,7 +663,7 @@ static inline int is_loop_device(struct file *file)
{
struct inode *i = file->f_mapping->host;
- return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
+ return i && S_ISBLK(i->i_mode) && imajor(i) == LOOP_MAJOR;
}
static int loop_validate_file(struct file *file, struct block_device *bdev)
@@ -1212,6 +1212,9 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
goto out_unlock;
}
+ if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
+ blk_queue_write_cache(lo->lo_queue, false, false);
+
/* freeze request queue during the transition */
blk_mq_freeze_queue(lo->lo_queue);
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 8b9622eb0a21..4ff71b579cfc 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -78,8 +78,7 @@ struct link_dead_args {
#define NBD_RT_HAS_PID_FILE 3
#define NBD_RT_HAS_CONFIG_REF 4
#define NBD_RT_BOUND 5
-#define NBD_RT_DESTROY_ON_DISCONNECT 6
-#define NBD_RT_DISCONNECT_ON_CLOSE 7
+#define NBD_RT_DISCONNECT_ON_CLOSE 6
#define NBD_DESTROY_ON_DISCONNECT 0
#define NBD_DISCONNECT_REQUESTED 1
@@ -1904,12 +1903,21 @@ again:
if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
- set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
- &config->runtime_flags);
- set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
- put_dev = true;
+ /*
+ * We have 1 ref to keep the device around, and then 1
+ * ref for our current operation here, which will be
+ * inherited by the config. If we already have
+ * DESTROY_ON_DISCONNECT set then we know we don't have
+ * that extra ref already held so we don't need the
+ * put_dev.
+ */
+ if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+ &nbd->flags))
+ put_dev = true;
} else {
- clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
+ if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+ &nbd->flags))
+ refcount_inc(&nbd->refs);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
@@ -2080,15 +2088,13 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
- if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
- &config->runtime_flags))
+ if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+ &nbd->flags))
put_dev = true;
- set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
} else {
- if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
- &config->runtime_flags))
+ if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+ &nbd->flags))
refcount_inc(&nbd->refs);
- clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
}
if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c
index 63f549889f87..5ac1881396af 100644
--- a/drivers/block/rsxx/core.c
+++ b/drivers/block/rsxx/core.c
@@ -165,15 +165,17 @@ static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
{
struct rsxx_cardinfo *card = file_inode(fp)->i_private;
char *buf;
- ssize_t st;
+ int st;
buf = kzalloc(cnt, GFP_KERNEL);
if (!buf)
return -ENOMEM;
st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1);
- if (!st)
- st = copy_to_user(ubuf, buf, cnt);
+ if (!st) {
+ if (copy_to_user(ubuf, buf, cnt))
+ st = -EFAULT;
+ }
kfree(buf);
if (st)
return st;
diff --git a/drivers/block/rsxx/rsxx_priv.h b/drivers/block/rsxx/rsxx_priv.h
index 4861669e5786..6147977994ff 100644
--- a/drivers/block/rsxx/rsxx_priv.h
+++ b/drivers/block/rsxx/rsxx_priv.h
@@ -11,7 +11,6 @@
#ifndef __RSXX_PRIV_H__
#define __RSXX_PRIV_H__
-#include <linux/version.h>
#include <linux/semaphore.h>
#include <linux/fs.h>
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index da16121140ca..1cdf09ff67b6 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -1326,9 +1326,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
pages[i]->page,
seg[i].nsec << 9,
seg[i].offset) == 0)) {
-
- int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
- bio = bio_alloc(GFP_KERNEL, nr_iovecs);
+ bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
if (unlikely(bio == NULL))
goto fail_put_bio;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d7018543842e..a711a2e2a794 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1081,7 +1081,7 @@ static ssize_t mm_stat_show(struct device *dev,
zram->limit_pages << PAGE_SHIFT,
max_used << PAGE_SHIFT,
(u64)atomic64_read(&zram->stats.same_pages),
- pool_stats.pages_compacted,
+ atomic_long_read(&pool_stats.pages_compacted),
(u64)atomic64_read(&zram->stats.huge_pages),
(u64)atomic64_read(&zram->stats.huge_pages_since));
up_read(&zram->init_lock);
diff --git a/drivers/char/hw_random/pseries-rng.c b/drivers/char/hw_random/pseries-rng.c
index 8038a8a9fb58..f4949b689bd5 100644
--- a/drivers/char/hw_random/pseries-rng.c
+++ b/drivers/char/hw_random/pseries-rng.c
@@ -54,10 +54,9 @@ static int pseries_rng_probe(struct vio_dev *dev,
return hwrng_register(&pseries_rng);
}
-static int pseries_rng_remove(struct vio_dev *dev)
+static void pseries_rng_remove(struct vio_dev *dev)
{
hwrng_unregister(&pseries_rng);
- return 0;
}
static const struct vio_device_id pseries_rng_driver_ids[] = {
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index 19e23fcc6bc8..ddaeceb7e109 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -278,8 +278,6 @@ static void tpm_devs_release(struct device *dev)
{
struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
- dump_stack();
-
/* release the master device reference */
put_device(&chip->dev);
}
diff --git a/drivers/char/tpm/tpm_ibmvtpm.c b/drivers/char/tpm/tpm_ibmvtpm.c
index 994385bf37c0..903604769de9 100644
--- a/drivers/char/tpm/tpm_ibmvtpm.c
+++ b/drivers/char/tpm/tpm_ibmvtpm.c
@@ -343,7 +343,7 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm)
*
* Return: Always 0.
*/
-static int tpm_ibmvtpm_remove(struct vio_dev *vdev)
+static void tpm_ibmvtpm_remove(struct vio_dev *vdev)
{
struct tpm_chip *chip = dev_get_drvdata(&vdev->dev);
struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
@@ -372,8 +372,6 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev)
kfree(ibmvtpm);
/* For tpm_ibmvtpm_get_desired_dma */
dev_set_drvdata(&vdev->dev, NULL);
-
- return 0;
}
/**
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 431919d5f48a..a2e0395cbe61 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -707,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
const char *desc = "attempting to generate an interrupt";
u32 cap2;
cap_t cap;
+ int ret;
+ /* TPM 2.0 */
if (chip->flags & TPM_CHIP_FLAG_TPM2)
return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
- else
- return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc,
- 0);
+
+ /* TPM 1.2 */
+ ret = request_locality(chip, 0);
+ if (ret < 0)
+ return ret;
+
+ ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+
+ release_locality(chip, 0);
+
+ return ret;
}
/* Register the IRQ and issue a command that will cause an interrupt. If an
@@ -1019,11 +1029,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
init_waitqueue_head(&priv->read_queue);
init_waitqueue_head(&priv->int_queue);
if (irq != -1) {
- /* Before doing irq testing issue a command to the TPM in polling mode
+ /*
+ * Before doing irq testing issue a command to the TPM in polling mode
* to make sure it works. May as well use that command to set the
* proper timeouts for the driver.
*/
- if (tpm_get_timeouts(chip)) {
+
+ rc = request_locality(chip, 0);
+ if (rc < 0)
+ goto out_err;
+
+ rc = tpm_get_timeouts(chip);
+
+ release_locality(chip, 0);
+
+ if (rc) {
dev_err(dev, "Could not get TPM timeouts and durations\n");
rc = -ENODEV;
goto out_err;
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 4b47170dbe2e..a588d56502d4 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -369,6 +369,13 @@ config COMMON_CLK_FIXED_MMIO
help
Support for Memory Mapped IO Fixed clocks
+config COMMON_CLK_K210
+ bool "Clock driver for the Canaan Kendryte K210 SoC"
+ depends on OF && RISCV && SOC_CANAAN
+ default SOC_CANAAN
+ help
+ Support for the Canaan Kendryte K210 RISC-V SoC clocks.
+
source "drivers/clk/actions/Kconfig"
source "drivers/clk/analogbits/Kconfig"
source "drivers/clk/baikal-t1/Kconfig"
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 71c1fa24b5f0..b22ae4f81e0b 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -36,6 +36,7 @@ obj-$(CONFIG_COMMON_CLK_ASPEED) += clk-aspeed.o
obj-$(CONFIG_MACH_ASPEED_G6) += clk-ast2600.o
obj-$(CONFIG_ARCH_HIGHBANK) += clk-highbank.o
obj-$(CONFIG_CLK_HSDK) += clk-hsdk-pll.o
+obj-$(CONFIG_COMMON_CLK_K210) += clk-k210.o
obj-$(CONFIG_COMMON_CLK_LOCHNAGAR) += clk-lochnagar.o
obj-$(CONFIG_COMMON_CLK_MAX77686) += clk-max77686.o
obj-$(CONFIG_COMMON_CLK_MAX9485) += clk-max9485.o
diff --git a/drivers/clk/clk-k210.c b/drivers/clk/clk-k210.c
new file mode 100644
index 000000000000..6c84abf5b2e3
--- /dev/null
+++ b/drivers/clk/clk-k210.c
@@ -0,0 +1,1007 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ */
+#define pr_fmt(fmt) "k210-clk: " fmt
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_clk.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/clk-provider.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <soc/canaan/k210-sysctl.h>
+
+#include <dt-bindings/clock/k210-clk.h>
+
+struct k210_sysclk;
+
+struct k210_clk {
+ int id;
+ struct k210_sysclk *ksc;
+ struct clk_hw hw;
+};
+
+struct k210_clk_cfg {
+ const char *name;
+ u8 gate_reg;
+ u8 gate_bit;
+ u8 div_reg;
+ u8 div_shift;
+ u8 div_width;
+ u8 div_type;
+ u8 mux_reg;
+ u8 mux_bit;
+};
+
+enum k210_clk_div_type {
+ K210_DIV_NONE,
+ K210_DIV_ONE_BASED,
+ K210_DIV_DOUBLE_ONE_BASED,
+ K210_DIV_POWER_OF_TWO,
+};
+
+#define K210_GATE(_reg, _bit) \
+ .gate_reg = (_reg), \
+ .gate_bit = (_bit)
+
+#define K210_DIV(_reg, _shift, _width, _type) \
+ .div_reg = (_reg), \
+ .div_shift = (_shift), \
+ .div_width = (_width), \
+ .div_type = (_type)
+
+#define K210_MUX(_reg, _bit) \
+ .mux_reg = (_reg), \
+ .mux_bit = (_bit)
+
+static struct k210_clk_cfg k210_clk_cfgs[K210_NUM_CLKS] = {
+ /* Gated clocks, no mux, no divider */
+ [K210_CLK_CPU] = {
+ .name = "cpu",
+ K210_GATE(K210_SYSCTL_EN_CENT, 0)
+ },
+ [K210_CLK_DMA] = {
+ .name = "dma",
+ K210_GATE(K210_SYSCTL_EN_PERI, 1)
+ },
+ [K210_CLK_FFT] = {
+ .name = "fft",
+ K210_GATE(K210_SYSCTL_EN_PERI, 4)
+ },
+ [K210_CLK_GPIO] = {
+ .name = "gpio",
+ K210_GATE(K210_SYSCTL_EN_PERI, 5)
+ },
+ [K210_CLK_UART1] = {
+ .name = "uart1",
+ K210_GATE(K210_SYSCTL_EN_PERI, 16)
+ },
+ [K210_CLK_UART2] = {
+ .name = "uart2",
+ K210_GATE(K210_SYSCTL_EN_PERI, 17)
+ },
+ [K210_CLK_UART3] = {
+ .name = "uart3",
+ K210_GATE(K210_SYSCTL_EN_PERI, 18)
+ },
+ [K210_CLK_FPIOA] = {
+ .name = "fpioa",
+ K210_GATE(K210_SYSCTL_EN_PERI, 20)
+ },
+ [K210_CLK_SHA] = {
+ .name = "sha",
+ K210_GATE(K210_SYSCTL_EN_PERI, 26)
+ },
+ [K210_CLK_AES] = {
+ .name = "aes",
+ K210_GATE(K210_SYSCTL_EN_PERI, 19)
+ },
+ [K210_CLK_OTP] = {
+ .name = "otp",
+ K210_GATE(K210_SYSCTL_EN_PERI, 27)
+ },
+ [K210_CLK_RTC] = {
+ .name = "rtc",
+ K210_GATE(K210_SYSCTL_EN_PERI, 29)
+ },
+
+ /* Gated divider clocks */
+ [K210_CLK_SRAM0] = {
+ .name = "sram0",
+ K210_GATE(K210_SYSCTL_EN_CENT, 1),
+ K210_DIV(K210_SYSCTL_THR0, 0, 4, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_SRAM1] = {
+ .name = "sram1",
+ K210_GATE(K210_SYSCTL_EN_CENT, 2),
+ K210_DIV(K210_SYSCTL_THR0, 4, 4, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_ROM] = {
+ .name = "rom",
+ K210_GATE(K210_SYSCTL_EN_PERI, 0),
+ K210_DIV(K210_SYSCTL_THR0, 16, 4, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_DVP] = {
+ .name = "dvp",
+ K210_GATE(K210_SYSCTL_EN_PERI, 3),
+ K210_DIV(K210_SYSCTL_THR0, 12, 4, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_APB0] = {
+ .name = "apb0",
+ K210_GATE(K210_SYSCTL_EN_CENT, 3),
+ K210_DIV(K210_SYSCTL_SEL0, 3, 3, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_APB1] = {
+ .name = "apb1",
+ K210_GATE(K210_SYSCTL_EN_CENT, 4),
+ K210_DIV(K210_SYSCTL_SEL0, 6, 3, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_APB2] = {
+ .name = "apb2",
+ K210_GATE(K210_SYSCTL_EN_CENT, 5),
+ K210_DIV(K210_SYSCTL_SEL0, 9, 3, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_AI] = {
+ .name = "ai",
+ K210_GATE(K210_SYSCTL_EN_PERI, 2),
+ K210_DIV(K210_SYSCTL_THR0, 8, 4, K210_DIV_ONE_BASED)
+ },
+ [K210_CLK_SPI0] = {
+ .name = "spi0",
+ K210_GATE(K210_SYSCTL_EN_PERI, 6),
+ K210_DIV(K210_SYSCTL_THR1, 0, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_SPI1] = {
+ .name = "spi1",
+ K210_GATE(K210_SYSCTL_EN_PERI, 7),
+ K210_DIV(K210_SYSCTL_THR1, 8, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_SPI2] = {
+ .name = "spi2",
+ K210_GATE(K210_SYSCTL_EN_PERI, 8),
+ K210_DIV(K210_SYSCTL_THR1, 16, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2C0] = {
+ .name = "i2c0",
+ K210_GATE(K210_SYSCTL_EN_PERI, 13),
+ K210_DIV(K210_SYSCTL_THR5, 8, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2C1] = {
+ .name = "i2c1",
+ K210_GATE(K210_SYSCTL_EN_PERI, 14),
+ K210_DIV(K210_SYSCTL_THR5, 16, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2C2] = {
+ .name = "i2c2",
+ K210_GATE(K210_SYSCTL_EN_PERI, 15),
+ K210_DIV(K210_SYSCTL_THR5, 24, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_WDT0] = {
+ .name = "wdt0",
+ K210_GATE(K210_SYSCTL_EN_PERI, 24),
+ K210_DIV(K210_SYSCTL_THR6, 0, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_WDT1] = {
+ .name = "wdt1",
+ K210_GATE(K210_SYSCTL_EN_PERI, 25),
+ K210_DIV(K210_SYSCTL_THR6, 8, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2S0] = {
+ .name = "i2s0",
+ K210_GATE(K210_SYSCTL_EN_PERI, 10),
+ K210_DIV(K210_SYSCTL_THR3, 0, 16, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2S1] = {
+ .name = "i2s1",
+ K210_GATE(K210_SYSCTL_EN_PERI, 11),
+ K210_DIV(K210_SYSCTL_THR3, 16, 16, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2S2] = {
+ .name = "i2s2",
+ K210_GATE(K210_SYSCTL_EN_PERI, 12),
+ K210_DIV(K210_SYSCTL_THR4, 0, 16, K210_DIV_DOUBLE_ONE_BASED)
+ },
+
+ /* Divider clocks, no gate, no mux */
+ [K210_CLK_I2S0_M] = {
+ .name = "i2s0_m",
+ K210_DIV(K210_SYSCTL_THR4, 16, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2S1_M] = {
+ .name = "i2s1_m",
+ K210_DIV(K210_SYSCTL_THR4, 24, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+ [K210_CLK_I2S2_M] = {
+ .name = "i2s2_m",
+ K210_DIV(K210_SYSCTL_THR4, 0, 8, K210_DIV_DOUBLE_ONE_BASED)
+ },
+
+ /* Muxed gated divider clocks */
+ [K210_CLK_SPI3] = {
+ .name = "spi3",
+ K210_GATE(K210_SYSCTL_EN_PERI, 9),
+ K210_DIV(K210_SYSCTL_THR1, 24, 8, K210_DIV_DOUBLE_ONE_BASED),
+ K210_MUX(K210_SYSCTL_SEL0, 12)
+ },
+ [K210_CLK_TIMER0] = {
+ .name = "timer0",
+ K210_GATE(K210_SYSCTL_EN_PERI, 21),
+ K210_DIV(K210_SYSCTL_THR2, 0, 8, K210_DIV_DOUBLE_ONE_BASED),
+ K210_MUX(K210_SYSCTL_SEL0, 13)
+ },
+ [K210_CLK_TIMER1] = {
+ .name = "timer1",
+ K210_GATE(K210_SYSCTL_EN_PERI, 22),
+ K210_DIV(K210_SYSCTL_THR2, 8, 8, K210_DIV_DOUBLE_ONE_BASED),
+ K210_MUX(K210_SYSCTL_SEL0, 14)
+ },
+ [K210_CLK_TIMER2] = {
+ .name = "timer2",
+ K210_GATE(K210_SYSCTL_EN_PERI, 23),
+ K210_DIV(K210_SYSCTL_THR2, 16, 8, K210_DIV_DOUBLE_ONE_BASED),
+ K210_MUX(K210_SYSCTL_SEL0, 15)
+ },
+};
+
+/*
+ * PLL control register bits.
+ */
+#define K210_PLL_CLKR GENMASK(3, 0)
+#define K210_PLL_CLKF GENMASK(9, 4)
+#define K210_PLL_CLKOD GENMASK(13, 10)
+#define K210_PLL_BWADJ GENMASK(19, 14)
+#define K210_PLL_RESET (1 << 20)
+#define K210_PLL_PWRD (1 << 21)
+#define K210_PLL_INTFB (1 << 22)
+#define K210_PLL_BYPASS (1 << 23)
+#define K210_PLL_TEST (1 << 24)
+#define K210_PLL_EN (1 << 25)
+#define K210_PLL_SEL GENMASK(27, 26) /* PLL2 only */
+
+/*
+ * PLL lock register bits.
+ */
+#define K210_PLL_LOCK 0
+#define K210_PLL_CLEAR_SLIP 2
+#define K210_PLL_TEST_OUT 3
+
+/*
+ * Clock selector register bits.
+ */
+#define K210_ACLK_SEL BIT(0)
+#define K210_ACLK_DIV GENMASK(2, 1)
+
+/*
+ * PLLs.
+ */
+enum k210_pll_id {
+ K210_PLL0, K210_PLL1, K210_PLL2, K210_PLL_NUM
+};
+
+struct k210_pll {
+ enum k210_pll_id id;
+ struct k210_sysclk *ksc;
+ void __iomem *base;
+ void __iomem *reg;
+ void __iomem *lock;
+ u8 lock_shift;
+ u8 lock_width;
+ struct clk_hw hw;
+};
+#define to_k210_pll(_hw) container_of(_hw, struct k210_pll, hw)
+
+/*
+ * PLLs configuration: by default PLL0 runs at 780 MHz and PLL1 at 299 MHz.
+ * The first 2 SRAM banks depend on ACLK/CPU clock which is by default PLL0
+ * rate divided by 2. Set PLL1 to 390 MHz so that the third SRAM bank has the
+ * same clock as the first 2.
+ */
+struct k210_pll_cfg {
+ u32 reg;
+ u8 lock_shift;
+ u8 lock_width;
+ u32 r;
+ u32 f;
+ u32 od;
+ u32 bwadj;
+};
+
+static struct k210_pll_cfg k210_plls_cfg[] = {
+ { K210_SYSCTL_PLL0, 0, 2, 0, 59, 1, 59 }, /* 780 MHz */
+ { K210_SYSCTL_PLL1, 8, 1, 0, 59, 3, 59 }, /* 390 MHz */
+ { K210_SYSCTL_PLL2, 16, 1, 0, 22, 1, 22 }, /* 299 MHz */
+};
+
+/**
+ * struct k210_sysclk - sysclk driver data
+ * @regs: system controller registers start address
+ * @clk_lock: clock setting spinlock
+ * @plls: SoC PLLs descriptors
+ * @aclk: ACLK clock
+ * @clks: All other clocks
+ */
+struct k210_sysclk {
+ void __iomem *regs;
+ spinlock_t clk_lock;
+ struct k210_pll plls[K210_PLL_NUM];
+ struct clk_hw aclk;
+ struct k210_clk clks[K210_NUM_CLKS];
+};
+
+#define to_k210_sysclk(_hw) container_of(_hw, struct k210_sysclk, aclk)
+
+/*
+ * Set ACLK parent selector: 0 for IN0, 1 for PLL0.
+ */
+static void k210_aclk_set_selector(void __iomem *regs, u8 sel)
+{
+ u32 reg = readl(regs + K210_SYSCTL_SEL0);
+
+ if (sel)
+ reg |= K210_ACLK_SEL;
+ else
+ reg &= K210_ACLK_SEL;
+ writel(reg, regs + K210_SYSCTL_SEL0);
+}
+
+static void k210_init_pll(void __iomem *regs, enum k210_pll_id pllid,
+ struct k210_pll *pll)
+{
+ pll->id = pllid;
+ pll->reg = regs + k210_plls_cfg[pllid].reg;
+ pll->lock = regs + K210_SYSCTL_PLL_LOCK;
+ pll->lock_shift = k210_plls_cfg[pllid].lock_shift;
+ pll->lock_width = k210_plls_cfg[pllid].lock_width;
+}
+
+static void k210_pll_wait_for_lock(struct k210_pll *pll)
+{
+ u32 reg, mask = GENMASK(pll->lock_shift + pll->lock_width - 1,
+ pll->lock_shift);
+
+ while (true) {
+ reg = readl(pll->lock);
+ if ((reg & mask) == mask)
+ break;
+
+ reg |= BIT(pll->lock_shift + K210_PLL_CLEAR_SLIP);
+ writel(reg, pll->lock);
+ }
+}
+
+static bool k210_pll_hw_is_enabled(struct k210_pll *pll)
+{
+ u32 reg = readl(pll->reg);
+ u32 mask = K210_PLL_PWRD | K210_PLL_EN;
+
+ if (reg & K210_PLL_RESET)
+ return false;
+
+ return (reg & mask) == mask;
+}
+
+static void k210_pll_enable_hw(void __iomem *regs, struct k210_pll *pll)
+{
+ struct k210_pll_cfg *pll_cfg = &k210_plls_cfg[pll->id];
+ u32 reg;
+
+ if (k210_pll_hw_is_enabled(pll))
+ return;
+
+ /*
+ * For PLL0, we need to re-parent ACLK to IN0 to keep the CPU cores and
+ * SRAM running.
+ */
+ if (pll->id == K210_PLL0)
+ k210_aclk_set_selector(regs, 0);
+
+ /* Set PLL factors */
+ reg = readl(pll->reg);
+ reg &= ~GENMASK(19, 0);
+ reg |= FIELD_PREP(K210_PLL_CLKR, pll_cfg->r);
+ reg |= FIELD_PREP(K210_PLL_CLKF, pll_cfg->f);
+ reg |= FIELD_PREP(K210_PLL_CLKOD, pll_cfg->od);
+ reg |= FIELD_PREP(K210_PLL_BWADJ, pll_cfg->bwadj);
+ reg |= K210_PLL_PWRD;
+ writel(reg, pll->reg);
+
+ /*
+ * Reset the PLL: ensure reset is low before asserting it.
+ * The magic NOPs come from the Kendryte reference SDK.
+ */
+ reg &= ~K210_PLL_RESET;
+ writel(reg, pll->reg);
+ reg |= K210_PLL_RESET;
+ writel(reg, pll->reg);
+ nop();
+ nop();
+ reg &= ~K210_PLL_RESET;
+ writel(reg, pll->reg);
+
+ k210_pll_wait_for_lock(pll);
+
+ reg &= ~K210_PLL_BYPASS;
+ reg |= K210_PLL_EN;
+ writel(reg, pll->reg);
+
+ if (pll->id == K210_PLL0)
+ k210_aclk_set_selector(regs, 1);
+}
+
+static int k210_pll_enable(struct clk_hw *hw)
+{
+ struct k210_pll *pll = to_k210_pll(hw);
+ struct k210_sysclk *ksc = pll->ksc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+
+ k210_pll_enable_hw(ksc->regs, pll);
+
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+ return 0;
+}
+
+static void k210_pll_disable(struct clk_hw *hw)
+{
+ struct k210_pll *pll = to_k210_pll(hw);
+ struct k210_sysclk *ksc = pll->ksc;
+ unsigned long flags;
+ u32 reg;
+
+ /*
+ * Bypassing before powering off is important so child clocks do not
+ * stop working. This is especially important for pll0, the indirect
+ * parent of the cpu clock.
+ */
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+ reg = readl(pll->reg);
+ reg |= K210_PLL_BYPASS;
+ writel(reg, pll->reg);
+
+ reg &= ~K210_PLL_PWRD;
+ reg &= ~K210_PLL_EN;
+ writel(reg, pll->reg);
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+}
+
+static int k210_pll_is_enabled(struct clk_hw *hw)
+{
+ return k210_pll_hw_is_enabled(to_k210_pll(hw));
+}
+
+static unsigned long k210_pll_get_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+ struct k210_pll *pll = to_k210_pll(hw);
+ u32 reg = readl(pll->reg);
+ u32 r, f, od;
+
+ if (reg & K210_PLL_BYPASS)
+ return parent_rate;
+
+ if (!(reg & K210_PLL_PWRD))
+ return 0;
+
+ r = FIELD_GET(K210_PLL_CLKR, reg) + 1;
+ f = FIELD_GET(K210_PLL_CLKF, reg) + 1;
+ od = FIELD_GET(K210_PLL_CLKOD, reg) + 1;
+
+ return (u64)parent_rate * f / (r * od);
+}
+
+static const struct clk_ops k210_pll_ops = {
+ .enable = k210_pll_enable,
+ .disable = k210_pll_disable,
+ .is_enabled = k210_pll_is_enabled,
+ .recalc_rate = k210_pll_get_rate,
+};
+
+static int k210_pll2_set_parent(struct clk_hw *hw, u8 index)
+{
+ struct k210_pll *pll = to_k210_pll(hw);
+ struct k210_sysclk *ksc = pll->ksc;
+ unsigned long flags;
+ u32 reg;
+
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+
+ reg = readl(pll->reg);
+ reg &= ~K210_PLL_SEL;
+ reg |= FIELD_PREP(K210_PLL_SEL, index);
+ writel(reg, pll->reg);
+
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+ return 0;
+}
+
+static u8 k210_pll2_get_parent(struct clk_hw *hw)
+{
+ struct k210_pll *pll = to_k210_pll(hw);
+ u32 reg = readl(pll->reg);
+
+ return FIELD_GET(K210_PLL_SEL, reg);
+}
+
+static const struct clk_ops k210_pll2_ops = {
+ .enable = k210_pll_enable,
+ .disable = k210_pll_disable,
+ .is_enabled = k210_pll_is_enabled,
+ .recalc_rate = k210_pll_get_rate,
+ .set_parent = k210_pll2_set_parent,
+ .get_parent = k210_pll2_get_parent,
+};
+
+static int __init k210_register_pll(struct device_node *np,
+ struct k210_sysclk *ksc,
+ enum k210_pll_id pllid, const char *name,
+ int num_parents, const struct clk_ops *ops)
+{
+ struct k210_pll *pll = &ksc->plls[pllid];
+ struct clk_init_data init = {};
+ const struct clk_parent_data parent_data[] = {
+ { /* .index = 0 for in0 */ },
+ { .hw = &ksc->plls[K210_PLL0].hw },
+ { .hw = &ksc->plls[K210_PLL1].hw },
+ };
+
+ init.name = name;
+ init.parent_data = parent_data;
+ init.num_parents = num_parents;
+ init.ops = ops;
+
+ pll->hw.init = &init;
+ pll->ksc = ksc;
+
+ return of_clk_hw_register(np, &pll->hw);
+}
+
+static int __init k210_register_plls(struct device_node *np,
+ struct k210_sysclk *ksc)
+{
+ int i, ret;
+
+ for (i = 0; i < K210_PLL_NUM; i++)
+ k210_init_pll(ksc->regs, i, &ksc->plls[i]);
+
+ /* PLL0 and PLL1 only have IN0 as parent */
+ ret = k210_register_pll(np, ksc, K210_PLL0, "pll0", 1, &k210_pll_ops);
+ if (ret) {
+ pr_err("%pOFP: register PLL0 failed\n", np);
+ return ret;
+ }
+ ret = k210_register_pll(np, ksc, K210_PLL1, "pll1", 1, &k210_pll_ops);
+ if (ret) {
+ pr_err("%pOFP: register PLL1 failed\n", np);
+ return ret;
+ }
+
+ /* PLL2 has IN0, PLL0 and PLL1 as parents */
+ ret = k210_register_pll(np, ksc, K210_PLL2, "pll2", 3, &k210_pll2_ops);
+ if (ret) {
+ pr_err("%pOFP: register PLL2 failed\n", np);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int k210_aclk_set_parent(struct clk_hw *hw, u8 index)
+{
+ struct k210_sysclk *ksc = to_k210_sysclk(hw);
+ unsigned long flags;
+
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+
+ k210_aclk_set_selector(ksc->regs, index);
+
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+ return 0;
+}
+
+static u8 k210_aclk_get_parent(struct clk_hw *hw)
+{
+ struct k210_sysclk *ksc = to_k210_sysclk(hw);
+ u32 sel;
+
+ sel = readl(ksc->regs + K210_SYSCTL_SEL0) & K210_ACLK_SEL;
+
+ return sel ? 1 : 0;
+}
+
+static unsigned long k210_aclk_get_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+ struct k210_sysclk *ksc = to_k210_sysclk(hw);
+ u32 reg = readl(ksc->regs + K210_SYSCTL_SEL0);
+ unsigned int shift;
+
+ if (!(reg & 0x1))
+ return parent_rate;
+
+ shift = FIELD_GET(K210_ACLK_DIV, reg);
+
+ return parent_rate / (2UL << shift);
+}
+
+static const struct clk_ops k210_aclk_ops = {
+ .set_parent = k210_aclk_set_parent,
+ .get_parent = k210_aclk_get_parent,
+ .recalc_rate = k210_aclk_get_rate,
+};
+
+/*
+ * ACLK has IN0 and PLL0 as parents.
+ */
+static int __init k210_register_aclk(struct device_node *np,
+ struct k210_sysclk *ksc)
+{
+ struct clk_init_data init = {};
+ const struct clk_parent_data parent_data[] = {
+ { /* .index = 0 for in0 */ },
+ { .hw = &ksc->plls[K210_PLL0].hw },
+ };
+ int ret;
+
+ init.name = "aclk";
+ init.parent_data = parent_data;
+ init.num_parents = 2;
+ init.ops = &k210_aclk_ops;
+ ksc->aclk.init = &init;
+
+ ret = of_clk_hw_register(np, &ksc->aclk);
+ if (ret) {
+ pr_err("%pOFP: register aclk failed\n", np);
+ return ret;
+ }
+
+ return 0;
+}
+
+#define to_k210_clk(_hw) container_of(_hw, struct k210_clk, hw)
+
+static int k210_clk_enable(struct clk_hw *hw)
+{
+ struct k210_clk *kclk = to_k210_clk(hw);
+ struct k210_sysclk *ksc = kclk->ksc;
+ struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+ unsigned long flags;
+ u32 reg;
+
+ if (!cfg->gate_reg)
+ return 0;
+
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+ reg = readl(ksc->regs + cfg->gate_reg);
+ reg |= BIT(cfg->gate_bit);
+ writel(reg, ksc->regs + cfg->gate_reg);
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+ return 0;
+}
+
+static void k210_clk_disable(struct clk_hw *hw)
+{
+ struct k210_clk *kclk = to_k210_clk(hw);
+ struct k210_sysclk *ksc = kclk->ksc;
+ struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+ unsigned long flags;
+ u32 reg;
+
+ if (!cfg->gate_reg)
+ return;
+
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+ reg = readl(ksc->regs + cfg->gate_reg);
+ reg &= ~BIT(cfg->gate_bit);
+ writel(reg, ksc->regs + cfg->gate_reg);
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+}
+
+static int k210_clk_set_parent(struct clk_hw *hw, u8 index)
+{
+ struct k210_clk *kclk = to_k210_clk(hw);
+ struct k210_sysclk *ksc = kclk->ksc;
+ struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+ unsigned long flags;
+ u32 reg;
+
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+ reg = readl(ksc->regs + cfg->mux_reg);
+ if (index)
+ reg |= BIT(cfg->mux_bit);
+ else
+ reg &= ~BIT(cfg->mux_bit);
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+ return 0;
+}
+
+static u8 k210_clk_get_parent(struct clk_hw *hw)
+{
+ struct k210_clk *kclk = to_k210_clk(hw);
+ struct k210_sysclk *ksc = kclk->ksc;
+ struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+ unsigned long flags;
+ u32 reg, idx;
+
+ spin_lock_irqsave(&ksc->clk_lock, flags);
+ reg = readl(ksc->regs + cfg->mux_reg);
+ idx = (reg & BIT(cfg->mux_bit)) ? 1 : 0;
+ spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+ return idx;
+}
+
+static unsigned long k210_clk_get_rate(struct clk_hw *hw,
+ unsigned long parent_rate)
+{
+ struct k210_clk *kclk = to_k210_clk(hw);
+ struct k210_sysclk *ksc = kclk->ksc;
+ struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+ u32 reg, div_val;
+
+ if (!cfg->div_reg)
+ return parent_rate;
+
+ reg = readl(ksc->regs + cfg->div_reg);
+ div_val = (reg >> cfg->div_shift) & GENMASK(cfg->div_width - 1, 0);
+
+ switch (cfg->div_type) {
+ case K210_DIV_ONE_BASED:
+ return parent_rate / (div_val + 1);
+ case K210_DIV_DOUBLE_ONE_BASED:
+ return parent_rate / ((div_val + 1) * 2);
+ case K210_DIV_POWER_OF_TWO:
+ return parent_rate / (2UL << div_val);
+ case K210_DIV_NONE:
+ default:
+ return 0;
+ }
+}
+
+static const struct clk_ops k210_clk_mux_ops = {
+ .enable = k210_clk_enable,
+ .disable = k210_clk_disable,
+ .set_parent = k210_clk_set_parent,
+ .get_parent = k210_clk_get_parent,
+ .recalc_rate = k210_clk_get_rate,
+};
+
+static const struct clk_ops k210_clk_ops = {
+ .enable = k210_clk_enable,
+ .disable = k210_clk_disable,
+ .recalc_rate = k210_clk_get_rate,
+};
+
+static void __init k210_register_clk(struct device_node *np,
+ struct k210_sysclk *ksc, int id,
+ const struct clk_parent_data *parent_data,
+ int num_parents, unsigned long flags)
+{
+ struct k210_clk *kclk = &ksc->clks[id];
+ struct clk_init_data init = {};
+ int ret;
+
+ init.name = k210_clk_cfgs[id].name;
+ init.flags = flags;
+ init.parent_data = parent_data;
+ init.num_parents = num_parents;
+ if (num_parents > 1)
+ init.ops = &k210_clk_mux_ops;
+ else
+ init.ops = &k210_clk_ops;
+
+ kclk->id = id;
+ kclk->ksc = ksc;
+ kclk->hw.init = &init;
+
+ ret = of_clk_hw_register(np, &kclk->hw);
+ if (ret) {
+ pr_err("%pOFP: register clock %s failed\n",
+ np, k210_clk_cfgs[id].name);
+ kclk->id = -1;
+ }
+}
+
+/*
+ * All muxed clocks have IN0 and PLL0 as parents.
+ */
+static inline void __init k210_register_mux_clk(struct device_node *np,
+ struct k210_sysclk *ksc, int id)
+{
+ const struct clk_parent_data parent_data[2] = {
+ { /* .index = 0 for in0 */ },
+ { .hw = &ksc->plls[K210_PLL0].hw }
+ };
+
+ k210_register_clk(np, ksc, id, parent_data, 2, 0);
+}
+
+static inline void __init k210_register_in0_child(struct device_node *np,
+ struct k210_sysclk *ksc, int id)
+{
+ const struct clk_parent_data parent_data = {
+ /* .index = 0 for in0 */
+ };
+
+ k210_register_clk(np, ksc, id, &parent_data, 1, 0);
+}
+
+static inline void __init k210_register_pll_child(struct device_node *np,
+ struct k210_sysclk *ksc, int id,
+ enum k210_pll_id pllid,
+ unsigned long flags)
+{
+ const struct clk_parent_data parent_data = {
+ .hw = &ksc->plls[pllid].hw,
+ };
+
+ k210_register_clk(np, ksc, id, &parent_data, 1, flags);
+}
+
+static inline void __init k210_register_aclk_child(struct device_node *np,
+ struct k210_sysclk *ksc, int id,
+ unsigned long flags)
+{
+ const struct clk_parent_data parent_data = {
+ .hw = &ksc->aclk,
+ };
+
+ k210_register_clk(np, ksc, id, &parent_data, 1, flags);
+}
+
+static inline void __init k210_register_clk_child(struct device_node *np,
+ struct k210_sysclk *ksc, int id,
+ int parent_id)
+{
+ const struct clk_parent_data parent_data = {
+ .hw = &ksc->clks[parent_id].hw,
+ };
+
+ k210_register_clk(np, ksc, id, &parent_data, 1, 0);
+}
+
+static struct clk_hw *k210_clk_hw_onecell_get(struct of_phandle_args *clkspec,
+ void *data)
+{
+ struct k210_sysclk *ksc = data;
+ unsigned int idx = clkspec->args[0];
+
+ if (idx >= K210_NUM_CLKS)
+ return ERR_PTR(-EINVAL);
+
+ return &ksc->clks[idx].hw;
+}
+
+static void __init k210_clk_init(struct device_node *np)
+{
+ struct device_node *sysctl_np;
+ struct k210_sysclk *ksc;
+ int i, ret;
+
+ ksc = kzalloc(sizeof(*ksc), GFP_KERNEL);
+ if (!ksc)
+ return;
+
+ spin_lock_init(&ksc->clk_lock);
+ sysctl_np = of_get_parent(np);
+ ksc->regs = of_iomap(sysctl_np, 0);
+ of_node_put(sysctl_np);
+ if (!ksc->regs) {
+ pr_err("%pOFP: failed to map registers\n", np);
+ return;
+ }
+
+ ret = k210_register_plls(np, ksc);
+ if (ret)
+ return;
+
+ ret = k210_register_aclk(np, ksc);
+ if (ret)
+ return;
+
+ /*
+ * Critical clocks: there are no consumers of the SRAM clocks,
+ * including the AI clock for the third SRAM bank. The CPU clock
+ * is only referenced by the uarths serial device and so would be
+ * disabled if the serial console is disabled to switch to another
+ * console. Mark all these clocks as critical so that they are never
+ * disabled by the core clock management.
+ */
+ k210_register_aclk_child(np, ksc, K210_CLK_CPU, CLK_IS_CRITICAL);
+ k210_register_aclk_child(np, ksc, K210_CLK_SRAM0, CLK_IS_CRITICAL);
+ k210_register_aclk_child(np, ksc, K210_CLK_SRAM1, CLK_IS_CRITICAL);
+ k210_register_pll_child(np, ksc, K210_CLK_AI, K210_PLL1,
+ CLK_IS_CRITICAL);
+
+ /* Clocks with aclk as source */
+ k210_register_aclk_child(np, ksc, K210_CLK_DMA, 0);
+ k210_register_aclk_child(np, ksc, K210_CLK_FFT, 0);
+ k210_register_aclk_child(np, ksc, K210_CLK_ROM, 0);
+ k210_register_aclk_child(np, ksc, K210_CLK_DVP, 0);
+ k210_register_aclk_child(np, ksc, K210_CLK_APB0, 0);
+ k210_register_aclk_child(np, ksc, K210_CLK_APB1, 0);
+ k210_register_aclk_child(np, ksc, K210_CLK_APB2, 0);
+
+ /* Clocks with PLL0 as source */
+ k210_register_pll_child(np, ksc, K210_CLK_SPI0, K210_PLL0, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_SPI1, K210_PLL0, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_SPI2, K210_PLL0, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2C0, K210_PLL0, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2C1, K210_PLL0, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2C2, K210_PLL0, 0);
+
+ /* Clocks with PLL2 as source */
+ k210_register_pll_child(np, ksc, K210_CLK_I2S0, K210_PLL2, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2S1, K210_PLL2, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2S2, K210_PLL2, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2S0_M, K210_PLL2, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2S1_M, K210_PLL2, 0);
+ k210_register_pll_child(np, ksc, K210_CLK_I2S2_M, K210_PLL2, 0);
+
+ /* Clocks with IN0 as source */
+ k210_register_in0_child(np, ksc, K210_CLK_WDT0);
+ k210_register_in0_child(np, ksc, K210_CLK_WDT1);
+ k210_register_in0_child(np, ksc, K210_CLK_RTC);
+
+ /* Clocks with APB0 as source */
+ k210_register_clk_child(np, ksc, K210_CLK_GPIO, K210_CLK_APB0);
+ k210_register_clk_child(np, ksc, K210_CLK_UART1, K210_CLK_APB0);
+ k210_register_clk_child(np, ksc, K210_CLK_UART2, K210_CLK_APB0);
+ k210_register_clk_child(np, ksc, K210_CLK_UART3, K210_CLK_APB0);
+ k210_register_clk_child(np, ksc, K210_CLK_FPIOA, K210_CLK_APB0);
+ k210_register_clk_child(np, ksc, K210_CLK_SHA, K210_CLK_APB0);
+
+ /* Clocks with APB1 as source */
+ k210_register_clk_child(np, ksc, K210_CLK_AES, K210_CLK_APB1);
+ k210_register_clk_child(np, ksc, K210_CLK_OTP, K210_CLK_APB1);
+
+ /* Mux clocks with in0 or pll0 as source */
+ k210_register_mux_clk(np, ksc, K210_CLK_SPI3);
+ k210_register_mux_clk(np, ksc, K210_CLK_TIMER0);
+ k210_register_mux_clk(np, ksc, K210_CLK_TIMER1);
+ k210_register_mux_clk(np, ksc, K210_CLK_TIMER2);
+
+ /* Check for registration errors */
+ for (i = 0; i < K210_NUM_CLKS; i++) {
+ if (ksc->clks[i].id != i)
+ return;
+ }
+
+ ret = of_clk_add_hw_provider(np, k210_clk_hw_onecell_get, ksc);
+ if (ret) {
+ pr_err("%pOFP: add clock provider failed %d\n", np, ret);
+ return;
+ }
+
+ pr_info("%pOFP: CPU running at %lu MHz\n",
+ np, clk_hw_get_rate(&ksc->clks[K210_CLK_CPU].hw) / 1000000);
+}
+
+CLK_OF_DECLARE(k210_clk, "canaan,k210-clk", k210_clk_init);
+
+/*
+ * Enable PLL1 to be able to use the AI SRAM.
+ */
+void __init k210_clk_early_init(void __iomem *regs)
+{
+ struct k210_pll pll1;
+
+ /* Make sure ACLK selector is set to PLL0 */
+ k210_aclk_set_selector(regs, 1);
+
+ /* Startup PLL1 to enable the aisram bank for general memory use */
+ k210_init_pll(regs, K210_PLL1, &pll1);
+ k210_pll_enable_hw(regs, &pll1);
+}
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index 2de5e3672e42..cc8dd3072b8b 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -1042,7 +1042,7 @@ error:
return ret;
}
-static int nx842_remove(struct vio_dev *viodev)
+static void nx842_remove(struct vio_dev *viodev)
{
struct nx842_devdata *old_devdata;
unsigned long flags;
@@ -1063,8 +1063,6 @@ static int nx842_remove(struct vio_dev *viodev)
if (old_devdata)
kfree(old_devdata->counters);
kfree(old_devdata);
-
- return 0;
}
static const struct vio_device_id nx842_vio_driver_ids[] = {
diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c
index 0d2dc5be7f19..1d0e8a1ba160 100644
--- a/drivers/crypto/nx/nx.c
+++ b/drivers/crypto/nx/nx.c
@@ -783,7 +783,7 @@ static int nx_probe(struct vio_dev *viodev, const struct vio_device_id *id)
return nx_register_algs();
}
-static int nx_remove(struct vio_dev *viodev)
+static void nx_remove(struct vio_dev *viodev)
{
dev_dbg(&viodev->dev, "entering nx_remove for UA 0x%x\n",
viodev->unit_address);
@@ -811,8 +811,6 @@ static int nx_remove(struct vio_dev *viodev)
nx_unregister_skcipher(&nx_ecb_aes_alg, NX_FC_AES,
NX_MODE_AES_ECB);
}
-
- return 0;
}
diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index cadbd0a1a1ef..5fa6ae9dbc8b 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -480,7 +480,7 @@ static void dax_free_inode(struct inode *inode)
kfree(dax_dev->host);
dax_dev->host = NULL;
if (inode->i_rdev)
- ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
+ ida_simple_remove(&dax_minor_ida, iminor(inode));
kmem_cache_free(dax_cache, dax_dev);
}
diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index 5ea09fd01544..c91d05651596 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -113,8 +113,29 @@ MODULE_DEVICE_TABLE(i2c, pca953x_id);
#ifdef CONFIG_GPIO_PCA953X_IRQ
#include <linux/dmi.h>
-#include <linux/gpio.h>
-#include <linux/list.h>
+
+static const struct acpi_gpio_params pca953x_irq_gpios = { 0, 0, true };
+
+static const struct acpi_gpio_mapping pca953x_acpi_irq_gpios[] = {
+ { "irq-gpios", &pca953x_irq_gpios, 1, ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER },
+ { }
+};
+
+static int pca953x_acpi_get_irq(struct device *dev)
+{
+ int ret;
+
+ ret = devm_acpi_dev_add_driver_gpios(dev, pca953x_acpi_irq_gpios);
+ if (ret)
+ dev_warn(dev, "can't add GPIO ACPI mapping\n");
+
+ ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq-gpios", 0);
+ if (ret < 0)
+ return ret;
+
+ dev_info(dev, "ACPI interrupt quirk (IRQ %d)\n", ret);
+ return ret;
+}
static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = {
{
@@ -133,59 +154,6 @@ static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = {
},
{}
};
-
-#ifdef CONFIG_ACPI
-static int pca953x_acpi_get_pin(struct acpi_resource *ares, void *data)
-{
- struct acpi_resource_gpio *agpio;
- int *pin = data;
-
- if (acpi_gpio_get_irq_resource(ares, &agpio))
- *pin = agpio->pin_table[0];
- return 1;
-}
-
-static int pca953x_acpi_find_pin(struct device *dev)
-{
- struct acpi_device *adev = ACPI_COMPANION(dev);
- int pin = -ENOENT, ret;
- LIST_HEAD(r);
-
- ret = acpi_dev_get_resources(adev, &r, pca953x_acpi_get_pin, &pin);
- acpi_dev_free_resource_list(&r);
- if (ret < 0)
- return ret;
-
- return pin;
-}
-#else
-static inline int pca953x_acpi_find_pin(struct device *dev) { return -ENXIO; }
-#endif
-
-static int pca953x_acpi_get_irq(struct device *dev)
-{
- int pin, ret;
-
- pin = pca953x_acpi_find_pin(dev);
- if (pin < 0)
- return pin;
-
- dev_info(dev, "Applying ACPI interrupt quirk (GPIO %d)\n", pin);
-
- if (!gpio_is_valid(pin))
- return -EINVAL;
-
- ret = gpio_request(pin, "pca953x interrupt");
- if (ret)
- return ret;
-
- ret = gpio_to_irq(pin);
-
- /* When pin is used as an IRQ, no need to keep it requested */
- gpio_free(pin);
-
- return ret;
-}
#endif
static const struct acpi_device_id pca953x_acpi_ids[] = {
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index e37a57d0a2f0..1aacd2a5a1fd 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -174,7 +174,7 @@ static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio,
int ret, value;
ret = request_threaded_irq(event->irq, NULL, event->handler,
- event->irqflags, "ACPI:Event", event);
+ event->irqflags | IRQF_ONESHOT, "ACPI:Event", event);
if (ret) {
dev_err(acpi_gpio->chip->parent,
"Failed to setup interrupt handler for %d\n",
@@ -677,6 +677,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
if (!lookup->desc) {
const struct acpi_resource_gpio *agpio = &ares->data.gpio;
bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
+ struct gpio_desc *desc;
u16 pin_index;
if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint)
@@ -689,8 +690,12 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
if (pin_index >= agpio->pin_table_length)
return 1;
- lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
+ if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER)
+ desc = gpio_to_desc(agpio->pin_table[pin_index]);
+ else
+ desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
agpio->pin_table[pin_index]);
+ lookup->desc = desc;
lookup->info.pin_config = agpio->pin_config;
lookup->info.debounce = agpio->debounce_timeout;
lookup->info.gpioint = gpioint;
@@ -940,8 +945,9 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
}
/**
- * acpi_dev_gpio_irq_get() - Find GpioInt and translate it to Linux IRQ number
+ * acpi_dev_gpio_irq_get_by() - Find GpioInt and translate it to Linux IRQ number
* @adev: pointer to a ACPI device to get IRQ from
+ * @name: optional name of GpioInt resource
* @index: index of GpioInt resource (starting from %0)
*
* If the device has one or more GpioInt resources, this function can be
@@ -951,9 +957,12 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
* The function is idempotent, though each time it runs it will configure GPIO
* pin direction according to the flags in GpioInt resource.
*
+ * The function takes optional @name parameter. If the resource has a property
+ * name, then only those will be taken into account.
+ *
* Return: Linux IRQ number (> %0) on success, negative errno on failure.
*/
-int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
+int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index)
{
int idx, i;
unsigned int irq_flags;
@@ -963,7 +972,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
struct acpi_gpio_info info;
struct gpio_desc *desc;
- desc = acpi_get_gpiod_by_index(adev, NULL, i, &info);
+ desc = acpi_get_gpiod_by_index(adev, name, i, &info);
/* Ignore -EPROBE_DEFER, it only matters if idx matches */
if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER)
@@ -1008,7 +1017,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
}
return -ENOENT;
}
-EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get);
+EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get_by);
static acpi_status
acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index adf55db080d8..7ec0822c0505 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -367,22 +367,18 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
*
* Looks for device property "gpio-line-names" and if it exists assigns
* GPIO line names for the chip. The memory allocated for the assigned
- * names belong to the underlying software node and should not be released
+ * names belong to the underlying firmware node and should not be released
* by the caller.
*/
static int devprop_gpiochip_set_names(struct gpio_chip *chip)
{
struct gpio_device *gdev = chip->gpiodev;
- struct device *dev = chip->parent;
+ struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev);
const char **names;
int ret, i;
int count;
- /* GPIO chip may not have a parent device whose properties we inspect. */
- if (!dev)
- return 0;
-
- count = device_property_string_array_count(dev, "gpio-line-names");
+ count = fwnode_property_string_array_count(fwnode, "gpio-line-names");
if (count < 0)
return 0;
@@ -396,7 +392,7 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip)
if (!names)
return -ENOMEM;
- ret = device_property_read_string_array(dev, "gpio-line-names",
+ ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
names, count);
if (ret < 0) {
dev_warn(&gdev->dev, "failed to read GPIO line names\n");
@@ -474,9 +470,13 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid);
static void gpiodevice_release(struct device *dev)
{
- struct gpio_device *gdev = dev_get_drvdata(dev);
+ struct gpio_device *gdev = container_of(dev, struct gpio_device, dev);
+ unsigned long flags;
+ spin_lock_irqsave(&gpio_lock, flags);
list_del(&gdev->list);
+ spin_unlock_irqrestore(&gpio_lock, flags);
+
ida_free(&gpio_ida, gdev->id);
kfree_const(gdev->label);
kfree(gdev->descs);
@@ -605,7 +605,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
goto err_free_ida;
device_initialize(&gdev->dev);
- dev_set_drvdata(&gdev->dev, gdev);
if (gc->parent && gc->parent->driver)
gdev->owner = gc->parent->driver->owner;
else if (gc->owner)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 8155c54392c8..36a741d63ddc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
*/
bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
{
+#if defined(CONFIG_AMD_PMC)
if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
if (adev->flags & AMD_IS_APU)
return true;
}
-
+#endif
return false;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 0a25fecf488a..43059ead733b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -357,7 +357,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
while (size) {
uint32_t value;
- value = RREG32_PCIE(*pos >> 2);
+ value = RREG32_PCIE(*pos);
r = put_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -424,7 +424,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
return r;
}
- WREG32_PCIE(*pos >> 2, value);
+ WREG32_PCIE(*pos, value);
result += 4;
buf += 4;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 3c37cf1ae8b7..64beb3399604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -173,8 +173,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
switch (adev->asic_type) {
case CHIP_VEGA20:
case CHIP_ARCTURUS:
- case CHIP_SIENNA_CICHLID:
- case CHIP_NAVY_FLOUNDER:
/* enable runpm if runpm=1 */
if (amdgpu_runtime_pm > 0)
adev->runpm = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index 160fa5f59805..c625c5d8ed89 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -558,7 +558,8 @@ static bool nv_is_headless_sku(struct pci_dev *pdev)
{
if ((pdev->device == 0x731E &&
(pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
- (pdev->device == 0x7340 && pdev->revision == 0xC9))
+ (pdev->device == 0x7340 && pdev->revision == 0xC9) ||
+ (pdev->device == 0x7360 && pdev->revision == 0xC7))
return true;
return false;
}
@@ -634,7 +635,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
- amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+ if (!nv_is_headless_sku(adev->pdev))
+ amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
if (!amdgpu_sriov_vf(adev))
amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
break;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index 5159399f8239..5750818db8f6 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -530,7 +530,7 @@ bool dm_helpers_dp_write_dsc_enable(
{
uint8_t enable_dsc = enable ? 1 : 0;
struct amdgpu_dm_connector *aconnector;
- uint8_t ret;
+ uint8_t ret = 0;
if (!stream)
return false;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
index 45564a776e9b..9f0d03ae3109 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
@@ -1322,7 +1322,7 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
CMN2ASIC_MAPPING_WORKLOAD,
profile_mode);
if (workload_type < 0) {
- dev_err(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
+ dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index 90585461a56e..a6211858ead4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -78,6 +78,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_smc.bin");
#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xC000
#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0xE
+#define mmTHM_BACO_CNTL_ARCT 0xA7
+#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0
+
static int link_width[] = {0, 1, 2, 4, 8, 12, 16};
static int link_speed[] = {25, 50, 80, 160};
@@ -1532,9 +1535,15 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
break;
default:
if (!ras || !ras->supported) {
- data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
- data |= 0x80000000;
- WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+ if (adev->asic_type == CHIP_ARCTURUS) {
+ data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT);
+ data |= 0x80000000;
+ WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT, data);
+ } else {
+ data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+ data |= 0x80000000;
+ WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+ }
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0, NULL);
} else {
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
index 093b01159408..7ddbaecb11c2 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
@@ -810,7 +810,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
CMN2ASIC_MAPPING_WORKLOAD,
profile_mode);
if (workload_type < 0) {
- dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
+ dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
profile_mode);
return -EINVAL;
}
@@ -1685,9 +1685,9 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en)
uint32_t feature_mask[2];
int ret = 0;
- if (adev->pm.fw_version >= 0x43f1700)
+ if (adev->pm.fw_version >= 0x43f1700 && !en)
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify,
- en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL);
+ RLC_STATUS_OFF, NULL);
bitmap_zero(feature->enabled, feature->feature_num);
bitmap_zero(feature->supported, feature->feature_num);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
index 5faa509f0dba..5493388fcb10 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
@@ -844,7 +844,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
* TODO: If some case need switch to powersave/default power mode
* then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving.
*/
- dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
+ dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 69da601f1754..e771bd519ee2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -261,6 +261,9 @@ gk104_fifo_pbdma = {
struct nvkm_engine *
gk104_fifo_id_engine(struct nvkm_fifo *base, int engi)
{
+ if (engi == GK104_FIFO_ENGN_SW)
+ return nvkm_device_engine(base->engine.subdev.device, NVKM_ENGINE_SW, 0);
+
return gk104_fifo(base)->engine[engi].engine;
}
diff --git a/drivers/gpu/drm/tilcdc/Makefile b/drivers/gpu/drm/tilcdc/Makefile
index 662bf3a348c9..f5190477de72 100644
--- a/drivers/gpu/drm/tilcdc/Makefile
+++ b/drivers/gpu/drm/tilcdc/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-ifeq (, $(findstring -W,$(EXTRA_CFLAGS)))
+ifeq (, $(findstring -W,$(KCFLAGS)))
ccflags-y += -Werror
endif
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index 8c471823a5af..2f776d78e3c1 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
ret = add_memory(nid, PFN_PHYS((start_pfn)),
- (HA_CHUNK << PAGE_SHIFT), MEMHP_MERGE_RESOURCE);
+ (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE);
if (ret) {
pr_err("hot_add memory failed error is %d\n", ret);
diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c
index d4e0a0f6732a..ba766d24219e 100644
--- a/drivers/i2c/busses/i2c-brcmstb.c
+++ b/drivers/i2c/busses/i2c-brcmstb.c
@@ -316,7 +316,7 @@ static int brcmstb_send_i2c_cmd(struct brcmstb_i2c_dev *dev,
goto cmd_out;
}
- if ((CMD_RD || CMD_WR) &&
+ if ((cmd == CMD_RD || cmd == CMD_WR) &&
bsc_readl(dev, iic_enable) & BSC_IIC_EN_NOACK_MASK) {
rc = -EREMOTEIO;
dev_dbg(dev->device, "controller received NOACK intr for %s\n",
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 85307cfa7109..5392b82f68a4 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -38,6 +38,8 @@
#define DW_IC_CON_TX_EMPTY_CTRL BIT(8)
#define DW_IC_CON_RX_FIFO_FULL_HLD_CTRL BIT(9)
+#define DW_IC_DATA_CMD_DAT GENMASK(7, 0)
+
/*
* Registers offset
*/
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index d6425ad6e6a3..dd27b9dbe931 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -432,7 +432,7 @@ i2c_dw_read(struct dw_i2c_dev *dev)
regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
/* Ensure length byte is a valid value */
if (flags & I2C_M_RECV_LEN &&
- tmp <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
+ (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
len = i2c_dw_recv_len(dev, tmp);
}
*buf++ = tmp;
diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c
index 20a9881a0d6c..5ac30d95650c 100644
--- a/drivers/i2c/busses/i2c-exynos5.c
+++ b/drivers/i2c/busses/i2c-exynos5.c
@@ -606,6 +606,7 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
u32 i2c_ctl;
u32 int_en = 0;
u32 i2c_auto_conf = 0;
+ u32 i2c_addr = 0;
u32 fifo_ctl;
unsigned long flags;
unsigned short trig_lvl;
@@ -640,7 +641,12 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
int_en |= HSI2C_INT_TX_ALMOSTEMPTY_EN;
}
- writel(HSI2C_SLV_ADDR_MAS(i2c->msg->addr), i2c->regs + HSI2C_ADDR);
+ i2c_addr = HSI2C_SLV_ADDR_MAS(i2c->msg->addr);
+
+ if (i2c->op_clock >= I2C_MAX_FAST_MODE_PLUS_FREQ)
+ i2c_addr |= HSI2C_MASTER_ID(MASTER_ID(i2c->adap.nr));
+
+ writel(i2c_addr, i2c->regs + HSI2C_ADDR);
writel(fifo_ctl, i2c->regs + HSI2C_FIFO_CTL);
writel(i2c_ctl, i2c->regs + HSI2C_CTL);
diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index c3f584795911..214b4c913a13 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -375,32 +375,6 @@ static void geni_i2c_tx_msg_cleanup(struct geni_i2c_dev *gi2c,
}
}
-static void geni_i2c_stop_xfer(struct geni_i2c_dev *gi2c)
-{
- int ret;
- u32 geni_status;
- struct i2c_msg *cur;
-
- /* Resume device, as runtime suspend can happen anytime during transfer */
- ret = pm_runtime_get_sync(gi2c->se.dev);
- if (ret < 0) {
- dev_err(gi2c->se.dev, "Failed to resume device: %d\n", ret);
- return;
- }
-
- geni_status = readl_relaxed(gi2c->se.base + SE_GENI_STATUS);
- if (geni_status & M_GENI_CMD_ACTIVE) {
- cur = gi2c->cur;
- geni_i2c_abort_xfer(gi2c);
- if (cur->flags & I2C_M_RD)
- geni_i2c_rx_msg_cleanup(gi2c, cur);
- else
- geni_i2c_tx_msg_cleanup(gi2c, cur);
- }
-
- pm_runtime_put_sync_suspend(gi2c->se.dev);
-}
-
static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
u32 m_param)
{
@@ -676,13 +650,6 @@ static int geni_i2c_remove(struct platform_device *pdev)
return 0;
}
-static void geni_i2c_shutdown(struct platform_device *pdev)
-{
- struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
-
- geni_i2c_stop_xfer(gi2c);
-}
-
static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
{
int ret;
@@ -747,7 +714,6 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
static struct platform_driver geni_i2c_driver = {
.probe = geni_i2c_probe,
.remove = geni_i2c_remove,
- .shutdown = geni_i2c_shutdown,
.driver = {
.name = "geni_i2c",
.pm = &geni_i2c_pm_ops,
diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c
index 77af4c1a3f38..bb86d84558d9 100644
--- a/drivers/ide/falconide.c
+++ b/drivers/ide/falconide.c
@@ -164,6 +164,7 @@ static int __init falconide_init(struct platform_device *pdev)
if (rc)
goto err_free;
+ platform_set_drvdata(pdev, host);
return 0;
err_free:
ide_host_free(host);
@@ -174,7 +175,7 @@ err:
static int falconide_remove(struct platform_device *pdev)
{
- struct ide_host *host = dev_get_drvdata(&pdev->dev);
+ struct ide_host *host = platform_get_drvdata(pdev);
ide_host_remove(host);
diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index be996dba040c..3d194bb60840 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -3651,6 +3651,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
struct ib_cm_sidr_rep_param *param)
{
struct ib_mad_send_buf *msg;
+ unsigned long flags;
int ret;
lockdep_assert_held(&cm_id_priv->lock);
@@ -3676,12 +3677,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
return ret;
}
cm_id_priv->id.state = IB_CM_IDLE;
- spin_lock_irq(&cm.lock);
+ spin_lock_irqsave(&cm.lock, flags);
if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
}
- spin_unlock_irq(&cm.lock);
+ spin_unlock_irqrestore(&cm.lock, flags);
return 0;
}
diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c
index e47c5949013f..ff047eb024ab 100644
--- a/drivers/infiniband/core/uverbs_ioctl.c
+++ b/drivers/infiniband/core/uverbs_ioctl.c
@@ -91,7 +91,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
}
/**
- * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * _uverbs_alloc() - Quickly allocate memory for use with a bundle
* @bundle: The bundle
* @size: Number of bytes to allocate
* @flags: Allocator flags
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index ebc2a4355fa5..de3c2fc6f361 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -2073,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
num_alloc_xa_entries++;
event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
- if (!event_sub)
+ if (!event_sub) {
+ err = -ENOMEM;
goto err;
+ }
list_add_tail(&event_sub->event_list, &sub_list);
uverbs_uobject_get(&ev_file->uobj);
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 374698186662..b103555b1f5d 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -1082,7 +1082,7 @@ end:
return ret ? ret : npages;
}
-/**
+/*
* Parse a series of data segments for page fault handling.
*
* @dev: Pointer to mlx5 IB device
diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig
index 452149066792..06b8dc5093f7 100644
--- a/drivers/infiniband/sw/rxe/Kconfig
+++ b/drivers/infiniband/sw/rxe/Kconfig
@@ -4,6 +4,7 @@ config RDMA_RXE
depends on INET && PCI && INFINIBAND
depends on INFINIBAND_VIRT_DMA
select NET_UDP_TUNNEL
+ select CRYPTO
select CRYPTO_CRC32
help
This driver implements the InfiniBand RDMA transport over
diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index a8ac791a1bb9..17a361b8dbb1 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -547,6 +547,7 @@ int rxe_completer(void *arg)
struct sk_buff *skb = NULL;
struct rxe_pkt_info *pkt = NULL;
enum comp_state state;
+ int ret = 0;
rxe_add_ref(qp);
@@ -554,7 +555,8 @@ int rxe_completer(void *arg)
qp->req.state == QP_STATE_RESET) {
rxe_drain_resp_pkts(qp, qp->valid &&
qp->req.state == QP_STATE_ERROR);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
}
if (qp->comp.timeout) {
@@ -564,8 +566,10 @@ int rxe_completer(void *arg)
qp->comp.timeout_retry = 0;
}
- if (qp->req.need_retry)
- goto exit;
+ if (qp->req.need_retry) {
+ ret = -EAGAIN;
+ goto done;
+ }
state = COMPST_GET_ACK;
@@ -636,8 +640,6 @@ int rxe_completer(void *arg)
break;
case COMPST_DONE:
- if (pkt)
- free_pkt(pkt);
goto done;
case COMPST_EXIT:
@@ -660,7 +662,8 @@ int rxe_completer(void *arg)
qp->qp_timeout_jiffies)
mod_timer(&qp->retrans_timer,
jiffies + qp->qp_timeout_jiffies);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
case COMPST_ERROR_RETRY:
/* we come here if the retry timer fired and we did
@@ -672,18 +675,18 @@ int rxe_completer(void *arg)
*/
/* there is nothing to retry in this case */
- if (!wqe || (wqe->state == wqe_state_posted))
- goto exit;
+ if (!wqe || (wqe->state == wqe_state_posted)) {
+ pr_warn("Retry attempted without a valid wqe\n");
+ ret = -EAGAIN;
+ goto done;
+ }
/* if we've started a retry, don't start another
* retry sequence, unless this is a timeout.
*/
if (qp->comp.started_retry &&
- !qp->comp.timeout_retry) {
- if (pkt)
- free_pkt(pkt);
+ !qp->comp.timeout_retry)
goto done;
- }
if (qp->comp.retry_cnt > 0) {
if (qp->comp.retry_cnt != 7)
@@ -704,8 +707,6 @@ int rxe_completer(void *arg)
qp->comp.started_retry = 1;
rxe_run_task(&qp->req.task, 0);
}
- if (pkt)
- free_pkt(pkt);
goto done;
} else {
@@ -726,8 +727,8 @@ int rxe_completer(void *arg)
mod_timer(&qp->rnr_nak_timer,
jiffies + rnrnak_jiffies(aeth_syn(pkt)
& ~AETH_TYPE_MASK));
- free_pkt(pkt);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
} else {
rxe_counter_inc(rxe,
RXE_CNT_RNR_RETRY_EXCEEDED);
@@ -740,25 +741,15 @@ int rxe_completer(void *arg)
WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
do_complete(qp, wqe);
rxe_qp_error(qp);
- if (pkt)
- free_pkt(pkt);
- goto exit;
+ ret = -EAGAIN;
+ goto done;
}
}
-exit:
- /* we come here if we are done with processing and want the task to
- * exit from the loop calling us
- */
- WARN_ON_ONCE(skb);
- rxe_drop_ref(qp);
- return -EAGAIN;
-
done:
- /* we come here if we have processed a packet we want the task to call
- * us again to see if there is anything else to do
- */
- WARN_ON_ONCE(skb);
+ if (pkt)
+ free_pkt(pkt);
rxe_drop_ref(qp);
- return 0;
+
+ return ret;
}
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 0701bd1ffd1a..01662727dca0 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
return 0;
}
+/* fix up a send packet to match the packets
+ * received from UDP before looping them back
+ */
void rxe_loopback(struct sk_buff *skb)
{
+ struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
if (skb->protocol == htons(ETH_P_IP))
skb_pull(skb, sizeof(struct iphdr));
else
skb_pull(skb, sizeof(struct ipv6hdr));
- rxe_rcv(skb);
+ if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
+ kfree_skb(skb);
+ else
+ rxe_rcv(skb);
}
struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c
index 45d2f711bce2..7a49e27da23a 100644
--- a/drivers/infiniband/sw/rxe/rxe_recv.c
+++ b/drivers/infiniband/sw/rxe/rxe_recv.c
@@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
struct rxe_mc_elem *mce;
struct rxe_qp *qp;
union ib_gid dgid;
- struct sk_buff *per_qp_skb;
- struct rxe_pkt_info *per_qp_pkt;
int err;
if (skb->protocol == htons(ETH_P_IP))
@@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
/* lookup mcast group corresponding to mgid, takes a ref */
mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
if (!mcg)
- goto err1; /* mcast group not registered */
+ goto drop; /* mcast group not registered */
spin_lock_bh(&mcg->mcg_lock);
+ /* this is unreliable datagram service so we let
+ * failures to deliver a multicast packet to a
+ * single QP happen and just move on and try
+ * the rest of them on the list
+ */
list_for_each_entry(mce, &mcg->qp_list, qp_list) {
qp = mce->qp;
@@ -266,39 +269,47 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
if (err)
continue;
- /* for all but the last qp create a new clone of the
- * skb and pass to the qp. If an error occurs in the
- * checks for the last qp in the list we need to
- * free the skb since it hasn't been passed on to
- * rxe_rcv_pkt() which would free it later.
+ /* for all but the last QP create a new clone of the
+ * skb and pass to the QP. Pass the original skb to
+ * the last QP in the list.
*/
if (mce->qp_list.next != &mcg->qp_list) {
- per_qp_skb = skb_clone(skb, GFP_ATOMIC);
- if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
- kfree_skb(per_qp_skb);
+ struct sk_buff *cskb;
+ struct rxe_pkt_info *cpkt;
+
+ cskb = skb_clone(skb, GFP_ATOMIC);
+ if (unlikely(!cskb))
continue;
+
+ if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
+ kfree_skb(cskb);
+ break;
}
+
+ cpkt = SKB_TO_PKT(cskb);
+ cpkt->qp = qp;
+ rxe_add_ref(qp);
+ rxe_rcv_pkt(cpkt, cskb);
} else {
- per_qp_skb = skb;
- /* show we have consumed the skb */
- skb = NULL;
+ pkt->qp = qp;
+ rxe_add_ref(qp);
+ rxe_rcv_pkt(pkt, skb);
+ skb = NULL; /* mark consumed */
}
-
- if (unlikely(!per_qp_skb))
- continue;
-
- per_qp_pkt = SKB_TO_PKT(per_qp_skb);
- per_qp_pkt->qp = qp;
- rxe_add_ref(qp);
- rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
}
spin_unlock_bh(&mcg->mcg_lock);
rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */
-err1:
- /* free skb if not consumed */
+ if (likely(!skb))
+ return;
+
+ /* This only occurs if one of the checks fails on the last
+ * QP in the list above
+ */
+
+drop:
kfree_skb(skb);
ib_device_put(&rxe->ib_dev);
}
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 1c4961e05c12..bb0ee5c9fde7 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -182,6 +182,10 @@ static bool increase_address_space(struct protection_domain *domain,
bool ret = true;
u64 *pte;
+ pte = (void *)get_zeroed_page(gfp);
+ if (!pte)
+ return false;
+
spin_lock_irqsave(&domain->lock, flags);
if (address <= PM_LEVEL_SIZE(domain->iop.mode))
@@ -191,10 +195,6 @@ static bool increase_address_space(struct protection_domain *domain,
if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
goto out;
- pte = (void *)get_zeroed_page(gfp);
- if (!pte)
- goto out;
-
*pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
domain->iop.root = pte;
@@ -208,10 +208,12 @@ static bool increase_address_space(struct protection_domain *domain,
*/
amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
+ pte = NULL;
ret = true;
out:
spin_unlock_irqrestore(&domain->lock, flags);
+ free_page((unsigned long)pte);
return ret;
}
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 9ab6ee22c110..af765c813cc8 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -311,6 +311,11 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
domain->ops->flush_iotlb_all(domain);
}
+static bool dev_is_untrusted(struct device *dev)
+{
+ return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+}
+
/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -365,8 +370,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
init_iova_domain(iovad, 1UL << order, base_pfn);
- if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
- DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+ if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
+ !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) &&
+ attr) {
if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
iommu_dma_entry_dtor))
pr_warn("iova flush queue initialization failed\n");
@@ -508,11 +514,6 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
iova_align(iovad, size), dir, attrs);
}
-static bool dev_is_untrusted(struct device *dev)
-{
- return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
-}
-
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
size_t size, int prot, u64 dma_mask)
{
diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h
index 97dfcffbf495..444c0bec221a 100644
--- a/drivers/iommu/intel/pasid.h
+++ b/drivers/iommu/intel/pasid.h
@@ -30,8 +30,8 @@
#define VCMD_VRSP_IP 0x1
#define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3)
#define VCMD_VRSP_SC_SUCCESS 0
-#define VCMD_VRSP_SC_NO_PASID_AVAIL 1
-#define VCMD_VRSP_SC_INVALID_PASID 1
+#define VCMD_VRSP_SC_NO_PASID_AVAIL 2
+#define VCMD_VRSP_SC_INVALID_PASID 2
#define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff)
#define VCMD_CMD_OPERAND(e) ((e) << 8)
/*
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 4a3f095a1c26..97eb62f667d2 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -798,10 +798,70 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova);
}
+static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
+{
+ struct platform_device *pdev;
+ struct tegra_mc *mc;
+
+ pdev = of_find_device_by_node(np);
+ if (!pdev)
+ return NULL;
+
+ mc = platform_get_drvdata(pdev);
+ if (!mc)
+ return NULL;
+
+ return mc->smmu;
+}
+
+static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
+ struct of_phandle_args *args)
+{
+ const struct iommu_ops *ops = smmu->iommu.ops;
+ int err;
+
+ err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops);
+ if (err < 0) {
+ dev_err(dev, "failed to initialize fwspec: %d\n", err);
+ return err;
+ }
+
+ err = ops->of_xlate(dev, args);
+ if (err < 0) {
+ dev_err(dev, "failed to parse SW group ID: %d\n", err);
+ iommu_fwspec_free(dev);
+ return err;
+ }
+
+ return 0;
+}
+
static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
{
- struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
+ struct device_node *np = dev->of_node;
+ struct tegra_smmu *smmu = NULL;
+ struct of_phandle_args args;
+ unsigned int index = 0;
+ int err;
+
+ while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
+ &args) == 0) {
+ smmu = tegra_smmu_find(args.np);
+ if (smmu) {
+ err = tegra_smmu_configure(smmu, dev, &args);
+ of_node_put(args.np);
+ if (err < 0)
+ return ERR_PTR(err);
+
+ break;
+ }
+
+ of_node_put(args.np);
+ index++;
+ }
+
+ smmu = dev_iommu_priv_get(dev);
if (!smmu)
return ERR_PTR(-ENODEV);
@@ -1028,6 +1088,16 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
if (!smmu)
return ERR_PTR(-ENOMEM);
+ /*
+ * This is a bit of a hack. Ideally we'd want to simply return this
+ * value. However the IOMMU registration process will attempt to add
+ * all devices to the IOMMU when bus_set_iommu() is called. In order
+ * not to rely on global variables to track the IOMMU instance, we
+ * set it here so that it can be looked up from the .probe_device()
+ * callback via the IOMMU device's .drvdata field.
+ */
+ mc->smmu = smmu;
+
size = BITS_TO_LONGS(soc->num_asids) * sizeof(long);
smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL);
diff --git a/drivers/leds/Kconfig b/drivers/leds/Kconfig
index 6c1d8b69a465..b6742b4231bf 100644
--- a/drivers/leds/Kconfig
+++ b/drivers/leds/Kconfig
@@ -934,4 +934,7 @@ source "drivers/leds/flash/Kconfig"
comment "LED Triggers"
source "drivers/leds/trigger/Kconfig"
+comment "LED Blink"
+source "drivers/leds/blink/Kconfig"
+
endif # NEW_LEDS
diff --git a/drivers/leds/Makefile b/drivers/leds/Makefile
index 156c0b4e60d9..2a698df9da57 100644
--- a/drivers/leds/Makefile
+++ b/drivers/leds/Makefile
@@ -108,3 +108,6 @@ obj-$(CONFIG_LEDS_CLASS_FLASH) += flash/
# LED Triggers
obj-$(CONFIG_LEDS_TRIGGERS) += trigger/
+
+# LED Blink
+obj-$(CONFIG_LEDS_BLINK) += blink/
diff --git a/drivers/leds/blink/Kconfig b/drivers/leds/blink/Kconfig
new file mode 100644
index 000000000000..265b53476a80
--- /dev/null
+++ b/drivers/leds/blink/Kconfig
@@ -0,0 +1,20 @@
+menuconfig LEDS_BLINK
+ bool "LED Blink support"
+ depends on LEDS_CLASS
+ help
+ This option enables blink support for the leds class.
+ If unsure, say Y.
+
+if LEDS_BLINK
+
+config LEDS_BLINK_LGM
+ tristate "LED support for Intel LGM SoC series"
+ depends on LEDS_CLASS
+ depends on MFD_SYSCON
+ depends on OF
+ help
+ Parallel to serial conversion, which is also called SSO controller,
+ can drive external shift register for LED outputs.
+ This enables LED support for Serial Shift Output controller(SSO).
+
+endif # LEDS_BLINK
diff --git a/drivers/leds/blink/Makefile b/drivers/leds/blink/Makefile
new file mode 100644
index 000000000000..2fa6c7b7b67e
--- /dev/null
+++ b/drivers/leds/blink/Makefile
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_LEDS_BLINK_LGM) += leds-lgm-sso.o
diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c
new file mode 100644
index 000000000000..7d5c9ca007d6
--- /dev/null
+++ b/drivers/leds/blink/leds-lgm-sso.c
@@ -0,0 +1,888 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Lightning Mountain SoC LED Serial Shift Output Controller driver
+ *
+ * Copyright (c) 2020 Intel Corporation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/sizes.h>
+#include <linux/uaccess.h>
+
+#define SSO_DEV_NAME "lgm-sso"
+
+#define LED_BLINK_H8_0 0x0
+#define LED_BLINK_H8_1 0x4
+#define GET_FREQ_OFFSET(pin, src) (((pin) * 6) + ((src) * 2))
+#define GET_SRC_OFFSET(pinc) (((pin) * 6) + 4)
+
+#define DUTY_CYCLE(x) (0x8 + ((x) * 4))
+#define SSO_CON0 0x2B0
+#define SSO_CON0_RZFL BIT(26)
+#define SSO_CON0_BLINK_R BIT(30)
+#define SSO_CON0_SWU BIT(31)
+
+#define SSO_CON1 0x2B4
+#define SSO_CON1_FCDSC GENMASK(21, 20) /* Fixed Divider Shift Clock */
+#define SSO_CON1_FPID GENMASK(24, 23)
+#define SSO_CON1_GPTD GENMASK(26, 25)
+#define SSO_CON1_US GENMASK(31, 30)
+
+#define SSO_CPU 0x2B8
+#define SSO_CON2 0x2C4
+#define SSO_CON3 0x2C8
+
+/* Driver MACRO */
+#define MAX_PIN_NUM_PER_BANK SZ_32
+#define MAX_GROUP_NUM SZ_4
+#define PINS_PER_GROUP SZ_8
+#define FPID_FREQ_RANK_MAX SZ_4
+#define SSO_LED_MAX_NUM SZ_32
+#define MAX_FREQ_RANK 10
+#define DEF_GPTC_CLK_RATE 200000000
+#define SSO_DEF_BRIGHTNESS LED_HALF
+#define DATA_CLK_EDGE 0 /* 0-rising, 1-falling */
+
+static const u32 freq_div_tbl[] = {4000, 2000, 1000, 800};
+static const int freq_tbl[] = {2, 4, 8, 10, 50000, 100000, 200000, 250000};
+static const int shift_clk_freq_tbl[] = {25000000, 12500000, 6250000, 3125000};
+
+/*
+ * Update Source to update the SOUTs
+ * SW - Software has to update the SWU bit
+ * GPTC - General Purpose timer is used as clock source
+ * FPID - Divided FSC clock (FPID) is used as clock source
+ */
+enum {
+ US_SW = 0,
+ US_GPTC = 1,
+ US_FPID = 2
+};
+
+enum {
+ MAX_FPID_FREQ_RANK = 5, /* 1 to 4 */
+ MAX_GPTC_FREQ_RANK = 9, /* 5 to 8 */
+ MAX_GPTC_HS_FREQ_RANK = 10, /* 9 to 10 */
+};
+
+enum {
+ LED_GRP0_PIN_MAX = 24,
+ LED_GRP1_PIN_MAX = 29,
+ LED_GRP2_PIN_MAX = 32,
+};
+
+enum {
+ LED_GRP0_0_23,
+ LED_GRP1_24_28,
+ LED_GRP2_29_31,
+ LED_GROUP_MAX,
+};
+
+enum {
+ CLK_SRC_FPID = 0,
+ CLK_SRC_GPTC = 1,
+ CLK_SRC_GPTC_HS = 2,
+};
+
+struct sso_led_priv;
+
+struct sso_led_desc {
+ const char *name;
+ const char *default_trigger;
+ unsigned int brightness;
+ unsigned int blink_rate;
+ unsigned int retain_state_suspended:1;
+ unsigned int retain_state_shutdown:1;
+ unsigned int panic_indicator:1;
+ unsigned int hw_blink:1;
+ unsigned int hw_trig:1;
+ unsigned int blinking:1;
+ int freq_idx;
+ u32 pin;
+};
+
+struct sso_led {
+ struct list_head list;
+ struct led_classdev cdev;
+ struct gpio_desc *gpiod;
+ struct sso_led_desc desc;
+ struct sso_led_priv *priv;
+};
+
+struct sso_gpio {
+ struct gpio_chip chip;
+ int shift_clk_freq;
+ int edge;
+ int freq;
+ u32 pins;
+ u32 alloc_bitmap;
+};
+
+struct sso_led_priv {
+ struct regmap *mmap;
+ struct device *dev;
+ struct platform_device *pdev;
+ struct clk *gclk;
+ struct clk *fpid_clk;
+ u32 fpid_clkrate;
+ u32 gptc_clkrate;
+ u32 freq[MAX_FREQ_RANK];
+ struct list_head led_list;
+ struct sso_gpio gpio;
+};
+
+static int sso_get_blink_rate_idx(struct sso_led_priv *priv, u32 rate)
+{
+ int i;
+
+ for (i = 0; i < MAX_FREQ_RANK; i++) {
+ if (rate <= priv->freq[i])
+ return i;
+ }
+
+ return -1;
+}
+
+static unsigned int sso_led_pin_to_group(u32 pin)
+{
+ if (pin < LED_GRP0_PIN_MAX)
+ return LED_GRP0_0_23;
+ else if (pin < LED_GRP1_PIN_MAX)
+ return LED_GRP1_24_28;
+ else
+ return LED_GRP2_29_31;
+}
+
+static u32 sso_led_get_freq_src(int freq_idx)
+{
+ if (freq_idx < MAX_FPID_FREQ_RANK)
+ return CLK_SRC_FPID;
+ else if (freq_idx < MAX_GPTC_FREQ_RANK)
+ return CLK_SRC_GPTC;
+ else
+ return CLK_SRC_GPTC_HS;
+}
+
+static u32 sso_led_pin_blink_off(u32 pin, unsigned int group)
+{
+ if (group == LED_GRP2_29_31)
+ return pin - LED_GRP1_PIN_MAX;
+ else if (group == LED_GRP1_24_28)
+ return pin - LED_GRP0_PIN_MAX;
+ else /* led 0 - 23 in led 32 location */
+ return SSO_LED_MAX_NUM - LED_GRP1_PIN_MAX;
+}
+
+static struct sso_led
+*cdev_to_sso_led_data(struct led_classdev *led_cdev)
+{
+ return container_of(led_cdev, struct sso_led, cdev);
+}
+
+static void sso_led_freq_set(struct sso_led_priv *priv, u32 pin, int freq_idx)
+{
+ u32 reg, off, freq_src, val_freq;
+ u32 low, high, val;
+ unsigned int group;
+
+ if (!freq_idx)
+ return;
+
+ group = sso_led_pin_to_group(pin);
+ freq_src = sso_led_get_freq_src(freq_idx);
+ off = sso_led_pin_blink_off(pin, group);
+
+ if (group == LED_GRP0_0_23)
+ return;
+ else if (group == LED_GRP1_24_28)
+ reg = LED_BLINK_H8_0;
+ else
+ reg = LED_BLINK_H8_1;
+
+ if (freq_src == CLK_SRC_FPID)
+ val_freq = freq_idx - 1;
+ else if (freq_src == CLK_SRC_GPTC)
+ val_freq = freq_idx - MAX_FPID_FREQ_RANK;
+
+ /* set blink rate idx */
+ if (freq_src != CLK_SRC_GPTC_HS) {
+ low = GET_FREQ_OFFSET(off, freq_src);
+ high = low + 2;
+ val = val_freq << high;
+ regmap_update_bits(priv->mmap, reg, GENMASK(high, low), val);
+ }
+
+ /* select clock source */
+ low = GET_SRC_OFFSET(off);
+ high = low + 2;
+ val = freq_src << high;
+ regmap_update_bits(priv->mmap, reg, GENMASK(high, low), val);
+}
+
+static void sso_led_brightness_set(struct led_classdev *led_cdev,
+ enum led_brightness brightness)
+{
+ struct sso_led_priv *priv;
+ struct sso_led_desc *desc;
+ struct sso_led *led;
+ int val;
+
+ led = cdev_to_sso_led_data(led_cdev);
+ priv = led->priv;
+ desc = &led->desc;
+
+ desc->brightness = brightness;
+ regmap_write(priv->mmap, DUTY_CYCLE(desc->pin), brightness);
+
+ if (brightness == LED_OFF)
+ val = 0;
+ else
+ val = 1;
+
+ /* HW blink off */
+ if (desc->hw_blink && !val && desc->blinking) {
+ desc->blinking = 0;
+ regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin), 0);
+ } else if (desc->hw_blink && val && !desc->blinking) {
+ desc->blinking = 1;
+ regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin),
+ 1 << desc->pin);
+ }
+
+ if (!desc->hw_trig && led->gpiod)
+ gpiod_set_value(led->gpiod, val);
+}
+
+static enum led_brightness sso_led_brightness_get(struct led_classdev *led_cdev)
+{
+ struct sso_led *led = cdev_to_sso_led_data(led_cdev);
+
+ return (enum led_brightness)led->desc.brightness;
+}
+
+static int
+delay_to_freq_idx(struct sso_led *led, unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ struct sso_led_priv *priv = led->priv;
+ unsigned long delay;
+ int freq_idx;
+ u32 freq;
+
+ if (!*delay_on && !*delay_off) {
+ *delay_on = *delay_off = (1000 / priv->freq[0]) / 2;
+ return 0;
+ }
+
+ delay = *delay_on + *delay_off;
+ freq = 1000 / delay;
+
+ freq_idx = sso_get_blink_rate_idx(priv, freq);
+ if (freq_idx == -1)
+ freq_idx = MAX_FREQ_RANK - 1;
+
+ delay = 1000 / priv->freq[freq_idx];
+ *delay_on = *delay_off = delay / 2;
+
+ if (!*delay_on)
+ *delay_on = *delay_off = 1;
+
+ return freq_idx;
+}
+
+static int
+sso_led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on,
+ unsigned long *delay_off)
+{
+ struct sso_led_priv *priv;
+ struct sso_led *led;
+ int freq_idx;
+
+ led = cdev_to_sso_led_data(led_cdev);
+ priv = led->priv;
+ freq_idx = delay_to_freq_idx(led, delay_on, delay_off);
+
+ sso_led_freq_set(priv, led->desc.pin, freq_idx);
+ regmap_update_bits(priv->mmap, SSO_CON2, BIT(led->desc.pin),
+ 1 << led->desc.pin);
+ led->desc.freq_idx = freq_idx;
+ led->desc.blink_rate = priv->freq[freq_idx];
+ led->desc.blinking = 1;
+
+ return 1;
+}
+
+static void sso_led_hw_cfg(struct sso_led_priv *priv, struct sso_led *led)
+{
+ struct sso_led_desc *desc = &led->desc;
+
+ /* set freq */
+ if (desc->hw_blink) {
+ sso_led_freq_set(priv, desc->pin, desc->freq_idx);
+ regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin),
+ 1 << desc->pin);
+ }
+
+ if (desc->hw_trig)
+ regmap_update_bits(priv->mmap, SSO_CON3, BIT(desc->pin),
+ 1 << desc->pin);
+
+ /* set brightness */
+ regmap_write(priv->mmap, DUTY_CYCLE(desc->pin), desc->brightness);
+
+ /* enable output */
+ if (!desc->hw_trig && desc->brightness)
+ gpiod_set_value(led->gpiod, 1);
+}
+
+static int sso_create_led(struct sso_led_priv *priv, struct sso_led *led,
+ struct fwnode_handle *child)
+{
+ struct sso_led_desc *desc = &led->desc;
+ struct led_init_data init_data;
+ int err;
+
+ init_data.fwnode = child;
+ init_data.devicename = SSO_DEV_NAME;
+ init_data.default_label = ":";
+
+ led->cdev.default_trigger = desc->default_trigger;
+ led->cdev.brightness_set = sso_led_brightness_set;
+ led->cdev.brightness_get = sso_led_brightness_get;
+ led->cdev.brightness = desc->brightness;
+ led->cdev.max_brightness = LED_FULL;
+
+ if (desc->retain_state_shutdown)
+ led->cdev.flags |= LED_RETAIN_AT_SHUTDOWN;
+ if (desc->retain_state_suspended)
+ led->cdev.flags |= LED_CORE_SUSPENDRESUME;
+ if (desc->panic_indicator)
+ led->cdev.flags |= LED_PANIC_INDICATOR;
+
+ if (desc->hw_blink)
+ led->cdev.blink_set = sso_led_blink_set;
+
+ sso_led_hw_cfg(priv, led);
+
+ err = devm_led_classdev_register_ext(priv->dev, &led->cdev, &init_data);
+ if (err)
+ return err;
+
+ list_add(&led->list, &priv->led_list);
+
+ return 0;
+}
+
+static void sso_init_freq(struct sso_led_priv *priv)
+{
+ int i;
+
+ priv->freq[0] = 0;
+ for (i = 1; i < MAX_FREQ_RANK; i++) {
+ if (i < MAX_FPID_FREQ_RANK) {
+ priv->freq[i] = priv->fpid_clkrate / freq_div_tbl[i - 1];
+ } else if (i < MAX_GPTC_FREQ_RANK) {
+ priv->freq[i] = priv->gptc_clkrate /
+ freq_div_tbl[i - MAX_FPID_FREQ_RANK];
+ } else if (i < MAX_GPTC_HS_FREQ_RANK) {
+ priv->freq[i] = priv->gptc_clkrate;
+ }
+ }
+}
+
+static int sso_gpio_request(struct gpio_chip *chip, unsigned int offset)
+{
+ struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+ if (priv->gpio.alloc_bitmap & BIT(offset))
+ return -EINVAL;
+
+ priv->gpio.alloc_bitmap |= BIT(offset);
+ regmap_write(priv->mmap, DUTY_CYCLE(offset), 0xFF);
+
+ return 0;
+}
+
+static void sso_gpio_free(struct gpio_chip *chip, unsigned int offset)
+{
+ struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+ priv->gpio.alloc_bitmap &= ~BIT(offset);
+ regmap_write(priv->mmap, DUTY_CYCLE(offset), 0x0);
+}
+
+static int sso_gpio_get_dir(struct gpio_chip *chip, unsigned int offset)
+{
+ return GPIOF_DIR_OUT;
+}
+
+static int
+sso_gpio_dir_out(struct gpio_chip *chip, unsigned int offset, int value)
+{
+ struct sso_led_priv *priv = gpiochip_get_data(chip);
+ bool bit = !!value;
+
+ regmap_update_bits(priv->mmap, SSO_CPU, BIT(offset), bit << offset);
+ if (!priv->gpio.freq)
+ regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_SWU,
+ SSO_CON0_SWU);
+
+ return 0;
+}
+
+static int sso_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+ struct sso_led_priv *priv = gpiochip_get_data(chip);
+ u32 reg_val;
+
+ regmap_read(priv->mmap, SSO_CPU, &reg_val);
+
+ return !!(reg_val & BIT(offset));
+}
+
+static void sso_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+ struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+ regmap_update_bits(priv->mmap, SSO_CPU, BIT(offset), value << offset);
+ if (!priv->gpio.freq)
+ regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_SWU,
+ SSO_CON0_SWU);
+}
+
+static int sso_gpio_gc_init(struct device *dev, struct sso_led_priv *priv)
+{
+ struct gpio_chip *gc = &priv->gpio.chip;
+
+ gc->request = sso_gpio_request;
+ gc->free = sso_gpio_free;
+ gc->get_direction = sso_gpio_get_dir;
+ gc->direction_output = sso_gpio_dir_out;
+ gc->get = sso_gpio_get;
+ gc->set = sso_gpio_set;
+
+ gc->label = "lgm-sso";
+ gc->base = -1;
+ /* To exclude pins from control, use "gpio-reserved-ranges" */
+ gc->ngpio = priv->gpio.pins;
+ gc->parent = dev;
+ gc->owner = THIS_MODULE;
+ gc->of_node = dev->of_node;
+
+ return devm_gpiochip_add_data(dev, gc, priv);
+}
+
+static int sso_gpio_get_freq_idx(int freq)
+{
+ int idx;
+
+ for (idx = 0; idx < ARRAY_SIZE(freq_tbl); idx++) {
+ if (freq <= freq_tbl[idx])
+ return idx;
+ }
+
+ return -1;
+}
+
+static void sso_register_shift_clk(struct sso_led_priv *priv)
+{
+ int idx, size = ARRAY_SIZE(shift_clk_freq_tbl);
+ u32 val = 0;
+
+ for (idx = 0; idx < size; idx++) {
+ if (shift_clk_freq_tbl[idx] <= priv->gpio.shift_clk_freq) {
+ val = idx;
+ break;
+ }
+ }
+
+ if (idx == size)
+ dev_warn(priv->dev, "%s: Invalid freq %d\n",
+ __func__, priv->gpio.shift_clk_freq);
+
+ regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_FCDSC,
+ FIELD_PREP(SSO_CON1_FCDSC, val));
+}
+
+static int sso_gpio_freq_set(struct sso_led_priv *priv)
+{
+ int freq_idx;
+ u32 val;
+
+ freq_idx = sso_gpio_get_freq_idx(priv->gpio.freq);
+ if (freq_idx == -1)
+ freq_idx = ARRAY_SIZE(freq_tbl) - 1;
+
+ val = freq_idx % FPID_FREQ_RANK_MAX;
+
+ if (!priv->gpio.freq) {
+ regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R, 0);
+ regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+ FIELD_PREP(SSO_CON1_US, US_SW));
+ } else if (freq_idx < FPID_FREQ_RANK_MAX) {
+ regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R,
+ SSO_CON0_BLINK_R);
+ regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+ FIELD_PREP(SSO_CON1_US, US_FPID));
+ regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_FPID,
+ FIELD_PREP(SSO_CON1_FPID, val));
+ } else {
+ regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R,
+ SSO_CON0_BLINK_R);
+ regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+ FIELD_PREP(SSO_CON1_US, US_GPTC));
+ regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_GPTD,
+ FIELD_PREP(SSO_CON1_GPTD, val));
+ }
+
+ return 0;
+}
+
+static int sso_gpio_hw_init(struct sso_led_priv *priv)
+{
+ u32 activate;
+ int i, err;
+
+ /* Clear all duty cycles */
+ for (i = 0; i < priv->gpio.pins; i++) {
+ err = regmap_write(priv->mmap, DUTY_CYCLE(i), 0);
+ if (err)
+ return err;
+ }
+
+ /* 4 groups for total 32 pins */
+ for (i = 1; i <= MAX_GROUP_NUM; i++) {
+ activate = !!(i * PINS_PER_GROUP <= priv->gpio.pins ||
+ priv->gpio.pins > (i - 1) * PINS_PER_GROUP);
+ err = regmap_update_bits(priv->mmap, SSO_CON1, BIT(i - 1),
+ activate << (i - 1));
+ if (err)
+ return err;
+ }
+
+ /* NO HW directly controlled pin by default */
+ err = regmap_write(priv->mmap, SSO_CON3, 0);
+ if (err)
+ return err;
+
+ /* NO BLINK for all pins */
+ err = regmap_write(priv->mmap, SSO_CON2, 0);
+ if (err)
+ return err;
+
+ /* OUTPUT 0 by default */
+ err = regmap_write(priv->mmap, SSO_CPU, 0);
+ if (err)
+ return err;
+
+ /* update edge */
+ err = regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_RZFL,
+ FIELD_PREP(SSO_CON0_RZFL, priv->gpio.edge));
+ if (err)
+ return err;
+
+ /* Set GPIO update rate */
+ sso_gpio_freq_set(priv);
+
+ /* Register shift clock */
+ sso_register_shift_clk(priv);
+
+ return 0;
+}
+
+static void sso_led_shutdown(struct sso_led *led)
+{
+ struct sso_led_priv *priv = led->priv;
+
+ /* unregister led */
+ devm_led_classdev_unregister(priv->dev, &led->cdev);
+
+ /* clear HW control bit */
+ if (led->desc.hw_trig)
+ regmap_update_bits(priv->mmap, SSO_CON3, BIT(led->desc.pin), 0);
+
+ if (led->gpiod)
+ devm_gpiod_put(priv->dev, led->gpiod);
+
+ led->priv = NULL;
+}
+
+static int
+__sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
+{
+ struct fwnode_handle *fwnode_child;
+ struct device *dev = priv->dev;
+ struct sso_led_desc *desc;
+ struct sso_led *led;
+ struct list_head *p;
+ const char *tmp;
+ u32 prop;
+ int ret;
+
+ fwnode_for_each_child_node(fw_ssoled, fwnode_child) {
+ led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL);
+ if (!led)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&led->list);
+ led->priv = priv;
+ desc = &led->desc;
+
+ led->gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL,
+ fwnode_child,
+ GPIOD_ASIS, NULL);
+ if (IS_ERR(led->gpiod)) {
+ dev_err(dev, "led: get gpio fail!\n");
+ goto __dt_err;
+ }
+
+ fwnode_property_read_string(fwnode_child,
+ "linux,default-trigger",
+ &desc->default_trigger);
+
+ if (fwnode_property_present(fwnode_child,
+ "retain-state-suspended"))
+ desc->retain_state_suspended = 1;
+
+ if (fwnode_property_present(fwnode_child,
+ "retain-state-shutdown"))
+ desc->retain_state_shutdown = 1;
+
+ if (fwnode_property_present(fwnode_child, "panic-indicator"))
+ desc->panic_indicator = 1;
+
+ ret = fwnode_property_read_u32(fwnode_child, "reg", &prop);
+ if (ret != 0 || prop >= SSO_LED_MAX_NUM) {
+ dev_err(dev, "invalid LED pin:%u\n", prop);
+ goto __dt_err;
+ }
+ desc->pin = prop;
+
+ if (fwnode_property_present(fwnode_child, "intel,sso-hw-blink"))
+ desc->hw_blink = 1;
+
+ desc->hw_trig = fwnode_property_read_bool(fwnode_child,
+ "intel,sso-hw-trigger");
+ if (desc->hw_trig) {
+ desc->default_trigger = NULL;
+ desc->retain_state_shutdown = 0;
+ desc->retain_state_suspended = 0;
+ desc->panic_indicator = 0;
+ desc->hw_blink = 0;
+ }
+
+ if (fwnode_property_read_u32(fwnode_child,
+ "intel,sso-blink-rate-hz", &prop)) {
+ /* default first freq rate */
+ desc->freq_idx = 0;
+ desc->blink_rate = priv->freq[desc->freq_idx];
+ } else {
+ desc->freq_idx = sso_get_blink_rate_idx(priv, prop);
+ if (desc->freq_idx == -1)
+ desc->freq_idx = MAX_FREQ_RANK - 1;
+
+ desc->blink_rate = priv->freq[desc->freq_idx];
+ }
+
+ if (!fwnode_property_read_string(fwnode_child, "default-state", &tmp)) {
+ if (!strcmp(tmp, "on"))
+ desc->brightness = LED_FULL;
+ }
+
+ if (sso_create_led(priv, led, fwnode_child))
+ goto __dt_err;
+ }
+ fwnode_handle_put(fw_ssoled);
+
+ return 0;
+__dt_err:
+ fwnode_handle_put(fw_ssoled);
+ /* unregister leds */
+ list_for_each(p, &priv->led_list) {
+ led = list_entry(p, struct sso_led, list);
+ sso_led_shutdown(led);
+ }
+
+ return -EINVAL;
+}
+
+static int sso_led_dt_parse(struct sso_led_priv *priv)
+{
+ struct fwnode_handle *fwnode = dev_fwnode(priv->dev);
+ struct fwnode_handle *fw_ssoled;
+ struct device *dev = priv->dev;
+ int count;
+ int ret;
+
+ count = device_get_child_node_count(dev);
+ if (!count)
+ return 0;
+
+ fw_ssoled = fwnode_get_named_child_node(fwnode, "ssoled");
+ if (fw_ssoled) {
+ ret = __sso_led_dt_parse(priv, fw_ssoled);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int sso_probe_gpios(struct sso_led_priv *priv)
+{
+ struct device *dev = priv->dev;
+ int ret;
+
+ if (device_property_read_u32(dev, "ngpios", &priv->gpio.pins))
+ priv->gpio.pins = MAX_PIN_NUM_PER_BANK;
+
+ if (priv->gpio.pins > MAX_PIN_NUM_PER_BANK)
+ return -EINVAL;
+
+ if (device_property_read_u32(dev, "intel,sso-update-rate-hz",
+ &priv->gpio.freq))
+ priv->gpio.freq = 0;
+
+ priv->gpio.edge = DATA_CLK_EDGE;
+ priv->gpio.shift_clk_freq = -1;
+
+ ret = sso_gpio_hw_init(priv);
+ if (ret)
+ return ret;
+
+ return sso_gpio_gc_init(dev, priv);
+}
+
+static void sso_clk_disable(void *data)
+{
+ struct sso_led_priv *priv = data;
+
+ clk_disable_unprepare(priv->fpid_clk);
+ clk_disable_unprepare(priv->gclk);
+}
+
+static int intel_sso_led_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct sso_led_priv *priv;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->pdev = pdev;
+ priv->dev = dev;
+
+ /* gate clock */
+ priv->gclk = devm_clk_get(dev, "sso");
+ if (IS_ERR(priv->gclk)) {
+ dev_err(dev, "get sso gate clock failed!\n");
+ return PTR_ERR(priv->gclk);
+ }
+
+ ret = clk_prepare_enable(priv->gclk);
+ if (ret) {
+ dev_err(dev, "Failed to prepate/enable sso gate clock!\n");
+ return ret;
+ }
+
+ priv->fpid_clk = devm_clk_get(dev, "fpid");
+ if (IS_ERR(priv->fpid_clk)) {
+ dev_err(dev, "Failed to get fpid clock!\n");
+ return PTR_ERR(priv->fpid_clk);
+ }
+
+ ret = clk_prepare_enable(priv->fpid_clk);
+ if (ret) {
+ dev_err(dev, "Failed to prepare/enable fpid clock!\n");
+ return ret;
+ }
+ priv->fpid_clkrate = clk_get_rate(priv->fpid_clk);
+
+ ret = devm_add_action_or_reset(dev, sso_clk_disable, priv);
+ if (ret) {
+ dev_err(dev, "Failed to devm_add_action_or_reset, %d\n", ret);
+ return ret;
+ }
+
+ priv->mmap = syscon_node_to_regmap(dev->of_node);
+ if (IS_ERR(priv->mmap)) {
+ dev_err(dev, "Failed to map iomem!\n");
+ return PTR_ERR(priv->mmap);
+ }
+
+ ret = sso_probe_gpios(priv);
+ if (ret) {
+ regmap_exit(priv->mmap);
+ return ret;
+ }
+
+ INIT_LIST_HEAD(&priv->led_list);
+
+ platform_set_drvdata(pdev, priv);
+ sso_init_freq(priv);
+
+ priv->gptc_clkrate = DEF_GPTC_CLK_RATE;
+
+ ret = sso_led_dt_parse(priv);
+ if (ret) {
+ regmap_exit(priv->mmap);
+ return ret;
+ }
+ dev_info(priv->dev, "sso LED init success!\n");
+
+ return 0;
+}
+
+static int intel_sso_led_remove(struct platform_device *pdev)
+{
+ struct sso_led_priv *priv;
+ struct list_head *pos, *n;
+ struct sso_led *led;
+
+ priv = platform_get_drvdata(pdev);
+
+ list_for_each_safe(pos, n, &priv->led_list) {
+ list_del(pos);
+ led = list_entry(pos, struct sso_led, list);
+ sso_led_shutdown(led);
+ }
+
+ clk_disable_unprepare(priv->fpid_clk);
+ clk_disable_unprepare(priv->gclk);
+ regmap_exit(priv->mmap);
+
+ return 0;
+}
+
+static const struct of_device_id of_sso_led_match[] = {
+ { .compatible = "intel,lgm-ssoled" },
+ {}
+};
+
+MODULE_DEVICE_TABLE(of, of_sso_led_match);
+
+static struct platform_driver intel_sso_led_driver = {
+ .probe = intel_sso_led_probe,
+ .remove = intel_sso_led_remove,
+ .driver = {
+ .name = "lgm-ssoled",
+ .of_match_table = of_match_ptr(of_sso_led_match),
+ },
+};
+
+module_platform_driver(intel_sso_led_driver);
+
+MODULE_DESCRIPTION("Intel SSO LED/GPIO driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 131ca83f5fb3..2e495ff67856 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -145,8 +145,7 @@ static void led_remove_brightness_hw_changed(struct led_classdev *led_cdev)
device_remove_file(led_cdev->dev, &dev_attr_brightness_hw_changed);
}
-void led_classdev_notify_brightness_hw_changed(struct led_classdev *led_cdev,
- enum led_brightness brightness)
+void led_classdev_notify_brightness_hw_changed(struct led_classdev *led_cdev, unsigned int brightness)
{
if (WARN_ON(!led_cdev->brightness_hw_changed_kn))
return;
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index c4e780bdb385..8eb8054ef9c6 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -39,8 +39,7 @@ const char * const led_colors[LED_COLOR_ID_MAX] = {
};
EXPORT_SYMBOL_GPL(led_colors);
-static int __led_set_brightness(struct led_classdev *led_cdev,
- enum led_brightness value)
+static int __led_set_brightness(struct led_classdev *led_cdev, unsigned int value)
{
if (!led_cdev->brightness_set)
return -ENOTSUPP;
@@ -50,8 +49,7 @@ static int __led_set_brightness(struct led_classdev *led_cdev,
return 0;
}
-static int __led_set_brightness_blocking(struct led_classdev *led_cdev,
- enum led_brightness value)
+static int __led_set_brightness_blocking(struct led_classdev *led_cdev, unsigned int value)
{
if (!led_cdev->brightness_set_blocking)
return -ENOTSUPP;
@@ -240,8 +238,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev)
}
EXPORT_SYMBOL_GPL(led_stop_software_blink);
-void led_set_brightness(struct led_classdev *led_cdev,
- enum led_brightness brightness)
+void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness)
{
/*
* If software blink is active, delay brightness setting
@@ -253,7 +250,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
* work queue task to avoid problems in case we are called
* from hard irq context.
*/
- if (brightness == LED_OFF) {
+ if (!brightness) {
set_bit(LED_BLINK_DISABLE, &led_cdev->work_flags);
schedule_work(&led_cdev->set_brightness_work);
} else {
@@ -268,8 +265,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
}
EXPORT_SYMBOL_GPL(led_set_brightness);
-void led_set_brightness_nopm(struct led_classdev *led_cdev,
- enum led_brightness value)
+void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value)
{
/* Use brightness_set op if available, it is guaranteed not to sleep */
if (!__led_set_brightness(led_cdev, value))
@@ -281,8 +277,7 @@ void led_set_brightness_nopm(struct led_classdev *led_cdev,
}
EXPORT_SYMBOL_GPL(led_set_brightness_nopm);
-void led_set_brightness_nosleep(struct led_classdev *led_cdev,
- enum led_brightness value)
+void led_set_brightness_nosleep(struct led_classdev *led_cdev, unsigned int value)
{
led_cdev->brightness = min(value, led_cdev->max_brightness);
@@ -293,8 +288,7 @@ void led_set_brightness_nosleep(struct led_classdev *led_cdev,
}
EXPORT_SYMBOL_GPL(led_set_brightness_nosleep);
-int led_set_brightness_sync(struct led_classdev *led_cdev,
- enum led_brightness value)
+int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value)
{
if (led_cdev->blink_delay_on || led_cdev->blink_delay_off)
return -EBUSY;
diff --git a/drivers/leds/leds-apu.c b/drivers/leds/leds-apu.c
index 7fd557aceff6..c409b80c236d 100644
--- a/drivers/leds/leds-apu.c
+++ b/drivers/leds/leds-apu.c
@@ -83,6 +83,7 @@ static const struct apu_led_profile apu1_led_profile[] = {
};
static const struct dmi_system_id apu_led_dmi_table[] __initconst = {
+ /* PC Engines APU with factory bios "SageBios_PCEngines_APU-45" */
{
.ident = "apu",
.matches = {
@@ -90,6 +91,14 @@ static const struct dmi_system_id apu_led_dmi_table[] __initconst = {
DMI_MATCH(DMI_PRODUCT_NAME, "APU")
}
},
+ /* PC Engines APU with "Mainline" bios >= 4.6.8 */
+ {
+ .ident = "apu",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "PC Engines"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "apu1")
+ }
+ },
{}
};
MODULE_DEVICE_TABLE(dmi, apu_led_dmi_table);
@@ -173,7 +182,7 @@ static int __init apu_led_init(void)
int err;
if (!(dmi_match(DMI_SYS_VENDOR, "PC Engines") &&
- dmi_match(DMI_PRODUCT_NAME, "APU"))) {
+ (dmi_match(DMI_PRODUCT_NAME, "APU") || dmi_match(DMI_PRODUCT_NAME, "apu1")))) {
pr_err("No PC Engines APUv1 board detected. For APUv2,3 support, enable CONFIG_PCENGINES_APU2\n");
return -ENODEV;
}
diff --git a/drivers/leds/leds-blinkm.c b/drivers/leds/leds-blinkm.c
index e11fe1788242..b4e1fdff4186 100644
--- a/drivers/leds/leds-blinkm.c
+++ b/drivers/leds/leds-blinkm.c
@@ -192,13 +192,13 @@ static int store_color_common(struct device *dev, const char *buf, int color)
return 0;
}
-static ssize_t show_red(struct device *dev, struct device_attribute *attr,
+static ssize_t red_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return show_color_common(dev, buf, RED);
}
-static ssize_t store_red(struct device *dev, struct device_attribute *attr,
+static ssize_t red_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
int ret;
@@ -209,15 +209,15 @@ static ssize_t store_red(struct device *dev, struct device_attribute *attr,
return count;
}
-static DEVICE_ATTR(red, S_IRUGO | S_IWUSR, show_red, store_red);
+static DEVICE_ATTR_RW(red);
-static ssize_t show_green(struct device *dev, struct device_attribute *attr,
+static ssize_t green_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return show_color_common(dev, buf, GREEN);
}
-static ssize_t store_green(struct device *dev, struct device_attribute *attr,
+static ssize_t green_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -229,15 +229,15 @@ static ssize_t store_green(struct device *dev, struct device_attribute *attr,
return count;
}
-static DEVICE_ATTR(green, S_IRUGO | S_IWUSR, show_green, store_green);
+static DEVICE_ATTR_RW(green);
-static ssize_t show_blue(struct device *dev, struct device_attribute *attr,
+static ssize_t blue_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return show_color_common(dev, buf, BLUE);
}
-static ssize_t store_blue(struct device *dev, struct device_attribute *attr,
+static ssize_t blue_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
int ret;
@@ -248,16 +248,16 @@ static ssize_t store_blue(struct device *dev, struct device_attribute *attr,
return count;
}
-static DEVICE_ATTR(blue, S_IRUGO | S_IWUSR, show_blue, store_blue);
+static DEVICE_ATTR_RW(blue);
-static ssize_t show_test(struct device *dev, struct device_attribute *attr,
+static ssize_t test_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
return scnprintf(buf, PAGE_SIZE,
"#Write into test to start test sequence!#\n");
}
-static ssize_t store_test(struct device *dev, struct device_attribute *attr,
+static ssize_t test_store(struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
{
@@ -273,7 +273,7 @@ static ssize_t store_test(struct device *dev, struct device_attribute *attr,
return count;
}
-static DEVICE_ATTR(test, S_IRUGO | S_IWUSR, show_test, store_test);
+static DEVICE_ATTR_RW(test);
/* TODO: HSB, fade, timeadj, script ... */
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 93f5b1b60fde..b5d5e22d2d1e 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -96,7 +96,8 @@ static int create_gpio_led(const struct gpio_led *template,
} else {
state = (template->default_state == LEDS_GPIO_DEFSTATE_ON);
}
- led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
+ led_dat->cdev.brightness = state;
+ led_dat->cdev.max_brightness = 1;
if (!template->retain_state_suspended)
led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
if (template->panic_indicator)
diff --git a/drivers/leds/leds-lm3530.c b/drivers/leds/leds-lm3530.c
index 2f8362f6bf75..2db455efd4b1 100644
--- a/drivers/leds/leds-lm3530.c
+++ b/drivers/leds/leds-lm3530.c
@@ -346,8 +346,8 @@ static void lm3530_brightness_set(struct led_classdev *led_cdev,
}
}
-static ssize_t lm3530_mode_get(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct lm3530_data *drvdata;
@@ -365,8 +365,8 @@ static ssize_t lm3530_mode_get(struct device *dev,
return len;
}
-static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute
- *attr, const char *buf, size_t size)
+static ssize_t mode_store(struct device *dev, struct device_attribute
+ *attr, const char *buf, size_t size)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct lm3530_data *drvdata;
@@ -397,7 +397,7 @@ static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute
return sizeof(drvdata->mode);
}
-static DEVICE_ATTR(mode, 0644, lm3530_mode_get, lm3530_mode_set);
+static DEVICE_ATTR_RW(mode);
static struct attribute *lm3530_attrs[] = {
&dev_attr_mode.attr,
diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c
index 9dd205870525..43d5970d96aa 100644
--- a/drivers/leds/leds-lm3533.c
+++ b/drivers/leds/leds-lm3533.c
@@ -608,7 +608,7 @@ static struct attribute *lm3533_led_attributes[] = {
static umode_t lm3533_led_attr_is_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
- struct device *dev = container_of(kobj, struct device, kobj);
+ struct device *dev = kobj_to_dev(kobj);
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct lm3533_led *led = to_lm3533_led(led_cdev);
umode_t mode = attr->mode;
diff --git a/drivers/leds/leds-lm355x.c b/drivers/leds/leds-lm355x.c
index 1505521249b5..2d3e11845ba5 100644
--- a/drivers/leds/leds-lm355x.c
+++ b/drivers/leds/leds-lm355x.c
@@ -349,9 +349,9 @@ static int lm355x_indicator_brightness_set(struct led_classdev *cdev,
}
/* indicator pattern only for lm3556*/
-static ssize_t lm3556_indicator_pattern_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
+static ssize_t pattern_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
{
ssize_t ret;
struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -381,7 +381,7 @@ out:
return ret;
}
-static DEVICE_ATTR(pattern, S_IWUSR, NULL, lm3556_indicator_pattern_store);
+static DEVICE_ATTR_WO(pattern);
static struct attribute *lm355x_indicator_attrs[] = {
&dev_attr_pattern.attr,
diff --git a/drivers/leds/leds-lm3642.c b/drivers/leds/leds-lm3642.c
index 62c14872caf7..8007b82985a8 100644
--- a/drivers/leds/leds-lm3642.c
+++ b/drivers/leds/leds-lm3642.c
@@ -165,9 +165,9 @@ static int lm3642_control(struct lm3642_chip_data *chip,
/* torch */
/* torch pin config for lm3642 */
-static ssize_t lm3642_torch_pin_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
+static ssize_t torch_pin_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
{
ssize_t ret;
struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -193,7 +193,7 @@ static ssize_t lm3642_torch_pin_store(struct device *dev,
return size;
}
-static DEVICE_ATTR(torch_pin, S_IWUSR, NULL, lm3642_torch_pin_store);
+static DEVICE_ATTR_WO(torch_pin);
static int lm3642_torch_brightness_set(struct led_classdev *cdev,
enum led_brightness brightness)
@@ -212,9 +212,9 @@ static int lm3642_torch_brightness_set(struct led_classdev *cdev,
/* flash */
/* strobe pin config for lm3642*/
-static ssize_t lm3642_strobe_pin_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
+static ssize_t strobe_pin_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
{
ssize_t ret;
struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -240,7 +240,7 @@ static ssize_t lm3642_strobe_pin_store(struct device *dev,
return size;
}
-static DEVICE_ATTR(strobe_pin, S_IWUSR, NULL, lm3642_strobe_pin_store);
+static DEVICE_ATTR_WO(strobe_pin);
static int lm3642_strobe_brightness_set(struct led_classdev *cdev,
enum led_brightness brightness)
diff --git a/drivers/leds/leds-lp50xx.c b/drivers/leds/leds-lp50xx.c
index f13117eed976..06230614fdc5 100644
--- a/drivers/leds/leds-lp50xx.c
+++ b/drivers/leds/leds-lp50xx.c
@@ -6,10 +6,9 @@
#include <linux/i2c.h>
#include <linux/init.h>
#include <linux/leds.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/slab.h>
@@ -322,7 +321,7 @@ static int lp50xx_brightness_set(struct led_classdev *cdev,
ret = regmap_write(led->priv->regmap, reg_val, brightness);
if (ret) {
- dev_err(&led->priv->client->dev,
+ dev_err(led->priv->dev,
"Cannot write brightness value %d\n", ret);
goto out;
}
@@ -338,7 +337,7 @@ static int lp50xx_brightness_set(struct led_classdev *cdev,
ret = regmap_write(led->priv->regmap, reg_val,
mc_dev->subled_info[i].intensity);
if (ret) {
- dev_err(&led->priv->client->dev,
+ dev_err(led->priv->dev,
"Cannot write intensity value %d\n", ret);
goto out;
}
@@ -360,8 +359,8 @@ static int lp50xx_set_banks(struct lp50xx *priv, u32 led_banks[])
bank_enable_mask |= (1 << led_banks[i]);
}
- led_config_lo = (u8)(bank_enable_mask & 0xff);
- led_config_hi = (u8)(bank_enable_mask >> 8) & 0xff;
+ led_config_lo = bank_enable_mask;
+ led_config_hi = bank_enable_mask >> 8;
ret = regmap_write(priv->regmap, LP50XX_LED_CFG0, led_config_lo);
if (ret)
@@ -382,11 +381,9 @@ static int lp50xx_enable_disable(struct lp50xx *priv, int enable_disable)
{
int ret;
- if (priv->enable_gpio) {
- ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
- if (ret)
- return ret;
- }
+ ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
+ if (ret)
+ return ret;
if (enable_disable)
return regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_EN);
@@ -404,7 +401,7 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
if (num_leds > 1) {
if (num_leds > priv->chip_info->max_modules) {
- dev_err(&priv->client->dev, "reg property is invalid\n");
+ dev_err(priv->dev, "reg property is invalid\n");
return -EINVAL;
}
@@ -412,13 +409,13 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
ret = fwnode_property_read_u32_array(child, "reg", led_banks, num_leds);
if (ret) {
- dev_err(&priv->client->dev, "reg property is missing\n");
+ dev_err(priv->dev, "reg property is missing\n");
return ret;
}
ret = lp50xx_set_banks(priv, led_banks);
if (ret) {
- dev_err(&priv->client->dev, "Cannot setup banked LEDs\n");
+ dev_err(priv->dev, "Cannot setup banked LEDs\n");
return ret;
}
@@ -426,12 +423,12 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
} else {
ret = fwnode_property_read_u32(child, "reg", &led_number);
if (ret) {
- dev_err(&priv->client->dev, "led reg property missing\n");
+ dev_err(priv->dev, "led reg property missing\n");
return ret;
}
if (led_number > priv->chip_info->num_leds) {
- dev_err(&priv->client->dev, "led-sources property is invalid\n");
+ dev_err(priv->dev, "led-sources property is invalid\n");
return -EINVAL;
}
@@ -455,12 +452,9 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
int i = 0;
priv->enable_gpio = devm_gpiod_get_optional(priv->dev, "enable", GPIOD_OUT_LOW);
- if (IS_ERR(priv->enable_gpio)) {
- ret = PTR_ERR(priv->enable_gpio);
- dev_err(&priv->client->dev, "Failed to get enable gpio: %d\n",
- ret);
- return ret;
- }
+ if (IS_ERR(priv->enable_gpio))
+ return dev_err_probe(priv->dev, PTR_ERR(priv->enable_gpio),
+ "Failed to get enable GPIO\n");
priv->regulator = devm_regulator_get(priv->dev, "vled");
if (IS_ERR(priv->regulator))
@@ -470,7 +464,7 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
led = &priv->leds[i];
ret = fwnode_property_count_u32(child, "reg");
if (ret < 0) {
- dev_err(&priv->client->dev, "reg property is invalid\n");
+ dev_err(priv->dev, "reg property is invalid\n");
goto child_out;
}
@@ -510,12 +504,11 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
led_cdev = &led->mc_cdev.led_cdev;
led_cdev->brightness_set_blocking = lp50xx_brightness_set;
- ret = devm_led_classdev_multicolor_register_ext(&priv->client->dev,
+ ret = devm_led_classdev_multicolor_register_ext(priv->dev,
&led->mc_cdev,
&init_data);
if (ret) {
- dev_err(&priv->client->dev, "led register err: %d\n",
- ret);
+ dev_err(priv->dev, "led register err: %d\n", ret);
goto child_out;
}
i++;
@@ -529,8 +522,7 @@ child_out:
return ret;
}
-static int lp50xx_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+static int lp50xx_probe(struct i2c_client *client)
{
struct lp50xx *led;
int count;
@@ -550,7 +542,7 @@ static int lp50xx_probe(struct i2c_client *client,
mutex_init(&led->lock);
led->client = client;
led->dev = &client->dev;
- led->chip_info = &lp50xx_chip_info_tbl[id->driver_data];
+ led->chip_info = device_get_match_data(&client->dev);
i2c_set_clientdata(client, led);
led->regmap = devm_regmap_init_i2c(client,
led->chip_info->lp50xx_regmap_config);
@@ -579,15 +571,14 @@ static int lp50xx_remove(struct i2c_client *client)
ret = lp50xx_enable_disable(led, 0);
if (ret) {
- dev_err(&led->client->dev, "Failed to disable chip\n");
+ dev_err(led->dev, "Failed to disable chip\n");
return ret;
}
if (led->regulator) {
ret = regulator_disable(led->regulator);
if (ret)
- dev_err(&led->client->dev,
- "Failed to disable regulator\n");
+ dev_err(led->dev, "Failed to disable regulator\n");
}
mutex_destroy(&led->lock);
@@ -596,24 +587,24 @@ static int lp50xx_remove(struct i2c_client *client)
}
static const struct i2c_device_id lp50xx_id[] = {
- { "lp5009", LP5009 },
- { "lp5012", LP5012 },
- { "lp5018", LP5018 },
- { "lp5024", LP5024 },
- { "lp5030", LP5030 },
- { "lp5036", LP5036 },
+ { "lp5009", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5009] },
+ { "lp5012", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5012] },
+ { "lp5018", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5018] },
+ { "lp5024", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5024] },
+ { "lp5030", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5030] },
+ { "lp5036", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5036] },
{ }
};
MODULE_DEVICE_TABLE(i2c, lp50xx_id);
static const struct of_device_id of_lp50xx_leds_match[] = {
- { .compatible = "ti,lp5009", .data = (void *)LP5009 },
- { .compatible = "ti,lp5012", .data = (void *)LP5012 },
- { .compatible = "ti,lp5018", .data = (void *)LP5018 },
- { .compatible = "ti,lp5024", .data = (void *)LP5024 },
- { .compatible = "ti,lp5030", .data = (void *)LP5030 },
- { .compatible = "ti,lp5036", .data = (void *)LP5036 },
- {},
+ { .compatible = "ti,lp5009", .data = &lp50xx_chip_info_tbl[LP5009] },
+ { .compatible = "ti,lp5012", .data = &lp50xx_chip_info_tbl[LP5012] },
+ { .compatible = "ti,lp5018", .data = &lp50xx_chip_info_tbl[LP5018] },
+ { .compatible = "ti,lp5024", .data = &lp50xx_chip_info_tbl[LP5024] },
+ { .compatible = "ti,lp5030", .data = &lp50xx_chip_info_tbl[LP5030] },
+ { .compatible = "ti,lp5036", .data = &lp50xx_chip_info_tbl[LP5036] },
+ {}
};
MODULE_DEVICE_TABLE(of, of_lp50xx_leds_match);
@@ -622,7 +613,7 @@ static struct i2c_driver lp50xx_driver = {
.name = "lp50xx",
.of_match_table = of_lp50xx_leds_match,
},
- .probe = lp50xx_probe,
+ .probe_new = lp50xx_probe,
.remove = lp50xx_remove,
.id_table = lp50xx_id,
};
diff --git a/drivers/leds/leds-max8997.c b/drivers/leds/leds-max8997.c
index 512a11d142d0..c0bddb33888d 100644
--- a/drivers/leds/leds-max8997.c
+++ b/drivers/leds/leds-max8997.c
@@ -160,8 +160,8 @@ static void max8997_led_brightness_set(struct led_classdev *led_cdev,
}
}
-static ssize_t max8997_led_show_mode(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t mode_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct max8997_led *led =
@@ -193,9 +193,9 @@ static ssize_t max8997_led_show_mode(struct device *dev,
return ret;
}
-static ssize_t max8997_led_store_mode(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
+static ssize_t mode_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct max8997_led *led =
@@ -222,7 +222,7 @@ static ssize_t max8997_led_store_mode(struct device *dev,
return size;
}
-static DEVICE_ATTR(mode, 0644, max8997_led_show_mode, max8997_led_store_mode);
+static DEVICE_ATTR_RW(mode);
static struct attribute *max8997_attrs[] = {
&dev_attr_mode.attr,
diff --git a/drivers/leds/leds-netxbig.c b/drivers/leds/leds-netxbig.c
index 68fbf0b66fad..77213b79f84d 100644
--- a/drivers/leds/leds-netxbig.c
+++ b/drivers/leds/leds-netxbig.c
@@ -204,9 +204,9 @@ static void netxbig_led_set(struct led_classdev *led_cdev,
spin_unlock_irqrestore(&led_dat->lock, flags);
}
-static ssize_t netxbig_led_sata_store(struct device *dev,
- struct device_attribute *attr,
- const char *buff, size_t count)
+static ssize_t sata_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buff, size_t count)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct netxbig_led_data *led_dat =
@@ -255,8 +255,8 @@ exit_unlock:
return ret;
}
-static ssize_t netxbig_led_sata_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t sata_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct netxbig_led_data *led_dat =
@@ -265,7 +265,7 @@ static ssize_t netxbig_led_sata_show(struct device *dev,
return sprintf(buf, "%d\n", led_dat->sata);
}
-static DEVICE_ATTR(sata, 0644, netxbig_led_sata_show, netxbig_led_sata_store);
+static DEVICE_ATTR_RW(sata);
static struct attribute *netxbig_led_attrs[] = {
&dev_attr_sata.attr,
diff --git a/drivers/leds/leds-ss4200.c b/drivers/leds/leds-ss4200.c
index 245de443fe9c..fcaa34706b6c 100644
--- a/drivers/leds/leds-ss4200.c
+++ b/drivers/leds/leds-ss4200.c
@@ -441,8 +441,8 @@ static void set_power_light_amber_noblink(void)
nasgpio_led_set_brightness(&amber->led_cdev, LED_FULL);
}
-static ssize_t nas_led_blink_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t blink_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct led_classdev *led = dev_get_drvdata(dev);
int blinking = 0;
@@ -451,9 +451,9 @@ static ssize_t nas_led_blink_show(struct device *dev,
return sprintf(buf, "%u\n", blinking);
}
-static ssize_t nas_led_blink_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
+static ssize_t blink_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
{
int ret;
struct led_classdev *led = dev_get_drvdata(dev);
@@ -468,7 +468,7 @@ static ssize_t nas_led_blink_store(struct device *dev,
return size;
}
-static DEVICE_ATTR(blink, 0644, nas_led_blink_show, nas_led_blink_store);
+static DEVICE_ATTR_RW(blink);
static struct attribute *nasgpio_led_attrs[] = {
&dev_attr_blink.attr,
@@ -478,7 +478,6 @@ ATTRIBUTE_GROUPS(nasgpio_led);
static int register_nasgpio_led(int led_nr)
{
- int ret;
struct nasgpio_led *nas_led = &nasgpio_leds[led_nr];
struct led_classdev *led = get_classdev_for_led_nr(led_nr);
@@ -489,11 +488,8 @@ static int register_nasgpio_led(int led_nr)
led->brightness_set = nasgpio_led_set_brightness;
led->blink_set = nasgpio_led_set_blink;
led->groups = nasgpio_led_groups;
- ret = led_classdev_register(&nas_gpio_pci_dev->dev, led);
- if (ret)
- return ret;
- return 0;
+ return led_classdev_register(&nas_gpio_pci_dev->dev, led);
}
static void unregister_nasgpio_led(int led_nr)
diff --git a/drivers/leds/leds-wm831x-status.c b/drivers/leds/leds-wm831x-status.c
index 67f4235cb28a..c48b80574f02 100644
--- a/drivers/leds/leds-wm831x-status.c
+++ b/drivers/leds/leds-wm831x-status.c
@@ -155,8 +155,8 @@ static const char * const led_src_texts[] = {
"soft",
};
-static ssize_t wm831x_status_src_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t src_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct wm831x_status *led = to_wm831x_status(led_cdev);
@@ -178,9 +178,9 @@ static ssize_t wm831x_status_src_show(struct device *dev,
return ret;
}
-static ssize_t wm831x_status_src_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t size)
+static ssize_t src_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t size)
{
struct led_classdev *led_cdev = dev_get_drvdata(dev);
struct wm831x_status *led = to_wm831x_status(led_cdev);
@@ -197,7 +197,7 @@ static ssize_t wm831x_status_src_store(struct device *dev,
return size;
}
-static DEVICE_ATTR(src, 0644, wm831x_status_src_show, wm831x_status_src_store);
+static DEVICE_ATTR_RW(src);
static struct attribute *wm831x_status_attrs[] = {
&dev_attr_src.attr,
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index 2d9eb48bbed9..345062ccabda 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -19,10 +19,8 @@ static inline int led_get_brightness(struct led_classdev *led_cdev)
void led_init_core(struct led_classdev *led_cdev);
void led_stop_software_blink(struct led_classdev *led_cdev);
-void led_set_brightness_nopm(struct led_classdev *led_cdev,
- enum led_brightness value);
-void led_set_brightness_nosleep(struct led_classdev *led_cdev,
- enum led_brightness value);
+void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value);
+void led_set_brightness_nosleep(struct led_classdev *led_cdev, unsigned int value);
ssize_t led_trigger_read(struct file *filp, struct kobject *kobj,
struct bin_attribute *attr, char *buf,
loff_t pos, size_t count);
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index fce4cbf9529d..50f3e673729c 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
{
sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
+ if (s >= c->start)
+ s -= c->start;
+ else
+ s = 0;
if (likely(c->sectors_per_block_bits >= 0))
s >>= c->sectors_per_block_bits;
else
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index 4312007d2d34..2d3cda0acacb 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -341,8 +341,8 @@ static void do_region(int op, int op_flags, unsigned region,
num_bvecs = 1;
break;
default:
- num_bvecs = min_t(int, BIO_MAX_PAGES,
- dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+ num_bvecs = bio_max_segs(dm_sector_div_up(remaining,
+ (PAGE_SIZE >> SECTOR_SHIFT)));
}
bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index e3d35c6c9f71..57882654ffee 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -264,15 +264,14 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
size_t entrylen, void *data, size_t datalen,
sector_t sector)
{
- int num_pages, bio_pages, pg_datalen, pg_sectorlen, i;
+ int bio_pages, pg_datalen, pg_sectorlen, i;
struct page *page;
struct bio *bio;
size_t ret;
void *ptr;
while (datalen) {
- num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT;
- bio_pages = min(num_pages, BIO_MAX_PAGES);
+ bio_pages = bio_max_segs(DIV_ROUND_UP(datalen, PAGE_SIZE));
atomic_inc(&lc->io_blocks);
@@ -364,7 +363,7 @@ static int log_one_block(struct log_writes_c *lc,
goto out;
atomic_inc(&lc->io_blocks);
- bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
+ bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt));
if (!bio) {
DMERR("Couldn't alloc log bio");
goto error;
@@ -386,7 +385,8 @@ static int log_one_block(struct log_writes_c *lc,
if (ret != block->vecs[i].bv_len) {
atomic_inc(&lc->io_blocks);
submit_bio(bio);
- bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
+ bio = bio_alloc(GFP_KERNEL,
+ bio_max_segs(block->vec_cnt - i));
if (!bio) {
DMERR("Couldn't alloc log bio");
goto error;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index fb41b4f23c48..66f4c6398f67 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
unsigned *offset, struct dm_buffer **buf)
{
- u64 position, block;
+ u64 position, block, rem;
u8 *res;
position = (index + rsb) * v->fec->roots;
- block = position >> v->data_dev_block_bits;
- *offset = (unsigned)(position - (block << v->data_dev_block_bits));
+ block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem);
+ *offset = (unsigned)rem;
- res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
+ res = dm_bufio_read(v->fec->bufio, block, buf);
if (IS_ERR(res)) {
DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
v->data_dev->name, (unsigned long long)rsb,
- (unsigned long long)(v->fec->start + block),
- PTR_ERR(res));
+ (unsigned long long)block, PTR_ERR(res));
*buf = NULL;
}
@@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
/* read the next block when we run out of parity bytes */
offset += v->fec->roots;
- if (offset >= 1 << v->data_dev_block_bits) {
+ if (offset >= v->fec->roots << SECTOR_SHIFT) {
dm_bufio_release(buf);
par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
@@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v)
{
struct dm_verity_fec *f = v->fec;
struct dm_target *ti = v->ti;
- u64 hash_blocks;
+ u64 hash_blocks, fec_blocks;
int ret;
if (!verity_fec_is_enabled(v)) {
@@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v)
}
f->bufio = dm_bufio_client_create(f->dev->bdev,
- 1 << v->data_dev_block_bits,
+ f->roots << SECTOR_SHIFT,
1, 0, NULL, NULL);
if (IS_ERR(f->bufio)) {
ti->error = "Cannot initialize FEC bufio client";
return PTR_ERR(f->bufio);
}
- if (dm_bufio_get_device_size(f->bufio) <
- ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
+ dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT));
+
+ fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT);
+ if (dm_bufio_get_device_size(f->bufio) < fec_blocks) {
ti->error = "FEC device is too small";
return -E2BIG;
}
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
index 2d778d0f011e..c0fe3295c330 100644
--- a/drivers/misc/ibmvmc.c
+++ b/drivers/misc/ibmvmc.c
@@ -2288,15 +2288,13 @@ crq_failed:
return -EPERM;
}
-static int ibmvmc_remove(struct vio_dev *vdev)
+static void ibmvmc_remove(struct vio_dev *vdev)
{
struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev);
dev_info(adapter->dev, "Entering remove for UA 0x%x\n",
vdev->unit_address);
ibmvmc_release_crq_queue(adapter);
-
- return 0;
}
static struct vio_device_id ibmvmc_device_table[] = {
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 646823ddd317..2d73407ee52e 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -20,7 +20,6 @@
#include <linux/slab.h>
#include <linux/scatterlist.h>
#include <linux/sizes.h>
-#include <linux/swiotlb.h>
#include <linux/regulator/consumer.h>
#include <linux/pm_runtime.h>
#include <linux/of.h>
@@ -4582,12 +4581,8 @@ int sdhci_setup_host(struct sdhci_host *host)
mmc->max_segs = SDHCI_MAX_SEGS;
} else if (host->flags & SDHCI_USE_SDMA) {
mmc->max_segs = 1;
- if (swiotlb_max_segment()) {
- unsigned int max_req_size = (1 << IO_TLB_SHIFT) *
- IO_TLB_SEGSIZE;
- mmc->max_req_size = min(mmc->max_req_size,
- max_req_size);
- }
+ mmc->max_req_size = min_t(size_t, mmc->max_req_size,
+ dma_max_mapping_size(mmc_dev(mmc)));
} else { /* PIO */
mmc->max_segs = SDHCI_MAX_SEGS;
}
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index b09bed554f26..bcd31f458d1a 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -94,7 +94,7 @@ config WIREGUARD
select CRYPTO_BLAKE2S_ARM if ARM
select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
- select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+ select CRYPTO_POLY1305_MIPS if MIPS
help
WireGuard is a secure, fast, and easy to use replacement for IPSec
that uses modern cryptography and clever networking tricks. It's
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index c3ec9ceed833..7fea9ae60f13 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -1758,7 +1758,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
return 0;
}
-static int ibmveth_remove(struct vio_dev *dev)
+static void ibmveth_remove(struct vio_dev *dev)
{
struct net_device *netdev = dev_get_drvdata(&dev->dev);
struct ibmveth_adapter *adapter = netdev_priv(netdev);
@@ -1771,8 +1771,6 @@ static int ibmveth_remove(struct vio_dev *dev)
free_netdev(netdev);
dev_set_drvdata(&dev->dev, NULL);
-
- return 0;
}
static struct attribute veth_active_attr;
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 161fa95e8768..9c6438d3b3a5 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -78,7 +78,6 @@ MODULE_LICENSE("GPL");
MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
-static int ibmvnic_remove(struct vio_dev *);
static void release_sub_crqs(struct ibmvnic_adapter *, bool);
static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
@@ -5393,7 +5392,7 @@ ibmvnic_init_fail:
return rc;
}
-static int ibmvnic_remove(struct vio_dev *dev)
+static void ibmvnic_remove(struct vio_dev *dev)
{
struct net_device *netdev = dev_get_drvdata(&dev->dev);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -5434,8 +5433,6 @@ static int ibmvnic_remove(struct vio_dev *dev)
device_remove_file(&dev->dev, &dev_attr_failover);
free_netdev(netdev);
dev_set_drvdata(&dev->dev, NULL);
-
- return 0;
}
static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index aff5ac1f002a..39a01c2a3058 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -1343,11 +1343,21 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
return 0;
gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
- if (nr_mops != 0)
+ if (nr_mops != 0) {
ret = gnttab_map_refs(queue->tx_map_ops,
NULL,
queue->pages_to_map,
nr_mops);
+ if (ret) {
+ unsigned int i;
+
+ netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n",
+ nr_mops, ret);
+ for (i = 0; i < nr_mops; ++i)
+ WARN_ON_ONCE(queue->tx_map_ops[i].status ==
+ GNTST_okay);
+ }
+ }
work_done = xenvif_tx_submit(queue);
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index 5dfd806fc2d2..604ab0e5a2ad 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -630,7 +630,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->queue_size = NVMF_DEF_QUEUE_SIZE;
opts->nr_io_queues = num_online_cpus();
opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
- opts->kato = NVME_DEFAULT_KATO;
+ opts->kato = 0;
opts->duplicate_connect = false;
opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
opts->hdr_digest = false;
@@ -893,6 +893,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
opts->nr_write_queues = 0;
opts->nr_poll_queues = 0;
opts->duplicate_connect = true;
+ } else {
+ if (!opts->kato)
+ opts->kato = NVME_DEFAULT_KATO;
}
if (ctrl_loss_tmo < 0) {
opts->max_reconnects = -1;
diff --git a/drivers/nvme/host/hwmon.c b/drivers/nvme/host/hwmon.c
index 8f9e96986780..0a586d712920 100644
--- a/drivers/nvme/host/hwmon.c
+++ b/drivers/nvme/host/hwmon.c
@@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
if (IS_ERR(hwmon)) {
dev_warn(dev, "Failed to instantiate hwmon device\n");
kfree(data);
+ return PTR_ERR(hwmon);
}
ctrl->hwmon_device = hwmon;
return 0;
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 7b6632c00ffd..17ab3320d28b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2632,6 +2632,7 @@ static void nvme_reset_work(struct work_struct *work)
* Don't limit the IOMMU merged segment size.
*/
dma_set_max_seg_size(dev->dev, 0xffffffff);
+ dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
mutex_unlock(&dev->shutdown_lock);
@@ -3233,7 +3234,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
- .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
+ NVME_QUIRK_NO_NS_DESC_LIST, },
{ PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
{ PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
@@ -3247,6 +3249,9 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
+ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */
@@ -3264,6 +3269,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index bc6a774f2124..fe6b8aa90b53 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -313,27 +313,40 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
}
-static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
- struct nvmet_subsys *subsys)
+static u16 nvmet_set_model_number(struct nvmet_subsys *subsys)
{
- const char *model = NVMET_DEFAULT_CTRL_MODEL;
- struct nvmet_subsys_model *subsys_model;
+ u16 status = 0;
+
+ mutex_lock(&subsys->lock);
+ if (!subsys->model_number) {
+ subsys->model_number =
+ kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
+ if (!subsys->model_number)
+ status = NVME_SC_INTERNAL;
+ }
+ mutex_unlock(&subsys->lock);
- rcu_read_lock();
- subsys_model = rcu_dereference(subsys->model);
- if (subsys_model)
- model = subsys_model->number;
- memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
- rcu_read_unlock();
+ return status;
}
static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ struct nvmet_subsys *subsys = ctrl->subsys;
struct nvme_id_ctrl *id;
u32 cmd_capsule_size;
u16 status = 0;
+ /*
+ * If there is no model number yet, set it now. It will then remain
+ * stable for the life time of the subsystem.
+ */
+ if (!subsys->model_number) {
+ status = nvmet_set_model_number(subsys);
+ if (status)
+ goto out;
+ }
+
id = kzalloc(sizeof(*id), GFP_KERNEL);
if (!id) {
status = NVME_SC_INTERNAL;
@@ -347,7 +360,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
memset(id->sn, ' ', sizeof(id->sn));
bin2hex(id->sn, &ctrl->subsys->serial,
min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
- nvmet_id_set_model_number(id, ctrl->subsys);
+ memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
+ strlen(subsys->model_number), ' ');
memcpy_and_pad(id->fr, sizeof(id->fr),
UTS_RELEASE, strlen(UTS_RELEASE), ' ');
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 635a7cb45d0b..e5dbd1923b7b 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -1118,16 +1118,12 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
char *page)
{
struct nvmet_subsys *subsys = to_subsys(item);
- struct nvmet_subsys_model *subsys_model;
- char *model = NVMET_DEFAULT_CTRL_MODEL;
int ret;
- rcu_read_lock();
- subsys_model = rcu_dereference(subsys->model);
- if (subsys_model)
- model = subsys_model->number;
- ret = snprintf(page, PAGE_SIZE, "%s\n", model);
- rcu_read_unlock();
+ mutex_lock(&subsys->lock);
+ ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ?
+ subsys->model_number : NVMET_DEFAULT_CTRL_MODEL);
+ mutex_unlock(&subsys->lock);
return ret;
}
@@ -1138,14 +1134,17 @@ static bool nvmet_is_ascii(const char c)
return c >= 0x20 && c <= 0x7e;
}
-static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
- const char *page, size_t count)
+static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
+ const char *page, size_t count)
{
- struct nvmet_subsys *subsys = to_subsys(item);
- struct nvmet_subsys_model *new_model;
- char *new_model_number;
int pos = 0, len;
+ if (subsys->model_number) {
+ pr_err("Can't set model number. %s is already assigned\n",
+ subsys->model_number);
+ return -EINVAL;
+ }
+
len = strcspn(page, "\n");
if (!len)
return -EINVAL;
@@ -1155,28 +1154,25 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
return -EINVAL;
}
- new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
- if (!new_model_number)
+ subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL);
+ if (!subsys->model_number)
return -ENOMEM;
+ return count;
+}
- new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
- if (!new_model) {
- kfree(new_model_number);
- return -ENOMEM;
- }
- memcpy(new_model->number, new_model_number, len);
+static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ ssize_t ret;
down_write(&nvmet_config_sem);
mutex_lock(&subsys->lock);
- new_model = rcu_replace_pointer(subsys->model, new_model,
- mutex_is_locked(&subsys->lock));
+ ret = nvmet_subsys_attr_model_store_locked(subsys, page, count);
mutex_unlock(&subsys->lock);
up_write(&nvmet_config_sem);
- kfree_rcu(new_model, rcuhead);
- kfree(new_model_number);
-
- return count;
+ return ret;
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 67bbf0e3b507..be6fcdaf51a7 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1532,7 +1532,7 @@ static void nvmet_subsys_free(struct kref *ref)
nvmet_passthru_subsys_free(subsys);
kfree(subsys->subsysnqn);
- kfree_rcu(subsys->model, rcuhead);
+ kfree(subsys->model_number);
kfree(subsys);
}
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 3d9a5d3ed9cd..9a8b3726a37c 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -185,7 +185,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
}
bip = bio_integrity_alloc(bio, GFP_NOIO,
- min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
+ bio_max_segs(req->metadata_sg_cnt));
if (IS_ERR(bip)) {
pr_err("Unable to allocate bio_integrity_payload\n");
return PTR_ERR(bip);
@@ -225,7 +225,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
- int sg_cnt = req->sg_cnt;
+ unsigned int sg_cnt = req->sg_cnt;
struct bio *bio;
struct scatterlist *sg;
struct blk_plug plug;
@@ -262,7 +262,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio = &req->b.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
} else {
- bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+ bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
}
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
@@ -289,7 +289,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
}
}
- bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+ bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
bio->bi_opf = op;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index cdfa537b1c0a..4b84edb49f22 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -208,11 +208,6 @@ struct nvmet_ctrl {
bool pi_support;
};
-struct nvmet_subsys_model {
- struct rcu_head rcuhead;
- char number[];
-};
-
struct nvmet_subsys {
enum nvme_subsys_type type;
@@ -242,7 +237,7 @@ struct nvmet_subsys {
struct config_group namespaces_group;
struct config_group allowed_hosts_group;
- struct nvmet_subsys_model __rcu *model;
+ char *model_number;
#ifdef CONFIG_NVME_TARGET_PASSTHRU
struct nvme_ctrl *passthru_ctrl;
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index f50c7b2bf21c..26c587ccd152 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
u16 status = NVME_SC_SUCCESS;
struct nvme_id_ctrl *id;
- int max_hw_sectors;
+ unsigned int max_hw_sectors;
int page_shift;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -198,7 +198,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
bio = &req->p.inline_bio;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
} else {
- bio = bio_alloc(GFP_KERNEL, min(req->sg_cnt, BIO_MAX_PAGES));
+ bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt));
bio->bi_end_io = bio_put;
}
bio->bi_opf = req_op(rq);
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index cf109d9a1112..e6939103991b 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -1554,7 +1554,7 @@ static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj,
if (off + count > size)
count = size - off;
- s = to_socket(container_of(kobj, struct device, kobj));
+ s = to_socket(kobj_to_dev(kobj));
if (!(s->state & SOCKET_PRESENT))
return -ENODEV;
@@ -1581,7 +1581,7 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
if (error)
return error;
- s = to_socket(container_of(kobj, struct device, kobj));
+ s = to_socket(kobj_to_dev(kobj));
if (off)
return -EINVAL;
diff --git a/drivers/pinctrl/Kconfig b/drivers/pinctrl/Kconfig
index 03c62e1cb395..b7675cce0027 100644
--- a/drivers/pinctrl/Kconfig
+++ b/drivers/pinctrl/Kconfig
@@ -370,6 +370,19 @@ config PINCTRL_MICROCHIP_SGPIO
connect control signals from SFP modules and to act as an
LED controller.
+config PINCTRL_K210
+ bool "Pinctrl driver for the Canaan Kendryte K210 SoC"
+ depends on RISCV && SOC_CANAAN && OF
+ select GENERIC_PINMUX_FUNCTIONS
+ select GENERIC_PINCONF
+ select GPIOLIB
+ select OF_GPIO
+ select REGMAP_MMIO
+ default SOC_CANAAN
+ help
+ Add support for the Canaan Kendryte K210 RISC-V SOC Field
+ Programmable IO Array (FPIOA) controller.
+
source "drivers/pinctrl/actions/Kconfig"
source "drivers/pinctrl/aspeed/Kconfig"
source "drivers/pinctrl/bcm/Kconfig"
diff --git a/drivers/pinctrl/Makefile b/drivers/pinctrl/Makefile
index efc96f25c8db..8bf459c32a76 100644
--- a/drivers/pinctrl/Makefile
+++ b/drivers/pinctrl/Makefile
@@ -45,6 +45,7 @@ obj-$(CONFIG_PINCTRL_RK805) += pinctrl-rk805.o
obj-$(CONFIG_PINCTRL_OCELOT) += pinctrl-ocelot.o
obj-$(CONFIG_PINCTRL_MICROCHIP_SGPIO) += pinctrl-microchip-sgpio.o
obj-$(CONFIG_PINCTRL_EQUILIBRIUM) += pinctrl-equilibrium.o
+obj-$(CONFIG_PINCTRL_K210) += pinctrl-k210.o
obj-y += actions/
obj-$(CONFIG_ARCH_ASPEED) += aspeed/
diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c
new file mode 100644
index 000000000000..8a733cf77ba0
--- /dev/null
+++ b/drivers/pinctrl/pinctrl-k210.c
@@ -0,0 +1,985 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#include <linux/io.h>
+#include <linux/of_device.h>
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/platform_device.h>
+#include <linux/bitfield.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/io.h>
+
+#include <dt-bindings/pinctrl/k210-fpioa.h>
+
+#include "core.h"
+#include "pinconf.h"
+#include "pinctrl-utils.h"
+
+/*
+ * The K210 only implements 8 drive levels, even though
+ * there is register space for 16
+ */
+#define K210_PC_DRIVE_MASK GENMASK(11, 8)
+#define K210_PC_DRIVE_SHIFT 8
+#define K210_PC_DRIVE_0 (0 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_1 (1 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_2 (2 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_3 (3 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_4 (4 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_5 (5 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_6 (6 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_7 (7 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_MAX 7
+#define K210_PC_MODE_MASK GENMASK(23, 12)
+
+/*
+ * output enabled == PC_OE & (PC_OE_INV ^ FUNCTION_OE)
+ * where FUNCTION_OE is a physical signal from the function.
+ */
+#define K210_PC_OE BIT(12) /* Output Enable */
+#define K210_PC_OE_INV BIT(13) /* INVert Output Enable */
+#define K210_PC_DO_OE BIT(14) /* set Data Out to Output Enable sig */
+#define K210_PC_DO_INV BIT(15) /* INVert final Data Output */
+#define K210_PC_PU BIT(16) /* Pull Up */
+#define K210_PC_PD BIT(17) /* Pull Down */
+/* Strong pull up not implemented on K210 */
+#define K210_PC_SL BIT(19) /* reduce SLew rate */
+/* Same semantics as OE above */
+#define K210_PC_IE BIT(20) /* Input Enable */
+#define K210_PC_IE_INV BIT(21) /* INVert Input Enable */
+#define K210_PC_DI_INV BIT(22) /* INVert Data Input */
+#define K210_PC_ST BIT(23) /* Schmitt Trigger */
+#define K210_PC_DI BIT(31) /* raw Data Input */
+
+#define K210_PC_BIAS_MASK (K210_PC_PU & K210_PC_PD)
+
+#define K210_PC_MODE_IN (K210_PC_IE | K210_PC_ST)
+#define K210_PC_MODE_OUT (K210_PC_DRIVE_7 | K210_PC_OE)
+#define K210_PC_MODE_I2C (K210_PC_MODE_IN | K210_PC_SL | \
+ K210_PC_OE | K210_PC_PU)
+#define K210_PC_MODE_SCCB (K210_PC_MODE_I2C | \
+ K210_PC_OE_INV | K210_PC_IE_INV)
+#define K210_PC_MODE_SPI (K210_PC_MODE_IN | K210_PC_IE_INV | \
+ K210_PC_MODE_OUT | K210_PC_OE_INV)
+#define K210_PC_MODE_GPIO (K210_PC_MODE_IN | K210_PC_MODE_OUT)
+
+#define K210_PG_FUNC GENMASK(7, 0)
+#define K210_PG_DO BIT(8)
+#define K210_PG_PIN GENMASK(22, 16)
+
+/*
+ * struct k210_fpioa: Kendryte K210 FPIOA memory mapped registers
+ * @pins: 48 32-bits IO pin registers
+ * @tie_en: 256 (one per function) input tie enable bits
+ * @tie_val: 256 (one per function) input tie value bits
+ */
+struct k210_fpioa {
+ u32 pins[48];
+ u32 tie_en[8];
+ u32 tie_val[8];
+};
+
+struct k210_fpioa_data {
+
+ struct device *dev;
+ struct pinctrl_dev *pctl;
+
+ struct k210_fpioa __iomem *fpioa;
+ struct regmap *sysctl_map;
+ u32 power_offset;
+ struct clk *clk;
+ struct clk *pclk;
+};
+
+#define K210_PIN_NAME(i) ("IO_" #i)
+#define K210_PIN(i) [(i)] = PINCTRL_PIN((i), K210_PIN_NAME(i))
+
+static const struct pinctrl_pin_desc k210_pins[] = {
+ K210_PIN(0), K210_PIN(1), K210_PIN(2),
+ K210_PIN(3), K210_PIN(4), K210_PIN(5),
+ K210_PIN(6), K210_PIN(7), K210_PIN(8),
+ K210_PIN(9), K210_PIN(10), K210_PIN(11),
+ K210_PIN(12), K210_PIN(13), K210_PIN(14),
+ K210_PIN(15), K210_PIN(16), K210_PIN(17),
+ K210_PIN(18), K210_PIN(19), K210_PIN(20),
+ K210_PIN(21), K210_PIN(22), K210_PIN(23),
+ K210_PIN(24), K210_PIN(25), K210_PIN(26),
+ K210_PIN(27), K210_PIN(28), K210_PIN(29),
+ K210_PIN(30), K210_PIN(31), K210_PIN(32),
+ K210_PIN(33), K210_PIN(34), K210_PIN(35),
+ K210_PIN(36), K210_PIN(37), K210_PIN(38),
+ K210_PIN(39), K210_PIN(40), K210_PIN(41),
+ K210_PIN(42), K210_PIN(43), K210_PIN(44),
+ K210_PIN(45), K210_PIN(46), K210_PIN(47)
+};
+
+#define K210_NPINS ARRAY_SIZE(k210_pins)
+
+/*
+ * Pin groups: each of the 48 programmable pins is a group.
+ * To this are added 8 power domain groups, which for the purposes of
+ * the pin subsystem, contain no pins. The power domain groups only exist
+ * to set the power level. The id should never be used (since there are
+ * no pins 48-55).
+ */
+static const char *const k210_group_names[] = {
+ /* The first 48 groups are for pins, one each */
+ K210_PIN_NAME(0), K210_PIN_NAME(1), K210_PIN_NAME(2),
+ K210_PIN_NAME(3), K210_PIN_NAME(4), K210_PIN_NAME(5),
+ K210_PIN_NAME(6), K210_PIN_NAME(7), K210_PIN_NAME(8),
+ K210_PIN_NAME(9), K210_PIN_NAME(10), K210_PIN_NAME(11),
+ K210_PIN_NAME(12), K210_PIN_NAME(13), K210_PIN_NAME(14),
+ K210_PIN_NAME(15), K210_PIN_NAME(16), K210_PIN_NAME(17),
+ K210_PIN_NAME(18), K210_PIN_NAME(19), K210_PIN_NAME(20),
+ K210_PIN_NAME(21), K210_PIN_NAME(22), K210_PIN_NAME(23),
+ K210_PIN_NAME(24), K210_PIN_NAME(25), K210_PIN_NAME(26),
+ K210_PIN_NAME(27), K210_PIN_NAME(28), K210_PIN_NAME(29),
+ K210_PIN_NAME(30), K210_PIN_NAME(31), K210_PIN_NAME(32),
+ K210_PIN_NAME(33), K210_PIN_NAME(34), K210_PIN_NAME(35),
+ K210_PIN_NAME(36), K210_PIN_NAME(37), K210_PIN_NAME(38),
+ K210_PIN_NAME(39), K210_PIN_NAME(40), K210_PIN_NAME(41),
+ K210_PIN_NAME(42), K210_PIN_NAME(43), K210_PIN_NAME(44),
+ K210_PIN_NAME(45), K210_PIN_NAME(46), K210_PIN_NAME(47),
+ [48] = "A0", [49] = "A1", [50] = "A2",
+ [51] = "B3", [52] = "B4", [53] = "B5",
+ [54] = "C6", [55] = "C7"
+};
+
+#define K210_NGROUPS ARRAY_SIZE(k210_group_names)
+
+enum k210_pinctrl_mode_id {
+ K210_PC_DEFAULT_DISABLED,
+ K210_PC_DEFAULT_IN,
+ K210_PC_DEFAULT_IN_TIE,
+ K210_PC_DEFAULT_OUT,
+ K210_PC_DEFAULT_I2C,
+ K210_PC_DEFAULT_SCCB,
+ K210_PC_DEFAULT_SPI,
+ K210_PC_DEFAULT_GPIO,
+ K210_PC_DEFAULT_INT13,
+};
+
+#define K210_PC_DEFAULT(mode) \
+ [K210_PC_DEFAULT_##mode] = K210_PC_MODE_##mode
+
+static const u32 k210_pinconf_mode_id_to_mode[] = {
+ [K210_PC_DEFAULT_DISABLED] = 0,
+ K210_PC_DEFAULT(IN),
+ [K210_PC_DEFAULT_IN_TIE] = K210_PC_MODE_IN,
+ K210_PC_DEFAULT(OUT),
+ K210_PC_DEFAULT(I2C),
+ K210_PC_DEFAULT(SCCB),
+ K210_PC_DEFAULT(SPI),
+ K210_PC_DEFAULT(GPIO),
+ [K210_PC_DEFAULT_INT13] = K210_PC_MODE_IN | K210_PC_PU,
+};
+
+#undef DEFAULT
+
+/*
+ * Pin functions configuration information.
+ */
+struct k210_pcf_info {
+ char name[15];
+ u8 mode_id;
+};
+
+#define K210_FUNC(id, mode) \
+ [K210_PCF_##id] = { \
+ .name = #id, \
+ .mode_id = K210_PC_DEFAULT_##mode \
+ }
+
+static const struct k210_pcf_info k210_pcf_infos[] = {
+ K210_FUNC(JTAG_TCLK, IN),
+ K210_FUNC(JTAG_TDI, IN),
+ K210_FUNC(JTAG_TMS, IN),
+ K210_FUNC(JTAG_TDO, OUT),
+ K210_FUNC(SPI0_D0, SPI),
+ K210_FUNC(SPI0_D1, SPI),
+ K210_FUNC(SPI0_D2, SPI),
+ K210_FUNC(SPI0_D3, SPI),
+ K210_FUNC(SPI0_D4, SPI),
+ K210_FUNC(SPI0_D5, SPI),
+ K210_FUNC(SPI0_D6, SPI),
+ K210_FUNC(SPI0_D7, SPI),
+ K210_FUNC(SPI0_SS0, OUT),
+ K210_FUNC(SPI0_SS1, OUT),
+ K210_FUNC(SPI0_SS2, OUT),
+ K210_FUNC(SPI0_SS3, OUT),
+ K210_FUNC(SPI0_ARB, IN_TIE),
+ K210_FUNC(SPI0_SCLK, OUT),
+ K210_FUNC(UARTHS_RX, IN),
+ K210_FUNC(UARTHS_TX, OUT),
+ K210_FUNC(RESV6, IN),
+ K210_FUNC(RESV7, IN),
+ K210_FUNC(CLK_SPI1, OUT),
+ K210_FUNC(CLK_I2C1, OUT),
+ K210_FUNC(GPIOHS0, GPIO),
+ K210_FUNC(GPIOHS1, GPIO),
+ K210_FUNC(GPIOHS2, GPIO),
+ K210_FUNC(GPIOHS3, GPIO),
+ K210_FUNC(GPIOHS4, GPIO),
+ K210_FUNC(GPIOHS5, GPIO),
+ K210_FUNC(GPIOHS6, GPIO),
+ K210_FUNC(GPIOHS7, GPIO),
+ K210_FUNC(GPIOHS8, GPIO),
+ K210_FUNC(GPIOHS9, GPIO),
+ K210_FUNC(GPIOHS10, GPIO),
+ K210_FUNC(GPIOHS11, GPIO),
+ K210_FUNC(GPIOHS12, GPIO),
+ K210_FUNC(GPIOHS13, GPIO),
+ K210_FUNC(GPIOHS14, GPIO),
+ K210_FUNC(GPIOHS15, GPIO),
+ K210_FUNC(GPIOHS16, GPIO),
+ K210_FUNC(GPIOHS17, GPIO),
+ K210_FUNC(GPIOHS18, GPIO),
+ K210_FUNC(GPIOHS19, GPIO),
+ K210_FUNC(GPIOHS20, GPIO),
+ K210_FUNC(GPIOHS21, GPIO),
+ K210_FUNC(GPIOHS22, GPIO),
+ K210_FUNC(GPIOHS23, GPIO),
+ K210_FUNC(GPIOHS24, GPIO),
+ K210_FUNC(GPIOHS25, GPIO),
+ K210_FUNC(GPIOHS26, GPIO),
+ K210_FUNC(GPIOHS27, GPIO),
+ K210_FUNC(GPIOHS28, GPIO),
+ K210_FUNC(GPIOHS29, GPIO),
+ K210_FUNC(GPIOHS30, GPIO),
+ K210_FUNC(GPIOHS31, GPIO),
+ K210_FUNC(GPIO0, GPIO),
+ K210_FUNC(GPIO1, GPIO),
+ K210_FUNC(GPIO2, GPIO),
+ K210_FUNC(GPIO3, GPIO),
+ K210_FUNC(GPIO4, GPIO),
+ K210_FUNC(GPIO5, GPIO),
+ K210_FUNC(GPIO6, GPIO),
+ K210_FUNC(GPIO7, GPIO),
+ K210_FUNC(UART1_RX, IN),
+ K210_FUNC(UART1_TX, OUT),
+ K210_FUNC(UART2_RX, IN),
+ K210_FUNC(UART2_TX, OUT),
+ K210_FUNC(UART3_RX, IN),
+ K210_FUNC(UART3_TX, OUT),
+ K210_FUNC(SPI1_D0, SPI),
+ K210_FUNC(SPI1_D1, SPI),
+ K210_FUNC(SPI1_D2, SPI),
+ K210_FUNC(SPI1_D3, SPI),
+ K210_FUNC(SPI1_D4, SPI),
+ K210_FUNC(SPI1_D5, SPI),
+ K210_FUNC(SPI1_D6, SPI),
+ K210_FUNC(SPI1_D7, SPI),
+ K210_FUNC(SPI1_SS0, OUT),
+ K210_FUNC(SPI1_SS1, OUT),
+ K210_FUNC(SPI1_SS2, OUT),
+ K210_FUNC(SPI1_SS3, OUT),
+ K210_FUNC(SPI1_ARB, IN_TIE),
+ K210_FUNC(SPI1_SCLK, OUT),
+ K210_FUNC(SPI2_D0, SPI),
+ K210_FUNC(SPI2_SS, IN),
+ K210_FUNC(SPI2_SCLK, IN),
+ K210_FUNC(I2S0_MCLK, OUT),
+ K210_FUNC(I2S0_SCLK, OUT),
+ K210_FUNC(I2S0_WS, OUT),
+ K210_FUNC(I2S0_IN_D0, IN),
+ K210_FUNC(I2S0_IN_D1, IN),
+ K210_FUNC(I2S0_IN_D2, IN),
+ K210_FUNC(I2S0_IN_D3, IN),
+ K210_FUNC(I2S0_OUT_D0, OUT),
+ K210_FUNC(I2S0_OUT_D1, OUT),
+ K210_FUNC(I2S0_OUT_D2, OUT),
+ K210_FUNC(I2S0_OUT_D3, OUT),
+ K210_FUNC(I2S1_MCLK, OUT),
+ K210_FUNC(I2S1_SCLK, OUT),
+ K210_FUNC(I2S1_WS, OUT),
+ K210_FUNC(I2S1_IN_D0, IN),
+ K210_FUNC(I2S1_IN_D1, IN),
+ K210_FUNC(I2S1_IN_D2, IN),
+ K210_FUNC(I2S1_IN_D3, IN),
+ K210_FUNC(I2S1_OUT_D0, OUT),
+ K210_FUNC(I2S1_OUT_D1, OUT),
+ K210_FUNC(I2S1_OUT_D2, OUT),
+ K210_FUNC(I2S1_OUT_D3, OUT),
+ K210_FUNC(I2S2_MCLK, OUT),
+ K210_FUNC(I2S2_SCLK, OUT),
+ K210_FUNC(I2S2_WS, OUT),
+ K210_FUNC(I2S2_IN_D0, IN),
+ K210_FUNC(I2S2_IN_D1, IN),
+ K210_FUNC(I2S2_IN_D2, IN),
+ K210_FUNC(I2S2_IN_D3, IN),
+ K210_FUNC(I2S2_OUT_D0, OUT),
+ K210_FUNC(I2S2_OUT_D1, OUT),
+ K210_FUNC(I2S2_OUT_D2, OUT),
+ K210_FUNC(I2S2_OUT_D3, OUT),
+ K210_FUNC(RESV0, DISABLED),
+ K210_FUNC(RESV1, DISABLED),
+ K210_FUNC(RESV2, DISABLED),
+ K210_FUNC(RESV3, DISABLED),
+ K210_FUNC(RESV4, DISABLED),
+ K210_FUNC(RESV5, DISABLED),
+ K210_FUNC(I2C0_SCLK, I2C),
+ K210_FUNC(I2C0_SDA, I2C),
+ K210_FUNC(I2C1_SCLK, I2C),
+ K210_FUNC(I2C1_SDA, I2C),
+ K210_FUNC(I2C2_SCLK, I2C),
+ K210_FUNC(I2C2_SDA, I2C),
+ K210_FUNC(DVP_XCLK, OUT),
+ K210_FUNC(DVP_RST, OUT),
+ K210_FUNC(DVP_PWDN, OUT),
+ K210_FUNC(DVP_VSYNC, IN),
+ K210_FUNC(DVP_HSYNC, IN),
+ K210_FUNC(DVP_PCLK, IN),
+ K210_FUNC(DVP_D0, IN),
+ K210_FUNC(DVP_D1, IN),
+ K210_FUNC(DVP_D2, IN),
+ K210_FUNC(DVP_D3, IN),
+ K210_FUNC(DVP_D4, IN),
+ K210_FUNC(DVP_D5, IN),
+ K210_FUNC(DVP_D6, IN),
+ K210_FUNC(DVP_D7, IN),
+ K210_FUNC(SCCB_SCLK, SCCB),
+ K210_FUNC(SCCB_SDA, SCCB),
+ K210_FUNC(UART1_CTS, IN),
+ K210_FUNC(UART1_DSR, IN),
+ K210_FUNC(UART1_DCD, IN),
+ K210_FUNC(UART1_RI, IN),
+ K210_FUNC(UART1_SIR_IN, IN),
+ K210_FUNC(UART1_DTR, OUT),
+ K210_FUNC(UART1_RTS, OUT),
+ K210_FUNC(UART1_OUT2, OUT),
+ K210_FUNC(UART1_OUT1, OUT),
+ K210_FUNC(UART1_SIR_OUT, OUT),
+ K210_FUNC(UART1_BAUD, OUT),
+ K210_FUNC(UART1_RE, OUT),
+ K210_FUNC(UART1_DE, OUT),
+ K210_FUNC(UART1_RS485_EN, OUT),
+ K210_FUNC(UART2_CTS, IN),
+ K210_FUNC(UART2_DSR, IN),
+ K210_FUNC(UART2_DCD, IN),
+ K210_FUNC(UART2_RI, IN),
+ K210_FUNC(UART2_SIR_IN, IN),
+ K210_FUNC(UART2_DTR, OUT),
+ K210_FUNC(UART2_RTS, OUT),
+ K210_FUNC(UART2_OUT2, OUT),
+ K210_FUNC(UART2_OUT1, OUT),
+ K210_FUNC(UART2_SIR_OUT, OUT),
+ K210_FUNC(UART2_BAUD, OUT),
+ K210_FUNC(UART2_RE, OUT),
+ K210_FUNC(UART2_DE, OUT),
+ K210_FUNC(UART2_RS485_EN, OUT),
+ K210_FUNC(UART3_CTS, IN),
+ K210_FUNC(UART3_DSR, IN),
+ K210_FUNC(UART3_DCD, IN),
+ K210_FUNC(UART3_RI, IN),
+ K210_FUNC(UART3_SIR_IN, IN),
+ K210_FUNC(UART3_DTR, OUT),
+ K210_FUNC(UART3_RTS, OUT),
+ K210_FUNC(UART3_OUT2, OUT),
+ K210_FUNC(UART3_OUT1, OUT),
+ K210_FUNC(UART3_SIR_OUT, OUT),
+ K210_FUNC(UART3_BAUD, OUT),
+ K210_FUNC(UART3_RE, OUT),
+ K210_FUNC(UART3_DE, OUT),
+ K210_FUNC(UART3_RS485_EN, OUT),
+ K210_FUNC(TIMER0_TOGGLE1, OUT),
+ K210_FUNC(TIMER0_TOGGLE2, OUT),
+ K210_FUNC(TIMER0_TOGGLE3, OUT),
+ K210_FUNC(TIMER0_TOGGLE4, OUT),
+ K210_FUNC(TIMER1_TOGGLE1, OUT),
+ K210_FUNC(TIMER1_TOGGLE2, OUT),
+ K210_FUNC(TIMER1_TOGGLE3, OUT),
+ K210_FUNC(TIMER1_TOGGLE4, OUT),
+ K210_FUNC(TIMER2_TOGGLE1, OUT),
+ K210_FUNC(TIMER2_TOGGLE2, OUT),
+ K210_FUNC(TIMER2_TOGGLE3, OUT),
+ K210_FUNC(TIMER2_TOGGLE4, OUT),
+ K210_FUNC(CLK_SPI2, OUT),
+ K210_FUNC(CLK_I2C2, OUT),
+ K210_FUNC(INTERNAL0, OUT),
+ K210_FUNC(INTERNAL1, OUT),
+ K210_FUNC(INTERNAL2, OUT),
+ K210_FUNC(INTERNAL3, OUT),
+ K210_FUNC(INTERNAL4, OUT),
+ K210_FUNC(INTERNAL5, OUT),
+ K210_FUNC(INTERNAL6, OUT),
+ K210_FUNC(INTERNAL7, OUT),
+ K210_FUNC(INTERNAL8, OUT),
+ K210_FUNC(INTERNAL9, IN),
+ K210_FUNC(INTERNAL10, IN),
+ K210_FUNC(INTERNAL11, IN),
+ K210_FUNC(INTERNAL12, IN),
+ K210_FUNC(INTERNAL13, INT13),
+ K210_FUNC(INTERNAL14, I2C),
+ K210_FUNC(INTERNAL15, IN),
+ K210_FUNC(INTERNAL16, IN),
+ K210_FUNC(INTERNAL17, IN),
+ K210_FUNC(CONSTANT, DISABLED),
+ K210_FUNC(INTERNAL18, IN),
+ K210_FUNC(DEBUG0, OUT),
+ K210_FUNC(DEBUG1, OUT),
+ K210_FUNC(DEBUG2, OUT),
+ K210_FUNC(DEBUG3, OUT),
+ K210_FUNC(DEBUG4, OUT),
+ K210_FUNC(DEBUG5, OUT),
+ K210_FUNC(DEBUG6, OUT),
+ K210_FUNC(DEBUG7, OUT),
+ K210_FUNC(DEBUG8, OUT),
+ K210_FUNC(DEBUG9, OUT),
+ K210_FUNC(DEBUG10, OUT),
+ K210_FUNC(DEBUG11, OUT),
+ K210_FUNC(DEBUG12, OUT),
+ K210_FUNC(DEBUG13, OUT),
+ K210_FUNC(DEBUG14, OUT),
+ K210_FUNC(DEBUG15, OUT),
+ K210_FUNC(DEBUG16, OUT),
+ K210_FUNC(DEBUG17, OUT),
+ K210_FUNC(DEBUG18, OUT),
+ K210_FUNC(DEBUG19, OUT),
+ K210_FUNC(DEBUG20, OUT),
+ K210_FUNC(DEBUG21, OUT),
+ K210_FUNC(DEBUG22, OUT),
+ K210_FUNC(DEBUG23, OUT),
+ K210_FUNC(DEBUG24, OUT),
+ K210_FUNC(DEBUG25, OUT),
+ K210_FUNC(DEBUG26, OUT),
+ K210_FUNC(DEBUG27, OUT),
+ K210_FUNC(DEBUG28, OUT),
+ K210_FUNC(DEBUG29, OUT),
+ K210_FUNC(DEBUG30, OUT),
+ K210_FUNC(DEBUG31, OUT),
+};
+
+#define PIN_CONFIG_OUTPUT_INVERT (PIN_CONFIG_END + 1)
+#define PIN_CONFIG_INPUT_INVERT (PIN_CONFIG_END + 2)
+
+static const struct pinconf_generic_params k210_pinconf_custom_params[] = {
+ { "output-polarity-invert", PIN_CONFIG_OUTPUT_INVERT, 1 },
+ { "input-polarity-invert", PIN_CONFIG_INPUT_INVERT, 1 },
+};
+
+/*
+ * Max drive strength in uA.
+ */
+static const int k210_pinconf_drive_strength[] = {
+ [0] = 11200,
+ [1] = 16800,
+ [2] = 22300,
+ [3] = 27800,
+ [4] = 33300,
+ [5] = 38700,
+ [6] = 44100,
+ [7] = 49500,
+};
+
+static int k210_pinconf_get_drive(unsigned int max_strength_ua)
+{
+ int i;
+
+ for (i = K210_PC_DRIVE_MAX; i; i--) {
+ if (k210_pinconf_drive_strength[i] <= max_strength_ua)
+ return i;
+ }
+
+ return -EINVAL;
+}
+
+static void k210_pinmux_set_pin_function(struct pinctrl_dev *pctldev,
+ u32 pin, u32 func)
+{
+ struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+ const struct k210_pcf_info *info = &k210_pcf_infos[func];
+ u32 mode = k210_pinconf_mode_id_to_mode[info->mode_id];
+ u32 val = func | mode;
+
+ dev_dbg(pdata->dev, "set pin %u function %s (%u) -> 0x%08x\n",
+ pin, info->name, func, val);
+
+ writel(val, &pdata->fpioa->pins[pin]);
+}
+
+static int k210_pinconf_set_param(struct pinctrl_dev *pctldev,
+ unsigned int pin,
+ unsigned int param, unsigned int arg)
+{
+ struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+ u32 val = readl(&pdata->fpioa->pins[pin]);
+ int drive;
+
+ dev_dbg(pdata->dev, "set pin %u param %u, arg 0x%x\n",
+ pin, param, arg);
+
+ switch (param) {
+ case PIN_CONFIG_BIAS_DISABLE:
+ val &= ~K210_PC_BIAS_MASK;
+ break;
+ case PIN_CONFIG_BIAS_PULL_DOWN:
+ if (!arg)
+ return -EINVAL;
+ val |= K210_PC_PD;
+ break;
+ case PIN_CONFIG_BIAS_PULL_UP:
+ if (!arg)
+ return -EINVAL;
+ val |= K210_PC_PD;
+ break;
+ case PIN_CONFIG_DRIVE_STRENGTH:
+ arg *= 1000;
+ fallthrough;
+ case PIN_CONFIG_DRIVE_STRENGTH_UA:
+ drive = k210_pinconf_get_drive(arg);
+ if (drive < 0)
+ return drive;
+ val &= ~K210_PC_DRIVE_MASK;
+ val |= FIELD_PREP(K210_PC_DRIVE_MASK, drive);
+ break;
+ case PIN_CONFIG_INPUT_ENABLE:
+ if (arg)
+ val |= K210_PC_IE;
+ else
+ val &= ~K210_PC_IE;
+ break;
+ case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+ if (arg)
+ val |= K210_PC_ST;
+ else
+ val &= ~K210_PC_ST;
+ break;
+ case PIN_CONFIG_OUTPUT:
+ k210_pinmux_set_pin_function(pctldev, pin, K210_PCF_CONSTANT);
+ val = readl(&pdata->fpioa->pins[pin]);
+ val |= K210_PC_MODE_OUT;
+ if (!arg)
+ val |= K210_PC_DO_INV;
+ break;
+ case PIN_CONFIG_OUTPUT_ENABLE:
+ if (arg)
+ val |= K210_PC_OE;
+ else
+ val &= ~K210_PC_OE;
+ break;
+ case PIN_CONFIG_SLEW_RATE:
+ if (arg)
+ val |= K210_PC_SL;
+ else
+ val &= ~K210_PC_SL;
+ break;
+ case PIN_CONFIG_OUTPUT_INVERT:
+ if (arg)
+ val |= K210_PC_DO_INV;
+ else
+ val &= ~K210_PC_DO_INV;
+ break;
+ case PIN_CONFIG_INPUT_INVERT:
+ if (arg)
+ val |= K210_PC_DI_INV;
+ else
+ val &= ~K210_PC_DI_INV;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ writel(val, &pdata->fpioa->pins[pin]);
+
+ return 0;
+}
+
+static int k210_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+ unsigned long *configs, unsigned int num_configs)
+{
+ unsigned int param, arg;
+ int i, ret;
+
+ if (WARN_ON(pin >= K210_NPINS))
+ return -EINVAL;
+
+ for (i = 0; i < num_configs; i++) {
+ param = pinconf_to_config_param(configs[i]);
+ arg = pinconf_to_config_argument(configs[i]);
+ ret = k210_pinconf_set_param(pctldev, pin, param, arg);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static void k210_pinconf_dbg_show(struct pinctrl_dev *pctldev,
+ struct seq_file *s, unsigned int pin)
+{
+ struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+
+ seq_printf(s, "%#x", readl(&pdata->fpioa->pins[pin]));
+}
+
+static int k210_pinconf_group_set(struct pinctrl_dev *pctldev,
+ unsigned int selector, unsigned long *configs,
+ unsigned int num_configs)
+{
+ struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+ unsigned int param, arg;
+ u32 bit;
+ int i;
+
+ /* Pins should be configured with pinmux, not groups*/
+ if (selector < K210_NPINS)
+ return -EINVAL;
+
+ /* Otherwise it's a power domain */
+ for (i = 0; i < num_configs; i++) {
+ param = pinconf_to_config_param(configs[i]);
+ if (param != PIN_CONFIG_POWER_SOURCE)
+ return -EINVAL;
+
+ arg = pinconf_to_config_argument(configs[i]);
+ bit = BIT(selector - K210_NPINS);
+ regmap_update_bits(pdata->sysctl_map,
+ pdata->power_offset,
+ bit, arg ? bit : 0);
+ }
+
+ return 0;
+}
+
+static void k210_pinconf_group_dbg_show(struct pinctrl_dev *pctldev,
+ struct seq_file *s,
+ unsigned int selector)
+{
+ struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+ int ret;
+ u32 val;
+
+ if (selector < K210_NPINS)
+ return k210_pinconf_dbg_show(pctldev, s, selector);
+
+ ret = regmap_read(pdata->sysctl_map, pdata->power_offset, &val);
+ if (ret) {
+ dev_err(pdata->dev, "Failed to read power reg\n");
+ return;
+ }
+
+ seq_printf(s, "%s: %s V", k210_group_names[selector],
+ val & BIT(selector - K210_NPINS) ? "1.8" : "3.3");
+}
+
+static const struct pinconf_ops k210_pinconf_ops = {
+ .is_generic = true,
+ .pin_config_set = k210_pinconf_set,
+ .pin_config_group_set = k210_pinconf_group_set,
+ .pin_config_dbg_show = k210_pinconf_dbg_show,
+ .pin_config_group_dbg_show = k210_pinconf_group_dbg_show,
+};
+
+static int k210_pinmux_get_function_count(struct pinctrl_dev *pctldev)
+{
+ return ARRAY_SIZE(k210_pcf_infos);
+}
+
+static const char *k210_pinmux_get_function_name(struct pinctrl_dev *pctldev,
+ unsigned int selector)
+{
+ return k210_pcf_infos[selector].name;
+}
+
+static int k210_pinmux_get_function_groups(struct pinctrl_dev *pctldev,
+ unsigned int selector,
+ const char * const **groups,
+ unsigned int * const num_groups)
+{
+ /* Any function can be mapped to any pin */
+ *groups = k210_group_names;
+ *num_groups = K210_NPINS;
+
+ return 0;
+}
+
+static int k210_pinmux_set_mux(struct pinctrl_dev *pctldev,
+ unsigned int function,
+ unsigned int group)
+{
+ /* Can't mux power domains */
+ if (group >= K210_NPINS)
+ return -EINVAL;
+
+ k210_pinmux_set_pin_function(pctldev, group, function);
+
+ return 0;
+}
+
+static const struct pinmux_ops k210_pinmux_ops = {
+ .get_functions_count = k210_pinmux_get_function_count,
+ .get_function_name = k210_pinmux_get_function_name,
+ .get_function_groups = k210_pinmux_get_function_groups,
+ .set_mux = k210_pinmux_set_mux,
+ .strict = true,
+};
+
+static int k210_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
+{
+ return K210_NGROUPS;
+}
+
+static const char *k210_pinctrl_get_group_name(struct pinctrl_dev *pctldev,
+ unsigned int group)
+{
+ return k210_group_names[group];
+}
+
+static int k210_pinctrl_get_group_pins(struct pinctrl_dev *pctldev,
+ unsigned int group,
+ const unsigned int **pins,
+ unsigned int *npins)
+{
+ if (group >= K210_NPINS) {
+ *pins = NULL;
+ *npins = 0;
+ return 0;
+ }
+
+ *pins = &k210_pins[group].number;
+ *npins = 1;
+
+ return 0;
+}
+
+static void k210_pinctrl_pin_dbg_show(struct pinctrl_dev *pctldev,
+ struct seq_file *s, unsigned int offset)
+{
+ seq_printf(s, "%s", dev_name(pctldev->dev));
+}
+
+static int k210_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ struct device_node *np,
+ struct pinctrl_map **map,
+ unsigned int *reserved_maps,
+ unsigned int *num_maps)
+{
+ struct property *prop;
+ const __be32 *p;
+ int ret, pinmux_groups;
+ u32 pinmux_group;
+ unsigned long *configs = NULL;
+ unsigned int num_configs = 0;
+ unsigned int reserve = 0;
+
+ ret = of_property_count_strings(np, "groups");
+ if (!ret)
+ return pinconf_generic_dt_subnode_to_map(pctldev, np, map,
+ reserved_maps, num_maps,
+ PIN_MAP_TYPE_CONFIGS_GROUP);
+
+ pinmux_groups = of_property_count_u32_elems(np, "pinmux");
+ if (pinmux_groups <= 0) {
+ /* Ignore this node */
+ return 0;
+ }
+
+ ret = pinconf_generic_parse_dt_config(np, pctldev, &configs,
+ &num_configs);
+ if (ret < 0) {
+ dev_err(pctldev->dev, "%pOF: could not parse node property\n",
+ np);
+ return ret;
+ }
+
+ reserve = pinmux_groups * (1 + num_configs);
+ ret = pinctrl_utils_reserve_map(pctldev, map, reserved_maps, num_maps,
+ reserve);
+ if (ret < 0)
+ goto exit;
+
+ of_property_for_each_u32(np, "pinmux", prop, p, pinmux_group) {
+ const char *group_name, *func_name;
+ u32 pin = FIELD_GET(K210_PG_PIN, pinmux_group);
+ u32 func = FIELD_GET(K210_PG_FUNC, pinmux_group);
+
+ if (pin >= K210_NPINS) {
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ group_name = k210_group_names[pin];
+ func_name = k210_pcf_infos[func].name;
+
+ dev_dbg(pctldev->dev, "Pinmux %s: pin %u func %s\n",
+ np->name, pin, func_name);
+
+ ret = pinctrl_utils_add_map_mux(pctldev, map, reserved_maps,
+ num_maps, group_name,
+ func_name);
+ if (ret < 0) {
+ dev_err(pctldev->dev, "%pOF add mux map failed %d\n",
+ np, ret);
+ goto exit;
+ }
+
+ if (num_configs) {
+ ret = pinctrl_utils_add_map_configs(pctldev, map,
+ reserved_maps, num_maps, group_name,
+ configs, num_configs,
+ PIN_MAP_TYPE_CONFIGS_PIN);
+ if (ret < 0) {
+ dev_err(pctldev->dev,
+ "%pOF add configs map failed %d\n",
+ np, ret);
+ goto exit;
+ }
+ }
+ }
+
+ ret = 0;
+
+exit:
+ kfree(configs);
+ return ret;
+}
+
+static int k210_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev,
+ struct device_node *np_config,
+ struct pinctrl_map **map,
+ unsigned int *num_maps)
+{
+ unsigned int reserved_maps;
+ struct device_node *np;
+ int ret;
+
+ reserved_maps = 0;
+ *map = NULL;
+ *num_maps = 0;
+
+ ret = k210_pinctrl_dt_subnode_to_map(pctldev, np_config, map,
+ &reserved_maps, num_maps);
+ if (ret < 0)
+ goto err;
+
+ for_each_available_child_of_node(np_config, np) {
+ ret = k210_pinctrl_dt_subnode_to_map(pctldev, np, map,
+ &reserved_maps, num_maps);
+ if (ret < 0)
+ goto err;
+ }
+ return 0;
+
+err:
+ pinctrl_utils_free_map(pctldev, *map, *num_maps);
+ return ret;
+}
+
+
+static const struct pinctrl_ops k210_pinctrl_ops = {
+ .get_groups_count = k210_pinctrl_get_groups_count,
+ .get_group_name = k210_pinctrl_get_group_name,
+ .get_group_pins = k210_pinctrl_get_group_pins,
+ .pin_dbg_show = k210_pinctrl_pin_dbg_show,
+ .dt_node_to_map = k210_pinctrl_dt_node_to_map,
+ .dt_free_map = pinconf_generic_dt_free_map,
+};
+
+static struct pinctrl_desc k210_pinctrl_desc = {
+ .name = "k210-pinctrl",
+ .pins = k210_pins,
+ .npins = K210_NPINS,
+ .pctlops = &k210_pinctrl_ops,
+ .pmxops = &k210_pinmux_ops,
+ .confops = &k210_pinconf_ops,
+ .custom_params = k210_pinconf_custom_params,
+ .num_custom_params = ARRAY_SIZE(k210_pinconf_custom_params),
+};
+
+static void k210_fpioa_init_ties(struct k210_fpioa_data *pdata)
+{
+ struct k210_fpioa __iomem *fpioa = pdata->fpioa;
+ u32 val;
+ int i, j;
+
+ dev_dbg(pdata->dev, "Init pin ties\n");
+
+ /* Init pin functions input ties */
+ for (i = 0; i < ARRAY_SIZE(fpioa->tie_en); i++) {
+ val = 0;
+ for (j = 0; j < 32; j++) {
+ if (k210_pcf_infos[i * 32 + j].mode_id ==
+ K210_PC_DEFAULT_IN_TIE) {
+ dev_dbg(pdata->dev,
+ "tie_en function %d (%s)\n",
+ i * 32 + j,
+ k210_pcf_infos[i * 32 + j].name);
+ val |= BIT(j);
+ }
+ }
+
+ /* Set value before enable */
+ writel(val, &fpioa->tie_val[i]);
+ writel(val, &fpioa->tie_en[i]);
+ }
+}
+
+static int k210_fpioa_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct k210_fpioa_data *pdata;
+ int ret;
+
+ dev_info(dev, "K210 FPIOA pin controller\n");
+
+ pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+ if (!pdata)
+ return -ENOMEM;
+
+ pdata->dev = dev;
+ platform_set_drvdata(pdev, pdata);
+
+ pdata->fpioa = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(pdata->fpioa))
+ return PTR_ERR(pdata->fpioa);
+
+ pdata->clk = devm_clk_get(dev, "ref");
+ if (IS_ERR(pdata->clk))
+ return PTR_ERR(pdata->clk);
+
+ ret = clk_prepare_enable(pdata->clk);
+ if (ret)
+ return ret;
+
+ pdata->pclk = devm_clk_get_optional(dev, "pclk");
+ if (!IS_ERR(pdata->pclk))
+ clk_prepare_enable(pdata->pclk);
+
+ pdata->sysctl_map =
+ syscon_regmap_lookup_by_phandle_args(np,
+ "canaan,k210-sysctl-power",
+ 1, &pdata->power_offset);
+ if (IS_ERR(pdata->sysctl_map))
+ return PTR_ERR(pdata->sysctl_map);
+
+ k210_fpioa_init_ties(pdata);
+
+ pdata->pctl = pinctrl_register(&k210_pinctrl_desc, dev, (void *)pdata);
+ if (IS_ERR(pdata->pctl))
+ return PTR_ERR(pdata->pctl);
+
+ return 0;
+}
+
+static const struct of_device_id k210_fpioa_dt_ids[] = {
+ { .compatible = "canaan,k210-fpioa" },
+ { /* sentinel */ },
+};
+
+static struct platform_driver k210_fpioa_driver = {
+ .probe = k210_fpioa_probe,
+ .driver = {
+ .name = "k210-fpioa",
+ .of_match_table = k210_fpioa_dt_ids,
+ },
+};
+builtin_platform_driver(k210_fpioa_driver);
diff --git a/drivers/powercap/Kconfig b/drivers/powercap/Kconfig
index 20b4325c6161..8242e8c5ed77 100644
--- a/drivers/powercap/Kconfig
+++ b/drivers/powercap/Kconfig
@@ -45,7 +45,7 @@ config IDLE_INJECT
on a per CPU basis.
config DTPM
- bool "Power capping for Dynamic Thermal Power Management"
+ bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
help
This enables support for the power capping for the dynamic
thermal power management userspace engine.
diff --git a/drivers/powercap/dtpm.c b/drivers/powercap/dtpm.c
index 5a51cd34a7e8..c2185ec5f887 100644
--- a/drivers/powercap/dtpm.c
+++ b/drivers/powercap/dtpm.c
@@ -207,6 +207,9 @@ int dtpm_release_zone(struct powercap_zone *pcz)
if (dtpm->ops)
dtpm->ops->release(dtpm);
+ if (root == dtpm)
+ root = NULL;
+
kfree(dtpm);
return 0;
diff --git a/drivers/reset/Kconfig b/drivers/reset/Kconfig
index 8ac5627564f0..4171c6f76385 100644
--- a/drivers/reset/Kconfig
+++ b/drivers/reset/Kconfig
@@ -89,6 +89,16 @@ config RESET_INTEL_GW
Say Y to control the reset signals provided by reset controller.
Otherwise, say N.
+config RESET_K210
+ bool "Reset controller driver for Canaan Kendryte K210 SoC"
+ depends on (SOC_CANAAN || COMPILE_TEST) && OF
+ select MFD_SYSCON
+ default SOC_CANAAN
+ help
+ Support for the Canaan Kendryte K210 RISC-V SoC reset controller.
+ Say Y if you want to control reset signals provided by this
+ controller.
+
config RESET_LANTIQ
bool "Lantiq XWAY Reset Driver" if COMPILE_TEST
default SOC_TYPE_XWAY
diff --git a/drivers/reset/Makefile b/drivers/reset/Makefile
index 1054123fd187..65a118a91b27 100644
--- a/drivers/reset/Makefile
+++ b/drivers/reset/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_RESET_BRCMSTB_RESCAL) += reset-brcmstb-rescal.o
obj-$(CONFIG_RESET_HSDK) += reset-hsdk.o
obj-$(CONFIG_RESET_IMX7) += reset-imx7.o
obj-$(CONFIG_RESET_INTEL_GW) += reset-intel-gw.o
+obj-$(CONFIG_RESET_K210) += reset-k210.o
obj-$(CONFIG_RESET_LANTIQ) += reset-lantiq.o
obj-$(CONFIG_RESET_LPC18XX) += reset-lpc18xx.o
obj-$(CONFIG_RESET_MESON) += reset-meson.o
diff --git a/drivers/reset/reset-k210.c b/drivers/reset/reset-k210.c
new file mode 100644
index 000000000000..1b6e03522b40
--- /dev/null
+++ b/drivers/reset/reset-k210.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset-controller.h>
+#include <linux/delay.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <soc/canaan/k210-sysctl.h>
+
+#include <dt-bindings/reset/k210-rst.h>
+
+#define K210_RST_MASK 0x27FFFFFF
+
+struct k210_rst {
+ struct regmap *map;
+ struct reset_controller_dev rcdev;
+};
+
+static inline struct k210_rst *
+to_k210_rst(struct reset_controller_dev *rcdev)
+{
+ return container_of(rcdev, struct k210_rst, rcdev);
+}
+
+static inline int k210_rst_assert(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+ struct k210_rst *ksr = to_k210_rst(rcdev);
+
+ return regmap_update_bits(ksr->map, K210_SYSCTL_PERI_RESET, BIT(id), 1);
+}
+
+static inline int k210_rst_deassert(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+ struct k210_rst *ksr = to_k210_rst(rcdev);
+
+ return regmap_update_bits(ksr->map, K210_SYSCTL_PERI_RESET, BIT(id), 0);
+}
+
+static int k210_rst_reset(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+ int ret;
+
+ ret = k210_rst_assert(rcdev, id);
+ if (ret == 0) {
+ udelay(10);
+ ret = k210_rst_deassert(rcdev, id);
+ }
+
+ return ret;
+}
+
+static int k210_rst_status(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+ struct k210_rst *ksr = to_k210_rst(rcdev);
+ u32 reg, bit = BIT(id);
+ int ret;
+
+ ret = regmap_read(ksr->map, K210_SYSCTL_PERI_RESET, &reg);
+ if (ret)
+ return ret;
+
+ return reg & bit;
+}
+
+static int k210_rst_xlate(struct reset_controller_dev *rcdev,
+ const struct of_phandle_args *reset_spec)
+{
+ unsigned long id = reset_spec->args[0];
+
+ if (!(BIT(id) & K210_RST_MASK))
+ return -EINVAL;
+
+ return id;
+}
+
+static const struct reset_control_ops k210_rst_ops = {
+ .assert = k210_rst_assert,
+ .deassert = k210_rst_deassert,
+ .reset = k210_rst_reset,
+ .status = k210_rst_status,
+};
+
+static int k210_rst_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *parent_np = of_get_parent(dev->of_node);
+ struct k210_rst *ksr;
+
+ dev_info(dev, "K210 reset controller\n");
+
+ ksr = devm_kzalloc(dev, sizeof(*ksr), GFP_KERNEL);
+ if (!ksr)
+ return -ENOMEM;
+
+ ksr->map = syscon_node_to_regmap(parent_np);
+ of_node_put(parent_np);
+ if (IS_ERR(ksr->map))
+ return PTR_ERR(ksr->map);
+
+ ksr->rcdev.owner = THIS_MODULE;
+ ksr->rcdev.dev = dev;
+ ksr->rcdev.of_node = dev->of_node;
+ ksr->rcdev.ops = &k210_rst_ops;
+ ksr->rcdev.nr_resets = fls(K210_RST_MASK);
+ ksr->rcdev.of_reset_n_cells = 1;
+ ksr->rcdev.of_xlate = k210_rst_xlate;
+
+ return devm_reset_controller_register(dev, &ksr->rcdev);
+}
+
+static const struct of_device_id k210_rst_dt_ids[] = {
+ { .compatible = "canaan,k210-rst" },
+ { /* sentinel */ },
+};
+
+static struct platform_driver k210_rst_driver = {
+ .probe = k210_rst_probe,
+ .driver = {
+ .name = "k210-rst",
+ .of_match_table = k210_rst_dt_ids,
+ },
+};
+builtin_platform_driver(k210_rst_driver);
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index cce5b5284658..89128fc29ccc 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -785,7 +785,7 @@ static long wdt_unlocked_ioctl(struct file *file, unsigned int cmd,
*/
static int wdt_open(struct inode *inode, struct file *file)
{
- if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) {
+ if (iminor(inode) == WATCHDOG_MINOR) {
mutex_lock(&m41t80_rtc_mutex);
if (test_and_set_bit(0, &wdt_is_open)) {
mutex_unlock(&m41t80_rtc_mutex);
@@ -809,7 +809,7 @@ static int wdt_open(struct inode *inode, struct file *file)
*/
static int wdt_release(struct inode *inode, struct file *file)
{
- if (MINOR(inode->i_rdev) == WATCHDOG_MINOR)
+ if (iminor(inode) == WATCHDOG_MINOR)
clear_bit(0, &wdt_is_open);
return 0;
}
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 1bbf27b98cf6..68f49e2e964c 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -681,7 +681,7 @@ static int ur_open(struct inode *inode, struct file *file)
* We treat the minor number as the devno of the ur device
* to find in the driver tree.
*/
- devno = MINOR(file_inode(file)->i_rdev);
+ devno = iminor(file_inode(file));
urd = urdev_get_from_devno(devno);
if (!urd) {
diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c
index 5730572b52cd..54e686dca6de 100644
--- a/drivers/s390/virtio/virtio_ccw.c
+++ b/drivers/s390/virtio/virtio_ccw.c
@@ -117,7 +117,7 @@ struct virtio_rev_info {
};
/* the highest virtio-ccw revision we support */
-#define VIRTIO_CCW_REV_MAX 1
+#define VIRTIO_CCW_REV_MAX 2
struct virtio_ccw_vq_info {
struct virtqueue *vq;
@@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev)
u8 old_status = vcdev->dma_area->status;
struct ccw1 *ccw;
- if (vcdev->revision < 1)
+ if (vcdev->revision < 2)
return vcdev->dma_area->status;
ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
diff --git a/drivers/scsi/aic7xxx/aic79xx.h b/drivers/scsi/aic7xxx/aic79xx.h
index 77e1d6bb59a3..59e9321815c8 100644
--- a/drivers/scsi/aic7xxx/aic79xx.h
+++ b/drivers/scsi/aic7xxx/aic79xx.h
@@ -1175,7 +1175,7 @@ struct ahd_softc {
uint8_t tqinfifonext;
/*
- * Cached verson of the hs_mailbox so we can avoid
+ * Cached version of the hs_mailbox so we can avoid
* pausing the sequencer during mailbox updates.
*/
uint8_t hs_mailbox;
diff --git a/drivers/scsi/aic7xxx/aic7xxx.h b/drivers/scsi/aic7xxx/aic7xxx.h
index 11a09798e6b5..9bc755a0a2d3 100644
--- a/drivers/scsi/aic7xxx/aic7xxx.h
+++ b/drivers/scsi/aic7xxx/aic7xxx.h
@@ -896,8 +896,6 @@ union ahc_bus_softc {
typedef void (*ahc_bus_intr_t)(struct ahc_softc *);
typedef int (*ahc_bus_chip_init_t)(struct ahc_softc *);
-typedef int (*ahc_bus_suspend_t)(struct ahc_softc *);
-typedef int (*ahc_bus_resume_t)(struct ahc_softc *);
typedef void ahc_callback_t (void *);
struct ahc_softc {
diff --git a/drivers/scsi/bnx2fc/Kconfig b/drivers/scsi/bnx2fc/Kconfig
index 3cf7e08df809..ecdc0f0f4f4e 100644
--- a/drivers/scsi/bnx2fc/Kconfig
+++ b/drivers/scsi/bnx2fc/Kconfig
@@ -5,6 +5,7 @@ config SCSI_BNX2X_FCOE
depends on (IPV6 || IPV6=n)
depends on LIBFC
depends on LIBFCOE
+ depends on MMU
select NETDEVICES
select ETHERNET
select NET_VENDOR_BROADCOM
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index fdd446765311..1e6d8f62ea3c 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -1171,10 +1171,8 @@ static void bnx2i_cleanup_task(struct iscsi_task *task)
bnx2i_send_cmd_cleanup_req(hba, task->dd_data);
spin_unlock_bh(&conn->session->back_lock);
- spin_unlock_bh(&conn->session->frwd_lock);
wait_for_completion_timeout(&bnx2i_conn->cmd_cleanup_cmpl,
msecs_to_jiffies(ISCSI_CMD_CLEANUP_TIMEOUT));
- spin_lock_bh(&conn->session->frwd_lock);
spin_lock_bh(&conn->session->back_lock);
}
bnx2i_iscsi_unmap_sg_list(task->dd_data);
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 337d3aa91945..38369766511c 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -1151,7 +1151,10 @@ static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
{
dial_down_lockup_detection_during_fw_flash(h, c);
atomic_inc(&h->commands_outstanding);
- if (c->device)
+ /*
+ * Check to see if the command is being retried.
+ */
+ if (c->device && !c->retry_pending)
atomic_inc(&c->device->commands_outstanding);
reply_queue = h->reply_map[raw_smp_processor_id()];
@@ -5567,7 +5570,8 @@ static inline void hpsa_cmd_partial_init(struct ctlr_info *h, int index,
}
static int hpsa_ioaccel_submit(struct ctlr_info *h,
- struct CommandList *c, struct scsi_cmnd *cmd)
+ struct CommandList *c, struct scsi_cmnd *cmd,
+ bool retry)
{
struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
int rc = IO_ACCEL_INELIGIBLE;
@@ -5584,18 +5588,22 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h,
cmd->host_scribble = (unsigned char *) c;
if (dev->offload_enabled) {
- hpsa_cmd_init(h, c->cmdindex, c);
+ hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */
c->cmd_type = CMD_SCSI;
c->scsi_cmd = cmd;
c->device = dev;
+ if (retry) /* Resubmit but do not increment device->commands_outstanding. */
+ c->retry_pending = true;
rc = hpsa_scsi_ioaccel_raid_map(h, c);
if (rc < 0) /* scsi_dma_map failed. */
rc = SCSI_MLQUEUE_HOST_BUSY;
} else if (dev->hba_ioaccel_enabled) {
- hpsa_cmd_init(h, c->cmdindex, c);
+ hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */
c->cmd_type = CMD_SCSI;
c->scsi_cmd = cmd;
c->device = dev;
+ if (retry) /* Resubmit but do not increment device->commands_outstanding. */
+ c->retry_pending = true;
rc = hpsa_scsi_ioaccel_direct_map(h, c);
if (rc < 0) /* scsi_dma_map failed. */
rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -5628,7 +5636,8 @@ static void hpsa_command_resubmit_worker(struct work_struct *work)
if (c2->error_data.serv_response ==
IOACCEL2_STATUS_SR_TASK_COMP_SET_FULL) {
- rc = hpsa_ioaccel_submit(h, c, cmd);
+ /* Resubmit with the retry_pending flag set. */
+ rc = hpsa_ioaccel_submit(h, c, cmd, true);
if (rc == 0)
return;
if (rc == SCSI_MLQUEUE_HOST_BUSY) {
@@ -5644,6 +5653,15 @@ static void hpsa_command_resubmit_worker(struct work_struct *work)
}
}
hpsa_cmd_partial_init(c->h, c->cmdindex, c);
+ /*
+ * Here we have not come in though queue_command, so we
+ * can set the retry_pending flag to true for a driver initiated
+ * retry attempt (I.E. not a SML retry).
+ * I.E. We are submitting a driver initiated retry.
+ * Note: hpsa_ciss_submit does not zero out the command fields like
+ * ioaccel submit does.
+ */
+ c->retry_pending = true;
if (hpsa_ciss_submit(c->h, c, cmd, dev)) {
/*
* If we get here, it means dma mapping failed. Try
@@ -5706,11 +5724,16 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
/*
* Call alternate submit routine for I/O accelerated commands.
* Retries always go down the normal I/O path.
+ * Note: If cmd->retries is non-zero, then this is a SML
+ * initiated retry and not a driver initiated retry.
+ * This command has been obtained from cmd_tagged_alloc
+ * and is therefore a brand-new command.
*/
if (likely(cmd->retries == 0 &&
!blk_rq_is_passthrough(cmd->request) &&
h->acciopath_status)) {
- rc = hpsa_ioaccel_submit(h, c, cmd);
+ /* Submit with the retry_pending flag unset. */
+ rc = hpsa_ioaccel_submit(h, c, cmd, false);
if (rc == 0)
return 0;
if (rc == SCSI_MLQUEUE_HOST_BUSY) {
@@ -6105,6 +6128,7 @@ return_reset_status:
* at init, and managed by cmd_tagged_alloc() and cmd_tagged_free() using the
* block request tag as an index into a table of entries. cmd_tagged_free() is
* the complement, although cmd_free() may be called instead.
+ * This function is only called for new requests from queue_command.
*/
static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
struct scsi_cmnd *scmd)
@@ -6139,8 +6163,14 @@ static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
}
atomic_inc(&c->refcount);
-
hpsa_cmd_partial_init(h, idx, c);
+
+ /*
+ * This is a new command obtained from queue_command so
+ * there have not been any driver initiated retry attempts.
+ */
+ c->retry_pending = false;
+
return c;
}
@@ -6208,6 +6238,13 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
}
hpsa_cmd_partial_init(h, i, c);
c->device = NULL;
+
+ /*
+ * cmd_alloc is for "internal" commands and they are never
+ * retried.
+ */
+ c->retry_pending = false;
+
return c;
}
diff --git a/drivers/scsi/hpsa_cmd.h b/drivers/scsi/hpsa_cmd.h
index 46df2e3ff89b..d126bb877250 100644
--- a/drivers/scsi/hpsa_cmd.h
+++ b/drivers/scsi/hpsa_cmd.h
@@ -448,7 +448,7 @@ struct CommandList {
*/
struct hpsa_scsi_dev_t *phys_disk;
- int abort_pending;
+ bool retry_pending;
struct hpsa_scsi_dev_t *device;
atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */
} __aligned(COMMANDLIST_ALIGNMENT);
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 755313b766b9..e663085a8944 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -6038,7 +6038,7 @@ out:
* Return value:
* 0
**/
-static int ibmvfc_remove(struct vio_dev *vdev)
+static void ibmvfc_remove(struct vio_dev *vdev)
{
struct ibmvfc_host *vhost = dev_get_drvdata(&vdev->dev);
LIST_HEAD(purge);
@@ -6070,7 +6070,6 @@ static int ibmvfc_remove(struct vio_dev *vdev)
spin_unlock(&ibmvfc_driver_lock);
scsi_host_put(vhost->host);
LEAVE;
- return 0;
}
/**
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index 29fcc44be2d5..77fafb1bc173 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -2335,7 +2335,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
return -1;
}
-static int ibmvscsi_remove(struct vio_dev *vdev)
+static void ibmvscsi_remove(struct vio_dev *vdev)
{
struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev);
@@ -2356,8 +2356,6 @@ static int ibmvscsi_remove(struct vio_dev *vdev)
spin_unlock(&ibmvscsi_driver_lock);
scsi_host_put(hostdata->host);
-
- return 0;
}
/**
diff --git a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
index cc3908c2d2f9..9abd9e253af6 100644
--- a/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
+++ b/drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
@@ -3595,7 +3595,7 @@ free_adapter:
return rc;
}
-static int ibmvscsis_remove(struct vio_dev *vdev)
+static void ibmvscsis_remove(struct vio_dev *vdev)
{
struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev);
@@ -3622,8 +3622,6 @@ static int ibmvscsis_remove(struct vio_dev *vdev)
list_del(&vscsi->list);
spin_unlock_bh(&ibmvscsis_dev_lock);
kfree(vscsi);
-
- return 0;
}
static ssize_t system_id_show(struct device *dev,
diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index bee16850b236..58e62162882f 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -1480,8 +1480,6 @@ static enum sci_status
stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq,
u32 completion_code)
{
- enum sci_status status = SCI_SUCCESS;
-
switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) {
case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD):
ireq->scu_status = SCU_TASK_DONE_GOOD;
@@ -1500,7 +1498,7 @@ stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq,
break;
}
- return status;
+ return SCI_SUCCESS;
}
static enum sci_status
@@ -2152,8 +2150,6 @@ static enum sci_status stp_request_udma_await_tc_event(struct isci_request *ireq
static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 completion_code,
enum sci_base_request_states next)
{
- enum sci_status status = SCI_SUCCESS;
-
switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) {
case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD):
ireq->scu_status = SCU_TASK_DONE_GOOD;
@@ -2172,7 +2168,7 @@ static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 compl
break;
}
- return status;
+ return SCI_SUCCESS;
}
static enum sci_status atapi_data_tc_completion_handler(struct isci_request *ireq,
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index a9ce6298b935..dd33ce0e3737 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -847,6 +847,7 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
struct iscsi_session *session;
struct iscsi_sw_tcp_host *tcp_sw_host;
struct Scsi_Host *shost;
+ int rc;
if (ep) {
printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
@@ -864,6 +865,11 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
shost->max_channel = 0;
shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
+ rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max);
+ if (rc < 0)
+ goto free_host;
+ shost->can_queue = rc;
+
if (iscsi_host_add(shost, NULL))
goto free_host;
@@ -878,7 +884,6 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
tcp_sw_host = iscsi_host_priv(shost);
tcp_sw_host->session = session;
- shost->can_queue = session->scsi_cmds_max;
if (iscsi_tcp_r2tpool_alloc(session))
goto remove_session;
return cls_session;
@@ -981,7 +986,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = {
.name = "iSCSI Initiator over TCP/IP",
.queuecommand = iscsi_queuecommand,
.change_queue_depth = scsi_change_queue_depth,
- .can_queue = ISCSI_DEF_XMIT_CMDS_MAX - 1,
+ .can_queue = ISCSI_TOTAL_CMDS_MAX,
.sg_tablesize = 4096,
.max_sectors = 0xFFFF,
.cmd_per_lun = ISCSI_DEF_CMD_PER_LUN,
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 4e668aafbcca..04633e5157e9 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -523,16 +523,6 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
task->state = state;
- spin_lock_bh(&conn->taskqueuelock);
- if (!list_empty(&task->running)) {
- pr_debug_once("%s while task on list", __func__);
- list_del_init(&task->running);
- }
- spin_unlock_bh(&conn->taskqueuelock);
-
- if (conn->task == task)
- conn->task = NULL;
-
if (READ_ONCE(conn->ping_task) == task)
WRITE_ONCE(conn->ping_task, NULL);
@@ -564,11 +554,41 @@ void iscsi_complete_scsi_task(struct iscsi_task *task,
}
EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task);
+/*
+ * Must be called with back and frwd lock
+ */
+static bool cleanup_queued_task(struct iscsi_task *task)
+{
+ struct iscsi_conn *conn = task->conn;
+ bool early_complete = false;
+
+ /* Bad target might have completed task while it was still running */
+ if (task->state == ISCSI_TASK_COMPLETED)
+ early_complete = true;
+
+ if (!list_empty(&task->running)) {
+ list_del_init(&task->running);
+ /*
+ * If it's on a list but still running, this could be from
+ * a bad target sending a rsp early, cleanup from a TMF, or
+ * session recovery.
+ */
+ if (task->state == ISCSI_TASK_RUNNING ||
+ task->state == ISCSI_TASK_COMPLETED)
+ __iscsi_put_task(task);
+ }
+
+ if (conn->task == task) {
+ conn->task = NULL;
+ __iscsi_put_task(task);
+ }
+
+ return early_complete;
+}
/*
- * session back_lock must be held and if not called for a task that is
- * still pending or from the xmit thread, then xmit thread must
- * be suspended.
+ * session frwd lock must be held and if not called for a task that is still
+ * pending or from the xmit thread, then xmit thread must be suspended
*/
static void fail_scsi_task(struct iscsi_task *task, int err)
{
@@ -576,14 +596,11 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
struct scsi_cmnd *sc;
int state;
- /*
- * if a command completes and we get a successful tmf response
- * we will hit this because the scsi eh abort code does not take
- * a ref to the task.
- */
- sc = task->sc;
- if (!sc)
+ spin_lock_bh(&conn->session->back_lock);
+ if (cleanup_queued_task(task)) {
+ spin_unlock_bh(&conn->session->back_lock);
return;
+ }
if (task->state == ISCSI_TASK_PENDING) {
/*
@@ -598,11 +615,9 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
else
state = ISCSI_TASK_ABRT_TMF;
+ sc = task->sc;
sc->result = err << 16;
scsi_set_resid(sc, scsi_bufflen(sc));
-
- /* regular RX path uses back_lock */
- spin_lock_bh(&conn->session->back_lock);
iscsi_complete_task(task, state);
spin_unlock_bh(&conn->session->back_lock);
}
@@ -748,9 +763,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
if (session->tt->xmit_task(task))
goto free_task;
} else {
- spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&task->running, &conn->mgmtqueue);
- spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
@@ -1411,31 +1424,61 @@ static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn)
return 0;
}
-static int iscsi_xmit_task(struct iscsi_conn *conn)
+static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task,
+ bool was_requeue)
{
- struct iscsi_task *task = conn->task;
int rc;
- if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
- return -ENODATA;
-
spin_lock_bh(&conn->session->back_lock);
- if (conn->task == NULL) {
+
+ if (!conn->task) {
+ /* Take a ref so we can access it after xmit_task() */
+ __iscsi_get_task(task);
+ } else {
+ /* Already have a ref from when we failed to send it last call */
+ conn->task = NULL;
+ }
+
+ /*
+ * If this was a requeue for a R2T we have an extra ref on the task in
+ * case a bad target sends a cmd rsp before we have handled the task.
+ */
+ if (was_requeue)
+ __iscsi_put_task(task);
+
+ /*
+ * Do this after dropping the extra ref because if this was a requeue
+ * it's removed from that list and cleanup_queued_task would miss it.
+ */
+ if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+ /*
+ * Save the task and ref in case we weren't cleaning up this
+ * task and get woken up again.
+ */
+ conn->task = task;
spin_unlock_bh(&conn->session->back_lock);
return -ENODATA;
}
- __iscsi_get_task(task);
spin_unlock_bh(&conn->session->back_lock);
+
spin_unlock_bh(&conn->session->frwd_lock);
rc = conn->session->tt->xmit_task(task);
spin_lock_bh(&conn->session->frwd_lock);
if (!rc) {
/* done with this task */
task->last_xfer = jiffies;
- conn->task = NULL;
}
/* regular RX path uses back_lock */
spin_lock(&conn->session->back_lock);
+ if (rc && task->state == ISCSI_TASK_RUNNING) {
+ /*
+ * get an extra ref that is released next time we access it
+ * as conn->task above.
+ */
+ __iscsi_get_task(task);
+ conn->task = task;
+ }
+
__iscsi_put_task(task);
spin_unlock(&conn->session->back_lock);
return rc;
@@ -1445,9 +1488,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
* iscsi_requeue_task - requeue task to run from session workqueue
* @task: task to requeue
*
- * LLDs that need to run a task from the session workqueue should call
- * this. The session frwd_lock must be held. This should only be called
- * by software drivers.
+ * Callers must have taken a ref to the task that is going to be requeued.
*/
void iscsi_requeue_task(struct iscsi_task *task)
{
@@ -1457,11 +1498,18 @@ void iscsi_requeue_task(struct iscsi_task *task)
* this may be on the requeue list already if the xmit_task callout
* is handling the r2ts while we are adding new ones
*/
- spin_lock_bh(&conn->taskqueuelock);
- if (list_empty(&task->running))
+ spin_lock_bh(&conn->session->frwd_lock);
+ if (list_empty(&task->running)) {
list_add_tail(&task->running, &conn->requeue);
- spin_unlock_bh(&conn->taskqueuelock);
+ } else {
+ /*
+ * Don't need the extra ref since it's already requeued and
+ * has a ref.
+ */
+ iscsi_put_task(task);
+ }
iscsi_conn_queue_work(conn);
+ spin_unlock_bh(&conn->session->frwd_lock);
}
EXPORT_SYMBOL_GPL(iscsi_requeue_task);
@@ -1487,7 +1535,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
}
if (conn->task) {
- rc = iscsi_xmit_task(conn);
+ rc = iscsi_xmit_task(conn, conn->task, false);
if (rc)
goto done;
}
@@ -1497,54 +1545,41 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
* only have one nop-out as a ping from us and targets should not
* overflow us with nop-ins
*/
- spin_lock_bh(&conn->taskqueuelock);
check_mgmt:
while (!list_empty(&conn->mgmtqueue)) {
- conn->task = list_entry(conn->mgmtqueue.next,
- struct iscsi_task, running);
- list_del_init(&conn->task->running);
- spin_unlock_bh(&conn->taskqueuelock);
- if (iscsi_prep_mgmt_task(conn, conn->task)) {
+ task = list_entry(conn->mgmtqueue.next, struct iscsi_task,
+ running);
+ list_del_init(&task->running);
+ if (iscsi_prep_mgmt_task(conn, task)) {
/* regular RX path uses back_lock */
spin_lock_bh(&conn->session->back_lock);
- __iscsi_put_task(conn->task);
+ __iscsi_put_task(task);
spin_unlock_bh(&conn->session->back_lock);
- conn->task = NULL;
- spin_lock_bh(&conn->taskqueuelock);
continue;
}
- rc = iscsi_xmit_task(conn);
+ rc = iscsi_xmit_task(conn, task, false);
if (rc)
goto done;
- spin_lock_bh(&conn->taskqueuelock);
}
/* process pending command queue */
while (!list_empty(&conn->cmdqueue)) {
- conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
- running);
- list_del_init(&conn->task->running);
- spin_unlock_bh(&conn->taskqueuelock);
+ task = list_entry(conn->cmdqueue.next, struct iscsi_task,
+ running);
+ list_del_init(&task->running);
if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
- fail_scsi_task(conn->task, DID_IMM_RETRY);
- spin_lock_bh(&conn->taskqueuelock);
+ fail_scsi_task(task, DID_IMM_RETRY);
continue;
}
- rc = iscsi_prep_scsi_cmd_pdu(conn->task);
+ rc = iscsi_prep_scsi_cmd_pdu(task);
if (rc) {
- if (rc == -ENOMEM || rc == -EACCES) {
- spin_lock_bh(&conn->taskqueuelock);
- list_add_tail(&conn->task->running,
- &conn->cmdqueue);
- conn->task = NULL;
- spin_unlock_bh(&conn->taskqueuelock);
- goto done;
- } else
- fail_scsi_task(conn->task, DID_ABORT);
- spin_lock_bh(&conn->taskqueuelock);
+ if (rc == -ENOMEM || rc == -EACCES)
+ fail_scsi_task(task, DID_IMM_RETRY);
+ else
+ fail_scsi_task(task, DID_ABORT);
continue;
}
- rc = iscsi_xmit_task(conn);
+ rc = iscsi_xmit_task(conn, task, false);
if (rc)
goto done;
/*
@@ -1552,7 +1587,6 @@ check_mgmt:
* we need to check the mgmt queue for nops that need to
* be sent to aviod starvation
*/
- spin_lock_bh(&conn->taskqueuelock);
if (!list_empty(&conn->mgmtqueue))
goto check_mgmt;
}
@@ -1566,21 +1600,17 @@ check_mgmt:
task = list_entry(conn->requeue.next, struct iscsi_task,
running);
+
if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT))
break;
- conn->task = task;
- list_del_init(&conn->task->running);
- conn->task->state = ISCSI_TASK_RUNNING;
- spin_unlock_bh(&conn->taskqueuelock);
- rc = iscsi_xmit_task(conn);
+ list_del_init(&task->running);
+ rc = iscsi_xmit_task(conn, task, true);
if (rc)
goto done;
- spin_lock_bh(&conn->taskqueuelock);
if (!list_empty(&conn->mgmtqueue))
goto check_mgmt;
}
- spin_unlock_bh(&conn->taskqueuelock);
spin_unlock_bh(&conn->session->frwd_lock);
return -ENODATA;
@@ -1746,9 +1776,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
goto prepd_reject;
}
} else {
- spin_lock_bh(&conn->taskqueuelock);
list_add_tail(&task->running, &conn->cmdqueue);
- spin_unlock_bh(&conn->taskqueuelock);
iscsi_conn_queue_work(conn);
}
@@ -1855,27 +1883,39 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
}
/*
- * Fail commands. session lock held and recv side suspended and xmit
- * thread flushed
+ * Fail commands. session frwd lock held and xmit thread flushed.
*/
static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error)
{
+ struct iscsi_session *session = conn->session;
struct iscsi_task *task;
int i;
- for (i = 0; i < conn->session->cmds_max; i++) {
- task = conn->session->cmds[i];
+ spin_lock_bh(&session->back_lock);
+ for (i = 0; i < session->cmds_max; i++) {
+ task = session->cmds[i];
if (!task->sc || task->state == ISCSI_TASK_FREE)
continue;
if (lun != -1 && lun != task->sc->device->lun)
continue;
- ISCSI_DBG_SESSION(conn->session,
+ __iscsi_get_task(task);
+ spin_unlock_bh(&session->back_lock);
+
+ ISCSI_DBG_SESSION(session,
"failing sc %p itt 0x%x state %d\n",
task->sc, task->itt, task->state);
fail_scsi_task(task, error);
+
+ spin_unlock_bh(&session->frwd_lock);
+ iscsi_put_task(task);
+ spin_lock_bh(&session->frwd_lock);
+
+ spin_lock_bh(&session->back_lock);
}
+
+ spin_unlock_bh(&session->back_lock);
}
/**
@@ -1953,6 +1993,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc);
spin_lock_bh(&session->frwd_lock);
+ spin_lock(&session->back_lock);
task = (struct iscsi_task *)sc->SCp.ptr;
if (!task) {
/*
@@ -1960,8 +2001,11 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
* so let timeout code complete it now.
*/
rc = BLK_EH_DONE;
+ spin_unlock(&session->back_lock);
goto done;
}
+ __iscsi_get_task(task);
+ spin_unlock(&session->back_lock);
if (session->state != ISCSI_STATE_LOGGED_IN) {
/*
@@ -2020,6 +2064,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
goto done;
}
+ spin_lock(&session->back_lock);
for (i = 0; i < conn->session->cmds_max; i++) {
running_task = conn->session->cmds[i];
if (!running_task->sc || running_task == task ||
@@ -2052,10 +2097,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
"last xfer %lu/%lu. Last check %lu.\n",
task->last_xfer, running_task->last_xfer,
task->last_timeout);
+ spin_unlock(&session->back_lock);
rc = BLK_EH_RESET_TIMER;
goto done;
}
}
+ spin_unlock(&session->back_lock);
/* Assumes nop timeout is shorter than scsi cmd timeout */
if (task->have_checked_conn)
@@ -2077,9 +2124,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
rc = BLK_EH_RESET_TIMER;
done:
- if (task)
- task->last_timeout = jiffies;
spin_unlock_bh(&session->frwd_lock);
+
+ if (task) {
+ task->last_timeout = jiffies;
+ iscsi_put_task(task);
+ }
ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
"timer reset" : "shutdown or nh");
return rc;
@@ -2187,15 +2237,20 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
conn->eh_abort_cnt++;
age = session->age;
+ spin_lock(&session->back_lock);
task = (struct iscsi_task *)sc->SCp.ptr;
- ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n",
- sc, task->itt);
-
- /* task completed before time out */
- if (!task->sc) {
+ if (!task || !task->sc) {
+ /* task completed before time out */
ISCSI_DBG_EH(session, "sc completed while abort in progress\n");
- goto success;
+
+ spin_unlock(&session->back_lock);
+ spin_unlock_bh(&session->frwd_lock);
+ mutex_unlock(&session->eh_mutex);
+ return SUCCESS;
}
+ ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", sc, task->itt);
+ __iscsi_get_task(task);
+ spin_unlock(&session->back_lock);
if (task->state == ISCSI_TASK_PENDING) {
fail_scsi_task(task, DID_ABORT);
@@ -2257,6 +2312,7 @@ success:
success_unlocked:
ISCSI_DBG_EH(session, "abort success [sc %p itt 0x%x]\n",
sc, task->itt);
+ iscsi_put_task(task);
mutex_unlock(&session->eh_mutex);
return SUCCESS;
@@ -2265,6 +2321,7 @@ failed:
failed_unlocked:
ISCSI_DBG_EH(session, "abort failed [sc %p itt 0x%x]\n", sc,
task ? task->itt : 0);
+ iscsi_put_task(task);
mutex_unlock(&session->eh_mutex);
return FAILED;
}
@@ -2591,6 +2648,56 @@ void iscsi_pool_free(struct iscsi_pool *q)
}
EXPORT_SYMBOL_GPL(iscsi_pool_free);
+int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+ uint16_t requested_cmds_max)
+{
+ int scsi_cmds, total_cmds = requested_cmds_max;
+
+check:
+ if (!total_cmds)
+ total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;
+ /*
+ * The iscsi layer needs some tasks for nop handling and tmfs,
+ * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX
+ * + 1 command for scsi IO.
+ */
+ if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
+ printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of two that is at least %d.\n",
+ total_cmds, ISCSI_TOTAL_CMDS_MIN);
+ return -EINVAL;
+ }
+
+ if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {
+ printk(KERN_INFO "iscsi: invalid max cmds of %d. Must be a power of 2 less than or equal to %d. Using %d.\n",
+ requested_cmds_max, ISCSI_TOTAL_CMDS_MAX,
+ ISCSI_TOTAL_CMDS_MAX);
+ total_cmds = ISCSI_TOTAL_CMDS_MAX;
+ }
+
+ if (!is_power_of_2(total_cmds)) {
+ total_cmds = rounddown_pow_of_two(total_cmds);
+ if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
+ printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of 2 greater than %d.\n", requested_cmds_max, ISCSI_TOTAL_CMDS_MIN);
+ return -EINVAL;
+ }
+
+ printk(KERN_INFO "iscsi: invalid max cmds %d. Must be a power of 2. Rounding max cmds down to %d.\n",
+ requested_cmds_max, total_cmds);
+ }
+
+ scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
+ if (shost->can_queue && scsi_cmds > shost->can_queue) {
+ total_cmds = shost->can_queue;
+
+ printk(KERN_INFO "iscsi: requested max cmds %u is higher than driver limit. Using driver limit %u\n",
+ requested_cmds_max, shost->can_queue);
+ goto check;
+ }
+
+ return scsi_cmds;
+}
+EXPORT_SYMBOL_GPL(iscsi_host_get_max_scsi_cmds);
+
/**
* iscsi_host_add - add host to system
* @shost: scsi host
@@ -2681,8 +2788,6 @@ void iscsi_host_remove(struct Scsi_Host *shost)
flush_signals(current);
scsi_remove_host(shost);
- if (ihost->workq)
- destroy_workqueue(ihost->workq);
}
EXPORT_SYMBOL_GPL(iscsi_host_remove);
@@ -2690,6 +2795,9 @@ void iscsi_host_free(struct Scsi_Host *shost)
{
struct iscsi_host *ihost = shost_priv(shost);
+ if (ihost->workq)
+ destroy_workqueue(ihost->workq);
+
kfree(ihost->netdev);
kfree(ihost->hwaddress);
kfree(ihost->initiatorname);
@@ -2743,7 +2851,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
struct iscsi_host *ihost = shost_priv(shost);
struct iscsi_session *session;
struct iscsi_cls_session *cls_session;
- int cmd_i, scsi_cmds, total_cmds = cmds_max;
+ int cmd_i, scsi_cmds;
unsigned long flags;
spin_lock_irqsave(&ihost->lock, flags);
@@ -2754,37 +2862,9 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
ihost->num_sessions++;
spin_unlock_irqrestore(&ihost->lock, flags);
- if (!total_cmds)
- total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;
- /*
- * The iscsi layer needs some tasks for nop handling and tmfs,
- * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX
- * + 1 command for scsi IO.
- */
- if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
- printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
- "must be a power of two that is at least %d.\n",
- total_cmds, ISCSI_TOTAL_CMDS_MIN);
+ scsi_cmds = iscsi_host_get_max_scsi_cmds(shost, cmds_max);
+ if (scsi_cmds < 0)
goto dec_session_count;
- }
-
- if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {
- printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
- "must be a power of 2 less than or equal to %d.\n",
- cmds_max, ISCSI_TOTAL_CMDS_MAX);
- total_cmds = ISCSI_TOTAL_CMDS_MAX;
- }
-
- if (!is_power_of_2(total_cmds)) {
- printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
- "must be a power of 2.\n", total_cmds);
- total_cmds = rounddown_pow_of_two(total_cmds);
- if (total_cmds < ISCSI_TOTAL_CMDS_MIN)
- goto dec_session_count;
- printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n",
- total_cmds);
- }
- scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
cls_session = iscsi_alloc_session(shost, iscsit,
sizeof(struct iscsi_session) +
@@ -2800,7 +2880,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
session->lu_reset_timeout = 15;
session->abort_timeout = 10;
session->scsi_cmds_max = scsi_cmds;
- session->cmds_max = total_cmds;
+ session->cmds_max = scsi_cmds + ISCSI_MGMT_CMDS_MAX;
session->queued_cmdsn = session->cmdsn = initial_cmdsn;
session->exp_cmdsn = initial_cmdsn + 1;
session->max_cmdsn = initial_cmdsn + 1;
@@ -2919,7 +2999,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
INIT_LIST_HEAD(&conn->mgmtqueue);
INIT_LIST_HEAD(&conn->cmdqueue);
INIT_LIST_HEAD(&conn->requeue);
- spin_lock_init(&conn->taskqueuelock);
INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
/* allocate login_task used for the login/text sequences */
@@ -3085,10 +3164,16 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
ISCSI_DBG_SESSION(conn->session,
"failing mgmt itt 0x%x state %d\n",
task->itt, task->state);
+
+ spin_lock_bh(&session->back_lock);
+ if (cleanup_queued_task(task)) {
+ spin_unlock_bh(&session->back_lock);
+ continue;
+ }
+
state = ISCSI_TASK_ABRT_SESS_RECOV;
if (task->state == ISCSI_TASK_PENDING)
state = ISCSI_TASK_COMPLETED;
- spin_lock_bh(&session->back_lock);
iscsi_complete_task(task, state);
spin_unlock_bh(&session->back_lock);
}
@@ -3189,6 +3274,13 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
spin_unlock_bh(&session->frwd_lock);
/*
+ * The target could have reduced it's window size between logins, so
+ * we have to reset max/exp cmdsn so we can see the new values.
+ */
+ spin_lock_bh(&session->back_lock);
+ session->max_cmdsn = session->exp_cmdsn = session->cmdsn + 1;
+ spin_unlock_bh(&session->back_lock);
+ /*
* Unblock xmitworker(), Login Phase will pass through.
*/
clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
@@ -3338,125 +3430,125 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
switch(param) {
case ISCSI_PARAM_FAST_ABORT:
- len = sprintf(buf, "%d\n", session->fast_abort);
+ len = sysfs_emit(buf, "%d\n", session->fast_abort);
break;
case ISCSI_PARAM_ABORT_TMO:
- len = sprintf(buf, "%d\n", session->abort_timeout);
+ len = sysfs_emit(buf, "%d\n", session->abort_timeout);
break;
case ISCSI_PARAM_LU_RESET_TMO:
- len = sprintf(buf, "%d\n", session->lu_reset_timeout);
+ len = sysfs_emit(buf, "%d\n", session->lu_reset_timeout);
break;
case ISCSI_PARAM_TGT_RESET_TMO:
- len = sprintf(buf, "%d\n", session->tgt_reset_timeout);
+ len = sysfs_emit(buf, "%d\n", session->tgt_reset_timeout);
break;
case ISCSI_PARAM_INITIAL_R2T_EN:
- len = sprintf(buf, "%d\n", session->initial_r2t_en);
+ len = sysfs_emit(buf, "%d\n", session->initial_r2t_en);
break;
case ISCSI_PARAM_MAX_R2T:
- len = sprintf(buf, "%hu\n", session->max_r2t);
+ len = sysfs_emit(buf, "%hu\n", session->max_r2t);
break;
case ISCSI_PARAM_IMM_DATA_EN:
- len = sprintf(buf, "%d\n", session->imm_data_en);
+ len = sysfs_emit(buf, "%d\n", session->imm_data_en);
break;
case ISCSI_PARAM_FIRST_BURST:
- len = sprintf(buf, "%u\n", session->first_burst);
+ len = sysfs_emit(buf, "%u\n", session->first_burst);
break;
case ISCSI_PARAM_MAX_BURST:
- len = sprintf(buf, "%u\n", session->max_burst);
+ len = sysfs_emit(buf, "%u\n", session->max_burst);
break;
case ISCSI_PARAM_PDU_INORDER_EN:
- len = sprintf(buf, "%d\n", session->pdu_inorder_en);
+ len = sysfs_emit(buf, "%d\n", session->pdu_inorder_en);
break;
case ISCSI_PARAM_DATASEQ_INORDER_EN:
- len = sprintf(buf, "%d\n", session->dataseq_inorder_en);
+ len = sysfs_emit(buf, "%d\n", session->dataseq_inorder_en);
break;
case ISCSI_PARAM_DEF_TASKMGMT_TMO:
- len = sprintf(buf, "%d\n", session->def_taskmgmt_tmo);
+ len = sysfs_emit(buf, "%d\n", session->def_taskmgmt_tmo);
break;
case ISCSI_PARAM_ERL:
- len = sprintf(buf, "%d\n", session->erl);
+ len = sysfs_emit(buf, "%d\n", session->erl);
break;
case ISCSI_PARAM_TARGET_NAME:
- len = sprintf(buf, "%s\n", session->targetname);
+ len = sysfs_emit(buf, "%s\n", session->targetname);
break;
case ISCSI_PARAM_TARGET_ALIAS:
- len = sprintf(buf, "%s\n", session->targetalias);
+ len = sysfs_emit(buf, "%s\n", session->targetalias);
break;
case ISCSI_PARAM_TPGT:
- len = sprintf(buf, "%d\n", session->tpgt);
+ len = sysfs_emit(buf, "%d\n", session->tpgt);
break;
case ISCSI_PARAM_USERNAME:
- len = sprintf(buf, "%s\n", session->username);
+ len = sysfs_emit(buf, "%s\n", session->username);
break;
case ISCSI_PARAM_USERNAME_IN:
- len = sprintf(buf, "%s\n", session->username_in);
+ len = sysfs_emit(buf, "%s\n", session->username_in);
break;
case ISCSI_PARAM_PASSWORD:
- len = sprintf(buf, "%s\n", session->password);
+ len = sysfs_emit(buf, "%s\n", session->password);
break;
case ISCSI_PARAM_PASSWORD_IN:
- len = sprintf(buf, "%s\n", session->password_in);
+ len = sysfs_emit(buf, "%s\n", session->password_in);
break;
case ISCSI_PARAM_IFACE_NAME:
- len = sprintf(buf, "%s\n", session->ifacename);
+ len = sysfs_emit(buf, "%s\n", session->ifacename);
break;
case ISCSI_PARAM_INITIATOR_NAME:
- len = sprintf(buf, "%s\n", session->initiatorname);
+ len = sysfs_emit(buf, "%s\n", session->initiatorname);
break;
case ISCSI_PARAM_BOOT_ROOT:
- len = sprintf(buf, "%s\n", session->boot_root);
+ len = sysfs_emit(buf, "%s\n", session->boot_root);
break;
case ISCSI_PARAM_BOOT_NIC:
- len = sprintf(buf, "%s\n", session->boot_nic);
+ len = sysfs_emit(buf, "%s\n", session->boot_nic);
break;
case ISCSI_PARAM_BOOT_TARGET:
- len = sprintf(buf, "%s\n", session->boot_target);
+ len = sysfs_emit(buf, "%s\n", session->boot_target);
break;
case ISCSI_PARAM_AUTO_SND_TGT_DISABLE:
- len = sprintf(buf, "%u\n", session->auto_snd_tgt_disable);
+ len = sysfs_emit(buf, "%u\n", session->auto_snd_tgt_disable);
break;
case ISCSI_PARAM_DISCOVERY_SESS:
- len = sprintf(buf, "%u\n", session->discovery_sess);
+ len = sysfs_emit(buf, "%u\n", session->discovery_sess);
break;
case ISCSI_PARAM_PORTAL_TYPE:
- len = sprintf(buf, "%s\n", session->portal_type);
+ len = sysfs_emit(buf, "%s\n", session->portal_type);
break;
case ISCSI_PARAM_CHAP_AUTH_EN:
- len = sprintf(buf, "%u\n", session->chap_auth_en);
+ len = sysfs_emit(buf, "%u\n", session->chap_auth_en);
break;
case ISCSI_PARAM_DISCOVERY_LOGOUT_EN:
- len = sprintf(buf, "%u\n", session->discovery_logout_en);
+ len = sysfs_emit(buf, "%u\n", session->discovery_logout_en);
break;
case ISCSI_PARAM_BIDI_CHAP_EN:
- len = sprintf(buf, "%u\n", session->bidi_chap_en);
+ len = sysfs_emit(buf, "%u\n", session->bidi_chap_en);
break;
case ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL:
- len = sprintf(buf, "%u\n", session->discovery_auth_optional);
+ len = sysfs_emit(buf, "%u\n", session->discovery_auth_optional);
break;
case ISCSI_PARAM_DEF_TIME2WAIT:
- len = sprintf(buf, "%d\n", session->time2wait);
+ len = sysfs_emit(buf, "%d\n", session->time2wait);
break;
case ISCSI_PARAM_DEF_TIME2RETAIN:
- len = sprintf(buf, "%d\n", session->time2retain);
+ len = sysfs_emit(buf, "%d\n", session->time2retain);
break;
case ISCSI_PARAM_TSID:
- len = sprintf(buf, "%u\n", session->tsid);
+ len = sysfs_emit(buf, "%u\n", session->tsid);
break;
case ISCSI_PARAM_ISID:
- len = sprintf(buf, "%02x%02x%02x%02x%02x%02x\n",
+ len = sysfs_emit(buf, "%02x%02x%02x%02x%02x%02x\n",
session->isid[0], session->isid[1],
session->isid[2], session->isid[3],
session->isid[4], session->isid[5]);
break;
case ISCSI_PARAM_DISCOVERY_PARENT_IDX:
- len = sprintf(buf, "%u\n", session->discovery_parent_idx);
+ len = sysfs_emit(buf, "%u\n", session->discovery_parent_idx);
break;
case ISCSI_PARAM_DISCOVERY_PARENT_TYPE:
if (session->discovery_parent_type)
- len = sprintf(buf, "%s\n",
+ len = sysfs_emit(buf, "%s\n",
session->discovery_parent_type);
else
- len = sprintf(buf, "\n");
+ len = sysfs_emit(buf, "\n");
break;
default:
return -ENOSYS;
@@ -3488,16 +3580,16 @@ int iscsi_conn_get_addr_param(struct sockaddr_storage *addr,
case ISCSI_PARAM_CONN_ADDRESS:
case ISCSI_HOST_PARAM_IPADDRESS:
if (sin)
- len = sprintf(buf, "%pI4\n", &sin->sin_addr.s_addr);
+ len = sysfs_emit(buf, "%pI4\n", &sin->sin_addr.s_addr);
else
- len = sprintf(buf, "%pI6\n", &sin6->sin6_addr);
+ len = sysfs_emit(buf, "%pI6\n", &sin6->sin6_addr);
break;
case ISCSI_PARAM_CONN_PORT:
case ISCSI_PARAM_LOCAL_PORT:
if (sin)
- len = sprintf(buf, "%hu\n", be16_to_cpu(sin->sin_port));
+ len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin->sin_port));
else
- len = sprintf(buf, "%hu\n",
+ len = sysfs_emit(buf, "%hu\n",
be16_to_cpu(sin6->sin6_port));
break;
default:
@@ -3516,88 +3608,88 @@ int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
switch(param) {
case ISCSI_PARAM_PING_TMO:
- len = sprintf(buf, "%u\n", conn->ping_timeout);
+ len = sysfs_emit(buf, "%u\n", conn->ping_timeout);
break;
case ISCSI_PARAM_RECV_TMO:
- len = sprintf(buf, "%u\n", conn->recv_timeout);
+ len = sysfs_emit(buf, "%u\n", conn->recv_timeout);
break;
case ISCSI_PARAM_MAX_RECV_DLENGTH:
- len = sprintf(buf, "%u\n", conn->max_recv_dlength);
+ len = sysfs_emit(buf, "%u\n", conn->max_recv_dlength);
break;
case ISCSI_PARAM_MAX_XMIT_DLENGTH:
- len = sprintf(buf, "%u\n", conn->max_xmit_dlength);
+ len = sysfs_emit(buf, "%u\n", conn->max_xmit_dlength);
break;
case ISCSI_PARAM_HDRDGST_EN:
- len = sprintf(buf, "%d\n", conn->hdrdgst_en);
+ len = sysfs_emit(buf, "%d\n", conn->hdrdgst_en);
break;
case ISCSI_PARAM_DATADGST_EN:
- len = sprintf(buf, "%d\n", conn->datadgst_en);
+ len = sysfs_emit(buf, "%d\n", conn->datadgst_en);
break;
case ISCSI_PARAM_IFMARKER_EN:
- len = sprintf(buf, "%d\n", conn->ifmarker_en);
+ len = sysfs_emit(buf, "%d\n", conn->ifmarker_en);
break;
case ISCSI_PARAM_OFMARKER_EN:
- len = sprintf(buf, "%d\n", conn->ofmarker_en);
+ len = sysfs_emit(buf, "%d\n", conn->ofmarker_en);
break;
case ISCSI_PARAM_EXP_STATSN:
- len = sprintf(buf, "%u\n", conn->exp_statsn);
+ len = sysfs_emit(buf, "%u\n", conn->exp_statsn);
break;
case ISCSI_PARAM_PERSISTENT_PORT:
- len = sprintf(buf, "%d\n", conn->persistent_port);
+ len = sysfs_emit(buf, "%d\n", conn->persistent_port);
break;
case ISCSI_PARAM_PERSISTENT_ADDRESS:
- len = sprintf(buf, "%s\n", conn->persistent_address);
+ len = sysfs_emit(buf, "%s\n", conn->persistent_address);
break;
case ISCSI_PARAM_STATSN:
- len = sprintf(buf, "%u\n", conn->statsn);
+ len = sysfs_emit(buf, "%u\n", conn->statsn);
break;
case ISCSI_PARAM_MAX_SEGMENT_SIZE:
- len = sprintf(buf, "%u\n", conn->max_segment_size);
+ len = sysfs_emit(buf, "%u\n", conn->max_segment_size);
break;
case ISCSI_PARAM_KEEPALIVE_TMO:
- len = sprintf(buf, "%u\n", conn->keepalive_tmo);
+ len = sysfs_emit(buf, "%u\n", conn->keepalive_tmo);
break;
case ISCSI_PARAM_LOCAL_PORT:
- len = sprintf(buf, "%u\n", conn->local_port);
+ len = sysfs_emit(buf, "%u\n", conn->local_port);
break;
case ISCSI_PARAM_TCP_TIMESTAMP_STAT:
- len = sprintf(buf, "%u\n", conn->tcp_timestamp_stat);
+ len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_stat);
break;
case ISCSI_PARAM_TCP_NAGLE_DISABLE:
- len = sprintf(buf, "%u\n", conn->tcp_nagle_disable);
+ len = sysfs_emit(buf, "%u\n", conn->tcp_nagle_disable);
break;
case ISCSI_PARAM_TCP_WSF_DISABLE:
- len = sprintf(buf, "%u\n", conn->tcp_wsf_disable);
+ len = sysfs_emit(buf, "%u\n", conn->tcp_wsf_disable);
break;
case ISCSI_PARAM_TCP_TIMER_SCALE:
- len = sprintf(buf, "%u\n", conn->tcp_timer_scale);
+ len = sysfs_emit(buf, "%u\n", conn->tcp_timer_scale);
break;
case ISCSI_PARAM_TCP_TIMESTAMP_EN:
- len = sprintf(buf, "%u\n", conn->tcp_timestamp_en);
+ len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_en);
break;
case ISCSI_PARAM_IP_FRAGMENT_DISABLE:
- len = sprintf(buf, "%u\n", conn->fragment_disable);
+ len = sysfs_emit(buf, "%u\n", conn->fragment_disable);
break;
case ISCSI_PARAM_IPV4_TOS:
- len = sprintf(buf, "%u\n", conn->ipv4_tos);
+ len = sysfs_emit(buf, "%u\n", conn->ipv4_tos);
break;
case ISCSI_PARAM_IPV6_TC:
- len = sprintf(buf, "%u\n", conn->ipv6_traffic_class);
+ len = sysfs_emit(buf, "%u\n", conn->ipv6_traffic_class);
break;
case ISCSI_PARAM_IPV6_FLOW_LABEL:
- len = sprintf(buf, "%u\n", conn->ipv6_flow_label);
+ len = sysfs_emit(buf, "%u\n", conn->ipv6_flow_label);
break;
case ISCSI_PARAM_IS_FW_ASSIGNED_IPV6:
- len = sprintf(buf, "%u\n", conn->is_fw_assigned_ipv6);
+ len = sysfs_emit(buf, "%u\n", conn->is_fw_assigned_ipv6);
break;
case ISCSI_PARAM_TCP_XMIT_WSF:
- len = sprintf(buf, "%u\n", conn->tcp_xmit_wsf);
+ len = sysfs_emit(buf, "%u\n", conn->tcp_xmit_wsf);
break;
case ISCSI_PARAM_TCP_RECV_WSF:
- len = sprintf(buf, "%u\n", conn->tcp_recv_wsf);
+ len = sysfs_emit(buf, "%u\n", conn->tcp_recv_wsf);
break;
case ISCSI_PARAM_LOCAL_IPADDR:
- len = sprintf(buf, "%s\n", conn->local_ipaddr);
+ len = sysfs_emit(buf, "%s\n", conn->local_ipaddr);
break;
default:
return -ENOSYS;
@@ -3615,13 +3707,13 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
switch (param) {
case ISCSI_HOST_PARAM_NETDEV_NAME:
- len = sprintf(buf, "%s\n", ihost->netdev);
+ len = sysfs_emit(buf, "%s\n", ihost->netdev);
break;
case ISCSI_HOST_PARAM_HWADDRESS:
- len = sprintf(buf, "%s\n", ihost->hwaddress);
+ len = sysfs_emit(buf, "%s\n", ihost->hwaddress);
break;
case ISCSI_HOST_PARAM_INITIATOR_NAME:
- len = sprintf(buf, "%s\n", ihost->initiatorname);
+ len = sysfs_emit(buf, "%s\n", ihost->initiatorname);
break;
default:
return -ENOSYS;
diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 83f14b2c8804..2e9ffe3d1a55 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -524,48 +524,79 @@ static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
/**
* iscsi_tcp_r2t_rsp - iSCSI R2T Response processing
* @conn: iscsi connection
- * @task: scsi command task
+ * @hdr: PDU header
*/
-static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
+static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
{
struct iscsi_session *session = conn->session;
- struct iscsi_tcp_task *tcp_task = task->dd_data;
- struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
- struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+ struct iscsi_tcp_task *tcp_task;
+ struct iscsi_tcp_conn *tcp_conn;
+ struct iscsi_r2t_rsp *rhdr;
struct iscsi_r2t_info *r2t;
- int r2tsn = be32_to_cpu(rhdr->r2tsn);
+ struct iscsi_task *task;
u32 data_length;
u32 data_offset;
+ int r2tsn;
int rc;
+ spin_lock(&session->back_lock);
+ task = iscsi_itt_to_ctask(conn, hdr->itt);
+ if (!task) {
+ spin_unlock(&session->back_lock);
+ return ISCSI_ERR_BAD_ITT;
+ } else if (task->sc->sc_data_direction != DMA_TO_DEVICE) {
+ spin_unlock(&session->back_lock);
+ return ISCSI_ERR_PROTO;
+ }
+ /*
+ * A bad target might complete the cmd before we have handled R2Ts
+ * so get a ref to the task that will be dropped in the xmit path.
+ */
+ if (task->state != ISCSI_TASK_RUNNING) {
+ spin_unlock(&session->back_lock);
+ /* Let the path that got the early rsp complete it */
+ return 0;
+ }
+ task->last_xfer = jiffies;
+ __iscsi_get_task(task);
+
+ tcp_conn = conn->dd_data;
+ rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+ /* fill-in new R2T associated with the task */
+ iscsi_update_cmdsn(session, (struct iscsi_nopin *)rhdr);
+ spin_unlock(&session->back_lock);
+
if (tcp_conn->in.datalen) {
iscsi_conn_printk(KERN_ERR, conn,
"invalid R2t with datalen %d\n",
tcp_conn->in.datalen);
- return ISCSI_ERR_DATALEN;
+ rc = ISCSI_ERR_DATALEN;
+ goto put_task;
}
+ tcp_task = task->dd_data;
+ r2tsn = be32_to_cpu(rhdr->r2tsn);
if (tcp_task->exp_datasn != r2tsn){
ISCSI_DBG_TCP(conn, "task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
tcp_task->exp_datasn, r2tsn);
- return ISCSI_ERR_R2TSN;
+ rc = ISCSI_ERR_R2TSN;
+ goto put_task;
}
- /* fill-in new R2T associated with the task */
- iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
-
- if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
+ if (session->state != ISCSI_STATE_LOGGED_IN) {
iscsi_conn_printk(KERN_INFO, conn,
"dropping R2T itt %d in recovery.\n",
task->itt);
- return 0;
+ rc = 0;
+ goto put_task;
}
data_length = be32_to_cpu(rhdr->data_length);
if (data_length == 0) {
iscsi_conn_printk(KERN_ERR, conn,
"invalid R2T with zero data len\n");
- return ISCSI_ERR_DATALEN;
+ rc = ISCSI_ERR_DATALEN;
+ goto put_task;
}
if (data_length > session->max_burst)
@@ -579,7 +610,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
"invalid R2T with data len %u at offset %u "
"and total length %d\n", data_length,
data_offset, task->sc->sdb.length);
- return ISCSI_ERR_DATALEN;
+ rc = ISCSI_ERR_DATALEN;
+ goto put_task;
}
spin_lock(&tcp_task->pool2queue);
@@ -589,7 +621,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
"Target has sent more R2Ts than it "
"negotiated for or driver has leaked.\n");
spin_unlock(&tcp_task->pool2queue);
- return ISCSI_ERR_PROTO;
+ rc = ISCSI_ERR_PROTO;
+ goto put_task;
}
r2t->exp_statsn = rhdr->statsn;
@@ -607,6 +640,10 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
iscsi_requeue_task(task);
return 0;
+
+put_task:
+ iscsi_put_task(task);
+ return rc;
}
/*
@@ -730,20 +767,11 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
break;
case ISCSI_OP_R2T:
- spin_lock(&conn->session->back_lock);
- task = iscsi_itt_to_ctask(conn, hdr->itt);
- spin_unlock(&conn->session->back_lock);
- if (!task)
- rc = ISCSI_ERR_BAD_ITT;
- else if (ahslen)
+ if (ahslen) {
rc = ISCSI_ERR_AHSLEN;
- else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
- task->last_xfer = jiffies;
- spin_lock(&conn->session->frwd_lock);
- rc = iscsi_tcp_r2t_rsp(conn, task);
- spin_unlock(&conn->session->frwd_lock);
- } else
- rc = ISCSI_ERR_PROTO;
+ break;
+ }
+ rc = iscsi_tcp_r2t_rsp(conn, hdr);
break;
case ISCSI_OP_LOGIN_RSP:
case ISCSI_OP_TEXT_RSP:
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c
index f5582c8e77c9..ac066f86bb14 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.c
@@ -3648,25 +3648,16 @@ _base_get_msix_index(struct MPT3SAS_ADAPTER *ioc,
base_mod64(atomic64_add_return(1,
&ioc->total_io_cnt), ioc->reply_queue_count) : 0;
- return ioc->cpu_msix_table[raw_smp_processor_id()];
-}
+ if (scmd && ioc->shost->nr_hw_queues > 1) {
+ u32 tag = blk_mq_unique_tag(scmd->request);
-/**
- * _base_sdev_nr_inflight_request -get number of inflight requests
- * of a request queue.
- * @q: request_queue object
- *
- * returns number of inflight request of a request queue.
- */
-inline unsigned long
-_base_sdev_nr_inflight_request(struct request_queue *q)
-{
- struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[0];
+ return blk_mq_unique_tag_to_hwq(tag) +
+ ioc->high_iops_queues;
+ }
- return atomic_read(&hctx->nr_active);
+ return ioc->cpu_msix_table[raw_smp_processor_id()];
}
-
/**
* _base_get_high_iops_msix_index - get the msix index of
* high iops queues
@@ -3686,7 +3677,8 @@ _base_get_high_iops_msix_index(struct MPT3SAS_ADAPTER *ioc,
* reply queues in terms of batch count 16 when outstanding
* IOs on the target device is >=8.
*/
- if (_base_sdev_nr_inflight_request(scmd->device->request_queue) >
+
+ if (atomic_read(&scmd->device->device_busy) >
MPT3SAS_DEVICE_HIGH_IOPS_DEPTH)
return base_mod64((
atomic64_add_return(1, &ioc->high_iops_outstanding) /
@@ -3739,8 +3731,23 @@ mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
struct scsi_cmnd *scmd)
{
struct scsiio_tracker *request = scsi_cmd_priv(scmd);
- unsigned int tag = scmd->request->tag;
u16 smid;
+ u32 tag, unique_tag;
+
+ unique_tag = blk_mq_unique_tag(scmd->request);
+ tag = blk_mq_unique_tag_to_tag(unique_tag);
+
+ /*
+ * Store hw queue number corresponding to the tag.
+ * This hw queue number is used later to determine
+ * the unique_tag using the logic below. This unique_tag
+ * is used to retrieve the scmd pointer corresponding
+ * to tag using scsi_host_find_tag() API.
+ *
+ * tag = smid - 1;
+ * unique_tag = ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag;
+ */
+ ioc->io_queue_num[tag] = blk_mq_unique_tag_to_hwq(unique_tag);
smid = tag + 1;
request->cb_idx = cb_idx;
@@ -3831,6 +3838,7 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid)
mpt3sas_base_clear_st(ioc, st);
_base_recovery_check(ioc);
+ ioc->io_queue_num[smid - 1] = 0;
return;
}
@@ -5362,6 +5370,9 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc)
kfree(ioc->chain_lookup);
ioc->chain_lookup = NULL;
}
+
+ kfree(ioc->io_queue_num);
+ ioc->io_queue_num = NULL;
}
/**
@@ -5641,7 +5652,8 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
reply_post_free_sz = ioc->reply_post_queue_depth *
sizeof(Mpi2DefaultReplyDescriptor_t);
rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK;
- if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable)
+ if ((_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable)
+ || (ioc->reply_queue_count < RDPQ_MAX_INDEX_IN_ONE_CHUNK))
rdpq_sz = reply_post_free_sz * ioc->reply_queue_count;
ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz);
if (ret == -EAGAIN) {
@@ -5772,6 +5784,11 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
ioc_info(ioc, "internal(0x%p): depth(%d), start smid(%d)\n",
ioc->internal,
ioc->internal_depth, ioc->internal_smid));
+
+ ioc->io_queue_num = kcalloc(ioc->scsiio_depth,
+ sizeof(u16), GFP_KERNEL);
+ if (!ioc->io_queue_num)
+ goto out;
/*
* The number of NVMe page sized blocks needed is:
* (((sg_tablesize * 8) - 1) / (page_size - 8)) + 1
@@ -8174,8 +8191,11 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
ioc_state = mpt3sas_base_get_iocstate(ioc, 0);
if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT ||
(ioc_state & MPI2_IOC_STATE_MASK) ==
- MPI2_IOC_STATE_COREDUMP)
+ MPI2_IOC_STATE_COREDUMP) {
is_fault = 1;
+ ioc->htb_rel.trigger_info_dwords[1] =
+ (ioc_state & MPI2_DOORBELL_DATA_MASK);
+ }
}
_base_pre_reset_handler(ioc);
mpt3sas_wait_for_commands_to_complete(ioc);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.h b/drivers/scsi/mpt3sas/mpt3sas_base.h
index 2def7a340616..315aee6ef86f 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_base.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_base.h
@@ -77,8 +77,8 @@
#define MPT3SAS_DRIVER_NAME "mpt3sas"
#define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
#define MPT3SAS_DESCRIPTION "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION "36.100.00.00"
-#define MPT3SAS_MAJOR_VERSION 36
+#define MPT3SAS_DRIVER_VERSION "37.100.00.00"
+#define MPT3SAS_MAJOR_VERSION 37
#define MPT3SAS_MINOR_VERSION 100
#define MPT3SAS_BUILD_VERSION 0
#define MPT3SAS_RELEASE_VERSION 00
@@ -1073,6 +1073,50 @@ struct hba_port {
#define MULTIPATH_DISABLED_PORT_ID 0xFF
+/**
+ * struct htb_rel_query - diagnostic buffer release reason
+ * @unique_id - unique id associated with this buffer.
+ * @buffer_rel_condition - Release condition ioctl/sysfs/reset
+ * @reserved
+ * @trigger_type - Master/Event/scsi/MPI
+ * @trigger_info_dwords - Data Correspondig to trigger type
+ */
+struct htb_rel_query {
+ u16 buffer_rel_condition;
+ u16 reserved;
+ u32 trigger_type;
+ u32 trigger_info_dwords[2];
+};
+
+/* Buffer_rel_condition bit fields */
+
+/* Bit 0 - Diag Buffer not Released */
+#define MPT3_DIAG_BUFFER_NOT_RELEASED (0x00)
+/* Bit 0 - Diag Buffer Released */
+#define MPT3_DIAG_BUFFER_RELEASED (0x01)
+
+/*
+ * Bit 1 - Diag Buffer Released by IOCTL,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_IOCTL (0x02 | MPT3_DIAG_BUFFER_RELEASED)
+
+/*
+ * Bit 2 - Diag Buffer Released by Trigger,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_TRIGGER (0x04 | MPT3_DIAG_BUFFER_RELEASED)
+
+/*
+ * Bit 3 - Diag Buffer Released by SysFs,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_SYSFS (0x08 | MPT3_DIAG_BUFFER_RELEASED)
+
+/* DIAG RESET Master trigger flags */
+#define MPT_DIAG_RESET_ISSUED_BY_DRIVER 0x00000000
+#define MPT_DIAG_RESET_ISSUED_BY_USER 0x00000001
+
typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
/**
* struct MPT3SAS_ADAPTER - per adapter struct
@@ -1439,6 +1483,7 @@ struct MPT3SAS_ADAPTER {
spinlock_t scsi_lookup_lock;
int pending_io_count;
wait_queue_head_t reset_wq;
+ u16 *io_queue_num;
/* PCIe SGL */
struct dma_pool *pcie_sgl_dma_pool;
@@ -1529,6 +1574,8 @@ struct MPT3SAS_ADAPTER {
u32 diagnostic_flags[MPI2_DIAG_BUF_TYPE_COUNT];
u32 ring_buffer_offset;
u32 ring_buffer_sz;
+ struct htb_rel_query htb_rel;
+ u8 reset_from_user;
u8 is_warpdrive;
u8 is_mcpu_endpoint;
u8 hide_ir_msg;
@@ -1565,6 +1612,7 @@ struct mpt3sas_debugfs_buffer {
};
#define MPT_DRV_SUPPORT_BITMAP_MEMMOVE 0x00000001
+#define MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY 0x00000002
typedef u8 (*MPT_CALLBACK)(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
u32 reply);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.c b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
index c8a0ce18f2c5..44f9a05db94e 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.c
@@ -479,6 +479,8 @@ void mpt3sas_ctl_pre_reset_handler(struct MPT3SAS_ADAPTER *ioc)
ioc_info(ioc,
"%s: Releasing the trace buffer due to adapter reset.",
__func__);
+ ioc->htb_rel.buffer_rel_condition =
+ MPT3_DIAG_BUFFER_REL_TRIGGER;
mpt3sas_send_diag_release(ioc, i, &issue_reset);
}
}
@@ -1334,6 +1336,7 @@ _ctl_do_reset(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
dctlprintk(ioc, ioc_info(ioc, "%s: enter\n",
__func__));
+ ioc->reset_from_user = 1;
retval = mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER);
ioc_info(ioc,
"Ioctl: host reset: %s\n", ((!retval) ? "SUCCESS" : "FAILED"));
@@ -1687,6 +1690,9 @@ _ctl_diag_register_2(struct MPT3SAS_ADAPTER *ioc,
request_data = ioc->diag_buffer[buffer_type];
request_data_sz = diag_register->requested_buffer_size;
ioc->unique_id[buffer_type] = diag_register->unique_id;
+ /* Reset ioc variables used for additional query commands */
+ ioc->reset_from_user = 0;
+ memset(&ioc->htb_rel, 0, sizeof(struct htb_rel_query));
ioc->diag_buffer_status[buffer_type] &=
MPT3_DIAG_BUFFER_IS_DRIVER_ALLOCATED;
memcpy(ioc->product_specific[buffer_type],
@@ -2469,7 +2475,61 @@ _ctl_diag_read_buffer(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
return rc;
}
+/**
+ * _ctl_addnl_diag_query - query relevant info associated with diag buffers
+ * @ioc: per adapter object
+ * @arg: user space buffer containing ioctl content
+ *
+ * The application will send only unique_id. Driver will
+ * inspect unique_id first, if valid, fill the details related to cause
+ * for diag buffer release.
+ */
+static long
+_ctl_addnl_diag_query(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
+{
+ struct mpt3_addnl_diag_query karg;
+ u32 buffer_type = 0;
+ if (copy_from_user(&karg, arg, sizeof(karg))) {
+ pr_err("%s: failure at %s:%d/%s()!\n",
+ ioc->name, __FILE__, __LINE__, __func__);
+ return -EFAULT;
+ }
+ dctlprintk(ioc, ioc_info(ioc, "%s\n", __func__));
+ if (karg.unique_id == 0) {
+ ioc_err(ioc, "%s: unique_id is(0x%08x)\n",
+ __func__, karg.unique_id);
+ return -EPERM;
+ }
+ buffer_type = _ctl_diag_get_bufftype(ioc, karg.unique_id);
+ if (buffer_type == MPT3_DIAG_UID_NOT_FOUND) {
+ ioc_err(ioc, "%s: buffer with unique_id(0x%08x) not found\n",
+ __func__, karg.unique_id);
+ return -EPERM;
+ }
+ memset(&karg.buffer_rel_condition, 0, sizeof(struct htb_rel_query));
+ if ((ioc->diag_buffer_status[buffer_type] &
+ MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
+ ioc_info(ioc, "%s: buffer_type(0x%02x) is not registered\n",
+ __func__, buffer_type);
+ goto out;
+ }
+ if ((ioc->diag_buffer_status[buffer_type] &
+ MPT3_DIAG_BUFFER_IS_RELEASED) == 0) {
+ ioc_err(ioc, "%s: buffer_type(0x%02x) is not released\n",
+ __func__, buffer_type);
+ return -EPERM;
+ }
+ memcpy(&karg.buffer_rel_condition, &ioc->htb_rel,
+ sizeof(struct htb_rel_query));
+out:
+ if (copy_to_user(arg, &karg, sizeof(struct mpt3_addnl_diag_query))) {
+ ioc_err(ioc, "%s: unable to write mpt3_addnl_diag_query data @ %p\n",
+ __func__, arg);
+ return -EFAULT;
+ }
+ return 0;
+}
#ifdef CONFIG_COMPAT
/**
@@ -2533,7 +2593,7 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
struct MPT3SAS_ADAPTER *ioc;
struct mpt3_ioctl_header ioctl_header;
enum block_state state;
- long ret = -EINVAL;
+ long ret = -ENOIOCTLCMD;
/* get IOCTL header */
if (copy_from_user(&ioctl_header, (char __user *)arg,
@@ -2643,6 +2703,10 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
if (_IOC_SIZE(cmd) == sizeof(struct mpt3_diag_read_buffer))
ret = _ctl_diag_read_buffer(ioc, arg);
break;
+ case MPT3ADDNLDIAGQUERY:
+ if (_IOC_SIZE(cmd) == sizeof(struct mpt3_addnl_diag_query))
+ ret = _ctl_addnl_diag_query(ioc, arg);
+ break;
default:
dctlprintk(ioc,
ioc_info(ioc, "unsupported ioctl opcode(0x%08x)\n",
@@ -3425,6 +3489,7 @@ host_trace_buffer_enable_store(struct device *cdev,
MPT3_DIAG_BUFFER_IS_RELEASED))
goto out;
ioc_info(ioc, "releasing host trace buffer\n");
+ ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_SYSFS;
mpt3sas_send_diag_release(ioc, MPI2_DIAG_BUF_TYPE_TRACE,
&issue_reset);
}
diff --git a/drivers/scsi/mpt3sas/mpt3sas_ctl.h b/drivers/scsi/mpt3sas/mpt3sas_ctl.h
index 0f7aa4ddade0..d2ccdafb8df2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_ctl.h
+++ b/drivers/scsi/mpt3sas/mpt3sas_ctl.h
@@ -94,6 +94,8 @@
struct mpt3_diag_query)
#define MPT3DIAGREADBUFFER _IOWR(MPT3_MAGIC_NUMBER, 30, \
struct mpt3_diag_read_buffer)
+#define MPT3ADDNLDIAGQUERY _IOWR(MPT3_MAGIC_NUMBER, 32, \
+ struct mpt3_addnl_diag_query)
/* Trace Buffer default UniqueId */
#define MPT2DIAGBUFFUNIQUEID (0x07075900)
@@ -430,4 +432,24 @@ struct mpt3_diag_read_buffer {
uint32_t diagnostic_data[1];
};
+/**
+ * struct mpt3_addnl_diag_query - diagnostic buffer release reason
+ * @hdr - generic header
+ * @unique_id - unique id associated with this buffer.
+ * @buffer_rel_condition - Release condition ioctl/sysfs/reset
+ * @reserved1
+ * @trigger_type - Master/Event/scsi/MPI
+ * @trigger_info_dwords - Data Correspondig to trigger type
+ * @reserved2
+ */
+struct mpt3_addnl_diag_query {
+ struct mpt3_ioctl_header hdr;
+ uint32_t unique_id;
+ uint16_t buffer_rel_condition;
+ uint16_t reserved1;
+ uint32_t trigger_type;
+ uint32_t trigger_info_dwords[2];
+ uint32_t reserved2[2];
+};
+
#endif /* MPT3SAS_CTL_H_INCLUDED */
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index c8b09a81834d..ffca03064797 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -54,6 +54,7 @@
#include <linux/interrupt.h>
#include <linux/aer.h>
#include <linux/raid_class.h>
+#include <linux/blk-mq-pci.h>
#include <asm/unaligned.h>
#include "mpt3sas_base.h"
@@ -168,6 +169,11 @@ MODULE_PARM_DESC(multipath_on_hba,
"\t SAS 2.0 & SAS 3.0 HBA - This will be disabled,\n\t\t"
"\t SAS 3.5 HBA - This will be enabled)");
+static int host_tagset_enable = 1;
+module_param(host_tagset_enable, int, 0444);
+MODULE_PARM_DESC(host_tagset_enable,
+ "Shared host tagset enable/disable Default: enable(1)");
+
/* raid transport support */
static struct raid_template *mpt3sas_raid_template;
static struct raid_template *mpt2sas_raid_template;
@@ -1743,10 +1749,12 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
struct scsi_cmnd *scmd = NULL;
struct scsiio_tracker *st;
Mpi25SCSIIORequest_t *mpi_request;
+ u16 tag = smid - 1;
if (smid > 0 &&
smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
- u32 unique_tag = smid - 1;
+ u32 unique_tag =
+ ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag;
mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
@@ -11599,6 +11607,22 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time)
return 1;
}
+/**
+ * scsih_map_queues - map reply queues with request queues
+ * @shost: SCSI host pointer
+ */
+static int scsih_map_queues(struct Scsi_Host *shost)
+{
+ struct MPT3SAS_ADAPTER *ioc =
+ (struct MPT3SAS_ADAPTER *)shost->hostdata;
+
+ if (ioc->shost->nr_hw_queues == 1)
+ return 0;
+
+ return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+ ioc->pdev, ioc->high_iops_queues);
+}
+
/* shost template for SAS 2.0 HBA devices */
static struct scsi_host_template mpt2sas_driver_template = {
.module = THIS_MODULE,
@@ -11666,6 +11690,7 @@ static struct scsi_host_template mpt3sas_driver_template = {
.sdev_attrs = mpt3sas_dev_attrs,
.track_queue_depth = 1,
.cmd_size = sizeof(struct scsiio_tracker),
+ .map_queues = scsih_map_queues,
};
/* raid transport support for SAS 3.0 HBA devices */
@@ -11922,6 +11947,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
* Enable MEMORY MOVE support flag.
*/
ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_MEMMOVE;
+ /* Enable ADDITIONAL QUERY support flag. */
+ ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY;
ioc->enable_sdev_max_qd = enable_sdev_max_qd;
@@ -12028,6 +12055,21 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
} else
ioc->hide_drives = 0;
+ shost->host_tagset = 0;
+ shost->nr_hw_queues = 1;
+
+ if (ioc->is_gen35_ioc && ioc->reply_queue_count > 1 &&
+ host_tagset_enable && ioc->smp_affinity_enable) {
+
+ shost->host_tagset = 1;
+ shost->nr_hw_queues =
+ ioc->reply_queue_count - ioc->high_iops_queues;
+
+ dev_info(&ioc->pdev->dev,
+ "Max SCSIIO MPT commands: %d shared with nr_hw_queues = %d\n",
+ shost->can_queue, shost->nr_hw_queues);
+ }
+
rv = scsi_add_host(shost, &pdev->dev);
if (rv) {
ioc_err(ioc, "failure at %s:%d/%s()!\n",
diff --git a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
index 8ec9bab20ec4..d9b7d0ee25b0 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
@@ -132,6 +132,35 @@ mpt3sas_process_trigger_data(struct MPT3SAS_ADAPTER *ioc,
&issue_reset);
}
+ ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_TRIGGER;
+ if (event_data) {
+ ioc->htb_rel.trigger_type = event_data->trigger_type;
+ switch (event_data->trigger_type) {
+ case MPT3SAS_TRIGGER_SCSI:
+ memcpy(&ioc->htb_rel.trigger_info_dwords,
+ &event_data->u.scsi,
+ sizeof(struct SL_WH_SCSI_TRIGGER_T));
+ break;
+ case MPT3SAS_TRIGGER_MPI:
+ memcpy(&ioc->htb_rel.trigger_info_dwords,
+ &event_data->u.mpi,
+ sizeof(struct SL_WH_MPI_TRIGGER_T));
+ break;
+ case MPT3SAS_TRIGGER_MASTER:
+ ioc->htb_rel.trigger_info_dwords[0] =
+ event_data->u.master.MasterData;
+ break;
+ case MPT3SAS_TRIGGER_EVENT:
+ memcpy(&ioc->htb_rel.trigger_info_dwords,
+ &event_data->u.event,
+ sizeof(struct SL_WH_EVENT_TRIGGER_T));
+ break;
+ default:
+ ioc_err(ioc, "%d - Is not a valid Trigger type\n",
+ event_data->trigger_type);
+ break;
+ }
+ }
_mpt3sas_raise_sigio(ioc, event_data);
dTriggerDiagPrintk(ioc, ioc_info(ioc, "%s: exit\n",
@@ -201,9 +230,14 @@ mpt3sas_trigger_master(struct MPT3SAS_ADAPTER *ioc, u32 trigger_bitmask)
event_data.u.master.MasterData = trigger_bitmask;
if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT ||
- trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET)
+ trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET) {
+ ioc->htb_rel.trigger_type = MPT3SAS_TRIGGER_MASTER;
+ ioc->htb_rel.trigger_info_dwords[0] = trigger_bitmask;
+ if (ioc->reset_from_user)
+ ioc->htb_rel.trigger_info_dwords[1] =
+ MPT_DIAG_RESET_ISSUED_BY_USER;
_mpt3sas_raise_sigio(ioc, &event_data);
- else
+ } else
mpt3sas_send_trigger_data_event(ioc, &event_data);
out:
diff --git a/drivers/scsi/pmcraid.h b/drivers/scsi/pmcraid.h
index 15c962108075..6d36debde18e 100644
--- a/drivers/scsi/pmcraid.h
+++ b/drivers/scsi/pmcraid.h
@@ -244,7 +244,7 @@ struct pmcraid_ioarcb {
__u8 hrrq_id;
__u8 cdb[PMCRAID_MAX_CDB_LEN];
struct pmcraid_ioarcb_add_data add_data;
-} __attribute__((packed, aligned(PMCRAID_IOARCB_ALIGNMENT)));
+};
/* well known resource handle values */
#define PMCRAID_IOA_RES_HANDLE 0xffffffff
@@ -1040,8 +1040,8 @@ struct pmcraid_passthrough_ioctl_buffer {
struct pmcraid_ioctl_header ioctl_header;
struct pmcraid_ioarcb ioarcb;
struct pmcraid_ioasa ioasa;
- u8 request_buffer[1];
-} __attribute__ ((packed));
+ u8 request_buffer[];
+} __attribute__ ((packed, aligned(PMCRAID_IOARCB_ALIGNMENT)));
/*
* keys to differentiate between driver handled IOCTLs and passthrough
diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index 0d09480b66cd..c48daf52725d 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -981,8 +981,7 @@ void qlt_free_session_done(struct work_struct *work)
int rc;
if (!own ||
- (own &&
- (own->iocb.u.isp24.status_subcode == ELS_PLOGI))) {
+ (own->iocb.u.isp24.status_subcode == ELS_PLOGI)) {
rc = qla2x00_post_async_logout_work(vha, sess,
NULL);
if (rc != QLA_SUCCESS)
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index a4b014e1cd8c..7bd9a4a04ad5 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -841,7 +841,7 @@ static int __qla4xxx_is_chap_active(struct device *dev, void *data)
sess = cls_session->dd_data;
ddb_entry = sess->dd_data;
- if (iscsi_session_chkready(cls_session))
+ if (iscsi_is_session_online(cls_session))
goto exit_is_chap_active;
if (ddb_entry->chap_tbl_idx == *chap_tbl_idx)
diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 2e68c0a87698..91074fd97f64 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -132,7 +132,11 @@ show_transport_handle(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_internal *priv = dev_to_iscsi_internal(dev);
- return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport));
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ return sysfs_emit(buf, "%llu\n",
+ (unsigned long long)iscsi_handle(priv->iscsi_transport));
}
static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL);
@@ -142,7 +146,7 @@ show_transport_##name(struct device *dev, \
struct device_attribute *attr,char *buf) \
{ \
struct iscsi_internal *priv = dev_to_iscsi_internal(dev); \
- return sprintf(buf, format"\n", priv->iscsi_transport->name); \
+ return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\
} \
static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL);
@@ -183,7 +187,7 @@ static ssize_t
show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf)
{
struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
- return sprintf(buf, "%llu\n", (unsigned long long) ep->id);
+ return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id);
}
static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL);
@@ -1701,10 +1705,8 @@ static const char *iscsi_session_state_name(int state)
int iscsi_session_chkready(struct iscsi_cls_session *session)
{
- unsigned long flags;
int err;
- spin_lock_irqsave(&session->lock, flags);
switch (session->state) {
case ISCSI_SESSION_LOGGED_IN:
err = 0;
@@ -1719,7 +1721,6 @@ int iscsi_session_chkready(struct iscsi_cls_session *session)
err = DID_NO_CONNECT << 16;
break;
}
- spin_unlock_irqrestore(&session->lock, flags);
return err;
}
EXPORT_SYMBOL_GPL(iscsi_session_chkready);
@@ -2883,6 +2884,9 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
struct iscsi_cls_session *session;
int err = 0, value = 0;
+ if (ev->u.set_param.len > PAGE_SIZE)
+ return -EINVAL;
+
session = iscsi_session_lookup(ev->u.set_param.sid);
conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid);
if (!conn || !session)
@@ -3030,6 +3034,9 @@ iscsi_set_host_param(struct iscsi_transport *transport,
if (!transport->set_host_param)
return -ENOSYS;
+ if (ev->u.set_host_param.len > PAGE_SIZE)
+ return -EINVAL;
+
shost = scsi_host_lookup(ev->u.set_host_param.host_no);
if (!shost) {
printk(KERN_ERR "set_host_param could not find host no %u\n",
@@ -3617,6 +3624,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
{
int err = 0;
u32 portid;
+ u32 pdu_len;
struct iscsi_uevent *ev = nlmsg_data(nlh);
struct iscsi_transport *transport = NULL;
struct iscsi_internal *priv;
@@ -3624,6 +3632,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
struct iscsi_cls_conn *conn;
struct iscsi_endpoint *ep = NULL;
+ if (!netlink_capable(skb, CAP_SYS_ADMIN))
+ return -EPERM;
+
if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE)
*group = ISCSI_NL_GRP_UIP;
else
@@ -3756,6 +3767,14 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
err = -EINVAL;
break;
case ISCSI_UEVENT_SEND_PDU:
+ pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+
+ if ((ev->u.send_pdu.hdr_size > pdu_len) ||
+ (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) {
+ err = -EINVAL;
+ break;
+ }
+
conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid);
if (conn) {
mutex_lock(&conn_mutex);
@@ -3960,7 +3979,7 @@ static ssize_t show_conn_state(struct device *dev,
conn->state < ARRAY_SIZE(connection_state_names))
state = connection_state_names[conn->state];
- return sprintf(buf, "%s\n", state);
+ return sysfs_emit(buf, "%s\n", state);
}
static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state,
NULL);
@@ -4188,7 +4207,7 @@ show_priv_session_state(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
- return sprintf(buf, "%s\n", iscsi_session_state_name(session->state));
+ return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state));
}
static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state,
NULL);
@@ -4197,7 +4216,7 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
- return sprintf(buf, "%d\n", session->creator);
+ return sysfs_emit(buf, "%d\n", session->creator);
}
static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator,
NULL);
@@ -4206,7 +4225,7 @@ show_priv_session_target_id(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
- return sprintf(buf, "%d\n", session->target_id);
+ return sysfs_emit(buf, "%d\n", session->target_id);
}
static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO,
show_priv_session_target_id, NULL);
@@ -4219,8 +4238,8 @@ show_priv_session_##field(struct device *dev, \
struct iscsi_cls_session *session = \
iscsi_dev_to_session(dev->parent); \
if (session->field == -1) \
- return sprintf(buf, "off\n"); \
- return sprintf(buf, format"\n", session->field); \
+ return sysfs_emit(buf, "off\n"); \
+ return sysfs_emit(buf, format"\n", session->field); \
}
#define iscsi_priv_session_attr_store(field) \
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index a3d2d4bc4a3d..ed0b1bb99f08 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -707,9 +707,9 @@ static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer,
put_unaligned_be16(spsp, &cdb[2]);
put_unaligned_be32(len, &cdb[6]);
- ret = scsi_execute_req(sdev, cdb,
- send ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
- buffer, len, NULL, SD_TIMEOUT, sdkp->max_retries, NULL);
+ ret = scsi_execute(sdev, cdb, send ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
+ buffer, len, NULL, NULL, SD_TIMEOUT, sdkp->max_retries, 0,
+ RQF_PM, NULL);
return ret <= 0 ? ret : -EIO;
}
#endif /* CONFIG_BLK_SED_OPAL */
@@ -3379,10 +3379,12 @@ static int sd_probe(struct device *dev)
sdp->type != TYPE_RBC)
goto out;
-#ifndef CONFIG_BLK_DEV_ZONED
- if (sdp->type == TYPE_ZBC)
+ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && sdp->type == TYPE_ZBC) {
+ sdev_printk(KERN_WARNING, sdp,
+ "Unsupported ZBC host-managed device.\n");
goto out;
-#endif
+ }
+
SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
"sd_probe\n"));
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index 03adb39293c2..ee558675eab4 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -704,6 +704,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
unsigned int nr_zones = sdkp->rev_nr_zones;
u32 max_append;
int ret = 0;
+ unsigned int flags;
/*
* For all zoned disks, initialize zone append emulation data if not
@@ -736,16 +737,19 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
disk->queue->nr_zones == nr_zones)
goto unlock;
+ flags = memalloc_noio_save();
sdkp->zone_blocks = zone_blocks;
sdkp->nr_zones = nr_zones;
- sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO);
+ sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
if (!sdkp->rev_wp_offset) {
ret = -ENOMEM;
+ memalloc_noio_restore(flags);
goto unlock;
}
ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
+ memalloc_noio_restore(flags);
kvfree(sdkp->rev_wp_offset);
sdkp->rev_wp_offset = NULL;
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 721f55db181f..77161750c9fb 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -451,6 +451,8 @@ static void ufshcd_print_evt(struct ufs_hba *hba, u32 id,
if (!found)
dev_err(hba->dev, "No record of %s\n", err_name);
+ else
+ dev_err(hba->dev, "%s: total cnt=%llu\n", err_name, e->cnt);
}
static void ufshcd_print_evt_hist(struct ufs_hba *hba)
@@ -1866,7 +1868,7 @@ static ssize_t ufshcd_clkgate_delay_show(struct device *dev,
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return snprintf(buf, PAGE_SIZE, "%lu\n", hba->clk_gating.delay_ms);
+ return sysfs_emit(buf, "%lu\n", hba->clk_gating.delay_ms);
}
static ssize_t ufshcd_clkgate_delay_store(struct device *dev,
@@ -1889,7 +1891,7 @@ static ssize_t ufshcd_clkgate_enable_show(struct device *dev,
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_gating.is_enabled);
+ return sysfs_emit(buf, "%d\n", hba->clk_gating.is_enabled);
}
static ssize_t ufshcd_clkgate_enable_store(struct device *dev,
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index ee61f821f75d..18e56c1c1b30 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -570,7 +570,7 @@ enum ufshcd_quirks {
/*
* This quirk allows only sg entries aligned with page size.
*/
- UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE = 1 << 13,
+ UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE = 1 << 14,
};
enum ufshcd_caps {
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index f357c6c659d2..e8a30c4c5aec 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -6,6 +6,7 @@ source "drivers/soc/amlogic/Kconfig"
source "drivers/soc/aspeed/Kconfig"
source "drivers/soc/atmel/Kconfig"
source "drivers/soc/bcm/Kconfig"
+source "drivers/soc/canaan/Kconfig"
source "drivers/soc/fsl/Kconfig"
source "drivers/soc/imx/Kconfig"
source "drivers/soc/ixp4xx/Kconfig"
@@ -22,6 +23,5 @@ source "drivers/soc/ti/Kconfig"
source "drivers/soc/ux500/Kconfig"
source "drivers/soc/versatile/Kconfig"
source "drivers/soc/xilinx/Kconfig"
-source "drivers/soc/kendryte/Kconfig"
endmenu
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 9bceb12b291d..f678e4d9e585 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_ARCH_ACTIONS) += actions/
obj-y += aspeed/
obj-$(CONFIG_ARCH_AT91) += atmel/
obj-y += bcm/
+obj-$(CONFIG_SOC_CANAAN) += canaan/
obj-$(CONFIG_ARCH_DOVE) += dove/
obj-$(CONFIG_MACH_DOVE) += dove/
obj-y += fsl/
@@ -28,4 +29,3 @@ obj-y += ti/
obj-$(CONFIG_ARCH_U8500) += ux500/
obj-$(CONFIG_PLAT_VERSATILE) += versatile/
obj-y += xilinx/
-obj-$(CONFIG_SOC_KENDRYTE) += kendryte/
diff --git a/drivers/soc/canaan/Kconfig b/drivers/soc/canaan/Kconfig
new file mode 100644
index 000000000000..8179b69518b4
--- /dev/null
+++ b/drivers/soc/canaan/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config SOC_K210_SYSCTL
+ bool "Canaan Kendryte K210 SoC system controller"
+ depends on RISCV && SOC_CANAAN && OF
+ default SOC_CANAAN
+ select PM
+ select SIMPLE_PM_BUS
+ select SYSCON
+ select MFD_SYSCON
+ help
+ Canaan Kendryte K210 SoC system controller driver.
diff --git a/drivers/soc/canaan/Makefile b/drivers/soc/canaan/Makefile
new file mode 100644
index 000000000000..570280ad7967
--- /dev/null
+++ b/drivers/soc/canaan/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SOC_K210_SYSCTL) += k210-sysctl.o
diff --git a/drivers/soc/canaan/k210-sysctl.c b/drivers/soc/canaan/k210-sysctl.c
new file mode 100644
index 000000000000..27a346c406bc
--- /dev/null
+++ b/drivers/soc/canaan/k210-sysctl.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ */
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/clk.h>
+#include <asm/soc.h>
+
+#include <soc/canaan/k210-sysctl.h>
+
+static int k210_sysctl_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct clk *pclk;
+ int ret;
+
+ dev_info(dev, "K210 system controller\n");
+
+ /* Get power bus clock */
+ pclk = devm_clk_get(dev, NULL);
+ if (IS_ERR(pclk))
+ return dev_err_probe(dev, PTR_ERR(pclk),
+ "Get bus clock failed\n");
+
+ ret = clk_prepare_enable(pclk);
+ if (ret) {
+ dev_err(dev, "Enable bus clock failed\n");
+ return ret;
+ }
+
+ /* Populate children */
+ ret = devm_of_platform_populate(dev);
+ if (ret)
+ dev_err(dev, "Populate platform failed %d\n", ret);
+
+ return ret;
+}
+
+static const struct of_device_id k210_sysctl_of_match[] = {
+ { .compatible = "canaan,k210-sysctl", },
+ { /* sentinel */ },
+};
+
+static struct platform_driver k210_sysctl_driver = {
+ .driver = {
+ .name = "k210-sysctl",
+ .of_match_table = k210_sysctl_of_match,
+ },
+ .probe = k210_sysctl_probe,
+};
+builtin_platform_driver(k210_sysctl_driver);
+
+/*
+ * System controller registers base address and size.
+ */
+#define K210_SYSCTL_BASE_ADDR 0x50440000ULL
+#define K210_SYSCTL_BASE_SIZE 0x1000
+
+/*
+ * This needs to be called very early during initialization, given that
+ * PLL1 needs to be enabled to be able to use all SRAM.
+ */
+static void __init k210_soc_early_init(const void *fdt)
+{
+ void __iomem *sysctl_base;
+
+ sysctl_base = ioremap(K210_SYSCTL_BASE_ADDR, K210_SYSCTL_BASE_SIZE);
+ if (!sysctl_base)
+ panic("k210-sysctl: ioremap failed");
+
+ k210_clk_early_init(sysctl_base);
+
+ iounmap(sysctl_base);
+}
+SOC_EARLY_INIT_DECLARE(k210_soc, "canaan,kendryte-k210", k210_soc_early_init);
diff --git a/drivers/soc/kendryte/Kconfig b/drivers/soc/kendryte/Kconfig
deleted file mode 100644
index 49785b1b0217..000000000000
--- a/drivers/soc/kendryte/Kconfig
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-if SOC_KENDRYTE
-
-config K210_SYSCTL
- bool "Kendryte K210 system controller"
- default y
- depends on RISCV
- help
- Enables controlling the K210 various clocks and to enable
- general purpose use of the extra 2MB of SRAM normally
- reserved for the AI engine.
-
-endif
diff --git a/drivers/soc/kendryte/Makefile b/drivers/soc/kendryte/Makefile
deleted file mode 100644
index 002d9ce95c0d..000000000000
--- a/drivers/soc/kendryte/Makefile
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_K210_SYSCTL) += k210-sysctl.o
diff --git a/drivers/soc/kendryte/k210-sysctl.c b/drivers/soc/kendryte/k210-sysctl.c
deleted file mode 100644
index 707019223dd8..000000000000
--- a/drivers/soc/kendryte/k210-sysctl.c
+++ /dev/null
@@ -1,260 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2019 Christoph Hellwig.
- * Copyright (c) 2019 Western Digital Corporation or its affiliates.
- */
-#include <linux/types.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/clk-provider.h>
-#include <linux/clkdev.h>
-#include <linux/bitfield.h>
-#include <asm/soc.h>
-
-#define K210_SYSCTL_CLK0_FREQ 26000000UL
-
-/* Registers base address */
-#define K210_SYSCTL_SYSCTL_BASE_ADDR 0x50440000ULL
-
-/* Registers */
-#define K210_SYSCTL_PLL0 0x08
-#define K210_SYSCTL_PLL1 0x0c
-/* clkr: 4bits, clkf1: 6bits, clkod: 4bits, bwadj: 4bits */
-#define PLL_RESET (1 << 20)
-#define PLL_PWR (1 << 21)
-#define PLL_INTFB (1 << 22)
-#define PLL_BYPASS (1 << 23)
-#define PLL_TEST (1 << 24)
-#define PLL_OUT_EN (1 << 25)
-#define PLL_TEST_EN (1 << 26)
-#define K210_SYSCTL_PLL_LOCK 0x18
-#define PLL0_LOCK1 (1 << 0)
-#define PLL0_LOCK2 (1 << 1)
-#define PLL0_SLIP_CLEAR (1 << 2)
-#define PLL0_TEST_CLK_OUT (1 << 3)
-#define PLL1_LOCK1 (1 << 8)
-#define PLL1_LOCK2 (1 << 9)
-#define PLL1_SLIP_CLEAR (1 << 10)
-#define PLL1_TEST_CLK_OUT (1 << 11)
-#define PLL2_LOCK1 (1 << 16)
-#define PLL2_LOCK2 (1 << 16)
-#define PLL2_SLIP_CLEAR (1 << 18)
-#define PLL2_TEST_CLK_OUT (1 << 19)
-#define K210_SYSCTL_CLKSEL0 0x20
-#define CLKSEL_ACLK (1 << 0)
-#define K210_SYSCTL_CLKEN_CENT 0x28
-#define CLKEN_CPU (1 << 0)
-#define CLKEN_SRAM0 (1 << 1)
-#define CLKEN_SRAM1 (1 << 2)
-#define CLKEN_APB0 (1 << 3)
-#define CLKEN_APB1 (1 << 4)
-#define CLKEN_APB2 (1 << 5)
-#define K210_SYSCTL_CLKEN_PERI 0x2c
-#define CLKEN_ROM (1 << 0)
-#define CLKEN_DMA (1 << 1)
-#define CLKEN_AI (1 << 2)
-#define CLKEN_DVP (1 << 3)
-#define CLKEN_FFT (1 << 4)
-#define CLKEN_GPIO (1 << 5)
-#define CLKEN_SPI0 (1 << 6)
-#define CLKEN_SPI1 (1 << 7)
-#define CLKEN_SPI2 (1 << 8)
-#define CLKEN_SPI3 (1 << 9)
-#define CLKEN_I2S0 (1 << 10)
-#define CLKEN_I2S1 (1 << 11)
-#define CLKEN_I2S2 (1 << 12)
-#define CLKEN_I2C0 (1 << 13)
-#define CLKEN_I2C1 (1 << 14)
-#define CLKEN_I2C2 (1 << 15)
-#define CLKEN_UART1 (1 << 16)
-#define CLKEN_UART2 (1 << 17)
-#define CLKEN_UART3 (1 << 18)
-#define CLKEN_AES (1 << 19)
-#define CLKEN_FPIO (1 << 20)
-#define CLKEN_TIMER0 (1 << 21)
-#define CLKEN_TIMER1 (1 << 22)
-#define CLKEN_TIMER2 (1 << 23)
-#define CLKEN_WDT0 (1 << 24)
-#define CLKEN_WDT1 (1 << 25)
-#define CLKEN_SHA (1 << 26)
-#define CLKEN_OTP (1 << 27)
-#define CLKEN_RTC (1 << 29)
-
-struct k210_sysctl {
- void __iomem *regs;
- struct clk_hw hw;
-};
-
-static void k210_set_bits(u32 val, void __iomem *reg)
-{
- writel(readl(reg) | val, reg);
-}
-
-static void k210_clear_bits(u32 val, void __iomem *reg)
-{
- writel(readl(reg) & ~val, reg);
-}
-
-static void k210_pll1_enable(void __iomem *regs)
-{
- u32 val;
-
- val = readl(regs + K210_SYSCTL_PLL1);
- val &= ~GENMASK(19, 0); /* clkr1 = 0 */
- val |= FIELD_PREP(GENMASK(9, 4), 0x3B); /* clkf1 = 59 */
- val |= FIELD_PREP(GENMASK(13, 10), 0x3); /* clkod1 = 3 */
- val |= FIELD_PREP(GENMASK(19, 14), 0x3B); /* bwadj1 = 59 */
- writel(val, regs + K210_SYSCTL_PLL1);
-
- k210_clear_bits(PLL_BYPASS, regs + K210_SYSCTL_PLL1);
- k210_set_bits(PLL_PWR, regs + K210_SYSCTL_PLL1);
-
- /*
- * Reset the pll. The magic NOPs come from the Kendryte reference SDK.
- */
- k210_clear_bits(PLL_RESET, regs + K210_SYSCTL_PLL1);
- k210_set_bits(PLL_RESET, regs + K210_SYSCTL_PLL1);
- nop();
- nop();
- k210_clear_bits(PLL_RESET, regs + K210_SYSCTL_PLL1);
-
- for (;;) {
- val = readl(regs + K210_SYSCTL_PLL_LOCK);
- if (val & PLL1_LOCK2)
- break;
- writel(val | PLL1_SLIP_CLEAR, regs + K210_SYSCTL_PLL_LOCK);
- }
-
- k210_set_bits(PLL_OUT_EN, regs + K210_SYSCTL_PLL1);
-}
-
-static unsigned long k210_sysctl_clk_recalc_rate(struct clk_hw *hw,
- unsigned long parent_rate)
-{
- struct k210_sysctl *s = container_of(hw, struct k210_sysctl, hw);
- u32 clksel0, pll0;
- u64 pll0_freq, clkr0, clkf0, clkod0;
-
- /*
- * If the clock selector is not set, use the base frequency.
- * Otherwise, use PLL0 frequency with a frequency divisor.
- */
- clksel0 = readl(s->regs + K210_SYSCTL_CLKSEL0);
- if (!(clksel0 & CLKSEL_ACLK))
- return K210_SYSCTL_CLK0_FREQ;
-
- /*
- * Get PLL0 frequency:
- * freq = base frequency * clkf0 / (clkr0 * clkod0)
- */
- pll0 = readl(s->regs + K210_SYSCTL_PLL0);
- clkr0 = 1 + FIELD_GET(GENMASK(3, 0), pll0);
- clkf0 = 1 + FIELD_GET(GENMASK(9, 4), pll0);
- clkod0 = 1 + FIELD_GET(GENMASK(13, 10), pll0);
- pll0_freq = clkf0 * K210_SYSCTL_CLK0_FREQ / (clkr0 * clkod0);
-
- /* Get the frequency divisor from the clock selector */
- return pll0_freq / (2ULL << FIELD_GET(0x00000006, clksel0));
-}
-
-static const struct clk_ops k210_sysctl_clk_ops = {
- .recalc_rate = k210_sysctl_clk_recalc_rate,
-};
-
-static const struct clk_init_data k210_clk_init_data = {
- .name = "k210-sysctl-pll1",
- .ops = &k210_sysctl_clk_ops,
-};
-
-static int k210_sysctl_probe(struct platform_device *pdev)
-{
- struct k210_sysctl *s;
- int error;
-
- pr_info("Kendryte K210 SoC sysctl\n");
-
- s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
-
- s->regs = devm_ioremap_resource(&pdev->dev,
- platform_get_resource(pdev, IORESOURCE_MEM, 0));
- if (IS_ERR(s->regs))
- return PTR_ERR(s->regs);
-
- s->hw.init = &k210_clk_init_data;
- error = devm_clk_hw_register(&pdev->dev, &s->hw);
- if (error) {
- dev_err(&pdev->dev, "failed to register clk");
- return error;
- }
-
- error = devm_of_clk_add_hw_provider(&pdev->dev, of_clk_hw_simple_get,
- &s->hw);
- if (error) {
- dev_err(&pdev->dev, "adding clk provider failed\n");
- return error;
- }
-
- return 0;
-}
-
-static const struct of_device_id k210_sysctl_of_match[] = {
- { .compatible = "kendryte,k210-sysctl", },
- {}
-};
-
-static struct platform_driver k210_sysctl_driver = {
- .driver = {
- .name = "k210-sysctl",
- .of_match_table = k210_sysctl_of_match,
- },
- .probe = k210_sysctl_probe,
-};
-
-static int __init k210_sysctl_init(void)
-{
- return platform_driver_register(&k210_sysctl_driver);
-}
-core_initcall(k210_sysctl_init);
-
-/*
- * This needs to be called very early during initialization, given that
- * PLL1 needs to be enabled to be able to use all SRAM.
- */
-static void __init k210_soc_early_init(const void *fdt)
-{
- void __iomem *regs;
-
- regs = ioremap(K210_SYSCTL_SYSCTL_BASE_ADDR, 0x1000);
- if (!regs)
- panic("K210 sysctl ioremap");
-
- /* Enable PLL1 to make the KPU SRAM useable */
- k210_pll1_enable(regs);
-
- k210_set_bits(PLL_OUT_EN, regs + K210_SYSCTL_PLL0);
-
- k210_set_bits(CLKEN_CPU | CLKEN_SRAM0 | CLKEN_SRAM1,
- regs + K210_SYSCTL_CLKEN_CENT);
- k210_set_bits(CLKEN_ROM | CLKEN_TIMER0 | CLKEN_RTC,
- regs + K210_SYSCTL_CLKEN_PERI);
-
- k210_set_bits(CLKSEL_ACLK, regs + K210_SYSCTL_CLKSEL0);
-
- iounmap(regs);
-}
-SOC_EARLY_INIT_DECLARE(generic_k210, "kendryte,k210", k210_soc_early_init);
-
-#ifdef CONFIG_SOC_KENDRYTE_K210_DTB_BUILTIN
-/*
- * Generic entry for the default k210.dtb embedded DTB for boards with:
- * - Vendor ID: 0x4B5
- * - Arch ID: 0xE59889E6A5A04149 (= "Canaan AI" in UTF-8 encoded Chinese)
- * - Impl ID: 0x4D41495832303030 (= "MAIX2000")
- * These values are reported by the SiPEED MAXDUINO, SiPEED MAIX GO and
- * SiPEED Dan dock boards.
- */
-SOC_BUILTIN_DTB_DECLARE(k210, 0x4B5, 0xE59889E6A5A04149, 0x4D41495832303030);
-#endif
diff --git a/drivers/soc/litex/Kconfig b/drivers/soc/litex/Kconfig
index 7a7c38282e11..e7011d665b15 100644
--- a/drivers/soc/litex/Kconfig
+++ b/drivers/soc/litex/Kconfig
@@ -12,9 +12,21 @@ config LITEX_SOC_CONTROLLER
select LITEX
help
This option enables the SoC Controller Driver which verifies
- LiteX CSR access and provides common litex_get_reg/litex_set_reg
+ LiteX CSR access and provides common litex_[read|write]*
accessors.
All drivers that use functions from litex.h must depend on
LITEX.
+config LITEX_SUBREG_SIZE
+ int "Size of a LiteX CSR subregister, in bytes"
+ depends on LITEX
+ range 1 4
+ default 4
+ help
+ LiteX MMIO registers (referred to as Configuration and Status
+ registers, or CSRs) are spread across adjacent 8- or 32-bit
+ subregisters, located at 32-bit aligned MMIO addresses. Use
+ this to select the appropriate size (1 or 4 bytes) matching
+ your particular LiteX build.
+
endmenu
diff --git a/drivers/soc/litex/litex_soc_ctrl.c b/drivers/soc/litex/litex_soc_ctrl.c
index 9b0766384570..6268bfa7f0d6 100644
--- a/drivers/soc/litex/litex_soc_ctrl.c
+++ b/drivers/soc/litex/litex_soc_ctrl.c
@@ -15,79 +15,11 @@
#include <linux/module.h>
#include <linux/errno.h>
#include <linux/io.h>
+#include <linux/reboot.h>
-/*
- * LiteX SoC Generator, depending on the configuration, can split a single
- * logical CSR (Control&Status Register) into a series of consecutive physical
- * registers.
- *
- * For example, in the configuration with 8-bit CSR Bus, 32-bit aligned (the
- * default one for 32-bit CPUs) a 32-bit logical CSR will be generated as four
- * 32-bit physical registers, each one containing one byte of meaningful data.
- *
- * For details see: https://github.com/enjoy-digital/litex/wiki/CSR-Bus
- *
- * The purpose of `litex_set_reg`/`litex_get_reg` is to implement the logic
- * of writing to/reading from the LiteX CSR in a single place that can be
- * then reused by all LiteX drivers.
- */
-
-/**
- * litex_set_reg() - Writes the value to the LiteX CSR (Control&Status Register)
- * @reg: Address of the CSR
- * @reg_size: The width of the CSR expressed in the number of bytes
- * @val: Value to be written to the CSR
- *
- * In the currently supported LiteX configuration (8-bit CSR Bus, 32-bit aligned),
- * a 32-bit LiteX CSR is generated as 4 consecutive 32-bit physical registers,
- * each one containing one byte of meaningful data.
- *
- * This function splits a single possibly multi-byte write into a series of
- * single-byte writes with a proper offset.
- */
-void litex_set_reg(void __iomem *reg, unsigned long reg_size,
- unsigned long val)
-{
- unsigned long shifted_data, shift, i;
-
- for (i = 0; i < reg_size; ++i) {
- shift = ((reg_size - i - 1) * LITEX_SUBREG_SIZE_BIT);
- shifted_data = val >> shift;
-
- WRITE_LITEX_SUBREGISTER(shifted_data, reg, i);
- }
-}
-EXPORT_SYMBOL_GPL(litex_set_reg);
-
-/**
- * litex_get_reg() - Reads the value of the LiteX CSR (Control&Status Register)
- * @reg: Address of the CSR
- * @reg_size: The width of the CSR expressed in the number of bytes
- *
- * Return: Value read from the CSR
- *
- * In the currently supported LiteX configuration (8-bit CSR Bus, 32-bit aligned),
- * a 32-bit LiteX CSR is generated as 4 consecutive 32-bit physical registers,
- * each one containing one byte of meaningful data.
- *
- * This function generates a series of single-byte reads with a proper offset
- * and joins their results into a single multi-byte value.
- */
-unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_size)
-{
- unsigned long shifted_data, shift, i;
- unsigned long result = 0;
-
- for (i = 0; i < reg_size; ++i) {
- shifted_data = READ_LITEX_SUBREGISTER(reg, i);
-
- shift = ((reg_size - i - 1) * LITEX_SUBREG_SIZE_BIT);
- result |= (shifted_data << shift);
- }
-
- return result;
-}
-EXPORT_SYMBOL_GPL(litex_get_reg);
+/* reset register located at the base address */
+#define RESET_REG_OFF 0x00
+#define RESET_REG_VALUE 0x00000001
#define SCRATCH_REG_OFF 0x04
#define SCRATCH_REG_VALUE 0x12345678
@@ -131,15 +63,27 @@ static int litex_check_csr_access(void __iomem *reg_addr)
/* restore original value of the SCRATCH register */
litex_write32(reg_addr + SCRATCH_REG_OFF, SCRATCH_REG_VALUE);
- pr_info("LiteX SoC Controller driver initialized");
+ pr_info("LiteX SoC Controller driver initialized: subreg:%d, align:%d",
+ LITEX_SUBREG_SIZE, LITEX_SUBREG_ALIGN);
return 0;
}
struct litex_soc_ctrl_device {
void __iomem *base;
+ struct notifier_block reset_nb;
};
+static int litex_reset_handler(struct notifier_block *this, unsigned long mode,
+ void *cmd)
+{
+ struct litex_soc_ctrl_device *soc_ctrl_dev =
+ container_of(this, struct litex_soc_ctrl_device, reset_nb);
+
+ litex_write32(soc_ctrl_dev->base + RESET_REG_OFF, RESET_REG_VALUE);
+ return NOTIFY_DONE;
+}
+
#ifdef CONFIG_OF
static const struct of_device_id litex_soc_ctrl_of_match[] = {
{.compatible = "litex,soc-controller"},
@@ -151,6 +95,7 @@ MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match);
static int litex_soc_ctrl_probe(struct platform_device *pdev)
{
struct litex_soc_ctrl_device *soc_ctrl_dev;
+ int error;
soc_ctrl_dev = devm_kzalloc(&pdev->dev, sizeof(*soc_ctrl_dev), GFP_KERNEL);
if (!soc_ctrl_dev)
@@ -160,7 +105,29 @@ static int litex_soc_ctrl_probe(struct platform_device *pdev)
if (IS_ERR(soc_ctrl_dev->base))
return PTR_ERR(soc_ctrl_dev->base);
- return litex_check_csr_access(soc_ctrl_dev->base);
+ error = litex_check_csr_access(soc_ctrl_dev->base);
+ if (error)
+ return error;
+
+ platform_set_drvdata(pdev, soc_ctrl_dev);
+
+ soc_ctrl_dev->reset_nb.notifier_call = litex_reset_handler;
+ soc_ctrl_dev->reset_nb.priority = 128;
+ error = register_restart_handler(&soc_ctrl_dev->reset_nb);
+ if (error) {
+ dev_warn(&pdev->dev, "cannot register restart handler: %d\n",
+ error);
+ }
+
+ return 0;
+}
+
+static int litex_soc_ctrl_remove(struct platform_device *pdev)
+{
+ struct litex_soc_ctrl_device *soc_ctrl_dev = platform_get_drvdata(pdev);
+
+ unregister_restart_handler(&soc_ctrl_dev->reset_nb);
+ return 0;
}
static struct platform_driver litex_soc_ctrl_driver = {
@@ -169,6 +136,7 @@ static struct platform_driver litex_soc_ctrl_driver = {
.of_match_table = of_match_ptr(litex_soc_ctrl_of_match)
},
.probe = litex_soc_ctrl_probe,
+ .remove = litex_soc_ctrl_remove,
};
module_platform_driver(litex_soc_ctrl_driver);
diff --git a/drivers/soc/sifive/sifive_l2_cache.c b/drivers/soc/sifive/sifive_l2_cache.c
index 44d7e1951da3..59640a1d0b28 100644
--- a/drivers/soc/sifive/sifive_l2_cache.c
+++ b/drivers/soc/sifive/sifive_l2_cache.c
@@ -17,6 +17,10 @@
#define SIFIVE_L2_DIRECCFIX_HIGH 0x104
#define SIFIVE_L2_DIRECCFIX_COUNT 0x108
+#define SIFIVE_L2_DIRECCFAIL_LOW 0x120
+#define SIFIVE_L2_DIRECCFAIL_HIGH 0x124
+#define SIFIVE_L2_DIRECCFAIL_COUNT 0x128
+
#define SIFIVE_L2_DATECCFIX_LOW 0x140
#define SIFIVE_L2_DATECCFIX_HIGH 0x144
#define SIFIVE_L2_DATECCFIX_COUNT 0x148
@@ -29,7 +33,7 @@
#define SIFIVE_L2_WAYENABLE 0x08
#define SIFIVE_L2_ECCINJECTERR 0x40
-#define SIFIVE_L2_MAX_ECCINTR 3
+#define SIFIVE_L2_MAX_ECCINTR 4
static void __iomem *l2_base;
static int g_irq[SIFIVE_L2_MAX_ECCINTR];
@@ -39,6 +43,7 @@ enum {
DIR_CORR = 0,
DATA_CORR,
DATA_UNCORR,
+ DIR_UNCORR,
};
#ifdef CONFIG_DEBUG_FS
@@ -93,6 +98,7 @@ static void l2_config_read(void)
static const struct of_device_id sifive_l2_ids[] = {
{ .compatible = "sifive,fu540-c000-ccache" },
+ { .compatible = "sifive,fu740-c000-ccache" },
{ /* end of table */ },
};
@@ -155,6 +161,15 @@ static irqreturn_t l2_int_handler(int irq, void *device)
atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE,
"DirECCFix");
}
+ if (irq == g_irq[DIR_UNCORR]) {
+ add_h = readl(l2_base + SIFIVE_L2_DIRECCFAIL_HIGH);
+ add_l = readl(l2_base + SIFIVE_L2_DIRECCFAIL_LOW);
+ /* Reading this register clears the DirFail interrupt sig */
+ readl(l2_base + SIFIVE_L2_DIRECCFAIL_COUNT);
+ atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_UE,
+ "DirECCFail");
+ panic("L2CACHE: DirFail @ 0x%08X.%08X\n", add_h, add_l);
+ }
if (irq == g_irq[DATA_CORR]) {
add_h = readl(l2_base + SIFIVE_L2_DATECCFIX_HIGH);
add_l = readl(l2_base + SIFIVE_L2_DATECCFIX_LOW);
@@ -181,7 +196,7 @@ static int __init sifive_l2_init(void)
{
struct device_node *np;
struct resource res;
- int i, rc;
+ int i, rc, intr_num;
np = of_find_matching_node(NULL, sifive_l2_ids);
if (!np)
@@ -194,7 +209,13 @@ static int __init sifive_l2_init(void)
if (!l2_base)
return -ENOMEM;
- for (i = 0; i < SIFIVE_L2_MAX_ECCINTR; i++) {
+ intr_num = of_property_count_u32_elems(np, "interrupts");
+ if (!intr_num) {
+ pr_err("L2CACHE: no interrupts property\n");
+ return -ENODEV;
+ }
+
+ for (i = 0; i < intr_num; i++) {
g_irq[i] = irq_of_parse_and_map(np, i);
rc = request_irq(g_irq[i], l2_int_handler, 0, "l2_ecc", NULL);
if (rc) {
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 76820d0b9deb..06bac8ba14e9 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -48,8 +48,6 @@ struct sdw_intel {
#endif
};
-#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1)
-
int intel_master_startup(struct platform_device *pdev);
int intel_master_process_wakeen_event(struct platform_device *pdev);
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index bc8520eb385e..05b726cdfebc 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -18,42 +18,12 @@
#include "cadence_master.h"
#include "intel.h"
-#define SDW_LINK_TYPE 4 /* from Intel ACPI documentation */
-#define SDW_MAX_LINKS 4
#define SDW_SHIM_LCAP 0x0
#define SDW_SHIM_BASE 0x2C000
#define SDW_ALH_BASE 0x2C800
#define SDW_LINK_BASE 0x30000
#define SDW_LINK_SIZE 0x10000
-static int ctrl_link_mask;
-module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444);
-MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
-
-static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
-{
- struct fwnode_handle *link;
- char name[32];
- u32 quirk_mask = 0;
-
- /* Find master handle */
- snprintf(name, sizeof(name),
- "mipi-sdw-link-%d-subproperties", i);
-
- link = fwnode_get_named_child_node(fw_node, name);
- if (!link)
- return false;
-
- fwnode_property_read_u32(link,
- "intel-quirk-mask",
- &quirk_mask);
-
- if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
- return false;
-
- return true;
-}
-
static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
{
struct sdw_intel_link_res *link = ctx->links;
@@ -81,74 +51,6 @@ static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
return 0;
}
-static int
-sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
-{
- struct acpi_device *adev;
- int ret, i;
- u8 count;
-
- if (acpi_bus_get_device(info->handle, &adev))
- return -EINVAL;
-
- /* Found controller, find links supported */
- count = 0;
- ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
- "mipi-sdw-master-count", &count, 1);
-
- /*
- * In theory we could check the number of links supported in
- * hardware, but in that step we cannot assume SoundWire IP is
- * powered.
- *
- * In addition, if the BIOS doesn't even provide this
- * 'master-count' property then all the inits based on link
- * masks will fail as well.
- *
- * We will check the hardware capabilities in the startup() step
- */
-
- if (ret) {
- dev_err(&adev->dev,
- "Failed to read mipi-sdw-master-count: %d\n", ret);
- return -EINVAL;
- }
-
- /* Check count is within bounds */
- if (count > SDW_MAX_LINKS) {
- dev_err(&adev->dev, "Link count %d exceeds max %d\n",
- count, SDW_MAX_LINKS);
- return -EINVAL;
- }
-
- if (!count) {
- dev_warn(&adev->dev, "No SoundWire links detected\n");
- return -EINVAL;
- }
- dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count);
-
- info->count = count;
- info->link_mask = 0;
-
- for (i = 0; i < count; i++) {
- if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) {
- dev_dbg(&adev->dev,
- "Link %d masked, will not be enabled\n", i);
- continue;
- }
-
- if (!is_link_enabled(acpi_fwnode_handle(adev), i)) {
- dev_dbg(&adev->dev,
- "Link %d not selected in firmware\n", i);
- continue;
- }
-
- info->link_mask |= BIT(i);
- }
-
- return 0;
-}
-
#define HDA_DSP_REG_ADSPIC2 (0x10)
#define HDA_DSP_REG_ADSPIS2 (0x14)
#define HDA_DSP_REG_ADSPIC2_SNDW BIT(5)
@@ -357,66 +259,6 @@ sdw_intel_startup_controller(struct sdw_intel_ctx *ctx)
return 0;
}
-static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
- void *cdata, void **return_value)
-{
- struct sdw_intel_acpi_info *info = cdata;
- struct acpi_device *adev;
- acpi_status status;
- u64 adr;
-
- status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
- if (ACPI_FAILURE(status))
- return AE_OK; /* keep going */
-
- if (acpi_bus_get_device(handle, &adev)) {
- pr_err("%s: Couldn't find ACPI handle\n", __func__);
- return AE_NOT_FOUND;
- }
-
- info->handle = handle;
-
- /*
- * On some Intel platforms, multiple children of the HDAS
- * device can be found, but only one of them is the SoundWire
- * controller. The SNDW device is always exposed with
- * Name(_ADR, 0x40000000), with bits 31..28 representing the
- * SoundWire link so filter accordingly
- */
- if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
- return AE_OK; /* keep going */
-
- /* device found, stop namespace walk */
- return AE_CTRL_TERMINATE;
-}
-
-/**
- * sdw_intel_acpi_scan() - SoundWire Intel init routine
- * @parent_handle: ACPI parent handle
- * @info: description of what firmware/DSDT tables expose
- *
- * This scans the namespace and queries firmware to figure out which
- * links to enable. A follow-up use of sdw_intel_probe() and
- * sdw_intel_startup() is required for creation of devices and bus
- * startup
- */
-int sdw_intel_acpi_scan(acpi_handle *parent_handle,
- struct sdw_intel_acpi_info *info)
-{
- acpi_status status;
-
- info->handle = NULL;
- status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
- parent_handle, 1,
- sdw_intel_acpi_cb,
- NULL, info, NULL);
- if (ACPI_FAILURE(status) || info->handle == NULL)
- return -ENODEV;
-
- return sdw_intel_scan_controller(info);
-}
-EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SOUNDWIRE_INTEL_INIT);
-
/**
* sdw_intel_probe() - SoundWire Intel probe routine
* @res: resource data
diff --git a/drivers/staging/vme/devices/vme_user.c b/drivers/staging/vme/devices/vme_user.c
index 35d7260e2271..e3fa38bd7f12 100644
--- a/drivers/staging/vme/devices/vme_user.c
+++ b/drivers/staging/vme/devices/vme_user.c
@@ -175,7 +175,7 @@ static ssize_t buffer_from_user(unsigned int minor, const char __user *buf,
static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
- unsigned int minor = MINOR(file_inode(file)->i_rdev);
+ unsigned int minor = iminor(file_inode(file));
ssize_t retval;
size_t image_size;
@@ -218,7 +218,7 @@ static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
static ssize_t vme_user_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- unsigned int minor = MINOR(file_inode(file)->i_rdev);
+ unsigned int minor = iminor(file_inode(file));
ssize_t retval;
size_t image_size;
@@ -260,7 +260,7 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf,
static loff_t vme_user_llseek(struct file *file, loff_t off, int whence)
{
- unsigned int minor = MINOR(file_inode(file)->i_rdev);
+ unsigned int minor = iminor(file_inode(file));
size_t image_size;
loff_t res;
@@ -294,7 +294,7 @@ static int vme_user_ioctl(struct inode *inode, struct file *file,
struct vme_slave slave;
struct vme_irq_id irq_req;
unsigned long copied;
- unsigned int minor = MINOR(inode->i_rdev);
+ unsigned int minor = iminor(inode);
int retval;
dma_addr_t pci_addr;
void __user *argp = (void __user *)arg;
@@ -412,7 +412,7 @@ vme_user_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
int ret;
struct inode *inode = file_inode(file);
- unsigned int minor = MINOR(inode->i_rdev);
+ unsigned int minor = iminor(inode);
mutex_lock(&image[minor].mutex);
ret = vme_user_ioctl(inode, file, cmd, arg);
@@ -481,7 +481,7 @@ static int vme_user_master_mmap(unsigned int minor, struct vm_area_struct *vma)
static int vme_user_mmap(struct file *file, struct vm_area_struct *vma)
{
- unsigned int minor = MINOR(file_inode(file)->i_rdev);
+ unsigned int minor = iminor(file_inode(file));
if (type[minor] == MASTER_MINOR)
return vme_user_master_mmap(minor, vma);
diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index e4a9b9fe3dfb..2a6165febd3b 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c
@@ -1006,7 +1006,7 @@ static void tgt_agent_fetch_work(struct work_struct *work)
agent->state = AGENT_STATE_SUSPENDED;
spin_unlock_bh(&agent->lock);
- };
+ }
}
static struct sbp_target_agent *sbp_target_agent_register(
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 8ed93fd205c7..ee3d52061281 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -315,10 +315,8 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op,
* Only allocate as many vector entries as the bio code allows us to,
* we'll loop later on until we have handled the whole request.
*/
- if (sg_num > BIO_MAX_PAGES)
- sg_num = BIO_MAX_PAGES;
-
- bio = bio_alloc_bioset(GFP_NOIO, sg_num, &ib_dev->ibd_bio_set);
+ bio = bio_alloc_bioset(GFP_NOIO, bio_max_segs(sg_num),
+ &ib_dev->ibd_bio_set);
if (!bio) {
pr_err("Unable to allocate memory for bio\n");
return NULL;
@@ -638,8 +636,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
return -ENODEV;
}
- bip = bio_integrity_alloc(bio, GFP_NOIO,
- min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES));
+ bip = bio_integrity_alloc(bio, GFP_NOIO, bio_max_segs(cmd->t_prot_nents));
if (IS_ERR(bip)) {
pr_err("Unable to allocate bio_integrity_payload\n");
return PTR_ERR(bip);
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 14db5e568f22..d4cc43afe05b 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -3739,6 +3739,7 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
spin_unlock(&dev->t10_pr.registration_lock);
put_unaligned_be32(add_len, &buf[4]);
+ target_set_cmd_data_length(cmd, 8 + add_len);
transport_kunmap_data_sg(cmd);
@@ -3757,7 +3758,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
struct t10_pr_registration *pr_reg;
unsigned char *buf;
u64 pr_res_key;
- u32 add_len = 16; /* Hardcoded to 16 when a reservation is held. */
+ u32 add_len = 0;
if (cmd->data_length < 8) {
pr_err("PRIN SA READ_RESERVATIONS SCSI Data Length: %u"
@@ -3775,8 +3776,9 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
pr_reg = dev->dev_pr_res_holder;
if (pr_reg) {
/*
- * Set the hardcoded Additional Length
+ * Set the Additional Length to 16 when a reservation is held
*/
+ add_len = 16;
put_unaligned_be32(add_len, &buf[4]);
if (cmd->data_length < 22)
@@ -3812,6 +3814,8 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
(pr_reg->pr_res_type & 0x0f);
}
+ target_set_cmd_data_length(cmd, 8 + add_len);
+
err:
spin_unlock(&dev->dev_reservation_lock);
transport_kunmap_data_sg(cmd);
@@ -3830,7 +3834,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
struct se_device *dev = cmd->se_dev;
struct t10_reservation *pr_tmpl = &dev->t10_pr;
unsigned char *buf;
- u16 add_len = 8; /* Hardcoded to 8. */
+ u16 len = 8; /* Hardcoded to 8. */
if (cmd->data_length < 6) {
pr_err("PRIN SA REPORT_CAPABILITIES SCSI Data Length:"
@@ -3842,7 +3846,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
if (!buf)
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
- put_unaligned_be16(add_len, &buf[0]);
+ put_unaligned_be16(len, &buf[0]);
buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */
buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */
buf[2] |= 0x04; /* ATP_C: All Target Ports Capable bit */
@@ -3871,6 +3875,8 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
buf[4] |= 0x02; /* PR_TYPE_WRITE_EXCLUSIVE */
buf[5] |= 0x01; /* PR_TYPE_EXCLUSIVE_ACCESS_ALLREG */
+ target_set_cmd_data_length(cmd, len);
+
transport_kunmap_data_sg(cmd);
return 0;
@@ -4031,6 +4037,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
* Set ADDITIONAL_LENGTH
*/
put_unaligned_be32(add_len, &buf[4]);
+ target_set_cmd_data_length(cmd, 8 + add_len);
transport_kunmap_data_sg(cmd);
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 33770e5808ce..3cbc074992bc 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -881,7 +881,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
if (!bio) {
new_bio:
- nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
+ nr_vecs = bio_max_segs(nr_pages);
nr_pages -= nr_vecs;
/*
* Calls bio_kmalloc() and sets bio->bi_end_io()
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 93ea17cbad79..5ecb9f18a53d 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -879,11 +879,9 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
}
EXPORT_SYMBOL(target_complete_cmd);
-void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+void target_set_cmd_data_length(struct se_cmd *cmd, int length)
{
- if ((scsi_status == SAM_STAT_GOOD ||
- cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) &&
- length < cmd->data_length) {
+ if (length < cmd->data_length) {
if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
cmd->residual_count += cmd->data_length - length;
} else {
@@ -893,6 +891,15 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len
cmd->data_length = length;
}
+}
+EXPORT_SYMBOL(target_set_cmd_data_length);
+
+void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+{
+ if (scsi_status == SAM_STAT_GOOD ||
+ cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) {
+ target_set_cmd_data_length(cmd, length);
+ }
target_complete_cmd(cmd, scsi_status);
}
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index a5991df23581..bf73cd5f4b04 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -1566,6 +1566,88 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
return &udev->se_dev;
}
+static void tcmu_dev_call_rcu(struct rcu_head *p)
+{
+ struct se_device *dev = container_of(p, struct se_device, rcu_head);
+ struct tcmu_dev *udev = TCMU_DEV(dev);
+
+ kfree(udev->uio_info.name);
+ kfree(udev->name);
+ kfree(udev);
+}
+
+static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
+{
+ if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+ kmem_cache_free(tcmu_cmd_cache, cmd);
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static void tcmu_blocks_release(struct radix_tree_root *blocks,
+ int start, int end)
+{
+ int i;
+ struct page *page;
+
+ for (i = start; i < end; i++) {
+ page = radix_tree_delete(blocks, i);
+ if (page) {
+ __free_page(page);
+ atomic_dec(&global_db_count);
+ }
+ }
+}
+
+static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
+{
+ struct tcmu_tmr *tmr, *tmp;
+
+ list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
+ list_del_init(&tmr->queue_entry);
+ kfree(tmr);
+ }
+}
+
+static void tcmu_dev_kref_release(struct kref *kref)
+{
+ struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
+ struct se_device *dev = &udev->se_dev;
+ struct tcmu_cmd *cmd;
+ bool all_expired = true;
+ int i;
+
+ vfree(udev->mb_addr);
+ udev->mb_addr = NULL;
+
+ spin_lock_bh(&timed_out_udevs_lock);
+ if (!list_empty(&udev->timedout_entry))
+ list_del(&udev->timedout_entry);
+ spin_unlock_bh(&timed_out_udevs_lock);
+
+ /* Upper layer should drain all requests before calling this */
+ mutex_lock(&udev->cmdr_lock);
+ idr_for_each_entry(&udev->commands, cmd, i) {
+ if (tcmu_check_and_free_pending_cmd(cmd) != 0)
+ all_expired = false;
+ }
+ /* There can be left over TMR cmds. Remove them. */
+ tcmu_remove_all_queued_tmr(udev);
+ if (!list_empty(&udev->qfull_queue))
+ all_expired = false;
+ idr_destroy(&udev->commands);
+ WARN_ON(!all_expired);
+
+ tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
+ bitmap_free(udev->data_bitmap);
+ mutex_unlock(&udev->cmdr_lock);
+
+ pr_debug("dev_kref_release\n");
+
+ call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+}
+
static void run_qfull_queue(struct tcmu_dev *udev, bool fail)
{
struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
@@ -1678,6 +1760,25 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
return page;
}
+static void tcmu_vma_open(struct vm_area_struct *vma)
+{
+ struct tcmu_dev *udev = vma->vm_private_data;
+
+ pr_debug("vma_open\n");
+
+ kref_get(&udev->kref);
+}
+
+static void tcmu_vma_close(struct vm_area_struct *vma)
+{
+ struct tcmu_dev *udev = vma->vm_private_data;
+
+ pr_debug("vma_close\n");
+
+ /* release ref from tcmu_vma_open */
+ kref_put(&udev->kref, tcmu_dev_kref_release);
+}
+
static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
{
struct tcmu_dev *udev = vmf->vma->vm_private_data;
@@ -1716,6 +1817,8 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
}
static const struct vm_operations_struct tcmu_vm_ops = {
+ .open = tcmu_vma_open,
+ .close = tcmu_vma_close,
.fault = tcmu_vma_fault,
};
@@ -1732,6 +1835,8 @@ static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
if (vma_pages(vma) != (udev->ring_size >> PAGE_SHIFT))
return -EINVAL;
+ tcmu_vma_open(vma);
+
return 0;
}
@@ -1744,93 +1849,12 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
return -EBUSY;
udev->inode = inode;
- kref_get(&udev->kref);
pr_debug("open\n");
return 0;
}
-static void tcmu_dev_call_rcu(struct rcu_head *p)
-{
- struct se_device *dev = container_of(p, struct se_device, rcu_head);
- struct tcmu_dev *udev = TCMU_DEV(dev);
-
- kfree(udev->uio_info.name);
- kfree(udev->name);
- kfree(udev);
-}
-
-static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
-{
- if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
- kmem_cache_free(tcmu_cmd_cache, cmd);
- return 0;
- }
- return -EINVAL;
-}
-
-static void tcmu_blocks_release(struct radix_tree_root *blocks,
- int start, int end)
-{
- int i;
- struct page *page;
-
- for (i = start; i < end; i++) {
- page = radix_tree_delete(blocks, i);
- if (page) {
- __free_page(page);
- atomic_dec(&global_db_count);
- }
- }
-}
-
-static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
-{
- struct tcmu_tmr *tmr, *tmp;
-
- list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
- list_del_init(&tmr->queue_entry);
- kfree(tmr);
- }
-}
-
-static void tcmu_dev_kref_release(struct kref *kref)
-{
- struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
- struct se_device *dev = &udev->se_dev;
- struct tcmu_cmd *cmd;
- bool all_expired = true;
- int i;
-
- vfree(udev->mb_addr);
- udev->mb_addr = NULL;
-
- spin_lock_bh(&timed_out_udevs_lock);
- if (!list_empty(&udev->timedout_entry))
- list_del(&udev->timedout_entry);
- spin_unlock_bh(&timed_out_udevs_lock);
-
- /* Upper layer should drain all requests before calling this */
- mutex_lock(&udev->cmdr_lock);
- idr_for_each_entry(&udev->commands, cmd, i) {
- if (tcmu_check_and_free_pending_cmd(cmd) != 0)
- all_expired = false;
- }
- /* There can be left over TMR cmds. Remove them. */
- tcmu_remove_all_queued_tmr(udev);
- if (!list_empty(&udev->qfull_queue))
- all_expired = false;
- idr_destroy(&udev->commands);
- WARN_ON(!all_expired);
-
- tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
- bitmap_free(udev->data_bitmap);
- mutex_unlock(&udev->cmdr_lock);
-
- call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
-}
-
static int tcmu_release(struct uio_info *info, struct inode *inode)
{
struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
@@ -1838,8 +1862,7 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
pr_debug("close\n");
- /* release ref from open */
- kref_put(&udev->kref, tcmu_dev_kref_release);
+
return 0;
}
diff --git a/drivers/tty/hvc/hvcs.c b/drivers/tty/hvc/hvcs.c
index c90848919644..9afa1dcef2c2 100644
--- a/drivers/tty/hvc/hvcs.c
+++ b/drivers/tty/hvc/hvcs.c
@@ -317,7 +317,6 @@ static void hvcs_hangup(struct tty_struct * tty);
static int hvcs_probe(struct vio_dev *dev,
const struct vio_device_id *id);
-static int hvcs_remove(struct vio_dev *dev);
static int __init hvcs_module_init(void);
static void __exit hvcs_module_exit(void);
static int hvcs_initialize(void);
@@ -819,7 +818,7 @@ static int hvcs_probe(
return 0;
}
-static int hvcs_remove(struct vio_dev *dev)
+static void hvcs_remove(struct vio_dev *dev)
{
struct hvcs_struct *hvcsd = dev_get_drvdata(&dev->dev);
unsigned long flags;
@@ -849,7 +848,6 @@ static int hvcs_remove(struct vio_dev *dev)
printk(KERN_INFO "HVCS: vty-server@%X removed from the"
" vio bus.\n", dev->unit_address);
- return 0;
};
static struct vio_driver hvcs_vio_driver = {
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 85a272c9978e..10ec60d81e84 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -623,7 +623,7 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
/* Memory might get onlined immediately. */
atomic64_add(size, &vm->offline_size);
rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name,
- MEMHP_MERGE_RESOURCE);
+ MHP_MERGE_RESOURCE);
if (rc) {
atomic64_sub(size, &vm->offline_size);
dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
@@ -2222,7 +2222,7 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
*/
static void virtio_mem_refresh_config(struct virtio_mem *vm)
{
- const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
+ const struct range pluggable_range = mhp_get_pluggable_range(true);
uint64_t new_plugged_size, usable_region_size, end_addr;
/* the plugged_size is just a reflection of what _we_ did previously */
@@ -2234,15 +2234,25 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm)
/* calculate the last usable memory block id */
virtio_cread_le(vm->vdev, struct virtio_mem_config,
usable_region_size, &usable_region_size);
- end_addr = vm->addr + usable_region_size;
- end_addr = min(end_addr, phys_limit);
+ end_addr = min(vm->addr + usable_region_size - 1,
+ pluggable_range.end);
- if (vm->in_sbm)
- vm->sbm.last_usable_mb_id =
- virtio_mem_phys_to_mb_id(end_addr) - 1;
- else
- vm->bbm.last_usable_bb_id =
- virtio_mem_phys_to_bb_id(vm, end_addr) - 1;
+ if (vm->in_sbm) {
+ vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr);
+ if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes()))
+ vm->sbm.last_usable_mb_id--;
+ } else {
+ vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm,
+ end_addr);
+ if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size))
+ vm->bbm.last_usable_bb_id--;
+ }
+ /*
+ * If we cannot plug any of our device memory (e.g., nothing in the
+ * usable region is addressable), the last usable memory block id will
+ * be smaller than the first usable memory block id. We'll stop
+ * attempting to add memory with -ENOSPC from our main loop.
+ */
/* see if there is a request to change the size */
virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
@@ -2364,7 +2374,7 @@ static int virtio_mem_init_vq(struct virtio_mem *vm)
static int virtio_mem_init(struct virtio_mem *vm)
{
- const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
+ const struct range pluggable_range = mhp_get_pluggable_range(true);
uint64_t sb_size, addr;
uint16_t node_id;
@@ -2405,9 +2415,10 @@ static int virtio_mem_init(struct virtio_mem *vm)
if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes()))
dev_warn(&vm->vdev->dev,
"The alignment of the physical end address can make some memory unusable.\n");
- if (vm->addr + vm->region_size > phys_limit)
+ if (vm->addr < pluggable_range.start ||
+ vm->addr + vm->region_size - 1 > pluggable_range.end)
dev_warn(&vm->vdev->dev,
- "Some memory is not addressable. This can make some memory unusable.\n");
+ "Some device memory is not addressable/pluggable. This can make some memory unusable.\n");
/*
* We want subblocks to span at least MAX_ORDER_NR_PAGES and
@@ -2429,7 +2440,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
vm->sbm.sb_size;
/* Round up to the next full memory block */
- addr = vm->addr + memory_block_size_bytes() - 1;
+ addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
+ memory_block_size_bytes() - 1;
vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr);
vm->sbm.next_mb_id = vm->sbm.first_mb_id;
} else {
@@ -2450,7 +2462,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
}
/* Round up to the next aligned big block */
- addr = vm->addr + vm->bbm.bb_size - 1;
+ addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
+ vm->bbm.bb_size - 1;
vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
vm->bbm.next_bb_id = vm->bbm.first_bb_id;
}
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index b57b2067ecbf..671c71245a7b 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void)
mutex_unlock(&balloon_mutex);
/* add_memory_resource() requires the device_hotplug lock */
lock_device_hotplug();
- rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE);
+ rc = add_memory_resource(nid, resource, MHP_MERGE_RESOURCE);
unlock_device_hotplug();
mutex_lock(&balloon_mutex);
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index b249f2d6b0cc..adb7260e94b2 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -323,6 +323,8 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
info->u.interdomain = dev;
+ if (dev)
+ atomic_inc(&dev->event_channels);
return ret;
}
@@ -568,18 +570,28 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
return;
if (spurious) {
+ struct xenbus_device *dev = info->u.interdomain;
+ unsigned int threshold = 1;
+
+ if (dev && dev->spurious_threshold)
+ threshold = dev->spurious_threshold;
+
if ((1 << info->spurious_cnt) < (HZ << 2)) {
if (info->spurious_cnt != 0xFF)
info->spurious_cnt++;
}
- if (info->spurious_cnt > 1) {
- delay = 1 << (info->spurious_cnt - 2);
+ if (info->spurious_cnt > threshold) {
+ delay = 1 << (info->spurious_cnt - 1 - threshold);
if (delay > HZ)
delay = HZ;
if (!info->eoi_time)
info->eoi_cpu = smp_processor_id();
info->eoi_time = get_jiffies_64() + delay;
+ if (dev)
+ atomic_add(delay, &dev->jiffies_eoi_delayed);
}
+ if (dev)
+ atomic_inc(&dev->spurious_events);
} else {
info->spurious_cnt = 0;
}
@@ -908,6 +920,7 @@ static void __unbind_from_irq(unsigned int irq)
if (VALID_EVTCHN(evtchn)) {
unsigned int cpu = cpu_from_irq(irq);
+ struct xenbus_device *dev;
xen_evtchn_close(evtchn);
@@ -918,6 +931,11 @@ static void __unbind_from_irq(unsigned int irq)
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
break;
+ case IRQT_EVTCHN:
+ dev = info->u.interdomain;
+ if (dev)
+ atomic_dec(&dev->event_channels);
+ break;
default:
break;
}
@@ -1581,6 +1599,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
{
int irq;
struct irq_info *info;
+ struct xenbus_device *dev;
irq = get_evtchn_to_irq(port);
if (irq == -1)
@@ -1610,6 +1629,10 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
info = info_for_irq(irq);
+ dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
+ if (dev)
+ atomic_inc(&dev->events);
+
if (ctrl->defer_eoi) {
info->eoi_cpu = smp_processor_id();
info->irq_epoch = __this_cpu_read(irq_epoch);
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index a7a85719a8c8..c99415a70051 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
{
struct user_evtchn *evtchn = data;
struct per_user_data *u = evtchn->user;
+ unsigned int prod, cons;
WARN(!evtchn->enabled,
"Interrupt for port %u, but apparently not enabled; per-user %p\n",
@@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
spin_lock(&u->ring_prod_lock);
- if ((u->ring_prod - u->ring_cons) < u->ring_size) {
- *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
- wmb(); /* Ensure ring contents visible */
- if (u->ring_cons == u->ring_prod++) {
+ prod = READ_ONCE(u->ring_prod);
+ cons = READ_ONCE(u->ring_cons);
+
+ if ((prod - cons) < u->ring_size) {
+ *evtchn_ring_entry(u, prod) = evtchn->port;
+ smp_wmb(); /* Ensure ring contents visible */
+ WRITE_ONCE(u->ring_prod, prod + 1);
+ if (cons == prod) {
wake_up_interruptible(&u->evtchn_wait);
kill_fasync(&u->evtchn_async_queue,
SIGIO, POLL_IN);
@@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
if (u->ring_overflow)
goto unlock_out;
- c = u->ring_cons;
- p = u->ring_prod;
+ c = READ_ONCE(u->ring_cons);
+ p = READ_ONCE(u->ring_prod);
if (c != p)
break;
@@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
return -EAGAIN;
rc = wait_event_interruptible(u->evtchn_wait,
- u->ring_cons != u->ring_prod);
+ READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod));
if (rc)
return rc;
}
@@ -245,13 +250,13 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
}
rc = -EFAULT;
- rmb(); /* Ensure that we see the port before we copy it. */
+ smp_rmb(); /* Ensure that we see the port before we copy it. */
if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
((bytes2 != 0) &&
copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
goto unlock_out;
- u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+ WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t));
rc = bytes1 + bytes2;
unlock_out:
@@ -552,7 +557,9 @@ static long evtchn_ioctl(struct file *file,
/* Initialise the ring to empty. Clear errors. */
mutex_lock(&u->ring_cons_mutex);
spin_lock_irq(&u->ring_prod_lock);
- u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+ WRITE_ONCE(u->ring_cons, 0);
+ WRITE_ONCE(u->ring_prod, 0);
+ u->ring_overflow = 0;
spin_unlock_irq(&u->ring_prod_lock);
mutex_unlock(&u->ring_cons_mutex);
rc = 0;
@@ -595,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait)
struct per_user_data *u = file->private_data;
poll_wait(file, &u->evtchn_wait, wait);
- if (u->ring_cons != u->ring_prod)
+ if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod))
mask |= EPOLLIN | EPOLLRDNORM;
if (u->ring_overflow)
mask = EPOLLERR;
diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c
index ce8ffb595a46..df7cab870be5 100644
--- a/drivers/xen/xen-acpi-processor.c
+++ b/drivers/xen/xen-acpi-processor.c
@@ -3,7 +3,8 @@
* Copyright 2012 by Oracle Inc
* Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
*
- * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
+ * This code borrows ideas from
+ * https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com
* so many thanks go to Kevin Tian <kevin.tian@intel.com>
* and Yu Ke <ke.yu@intel.com>.
*/
diff --git a/drivers/xen/xen-front-pgdir-shbuf.c b/drivers/xen/xen-front-pgdir-shbuf.c
index 48a658dc7ccf..81b6e13fa5ec 100644
--- a/drivers/xen/xen-front-pgdir-shbuf.c
+++ b/drivers/xen/xen-front-pgdir-shbuf.c
@@ -305,11 +305,18 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
/* Save handles even if error, so we can unmap. */
for (cur_page = 0; cur_page < buf->num_pages; cur_page++) {
- buf->backend_map_handles[cur_page] = map_ops[cur_page].handle;
- if (unlikely(map_ops[cur_page].status != GNTST_okay))
+ if (likely(map_ops[cur_page].status == GNTST_okay)) {
+ buf->backend_map_handles[cur_page] =
+ map_ops[cur_page].handle;
+ } else {
+ buf->backend_map_handles[cur_page] =
+ INVALID_GRANT_HANDLE;
+ if (!ret)
+ ret = -ENXIO;
dev_err(&buf->xb_dev->dev,
"Failed to map page %d: %d\n",
cur_page, map_ops[cur_page].status);
+ }
}
if (ret) {
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 8a75092bb148..97f0d234482d 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -206,6 +206,65 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
}
EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
+#define XENBUS_SHOW_STAT(name) \
+static ssize_t show_##name(struct device *_dev, \
+ struct device_attribute *attr, \
+ char *buf) \
+{ \
+ struct xenbus_device *dev = to_xenbus_device(_dev); \
+ \
+ return sprintf(buf, "%d\n", atomic_read(&dev->name)); \
+} \
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+XENBUS_SHOW_STAT(event_channels);
+XENBUS_SHOW_STAT(events);
+XENBUS_SHOW_STAT(spurious_events);
+XENBUS_SHOW_STAT(jiffies_eoi_delayed);
+
+static ssize_t show_spurious_threshold(struct device *_dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+
+ return sprintf(buf, "%d\n", dev->spurious_threshold);
+}
+
+static ssize_t set_spurious_threshold(struct device *_dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct xenbus_device *dev = to_xenbus_device(_dev);
+ unsigned int val;
+ ssize_t ret;
+
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ dev->spurious_threshold = val;
+
+ return count;
+}
+
+static DEVICE_ATTR(spurious_threshold, 0644, show_spurious_threshold,
+ set_spurious_threshold);
+
+static struct attribute *xenbus_attrs[] = {
+ &dev_attr_event_channels.attr,
+ &dev_attr_events.attr,
+ &dev_attr_spurious_events.attr,
+ &dev_attr_jiffies_eoi_delayed.attr,
+ &dev_attr_spurious_threshold.attr,
+ NULL
+};
+
+static const struct attribute_group xenbus_group = {
+ .name = "xenbus",
+ .attrs = xenbus_attrs,
+};
+
int xenbus_dev_probe(struct device *_dev)
{
struct xenbus_device *dev = to_xenbus_device(_dev);
@@ -253,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev)
return err;
}
+ dev->spurious_threshold = 1;
+ if (sysfs_create_group(&dev->dev.kobj, &xenbus_group))
+ dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n",
+ dev->nodename);
+
return 0;
fail_put:
module_put(drv->driver.owner);
@@ -269,6 +333,8 @@ int xenbus_dev_remove(struct device *_dev)
DPRINTK("%s", dev->nodename);
+ sysfs_remove_group(&dev->dev.kobj, &xenbus_group);
+
free_otherend_watch(dev);
if (drv->remove) {
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 648eb4c4cf7f..8d97f0b45e9c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1139,9 +1139,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
struct super_block *sb, unsigned int flags)
{
umode_t mode;
- char ext[32];
- char tag_name[14];
- unsigned int i_nlink;
struct v9fs_session_info *v9ses = sb->s_fs_info;
struct v9fs_inode *v9inode = V9FS_I(inode);
@@ -1159,18 +1156,18 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
inode->i_gid = stat->n_gid;
}
if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) {
- if (v9fs_proto_dotu(v9ses) && (stat->extension[0] != '\0')) {
+ if (v9fs_proto_dotu(v9ses)) {
+ unsigned int i_nlink;
/*
- * Hadlink support got added later to
- * to the .u extension. So there can be
- * server out there that doesn't support
- * this even with .u extension. So check
- * for non NULL stat->extension
+ * Hadlink support got added later to the .u extension.
+ * So there can be a server out there that doesn't
+ * support this even with .u extension. That would
+ * just leave us with stat->extension being an empty
+ * string, though.
*/
- strlcpy(ext, stat->extension, sizeof(ext));
/* HARDLINKCOUNT %u */
- sscanf(ext, "%13s %u", tag_name, &i_nlink);
- if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
+ if (sscanf(stat->extension,
+ " HARDLINKCOUNT %u", &i_nlink) == 1)
set_nlink(inode, i_nlink);
}
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 91ff9bfd7c1a..4aa1f88d5bf8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -221,7 +221,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
static ssize_t
__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
- int nr_pages)
+ unsigned int nr_pages)
{
struct file *file = iocb->ki_filp;
struct block_device *bdev = I_BDEV(bdev_file_inode(file));
@@ -355,8 +355,8 @@ static void blkdev_bio_end_io(struct bio *bio)
}
}
-static ssize_t
-__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
+static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ unsigned int nr_pages)
{
struct file *file = iocb->ki_filp;
struct inode *inode = bdev_file_inode(file);
@@ -486,7 +486,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
static ssize_t
blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{
- int nr_pages;
+ unsigned int nr_pages;
if (!iov_iter_count(iter))
return 0;
@@ -495,7 +495,7 @@ blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
- return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
+ return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
}
static __init int blkdev_init(void)
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 5064be59dac5..744b99ddc28c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1162,6 +1162,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
spin_lock(&sinfo->lock);
spin_lock(&cache->lock);
+ if (cache->swap_extents) {
+ ret = -ETXTBSY;
+ goto out;
+ }
+
if (cache->ro) {
cache->ro++;
ret = 0;
@@ -2307,7 +2312,7 @@ again:
}
ret = inc_block_group_ro(cache, 0);
- if (!do_chunk_alloc)
+ if (!do_chunk_alloc || ret == -ETXTBSY)
goto unlock_out;
if (!ret)
goto out;
@@ -2316,6 +2321,8 @@ again:
if (ret < 0)
goto out;
ret = inc_block_group_ro(cache, 0);
+ if (ret == -ETXTBSY)
+ goto unlock_out;
out:
if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
@@ -3406,6 +3413,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
ASSERT(list_empty(&block_group->io_list));
ASSERT(list_empty(&block_group->bg_list));
ASSERT(refcount_read(&block_group->refs) == 1);
+ ASSERT(block_group->swap_extents == 0);
btrfs_put_block_group(block_group);
spin_lock(&info->block_group_cache_lock);
@@ -3472,3 +3480,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
__btrfs_remove_free_space_cache(block_group->free_space_ctl);
}
}
+
+bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg)
+{
+ bool ret = true;
+
+ spin_lock(&bg->lock);
+ if (bg->ro)
+ ret = false;
+ else
+ bg->swap_extents++;
+ spin_unlock(&bg->lock);
+
+ return ret;
+}
+
+void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount)
+{
+ spin_lock(&bg->lock);
+ ASSERT(!bg->ro);
+ ASSERT(bg->swap_extents >= amount);
+ bg->swap_extents -= amount;
+ spin_unlock(&bg->lock);
+}
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index 29678426247d..3ecc3372a5ce 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -186,6 +186,12 @@ struct btrfs_block_group {
/* Flag indicating this block group is placed on a sequential zone */
bool seq_zone;
+ /*
+ * Number of extents in this block group used for swap files.
+ * All accesses protected by the spinlock 'lock'.
+ */
+ int swap_extents;
+
/* Record locked full stripes for RAID5/6 block group */
struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
@@ -312,4 +318,7 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
void btrfs_freeze_block_group(struct btrfs_block_group *cache);
void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
+bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
+void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
+
#endif /* BTRFS_BLOCK_GROUP_H */
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 6d203acfdeb3..3f4c832abfed 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -141,6 +141,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
const u32 csum_size = fs_info->csum_size;
+ const u32 sectorsize = fs_info->sectorsize;
struct page *page;
unsigned long i;
char *kaddr;
@@ -154,22 +155,34 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
shash->tfm = fs_info->csum_shash;
for (i = 0; i < cb->nr_pages; i++) {
+ u32 pg_offset;
+ u32 bytes_left = PAGE_SIZE;
page = cb->compressed_pages[i];
- kaddr = kmap_atomic(page);
- crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
- kunmap_atomic(kaddr);
-
- if (memcmp(&csum, cb_sum, csum_size)) {
- btrfs_print_data_csum_error(inode, disk_start,
- csum, cb_sum, cb->mirror_num);
- if (btrfs_io_bio(bio)->device)
- btrfs_dev_stat_inc_and_print(
- btrfs_io_bio(bio)->device,
- BTRFS_DEV_STAT_CORRUPTION_ERRS);
- return -EIO;
+ /* Determine the remaining bytes inside the page first */
+ if (i == cb->nr_pages - 1)
+ bytes_left = cb->compressed_len - i * PAGE_SIZE;
+
+ /* Hash through the page sector by sector */
+ for (pg_offset = 0; pg_offset < bytes_left;
+ pg_offset += sectorsize) {
+ kaddr = kmap_atomic(page);
+ crypto_shash_digest(shash, kaddr + pg_offset,
+ sectorsize, csum);
+ kunmap_atomic(kaddr);
+
+ if (memcmp(&csum, cb_sum, csum_size) != 0) {
+ btrfs_print_data_csum_error(inode, disk_start,
+ csum, cb_sum, cb->mirror_num);
+ if (btrfs_io_bio(bio)->device)
+ btrfs_dev_stat_inc_and_print(
+ btrfs_io_bio(bio)->device,
+ BTRFS_DEV_STAT_CORRUPTION_ERRS);
+ return -EIO;
+ }
+ cb_sum += csum_size;
+ disk_start += sectorsize;
}
- cb_sum += csum_size;
}
return 0;
}
@@ -640,7 +653,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree,
page_offset(bio_first_page_all(bio)),
- PAGE_SIZE);
+ fs_info->sectorsize);
read_unlock(&em_tree->lock);
if (!em)
return BLK_STS_IOERR;
@@ -698,19 +711,30 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
refcount_set(&cb->pending_bios, 1);
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+ u32 pg_len = PAGE_SIZE;
int submit = 0;
+ /*
+ * To handle subpage case, we need to make sure the bio only
+ * covers the range we need.
+ *
+ * If we're at the last page, truncate the length to only cover
+ * the remaining part.
+ */
+ if (pg_index == nr_pages - 1)
+ pg_len = min_t(u32, PAGE_SIZE,
+ compressed_len - pg_index * PAGE_SIZE);
+
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
- submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE,
+ submit = btrfs_bio_fits_in_stripe(page, pg_len,
comp_bio, 0);
page->mapping = NULL;
- if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
- PAGE_SIZE) {
+ if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
unsigned int nr_sectors;
ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
@@ -743,9 +767,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
- bio_add_page(comp_bio, page, PAGE_SIZE, 0);
+ bio_add_page(comp_bio, page, pg_len, 0);
}
- cur_disk_byte += PAGE_SIZE;
+ cur_disk_byte += pg_len;
}
ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
@@ -1237,7 +1261,6 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
unsigned long prev_start_byte;
unsigned long working_bytes = total_out - buf_start;
unsigned long bytes;
- char *kaddr;
struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter);
/*
@@ -1268,9 +1291,8 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
PAGE_SIZE - (buf_offset % PAGE_SIZE));
bytes = min(bytes, working_bytes);
- kaddr = kmap_atomic(bvec.bv_page);
- memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes);
- kunmap_atomic(kaddr);
+ memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset,
+ bytes);
flush_dcache_page(bvec.bv_page);
buf_offset += bytes;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bd659354d043..9ae776ab3967 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -524,6 +524,11 @@ struct btrfs_swapfile_pin {
* points to a struct btrfs_device.
*/
bool is_block_group;
+ /*
+ * Only used when 'is_block_group' is true and it is the number of
+ * extents used by a swapfile for this block group ('ptr' field).
+ */
+ int bg_extent_count;
};
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index ec0b50b8c5d6..bf25401c9768 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -627,7 +627,8 @@ static int btrfs_delayed_inode_reserve_metadata(
*/
if (!src_rsv || (!trans->bytes_reserved &&
src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
- ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+ ret = btrfs_qgroup_reserve_meta(root, num_bytes,
+ BTRFS_QGROUP_RSV_META_PREALLOC, true);
if (ret < 0)
return ret;
ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
@@ -649,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata(
btrfs_ino(inode),
num_bytes, 1);
} else {
- btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
+ btrfs_qgroup_free_meta_prealloc(root, num_bytes);
}
return ret;
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4dfb3ead1175..4671c99d468d 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3008,12 +3008,23 @@ readpage_ok:
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
pgoff_t end_index = i_size >> PAGE_SHIFT;
- unsigned off;
- /* Zero out the end if this page straddles i_size */
- off = offset_in_page(i_size);
- if (page->index == end_index && off)
- zero_user_segment(page, off, PAGE_SIZE);
+ /*
+ * Zero out the remaining part if this range straddles
+ * i_size.
+ *
+ * Here we should only zero the range inside the bvec,
+ * not touch anything else.
+ *
+ * NOTE: i_size is exclusive while end is inclusive.
+ */
+ if (page->index == end_index && i_size <= end) {
+ u32 zero_start = max(offset_in_page(i_size),
+ offset_in_page(end));
+
+ zero_user_segment(page, zero_start,
+ offset_in_page(end) + 1);
+ }
}
ASSERT(bio_offset + len > bio_offset);
bio_offset += len;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bf2c51a9607a..0e155f013839 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -3260,8 +3260,11 @@ reserve_space:
goto out;
ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
alloc_start, bytes_to_reserve);
- if (ret)
+ if (ret) {
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+ lockend, &cached_state);
goto out;
+ }
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
i_blocksize(inode),
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 5400294bd271..9988decd5717 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2555,7 +2555,12 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
to_unusable = size - to_free;
ctl->free_space += to_free;
- block_group->zone_unusable += to_unusable;
+ /*
+ * If the block group is read-only, we should account freed space into
+ * bytes_readonly.
+ */
+ if (!block_group->ro)
+ block_group->zone_unusable += to_unusable;
spin_unlock(&ctl->tree_lock);
if (!used) {
spin_lock(&block_group->lock);
@@ -2801,8 +2806,10 @@ static void __btrfs_return_cluster_to_free_space(
struct rb_node *node;
spin_lock(&cluster->lock);
- if (cluster->block_group != block_group)
- goto out;
+ if (cluster->block_group != block_group) {
+ spin_unlock(&cluster->lock);
+ return;
+ }
cluster->block_group = NULL;
cluster->window_start = 0;
@@ -2840,8 +2847,6 @@ static void __btrfs_return_cluster_to_free_space(
entry->offset, &entry->offset_index, bitmap);
}
cluster->root = RB_ROOT;
-
-out:
spin_unlock(&cluster->lock);
btrfs_put_block_group(block_group);
}
@@ -3125,8 +3130,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
entry->bytes -= bytes;
}
- if (entry->bytes == 0)
- rb_erase(&entry->offset_index, &cluster->root);
break;
}
out:
@@ -3143,7 +3146,10 @@ out:
ctl->free_space -= bytes;
if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
+
+ spin_lock(&cluster->lock);
if (entry->bytes == 0) {
+ rb_erase(&entry->offset_index, &cluster->root);
ctl->free_extents--;
if (entry->bitmap) {
kmem_cache_free(btrfs_free_space_bitmap_cachep,
@@ -3156,6 +3162,7 @@ out:
kmem_cache_free(btrfs_free_space_cachep, entry);
}
+ spin_unlock(&cluster->lock);
spin_unlock(&ctl->tree_lock);
return ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2e1c282c202d..35bfa0533f23 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1674,9 +1674,6 @@ next_slot:
*/
btrfs_release_path(path);
- /* If extent is RO, we must COW it */
- if (btrfs_extent_readonly(fs_info, disk_bytenr))
- goto out_check;
ret = btrfs_cross_ref_exist(root, ino,
found_key.offset -
extent_offset, disk_bytenr, false);
@@ -1723,6 +1720,7 @@ next_slot:
WARN_ON_ONCE(freespace_inode);
goto out_check;
}
+ /* If the extent's block group is RO, we must COW */
if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
goto out_check;
nocow = true;
@@ -6085,7 +6083,7 @@ static int btrfs_dirty_inode(struct inode *inode)
return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
- if (ret && ret == -ENOSPC) {
+ if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
/* whoops, lets try again with the full transaction */
btrfs_end_transaction(trans);
trans = btrfs_start_transaction(root, 1);
@@ -10200,6 +10198,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
sp->ptr = ptr;
sp->inode = inode;
sp->is_block_group = is_block_group;
+ sp->bg_extent_count = 1;
spin_lock(&fs_info->swapfile_pins_lock);
p = &fs_info->swapfile_pins.rb_node;
@@ -10213,6 +10212,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
(sp->ptr == entry->ptr && sp->inode > entry->inode)) {
p = &(*p)->rb_right;
} else {
+ if (is_block_group)
+ entry->bg_extent_count++;
spin_unlock(&fs_info->swapfile_pins_lock);
kfree(sp);
return 1;
@@ -10238,8 +10239,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode)
sp = rb_entry(node, struct btrfs_swapfile_pin, node);
if (sp->inode == inode) {
rb_erase(&sp->node, &fs_info->swapfile_pins);
- if (sp->is_block_group)
+ if (sp->is_block_group) {
+ btrfs_dec_block_group_swap_extents(sp->ptr,
+ sp->bg_extent_count);
btrfs_put_block_group(sp->ptr);
+ }
kfree(sp);
}
node = next;
@@ -10300,7 +10304,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
struct extent_map *em = NULL;
@@ -10351,13 +10356,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
"cannot activate swapfile while exclusive operation is running");
return -EBUSY;
}
+
+ /*
+ * Prevent snapshot creation while we are activating the swap file.
+ * We do not want to race with snapshot creation. If snapshot creation
+ * already started before we bumped nr_swapfiles from 0 to 1 and
+ * completes before the first write into the swap file after it is
+ * activated, than that write would fallback to COW.
+ */
+ if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
+ btrfs_exclop_finish(fs_info);
+ btrfs_warn(fs_info,
+ "cannot activate swapfile because snapshot creation is in progress");
+ return -EINVAL;
+ }
/*
* Snapshots can create extents which require COW even if NODATACOW is
* set. We use this counter to prevent snapshots. We must increment it
* before walking the extents because we don't want a concurrent
* snapshot to run after we've already checked the extents.
*/
- atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
+ atomic_inc(&root->nr_swapfiles);
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
@@ -10454,6 +10473,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
goto out;
}
+ if (!btrfs_inc_block_group_swap_extents(bg)) {
+ btrfs_warn(fs_info,
+ "block group for swapfile at %llu is read-only%s",
+ bg->start,
+ atomic_read(&fs_info->scrubs_running) ?
+ " (scrub running)" : "");
+ btrfs_put_block_group(bg);
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = btrfs_add_swapfile_pin(inode, bg, true);
if (ret) {
btrfs_put_block_group(bg);
@@ -10492,6 +10522,8 @@ out:
if (ret)
btrfs_swap_deactivate(file);
+ btrfs_drew_write_unlock(&root->snapshot_lock);
+
btrfs_exclop_finish(fs_info);
if (ret)
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 072e77726e94..e8d53fea4c61 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1936,7 +1936,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
- if (vol_args->size > PAGE_SIZE) {
+ u64 nums;
+
+ if (vol_args->size < sizeof(*inherit) ||
+ vol_args->size > PAGE_SIZE) {
ret = -EINVAL;
goto free_args;
}
@@ -1945,6 +1948,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
ret = PTR_ERR(inherit);
goto free_args;
}
+
+ if (inherit->num_qgroups > PAGE_SIZE ||
+ inherit->num_ref_copies > PAGE_SIZE ||
+ inherit->num_excl_copies > PAGE_SIZE) {
+ ret = -EINVAL;
+ goto free_inherit;
+ }
+
+ nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
+ 2 * inherit->num_excl_copies;
+ if (vol_args->size != struct_size(inherit, qgroups, nums)) {
+ ret = -EINVAL;
+ goto free_inherit;
+ }
}
ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index aa9cd11f4b78..9084a950dc09 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -467,7 +467,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes = min_t(unsigned long, destlen, out_len - start_byte);
- kaddr = kmap_atomic(dest_page);
+ kaddr = kmap_local_page(dest_page);
memcpy(kaddr, workspace->buf + start_byte, bytes);
/*
@@ -477,7 +477,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
*/
if (bytes < destlen)
memset(kaddr+bytes, 0, destlen-bytes);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
out:
return ret;
}
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 808370ada888..14ff388fd3bd 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3841,8 +3841,8 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
return num_bytes;
}
-static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
- enum btrfs_qgroup_rsv_type type, bool enforce)
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type, bool enforce)
{
struct btrfs_fs_info *fs_info = root->fs_info;
int ret;
@@ -3873,14 +3873,14 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
{
int ret;
- ret = qgroup_reserve_meta(root, num_bytes, type, enforce);
+ ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
if (ret <= 0 && ret != -EDQUOT)
return ret;
ret = try_flush_qgroup(root);
if (ret < 0)
return ret;
- return qgroup_reserve_meta(root, num_bytes, type, enforce);
+ return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
}
void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 50dea9a2d8fb..7283e4f549af 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -361,6 +361,8 @@ int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
int btrfs_qgroup_free_data(struct btrfs_inode *inode,
struct extent_changeset *reserved, u64 start,
u64 len);
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+ enum btrfs_qgroup_rsv_type type, bool enforce);
int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
enum btrfs_qgroup_rsv_type type, bool enforce);
/* Reserve metadata space for pertrans and prealloc type */
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 8ec34ecb6d68..8c31357f08ed 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -249,8 +249,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
{
int i;
- char *s;
- char *d;
int ret;
ret = alloc_rbio_pages(rbio);
@@ -261,13 +259,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
if (!rbio->bio_pages[i])
continue;
- s = kmap(rbio->bio_pages[i]);
- d = kmap(rbio->stripe_pages[i]);
-
- copy_page(d, s);
-
- kunmap(rbio->bio_pages[i]);
- kunmap(rbio->stripe_pages[i]);
+ copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]);
SetPageUptodate(rbio->stripe_pages[i]);
}
set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -2359,16 +2351,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
SetPageUptodate(p_page);
if (has_qstripe) {
+ /* RAID6, allocate and map temp space for the Q stripe */
q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
if (!q_page) {
__free_page(p_page);
goto cleanup;
}
SetPageUptodate(q_page);
+ pointers[rbio->real_stripes - 1] = kmap(q_page);
}
atomic_set(&rbio->error, 0);
+ /* Map the parity stripe just once */
+ pointers[nr_data] = kmap(p_page);
+
for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
struct page *p;
void *parity;
@@ -2378,16 +2375,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
pointers[stripe] = kmap(p);
}
- /* then add the parity stripe */
- pointers[stripe++] = kmap(p_page);
-
if (has_qstripe) {
- /*
- * raid6, add the qstripe and call the
- * library function to fill in our p/q
- */
- pointers[stripe++] = kmap(q_page);
-
+ /* RAID6, call the library function to fill in our P/Q */
raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
pointers);
} else {
@@ -2408,12 +2397,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
for (stripe = 0; stripe < nr_data; stripe++)
kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
- kunmap(p_page);
}
+ kunmap(p_page);
__free_page(p_page);
- if (q_page)
+ if (q_page) {
+ kunmap(q_page);
__free_page(q_page);
+ }
writeback:
/*
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index 2b490becbe67..8e026de74c44 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -218,11 +218,11 @@ static void __print_stack_trace(struct btrfs_fs_info *fs_info,
stack_trace_print(ra->trace, ra->trace_len, 2);
}
#else
-static void inline __save_stack_trace(struct ref_action *ra)
+static inline void __save_stack_trace(struct ref_action *ra)
{
}
-static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
+static inline void __print_stack_trace(struct btrfs_fs_info *fs_info,
struct ref_action *ra)
{
btrfs_err(fs_info, " ref-verify: no stacktrace support");
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index b24396cf2f99..762881b777b3 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -106,12 +106,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
if (comp_type == BTRFS_COMPRESS_NONE) {
- char *map;
-
- map = kmap(page);
- memcpy(map, data_start, datal);
+ memcpy_to_page(page, 0, data_start, datal);
flush_dcache_page(page);
- kunmap(page);
} else {
ret = btrfs_decompress(comp_type, data_start, page, 0,
inline_size, datal);
@@ -553,6 +549,24 @@ process_slot:
*/
btrfs_release_path(path);
+ /*
+ * When using NO_HOLES and we are cloning a range that covers
+ * only a hole (no extents) into a range beyond the current
+ * i_size, punching a hole in the target range will not create
+ * an extent map defining a hole, because the range starts at or
+ * beyond current i_size. If the file previously had an i_size
+ * greater than the new i_size set by this clone operation, we
+ * need to make sure the next fsync is a full fsync, so that it
+ * detects and logs a hole covering a range from the current
+ * i_size to the new i_size. If the clone range covers extents,
+ * besides a hole, then we know the full sync flag was already
+ * set by previous calls to btrfs_replace_file_extents() that
+ * replaced file extent items.
+ */
+ if (last_dest_end >= i_size_read(inode))
+ set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+ &BTRFS_I(inode)->runtime_flags);
+
ret = btrfs_replace_file_extents(inode, path, last_dest_end,
destoff + len - 1, NULL, &trans);
if (ret)
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 582df11d298a..c2900ebf767a 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -3767,6 +3767,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
* commit_transactions.
*/
ro_set = 0;
+ } else if (ret == -ETXTBSY) {
+ btrfs_warn(fs_info,
+ "skipping scrub of block group %llu due to active swapfile",
+ cache->start);
+ scrub_pause_off(fs_info);
+ ret = 0;
+ goto skip_unfreeze;
} else {
btrfs_warn(fs_info,
"failed setting block group ro: %d", ret);
@@ -3862,7 +3869,7 @@ done:
} else {
spin_unlock(&cache->lock);
}
-
+skip_unfreeze:
btrfs_unfreeze_block_group(cache);
btrfs_put_block_group(cache);
if (ret)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index f87878274e9f..8f323859156b 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -4932,7 +4932,6 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
struct btrfs_fs_info *fs_info = root->fs_info;
struct inode *inode;
struct page *page;
- char *addr;
pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t last_index;
unsigned pg_offset = offset_in_page(offset);
@@ -4985,10 +4984,8 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
}
}
- addr = kmap(page);
- memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset,
- cur_len);
- kunmap(page);
+ memcpy_from_page(sctx->send_buf + sctx->send_size, page,
+ pg_offset, cur_len);
unlock_page(page);
put_page(page);
index++;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f8435641b912..f7a4ad86adee 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1918,8 +1918,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
- if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
- btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+ if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
+ (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
(!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
btrfs_warn(fs_info,
"remount supports changing free space tree only from ro to rw");
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 582061c7b547..f4ade821307d 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
return -EUCLEAN;
}
for (; ptr < end; ptr += sizeof(*dref)) {
- u64 root_objectid;
- u64 owner;
u64 offset;
- u64 hash;
+ /*
+ * We cannot check the extent_data_ref hash due to possible
+ * overflow from the leaf due to hash collisions.
+ */
dref = (struct btrfs_extent_data_ref *)ptr;
- root_objectid = btrfs_extent_data_ref_root(leaf, dref);
- owner = btrfs_extent_data_ref_objectid(leaf, dref);
offset = btrfs_extent_data_ref_offset(leaf, dref);
- hash = hash_extent_data_ref(root_objectid, owner, offset);
- if (unlikely(hash != key->offset)) {
- extent_err(leaf, slot,
- "invalid extent data ref hash, item has 0x%016llx key has 0x%016llx",
- hash, key->offset);
- return -EUCLEAN;
- }
if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
extent_err(leaf, slot,
"invalid extent data backref offset, have %llu expect aligned to %u",
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index d90695c1ab6c..2f1acc9aea9e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3174,16 +3174,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
root_log_ctx.log_transid = log_root_tree->log_transid;
if (btrfs_is_zoned(fs_info)) {
- mutex_lock(&fs_info->tree_root->log_mutex);
if (!log_root_tree->node) {
ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
if (ret) {
- mutex_unlock(&fs_info->tree_log_mutex);
mutex_unlock(&log_root_tree->log_mutex);
goto out;
}
}
- mutex_unlock(&fs_info->tree_root->log_mutex);
}
/*
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b025102e435f..8a4514283a4b 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
+ const bool start_trans = (current->journal_info == NULL);
int ret;
- trans = btrfs_start_transaction(root, 2);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
+ if (start_trans) {
+ /*
+ * 1 unit for inserting/updating/deleting the xattr
+ * 1 unit for the inode item update
+ */
+ trans = btrfs_start_transaction(root, 2);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+ } else {
+ /*
+ * This can happen when smack is enabled and a directory is being
+ * created. It happens through d_instantiate_new(), which calls
+ * smack_d_instantiate(), which in turn calls __vfs_setxattr() to
+ * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the
+ * inode. We have already reserved space for the xattr and inode
+ * update at btrfs_mkdir(), so just use the transaction handle.
+ * We don't join or start a transaction, as that will reset the
+ * block_rsv of the handle and trigger a warning for the start
+ * case.
+ */
+ ASSERT(strncmp(name, XATTR_SECURITY_PREFIX,
+ XATTR_SECURITY_PREFIX_LEN) == 0);
+ trans = current->journal_info;
+ }
ret = btrfs_setxattr(trans, inode, name, value, size, flags);
if (ret)
@@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
BUG_ON(ret);
out:
- btrfs_end_transaction(trans);
+ if (start_trans)
+ btrfs_end_transaction(trans);
return ret;
}
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 05615a1099db..d524acf7b3e5 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -432,9 +432,8 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in,
PAGE_SIZE - (buf_offset % PAGE_SIZE));
bytes = min(bytes, bytes_left);
- kaddr = kmap_atomic(dest_page);
- memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
- kunmap_atomic(kaddr);
+ memcpy_to_page(dest_page, pg_offset,
+ workspace->buf + buf_offset, bytes);
pg_offset += bytes;
bytes_left -= bytes;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index d0eb0c8d6269..1f972b75a9ab 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -269,7 +269,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
sector_t sector = 0;
struct blk_zone *zones = NULL;
unsigned int i, nreported = 0, nr_zones;
- unsigned int zone_sectors;
+ sector_t zone_sectors;
char *model, *emulated;
int ret;
@@ -658,7 +658,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
u64 *bytenr_ret)
{
struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES];
- unsigned int zone_sectors;
+ sector_t zone_sectors;
u32 sb_zone;
int ret;
u8 zone_sectors_shift;
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
index 9a4871636c6c..8e9626d63976 100644
--- a/fs/btrfs/zstd.c
+++ b/fs/btrfs/zstd.c
@@ -688,10 +688,8 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
bytes = min_t(unsigned long, destlen - pg_offset,
workspace->out_buf.size - buf_offset);
- kaddr = kmap_atomic(dest_page);
- memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
- bytes);
- kunmap_atomic(kaddr);
+ memcpy_to_page(dest_page, pg_offset,
+ workspace->out_buf.dst + buf_offset, bytes);
pg_offset += bytes;
}
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index aa697ccfa9dc..3aedc484e440 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -133,11 +133,12 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
{
struct TCP_Server_Info *server = chan->server;
- seq_printf(m, "\t\tChannel %d Number of credits: %d Dialect 0x%x "
- "TCP status: %d Instance: %d Local Users To Server: %d "
- "SecMode: 0x%x Req On Wire: %d In Send: %d "
- "In MaxReq Wait: %d\n",
- i+1,
+ seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx"
+ "\n\t\tNumber of credits: %d Dialect 0x%x"
+ "\n\t\tTCP status: %d Instance: %d"
+ "\n\t\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d"
+ "\n\t\tIn Send: %d In MaxReq Wait: %d",
+ i+1, server->conn_id,
server->credits,
server->dialect,
server->tcpStatus,
@@ -227,7 +228,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- int i, j;
+ int c, i, j;
seq_puts(m,
"Display Internal CIFS Data Structures for Debugging\n"
@@ -275,14 +276,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
seq_putc(m, '\n');
seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
- seq_printf(m, "Servers:");
- i = 0;
+ seq_printf(m, "\nServers: ");
+
+ c = 0;
spin_lock(&cifs_tcp_ses_lock);
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list);
+ /* channel info will be printed as a part of sessions below */
+ if (server->is_channel)
+ continue;
+
+ c++;
+ seq_printf(m, "\n%d) ConnectionId: 0x%llx ",
+ c, server->conn_id);
+
+ if (server->hostname)
+ seq_printf(m, "Hostname: %s ", server->hostname);
#ifdef CONFIG_CIFS_SMB_DIRECT
if (!server->rdma)
goto skip_rdma;
@@ -362,46 +374,48 @@ skip_rdma:
if (server->posix_ext_supported)
seq_printf(m, " posix");
- i++;
+ if (server->rdma)
+ seq_printf(m, "\nRDMA ");
+ seq_printf(m, "\nTCP status: %d Instance: %d"
+ "\nLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d",
+ server->tcpStatus,
+ server->reconnect_instance,
+ server->srv_count,
+ server->sec_mode, in_flight(server));
+
+ seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
+ atomic_read(&server->in_send),
+ atomic_read(&server->num_waiters));
+
+ seq_printf(m, "\n\n\tSessions: ");
+ i = 0;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
smb_ses_list);
+ i++;
if ((ses->serverDomain == NULL) ||
(ses->serverOS == NULL) ||
(ses->serverNOS == NULL)) {
- seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
- i, ses->serverName, ses->ses_count,
+ seq_printf(m, "\n\t%d) Address: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
+ i, ses->ip_addr, ses->ses_count,
ses->capabilities, ses->status);
if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
- seq_printf(m, "Guest\t");
+ seq_printf(m, "Guest ");
else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
- seq_printf(m, "Anonymous\t");
+ seq_printf(m, "Anonymous ");
} else {
seq_printf(m,
- "\n%d) Name: %s Domain: %s Uses: %d OS:"
- " %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
- " session status: %d ",
- i, ses->serverName, ses->serverDomain,
+ "\n\t%d) Name: %s Domain: %s Uses: %d OS: %s "
+ "\n\tNOS: %s\tCapability: 0x%x"
+ "\n\tSMB session status: %d ",
+ i, ses->ip_addr, ses->serverDomain,
ses->ses_count, ses->serverOS, ses->serverNOS,
ses->capabilities, ses->status);
}
- seq_printf(m,"Security type: %s\n",
+ seq_printf(m, "\n\tSecurity type: %s ",
get_security_type_str(server->ops->select_sectype(server, ses->sectype)));
- if (server->rdma)
- seq_printf(m, "RDMA\n\t");
- seq_printf(m, "TCP status: %d Instance: %d\n\tLocal Users To "
- "Server: %d SecMode: 0x%x Req On Wire: %d",
- server->tcpStatus,
- server->reconnect_instance,
- server->srv_count,
- server->sec_mode, in_flight(server));
-
- seq_printf(m, " In Send: %d In MaxReq Wait: %d",
- atomic_read(&server->in_send),
- atomic_read(&server->num_waiters));
-
/* dump session id helpful for use with network trace */
seq_printf(m, " SessionId: 0x%llx", ses->Suid);
if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
@@ -414,13 +428,13 @@ skip_rdma:
from_kuid(&init_user_ns, ses->cred_uid));
if (ses->chan_count > 1) {
- seq_printf(m, "\n\n\tExtra Channels: %zu\n",
+ seq_printf(m, "\n\n\tExtra Channels: %zu ",
ses->chan_count-1);
for (j = 1; j < ses->chan_count; j++)
cifs_dump_channel(m, j, &ses->chans[j]);
}
- seq_puts(m, "\n\n\tShares:");
+ seq_puts(m, "\n\n\tShares: ");
j = 0;
seq_printf(m, "\n\t%d) IPC: ", j);
@@ -437,38 +451,43 @@ skip_rdma:
cifs_debug_tcon(m, tcon);
}
- seq_puts(m, "\n\tMIDs:\n");
-
- spin_lock(&GlobalMid_Lock);
- list_for_each(tmp3, &server->pending_mid_q) {
- mid_entry = list_entry(tmp3, struct mid_q_entry,
- qhead);
- seq_printf(m, "\tState: %d com: %d pid:"
- " %d cbdata: %p mid %llu\n",
- mid_entry->mid_state,
- le16_to_cpu(mid_entry->command),
- mid_entry->pid,
- mid_entry->callback_data,
- mid_entry->mid);
- }
- spin_unlock(&GlobalMid_Lock);
-
spin_lock(&ses->iface_lock);
if (ses->iface_count)
- seq_printf(m, "\n\tServer interfaces: %zu\n",
+ seq_printf(m, "\n\n\tServer interfaces: %zu",
ses->iface_count);
for (j = 0; j < ses->iface_count; j++) {
struct cifs_server_iface *iface;
iface = &ses->iface_list[j];
- seq_printf(m, "\t%d)", j);
+ seq_printf(m, "\n\t%d)", j+1);
cifs_dump_iface(m, iface);
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");
}
spin_unlock(&ses->iface_lock);
}
+ if (i == 0)
+ seq_printf(m, "\n\t\t[NONE]");
+
+ seq_puts(m, "\n\n\tMIDs: ");
+ spin_lock(&GlobalMid_Lock);
+ list_for_each(tmp3, &server->pending_mid_q) {
+ mid_entry = list_entry(tmp3, struct mid_q_entry,
+ qhead);
+ seq_printf(m, "\n\tState: %d com: %d pid:"
+ " %d cbdata: %p mid %llu\n",
+ mid_entry->mid_state,
+ le16_to_cpu(mid_entry->command),
+ mid_entry->pid,
+ mid_entry->callback_data,
+ mid_entry->mid);
+ }
+ spin_unlock(&GlobalMid_Lock);
+ seq_printf(m, "\n--\n");
}
+ if (c == 0)
+ seq_printf(m, "\n\t[NONE]");
+
spin_unlock(&cifs_tcp_ses_lock);
seq_putc(m, '\n');
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
index d35f599aa00e..f2d730fffccb 100644
--- a/fs/cifs/cifs_swn.c
+++ b/fs/cifs/cifs_swn.c
@@ -272,7 +272,7 @@ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
if (IS_ERR(share_name)) {
int ret;
- ret = PTR_ERR(net_name);
+ ret = PTR_ERR(share_name);
cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
__func__, tcon->treeName, ret);
kfree(net_name);
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 562913e2b3f2..9d29eb9660c2 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -267,10 +267,11 @@ is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group)
return true; /* well known sid found, uid returned */
}
-static void
+static __u16
cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
{
int i;
+ __u16 size = 1 + 1 + 6;
dst->revision = src->revision;
dst->num_subauth = min_t(u8, src->num_subauth, SID_MAX_SUB_AUTHORITIES);
@@ -278,6 +279,9 @@ cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
dst->authority[i] = src->authority[i];
for (i = 0; i < dst->num_subauth; ++i)
dst->sub_auth[i] = src->sub_auth[i];
+ size += (dst->num_subauth * 4);
+
+ return size;
}
static int
@@ -521,8 +525,11 @@ exit_cifs_idmap(void)
}
/* copy ntsd, owner sid, and group sid from a security descriptor to another */
-static void copy_sec_desc(const struct cifs_ntsd *pntsd,
- struct cifs_ntsd *pnntsd, __u32 sidsoffset)
+static __u32 copy_sec_desc(const struct cifs_ntsd *pntsd,
+ struct cifs_ntsd *pnntsd,
+ __u32 sidsoffset,
+ struct cifs_sid *pownersid,
+ struct cifs_sid *pgrpsid)
{
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -536,19 +543,25 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct cifs_sid));
/* copy owner sid */
- owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ if (pownersid)
+ owner_sid_ptr = pownersid;
+ else
+ owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->osidoffset));
nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
/* copy group sid */
- group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ if (pgrpsid)
+ group_sid_ptr = pgrpsid;
+ else
+ group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
le32_to_cpu(pntsd->gsidoffset));
ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
sizeof(struct cifs_sid));
cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
- return;
+ return sidsoffset + (2 * sizeof(struct cifs_sid));
}
@@ -663,6 +676,25 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
return;
}
+static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct cifs_sid *psid)
+{
+ __u16 size = 1 + 1 + 2 + 4;
+
+ dst->type = src->type;
+ dst->flags = src->flags;
+ dst->access_req = src->access_req;
+
+ /* Check if there's a replacement sid specified */
+ if (psid)
+ size += cifs_copy_sid(&dst->sid, psid);
+ else
+ size += cifs_copy_sid(&dst->sid, &src->sid);
+
+ dst->size = cpu_to_le16(size);
+
+ return size;
+}
+
static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
const struct cifs_sid *psid, __u64 nmode,
umode_t bits, __u8 access_type,
@@ -907,29 +939,30 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace)
return ace_size;
}
-static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
- struct cifs_sid *pgrpsid, __u64 *pnmode, bool modefromsid)
+static void populate_new_aces(char *nacl_base,
+ struct cifs_sid *pownersid,
+ struct cifs_sid *pgrpsid,
+ __u64 *pnmode, u32 *pnum_aces, u16 *pnsize,
+ bool modefromsid)
{
- u16 size = 0;
- u32 num_aces = 0;
- struct cifs_acl *pnndacl;
__u64 nmode;
+ u32 num_aces = 0;
+ u16 nsize = 0;
__u64 user_mode;
__u64 group_mode;
__u64 other_mode;
__u64 deny_user_mode = 0;
__u64 deny_group_mode = 0;
bool sticky_set = false;
-
- pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl));
+ struct cifs_ace *pnntace = NULL;
nmode = *pnmode;
+ num_aces = *pnum_aces;
+ nsize = *pnsize;
if (modefromsid) {
- struct cifs_ace *pntace =
- (struct cifs_ace *)((char *)pnndacl + size);
-
- size += setup_special_mode_ACE(pntace, nmode);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += setup_special_mode_ACE(pnntace, nmode);
num_aces++;
goto set_size;
}
@@ -966,40 +999,170 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
sticky_set = true;
if (deny_user_mode) {
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pownersid, deny_user_mode, 0700, ACCESS_DENIED, false);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pownersid, deny_user_mode,
+ 0700, ACCESS_DENIED, false);
num_aces++;
}
+
/* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/
if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) {
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+ 0070, ACCESS_DENIED, false);
num_aces++;
}
- size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size),
- pownersid, user_mode, 0700, ACCESS_ALLOWED, true);
+
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pownersid, user_mode,
+ 0700, ACCESS_ALLOWED, true);
num_aces++;
+
/* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */
if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) {
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+ 0070, ACCESS_DENIED, false);
num_aces++;
}
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set);
+
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, pgrpsid, group_mode,
+ 0070, ACCESS_ALLOWED, !sticky_set);
num_aces++;
- size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
- &sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set);
+
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+ nsize += fill_ace_for_sid(pnntace, &sid_everyone, other_mode,
+ 0007, ACCESS_ALLOWED, !sticky_set);
num_aces++;
set_size:
+ *pnum_aces = num_aces;
+ *pnsize = nsize;
+}
+
+static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+ struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
+ struct cifs_sid *pnownersid, struct cifs_sid *pngrpsid)
+{
+ int i;
+ u16 size = 0;
+ struct cifs_ace *pntace = NULL;
+ char *acl_base = NULL;
+ u32 src_num_aces = 0;
+ u16 nsize = 0;
+ struct cifs_ace *pnntace = NULL;
+ char *nacl_base = NULL;
+ u16 ace_size = 0;
+
+ acl_base = (char *)pdacl;
+ size = sizeof(struct cifs_acl);
+ src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+ nacl_base = (char *)pndacl;
+ nsize = sizeof(struct cifs_acl);
+
+ /* Go through all the ACEs */
+ for (i = 0; i < src_num_aces; ++i) {
+ pntace = (struct cifs_ace *) (acl_base + size);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+ if (pnownersid && compare_sids(&pntace->sid, pownersid) == 0)
+ ace_size = cifs_copy_ace(pnntace, pntace, pnownersid);
+ else if (pngrpsid && compare_sids(&pntace->sid, pgrpsid) == 0)
+ ace_size = cifs_copy_ace(pnntace, pntace, pngrpsid);
+ else
+ ace_size = cifs_copy_ace(pnntace, pntace, NULL);
+
+ size += le16_to_cpu(pntace->size);
+ nsize += ace_size;
+ }
+
+ return nsize;
+}
+
+static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+ struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
+ __u64 *pnmode, bool mode_from_sid)
+{
+ int i;
+ u16 size = 0;
+ struct cifs_ace *pntace = NULL;
+ char *acl_base = NULL;
+ u32 src_num_aces = 0;
+ u16 nsize = 0;
+ struct cifs_ace *pnntace = NULL;
+ char *nacl_base = NULL;
+ u32 num_aces = 0;
+ __u64 nmode;
+ bool new_aces_set = false;
+
+ /* Assuming that pndacl and pnmode are never NULL */
+ nmode = *pnmode;
+ nacl_base = (char *)pndacl;
+ nsize = sizeof(struct cifs_acl);
+
+ /* If pdacl is NULL, we don't have a src. Simply populate new ACL. */
+ if (!pdacl) {
+ populate_new_aces(nacl_base,
+ pownersid, pgrpsid,
+ pnmode, &num_aces, &nsize,
+ mode_from_sid);
+ goto finalize_dacl;
+ }
+
+ acl_base = (char *)pdacl;
+ size = sizeof(struct cifs_acl);
+ src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+ /* Retain old ACEs which we can retain */
+ for (i = 0; i < src_num_aces; ++i) {
+ pntace = (struct cifs_ace *) (acl_base + size);
+ pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+ if (!new_aces_set && (pntace->flags & INHERITED_ACE)) {
+ /* Place the new ACEs in between existing explicit and inherited */
+ populate_new_aces(nacl_base,
+ pownersid, pgrpsid,
+ pnmode, &num_aces, &nsize,
+ mode_from_sid);
+
+ new_aces_set = true;
+ }
+
+ /* If it's any one of the ACE we're replacing, skip! */
+ if ((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) ||
+ (compare_sids(&pntace->sid, pownersid) == 0) ||
+ (compare_sids(&pntace->sid, pgrpsid) == 0) ||
+ (compare_sids(&pntace->sid, &sid_everyone) == 0) ||
+ (compare_sids(&pntace->sid, &sid_authusers) == 0)) {
+ goto next_ace;
+ }
+
+ nsize += cifs_copy_ace(pnntace, pntace, NULL);
+ num_aces++;
+
+next_ace:
+ size += le16_to_cpu(pntace->size);
+ }
+
+ /* If inherited ACEs are not present, place the new ones at the tail */
+ if (!new_aces_set) {
+ populate_new_aces(nacl_base,
+ pownersid, pgrpsid,
+ pnmode, &num_aces, &nsize,
+ mode_from_sid);
+
+ new_aces_set = true;
+ }
+
+finalize_dacl:
pndacl->num_aces = cpu_to_le32(num_aces);
- pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
+ pndacl->size = cpu_to_le16(nsize);
return 0;
}
-
static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
{
/* BB need to add parm so we can store the SID BB */
@@ -1094,7 +1257,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
/* Convert permission bits from mode to equivalent CIFS ACL */
static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
- __u32 secdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
+ __u32 secdesclen, __u32 *pnsecdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
bool mode_from_sid, bool id_from_sid, int *aclflag)
{
int rc = 0;
@@ -1102,39 +1265,59 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
__u32 ndacloffset;
__u32 sidsoffset;
struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
- struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
+ struct cifs_sid *nowner_sid_ptr = NULL, *ngroup_sid_ptr = NULL;
struct cifs_acl *dacl_ptr = NULL; /* no need for SACL ptr */
struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
+ char *end_of_acl = ((char *)pntsd) + secdesclen;
+ u16 size = 0;
- if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
- owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
- le32_to_cpu(pntsd->osidoffset));
- group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
- le32_to_cpu(pntsd->gsidoffset));
- dacloffset = le32_to_cpu(pntsd->dacloffset);
+ dacloffset = le32_to_cpu(pntsd->dacloffset);
+ if (dacloffset) {
dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+ if (end_of_acl < (char *)dacl_ptr + le16_to_cpu(dacl_ptr->size)) {
+ cifs_dbg(VFS, "Server returned illegal ACL size\n");
+ return -EINVAL;
+ }
+ }
+
+ owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->osidoffset));
+ group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+ le32_to_cpu(pntsd->gsidoffset));
+
+ if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
ndacloffset = sizeof(struct cifs_ntsd);
ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
- ndacl_ptr->revision = dacl_ptr->revision;
- ndacl_ptr->size = 0;
- ndacl_ptr->num_aces = 0;
+ ndacl_ptr->revision =
+ dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
- rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr,
+ ndacl_ptr->size = cpu_to_le16(0);
+ ndacl_ptr->num_aces = cpu_to_le32(0);
+
+ rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr,
pnmode, mode_from_sid);
+
sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
- /* copy sec desc control portion & owner and group sids */
- copy_sec_desc(pntsd, pnntsd, sidsoffset);
- *aclflag = CIFS_ACL_DACL;
+ /* copy the non-dacl portion of secdesc */
+ *pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+ NULL, NULL);
+
+ *aclflag |= CIFS_ACL_DACL;
} else {
- memcpy(pnntsd, pntsd, secdesclen);
+ ndacloffset = sizeof(struct cifs_ntsd);
+ ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
+ ndacl_ptr->revision =
+ dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
+ ndacl_ptr->num_aces = dacl_ptr->num_aces;
+
if (uid_valid(uid)) { /* chown */
uid_t id;
- owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
- le32_to_cpu(pnntsd->osidoffset));
nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
GFP_KERNEL);
- if (!nowner_sid_ptr)
- return -ENOMEM;
+ if (!nowner_sid_ptr) {
+ rc = -ENOMEM;
+ goto chown_chgrp_exit;
+ }
id = from_kuid(&init_user_ns, uid);
if (id_from_sid) {
struct owner_sid *osid = (struct owner_sid *)nowner_sid_ptr;
@@ -1145,27 +1328,25 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
osid->SubAuthorities[0] = cpu_to_le32(88);
osid->SubAuthorities[1] = cpu_to_le32(1);
osid->SubAuthorities[2] = cpu_to_le32(id);
+
} else { /* lookup sid with upcall */
rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
if (rc) {
cifs_dbg(FYI, "%s: Mapping error %d for owner id %d\n",
__func__, rc, id);
- kfree(nowner_sid_ptr);
- return rc;
+ goto chown_chgrp_exit;
}
}
- cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
- kfree(nowner_sid_ptr);
- *aclflag = CIFS_ACL_OWNER;
+ *aclflag |= CIFS_ACL_OWNER;
}
if (gid_valid(gid)) { /* chgrp */
gid_t id;
- group_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
- le32_to_cpu(pnntsd->gsidoffset));
ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
GFP_KERNEL);
- if (!ngroup_sid_ptr)
- return -ENOMEM;
+ if (!ngroup_sid_ptr) {
+ rc = -ENOMEM;
+ goto chown_chgrp_exit;
+ }
id = from_kgid(&init_user_ns, gid);
if (id_from_sid) {
struct owner_sid *gsid = (struct owner_sid *)ngroup_sid_ptr;
@@ -1176,19 +1357,35 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
gsid->SubAuthorities[0] = cpu_to_le32(88);
gsid->SubAuthorities[1] = cpu_to_le32(2);
gsid->SubAuthorities[2] = cpu_to_le32(id);
+
} else { /* lookup sid with upcall */
rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
if (rc) {
cifs_dbg(FYI, "%s: Mapping error %d for group id %d\n",
__func__, rc, id);
- kfree(ngroup_sid_ptr);
- return rc;
+ goto chown_chgrp_exit;
}
}
- cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
- kfree(ngroup_sid_ptr);
- *aclflag = CIFS_ACL_GROUP;
+ *aclflag |= CIFS_ACL_GROUP;
+ }
+
+ if (dacloffset) {
+ /* Replace ACEs for old owner with new one */
+ size = replace_sids_and_copy_aces(dacl_ptr, ndacl_ptr,
+ owner_sid_ptr, group_sid_ptr,
+ nowner_sid_ptr, ngroup_sid_ptr);
+ ndacl_ptr->size = cpu_to_le16(size);
}
+
+ sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
+ /* copy the non-dacl portion of secdesc */
+ *pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+ nowner_sid_ptr, ngroup_sid_ptr);
+
+chown_chgrp_exit:
+ /* errors could jump here. So make sure we return soon after this */
+ kfree(nowner_sid_ptr);
+ kfree(ngroup_sid_ptr);
}
return rc;
@@ -1384,6 +1581,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
int rc = 0;
int aclflag = CIFS_ACL_DACL; /* default flag to set */
__u32 secdesclen = 0;
+ __u32 nsecdesclen = 0;
+ __u32 dacloffset = 0;
+ struct cifs_acl *dacl_ptr = NULL;
struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -1414,31 +1614,52 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
return rc;
}
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
+ mode_from_sid = true;
+ else
+ mode_from_sid = false;
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+ id_from_sid = true;
+ else
+ id_from_sid = false;
+
+ /* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */
+ nsecdesclen = secdesclen;
+ if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
+ if (mode_from_sid)
+ nsecdesclen += sizeof(struct cifs_ace);
+ else /* cifsacl */
+ nsecdesclen += 5 * sizeof(struct cifs_ace);
+ } else { /* chown */
+ /* When ownership changes, changes new owner sid length could be different */
+ nsecdesclen = sizeof(struct cifs_ntsd) + (sizeof(struct cifs_sid) * 2);
+ dacloffset = le32_to_cpu(pntsd->dacloffset);
+ if (dacloffset) {
+ dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+ if (mode_from_sid)
+ nsecdesclen +=
+ le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct cifs_ace);
+ else /* cifsacl */
+ nsecdesclen += le16_to_cpu(dacl_ptr->size);
+ }
+ }
+
/*
* Add three ACEs for owner, group, everyone getting rid of other ACEs
* as chmod disables ACEs and set the security descriptor. Allocate
* memory for the smb header, set security descriptor request security
* descriptor parameters, and secuirty descriptor itself
*/
- secdesclen = max_t(u32, secdesclen, DEFAULT_SEC_DESC_LEN);
- pnntsd = kmalloc(secdesclen, GFP_KERNEL);
+ nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN);
+ pnntsd = kmalloc(nsecdesclen, GFP_KERNEL);
if (!pnntsd) {
kfree(pntsd);
cifs_put_tlink(tlink);
return -ENOMEM;
}
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
- mode_from_sid = true;
- else
- mode_from_sid = false;
-
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
- id_from_sid = true;
- else
- id_from_sid = false;
-
- rc = build_sec_desc(pntsd, pnntsd, secdesclen, pnmode, uid, gid,
+ rc = build_sec_desc(pntsd, pnntsd, secdesclen, &nsecdesclen, pnmode, uid, gid,
mode_from_sid, id_from_sid, &aclflag);
cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
@@ -1448,7 +1669,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
if (!rc) {
/* Set the security descriptor */
- rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag);
+ rc = ops->set_acl(pnntsd, nsecdesclen, inode, path, aclflag);
cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
}
cifs_put_tlink(tlink);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index ff7fd0862e28..d9e704979d99 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -31,8 +31,8 @@
#define EXEC_BIT 0x1
#define ACL_OWNER_MASK 0700
-#define ACL_GROUP_MASK 0770
-#define ACL_EVERYONE_MASK 0777
+#define ACL_GROUP_MASK 0070
+#define ACL_EVERYONE_MASK 0007
#define UBITSHIFT 6
#define GBITSHIFT 3
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 51d53e4bdf6b..b8f1ff9a83f3 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -568,15 +568,15 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
return rc;
}
} else {
- /* We use ses->serverName if no domain name available */
- len = strlen(ses->serverName);
+ /* We use ses->ip_addr if no domain name available */
+ len = strlen(ses->ip_addr);
server = kmalloc(2 + (len * 2), GFP_KERNEL);
if (server == NULL) {
rc = -ENOMEM;
return rc;
}
- len = cifs_strtoUTF16((__le16 *)server, ses->serverName, len,
+ len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len,
nls_cp);
rc =
crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 38534e066cc7..d43e935d2df4 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -638,8 +638,18 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
if (tcon->handle_timeout)
seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
- /* convert actimeo and display it in seconds */
- seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->actimeo / HZ);
+
+ /*
+ * Display file and directory attribute timeout in seconds.
+ * If file and directory attribute timeout the same then actimeo
+ * was likely specified on mount
+ */
+ if (cifs_sb->ctx->acdirmax == cifs_sb->ctx->acregmax)
+ seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->acregmax / HZ);
+ else {
+ seq_printf(s, ",acdirmax=%lu", cifs_sb->ctx->acdirmax / HZ);
+ seq_printf(s, ",acregmax=%lu", cifs_sb->ctx->acregmax / HZ);
+ }
if (tcon->ses->chan_max > 1)
seq_printf(s, ",multichannel,max_channels=%zu",
@@ -1526,6 +1536,7 @@ init_cifs(void)
*/
atomic_set(&sesInfoAllocCount, 0);
atomic_set(&tconInfoAllocCount, 0);
+ atomic_set(&tcpSesNextId, 0);
atomic_set(&tcpSesAllocCount, 0);
atomic_set(&tcpSesReconnectCount, 0);
atomic_set(&tconInfoReconnectCount, 0);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 71e9c6abb2a6..0d7ef150dbb2 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -165,5 +165,5 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.30"
+#define CIFS_VERSION "2.31"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 50fcb65920e8..3de3c5908a72 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -21,6 +21,7 @@
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/inet.h>
#include <linux/slab.h>
#include <linux/mempool.h>
#include <linux/workqueue.h>
@@ -504,6 +505,8 @@ struct smb_version_operations {
loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int);
/* Check for STATUS_IO_TIMEOUT */
bool (*is_status_io_timeout)(char *buf);
+ /* Check for STATUS_NETWORK_NAME_DELETED */
+ void (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
};
struct smb_version_values {
@@ -577,6 +580,7 @@ inc_rfc1001_len(void *buf, int count)
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
+ __u64 conn_id; /* connection identifier (useful for debugging) */
int srv_count; /* reference counter */
/* 15 character server name + 0x20 16th byte indicating type = srv */
char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
@@ -901,7 +905,7 @@ struct cifs_ses {
kuid_t linux_uid; /* overriding owner of files on the mount */
kuid_t cred_uid; /* owner of credentials */
unsigned int capabilities;
- char serverName[SERVER_NAME_LEN_WITH_NULL];
+ char ip_addr[INET6_ADDRSTRLEN + 1]; /* Max ipv6 (or v4) addr string len */
char *user_name; /* must not be null except during init of sess
and after mount option parsing we fill it */
char *domainName;
@@ -1704,7 +1708,9 @@ static inline bool is_retryable_error(int error)
#define CIFS_ECHO_OP 0x080 /* echo request */
#define CIFS_OBREAK_OP 0x0100 /* oplock break request */
#define CIFS_NEG_OP 0x0200 /* negotiate request */
-#define CIFS_OP_MASK 0x0380 /* mask request type */
+/* Lower bitmask values are reserved by others below. */
+#define CIFS_SESS_OP 0x2000 /* session setup request */
+#define CIFS_OP_MASK 0x2380 /* mask request type */
#define CIFS_HAS_CREDITS 0x0400 /* already has credits */
#define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */
@@ -1844,6 +1850,7 @@ GLOBAL_EXTERN spinlock_t GlobalMid_Lock; /* protects above & list operations */
*/
GLOBAL_EXTERN atomic_t sesInfoAllocCount;
GLOBAL_EXTERN atomic_t tconInfoAllocCount;
+GLOBAL_EXTERN atomic_t tcpSesNextId;
GLOBAL_EXTERN atomic_t tcpSesAllocCount;
GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 32f7a013402e..75ce6f742b8d 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -232,6 +232,8 @@ extern unsigned int setup_special_user_owner_ACE(struct cifs_ace *pace);
extern void dequeue_mid(struct mid_q_entry *mid, bool malformed);
extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
unsigned int to_read);
+extern ssize_t cifs_discard_from_socket(struct TCP_Server_Info *server,
+ size_t to_read);
extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
struct page *page,
unsigned int page_offset,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 0496934feecb..c279527aae92 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1451,9 +1451,9 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server)
while (remaining > 0) {
int length;
- length = cifs_read_from_socket(server, server->bigbuf,
- min_t(unsigned int, remaining,
- CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
+ length = cifs_discard_from_socket(server,
+ min_t(size_t, remaining,
+ CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
if (length < 0)
return length;
server->total_read += length;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 4bb9decbbf27..112692300fb6 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -242,7 +242,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->max_read = 0;
cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
- trace_smb3_reconnect(server->CurrentMid, server->hostname);
+ trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
/* before reconnecting the tcp session, mark the smb session (uid)
and the tid bad so they are not used until reconnected */
@@ -564,6 +564,23 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
return cifs_readv_from_socket(server, &smb_msg);
}
+ssize_t
+cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
+{
+ struct msghdr smb_msg;
+
+ /*
+ * iov_iter_discard already sets smb_msg.type and count and iov_offset
+ * and cifs_readv_from_socket sets msg_control and msg_controllen
+ * so little to initialize in struct msghdr
+ */
+ smb_msg.msg_name = NULL;
+ smb_msg.msg_namelen = 0;
+ iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
+
+ return cifs_readv_from_socket(server, &smb_msg);
+}
+
int
cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
unsigned int page_offset, unsigned int to_read)
@@ -846,7 +863,7 @@ static void
smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
{
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer;
- int scredits = server->credits;
+ int scredits, in_flight;
/*
* SMB1 does not use credits.
@@ -857,12 +874,14 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
if (shdr->CreditRequest) {
spin_lock(&server->req_lock);
server->credits += le16_to_cpu(shdr->CreditRequest);
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits,
- le16_to_cpu(shdr->CreditRequest));
+ server->conn_id, server->hostname, scredits,
+ le16_to_cpu(shdr->CreditRequest), in_flight);
cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
__func__, le16_to_cpu(shdr->CreditRequest),
scredits);
@@ -993,6 +1012,10 @@ next_pdu:
if (mids[i] != NULL) {
mids[i]->resp_buf_size = server->pdu_size;
+ if (bufs[i] && server->ops->is_network_name_deleted)
+ server->ops->is_network_name_deleted(bufs[i],
+ server);
+
if (!mids[i]->multiRsp || mids[i]->multiEnd)
mids[i]->callback(mids[i]);
@@ -1317,6 +1340,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
goto out_err_crypto_release;
}
+ tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
tcp_ses->noblockcnt = ctx->rootfs;
tcp_ses->noblocksnd = ctx->noblocksnd || ctx->rootfs;
tcp_ses->noautotune = ctx->noautotune;
@@ -1838,9 +1862,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
/* new SMB session uses our server ref */
ses->server = server;
if (server->dstaddr.ss_family == AF_INET6)
- sprintf(ses->serverName, "%pI6", &addr6->sin6_addr);
+ sprintf(ses->ip_addr, "%pI6", &addr6->sin6_addr);
else
- sprintf(ses->serverName, "%pI4", &addr->sin_addr);
+ sprintf(ses->ip_addr, "%pI4", &addr->sin_addr);
if (ctx->username) {
ses->user_name = kstrdup(ctx->username, GFP_KERNEL);
@@ -2269,7 +2293,9 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
if (strcmp(old->local_nls->charset, new->local_nls->charset))
return 0;
- if (old->ctx->actimeo != new->ctx->actimeo)
+ if (old->ctx->acregmax != new->ctx->acregmax)
+ return 0;
+ if (old->ctx->acdirmax != new->ctx->acdirmax)
return 0;
return 1;
@@ -2911,7 +2937,7 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
#ifdef CONFIG_CIFS_DFS_UPCALL
/*
* cifs_build_path_to_root returns full path to root when we do not have an
- * exiting connection (tcon)
+ * existing connection (tcon)
*/
static char *
build_unc_path_to_root(const struct smb3_fs_context *ctx,
@@ -3038,96 +3064,91 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
return 0;
}
-static int setup_dfs_tgt_conn(const char *path, const char *full_path,
- const struct dfs_cache_tgt_iterator *tgt_it,
- struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
- unsigned int *xid, struct TCP_Server_Info **server,
- struct cifs_ses **ses, struct cifs_tcon **tcon)
-{
- int rc;
- struct dfs_info3_param ref = {0};
- char *mdata = NULL;
- struct smb3_fs_context fake_ctx = {NULL};
- char *fake_devname = NULL;
-
- cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
-
- rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
- if (rc)
- return rc;
-
- mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
- full_path + 1, &ref,
- &fake_devname);
- free_dfs_info_param(&ref);
-
- if (IS_ERR(mdata)) {
- rc = PTR_ERR(mdata);
- mdata = NULL;
- } else
- rc = cifs_setup_volume_info(&fake_ctx, mdata, fake_devname);
-
- kfree(mdata);
- kfree(fake_devname);
-
- if (!rc) {
- /*
- * We use a 'fake_ctx' here because we need pass it down to the
- * mount_{get,put} functions to test connection against new DFS
- * targets.
- */
- mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
- rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses,
- tcon);
- if (!rc || (*server && *ses)) {
- /*
- * We were able to connect to new target server.
- * Update current context with new target server.
- */
- rc = update_vol_info(tgt_it, &fake_ctx, ctx);
- }
- }
- smb3_cleanup_fs_context_contents(&fake_ctx);
- return rc;
-}
-
static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
unsigned int *xid, struct TCP_Server_Info **server,
struct cifs_ses **ses, struct cifs_tcon **tcon)
{
int rc;
- struct dfs_cache_tgt_list tgt_list;
+ struct dfs_cache_tgt_list tgt_list = {0};
struct dfs_cache_tgt_iterator *tgt_it = NULL;
+ struct smb3_fs_context tmp_ctx = {NULL};
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
return -EOPNOTSUPP;
+ cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, path, full_path);
+
rc = dfs_cache_noreq_find(path, NULL, &tgt_list);
if (rc)
return rc;
+ /*
+ * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
+ * test connection against new DFS targets.
+ */
+ rc = smb3_fs_context_dup(&tmp_ctx, ctx);
+ if (rc)
+ goto out;
for (;;) {
+ struct dfs_info3_param ref = {0};
+ char *fake_devname = NULL, *mdata = NULL;
+
/* Get next DFS target server - if any */
rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it);
if (rc)
break;
- /* Connect to next DFS target */
- rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses,
- tcon);
- if (!rc || (*server && *ses))
+
+ rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
+ if (rc)
+ break;
+
+ cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+ tmp_ctx.prepath);
+
+ mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref,
+ &fake_devname);
+ free_dfs_info_param(&ref);
+
+ if (IS_ERR(mdata)) {
+ rc = PTR_ERR(mdata);
+ mdata = NULL;
+ } else
+ rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname);
+
+ kfree(mdata);
+ kfree(fake_devname);
+
+ if (rc)
break;
+
+ cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+ tmp_ctx.prepath);
+
+ mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
+ rc = mount_get_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
+ if (!rc || (*server && *ses)) {
+ /*
+ * We were able to connect to new target server. Update current context with
+ * new target server.
+ */
+ rc = update_vol_info(tgt_it, &tmp_ctx, ctx);
+ break;
+ }
}
if (!rc) {
+ cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+ tmp_ctx.prepath);
/*
- * Update DFS target hint in DFS referral cache with the target
- * server we successfully reconnected to.
+ * Update DFS target hint in DFS referral cache with the target server we
+ * successfully reconnected to.
*/
- rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses,
- cifs_sb->local_nls,
- cifs_remap(cifs_sb), path,
- tgt_it);
+ rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls,
+ cifs_remap(cifs_sb), path, tgt_it);
}
+
+out:
+ smb3_cleanup_fs_context_contents(&tmp_ctx);
dfs_cache_free_tgts(&tgt_list);
return rc;
}
@@ -3285,77 +3306,77 @@ static void put_root_ses(struct cifs_ses *ses)
cifs_put_smb_ses(ses);
}
-/* Check if a path component is remote and then update @dfs_path accordingly */
-static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
- const unsigned int xid, struct TCP_Server_Info *server,
- struct cifs_tcon *tcon, char **dfs_path)
+/* Set up next dfs prefix path in @dfs_path */
+static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+ const unsigned int xid, struct TCP_Server_Info *server,
+ struct cifs_tcon *tcon, char **dfs_path)
{
- char *path, *s;
- char sep = CIFS_DIR_SEP(cifs_sb), tmp;
- char *npath;
- int rc = 0;
- int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS;
- int skip = added_treename;
+ char *path, *npath;
+ int added_treename = is_tcon_dfs(tcon);
+ int rc;
path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
if (!path)
return -ENOMEM;
- /*
- * Walk through the path components in @path and check if they're accessible. In case any of
- * the components is -EREMOTE, then update @dfs_path with the next DFS referral request path
- * (NOT including the remaining components).
- */
- s = path;
- do {
- /* skip separators */
- while (*s && *s == sep)
- s++;
- if (!*s)
- break;
- /* next separator */
- while (*s && *s != sep)
- s++;
- /*
- * if the treename is added, we then have to skip the first
- * part within the separators
- */
- if (skip) {
- skip = 0;
- continue;
+ rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+ if (rc == -EREMOTE) {
+ struct smb3_fs_context v = {NULL};
+ /* if @path contains a tree name, skip it in the prefix path */
+ if (added_treename) {
+ rc = smb3_parse_devname(path, &v);
+ if (rc)
+ goto out;
+ npath = build_unc_path_to_root(&v, cifs_sb, true);
+ smb3_cleanup_fs_context_contents(&v);
+ } else {
+ v.UNC = ctx->UNC;
+ v.prepath = path + 1;
+ npath = build_unc_path_to_root(&v, cifs_sb, true);
}
- tmp = *s;
- *s = 0;
- rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path);
- if (rc && rc == -EREMOTE) {
- struct smb3_fs_context v = {NULL};
- /* if @path contains a tree name, skip it in the prefix path */
- if (added_treename) {
- rc = smb3_parse_devname(path, &v);
- if (rc)
- break;
- rc = -EREMOTE;
- npath = build_unc_path_to_root(&v, cifs_sb, true);
- smb3_cleanup_fs_context_contents(&v);
- } else {
- v.UNC = ctx->UNC;
- v.prepath = path + 1;
- npath = build_unc_path_to_root(&v, cifs_sb, true);
- }
- if (IS_ERR(npath)) {
- rc = PTR_ERR(npath);
- break;
- }
- kfree(*dfs_path);
- *dfs_path = npath;
+
+ if (IS_ERR(npath)) {
+ rc = PTR_ERR(npath);
+ goto out;
}
- *s = tmp;
- } while (rc == 0);
+ kfree(*dfs_path);
+ *dfs_path = npath;
+ rc = -EREMOTE;
+ }
+
+out:
kfree(path);
return rc;
}
+/* Check if resolved targets can handle any DFS referrals */
+static int is_referral_server(const char *ref_path, struct cifs_tcon *tcon, bool *ref_server)
+{
+ int rc;
+ struct dfs_info3_param ref = {0};
+
+ if (is_tcon_dfs(tcon)) {
+ *ref_server = true;
+ } else {
+ cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+
+ rc = dfs_cache_noreq_find(ref_path, &ref, NULL);
+ if (rc) {
+ cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
+ return rc;
+ }
+ cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags);
+ /*
+ * Check if all targets are capable of handling DFS referrals as per
+ * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
+ */
+ *ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER);
+ free_dfs_info_param(&ref);
+ }
+ return 0;
+}
+
int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
{
int rc = 0;
@@ -3367,18 +3388,19 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
char *ref_path = NULL, *full_path = NULL;
char *oldmnt = NULL;
char *mntdata = NULL;
+ bool ref_server = false;
rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
/*
- * Unconditionally try to get an DFS referral (even cached) to determine whether it is an
- * DFS mount.
+ * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally
+ * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
*
* Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
* to respond with PATH_NOT_COVERED to requests that include the prefix.
*/
- if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+ dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
NULL)) {
- /* No DFS referral was returned. Looks like a regular share. */
if (rc)
goto error;
/* Check if it is fully accessible and then mount it */
@@ -3432,13 +3454,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
break;
if (!tcon)
continue;
+
/* Make sure that requests go through new root servers */
- if (is_tcon_dfs(tcon)) {
+ rc = is_referral_server(ref_path + 1, tcon, &ref_server);
+ if (rc)
+ break;
+ if (ref_server) {
put_root_ses(root_ses);
set_root_ses(cifs_sb, ses, &root_ses);
}
- /* Check for remaining path components and then continue chasing them (-EREMOTE) */
- rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
+
+ /* Get next dfs path and then continue chasing them if -EREMOTE */
+ rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
/* Prevent recursion on broken link referrals */
if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
rc = -ELOOP;
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 4950ab0486ae..098b4bc8da59 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -37,11 +37,12 @@ struct cache_dfs_tgt {
struct cache_entry {
struct hlist_node hlist;
const char *path;
- int ttl;
- int srvtype;
- int flags;
+ int hdr_flags; /* RESP_GET_DFS_REFERRAL.ReferralHeaderFlags */
+ int ttl; /* DFS_REREFERRAL_V3.TimeToLive */
+ int srvtype; /* DFS_REREFERRAL_V3.ServerType */
+ int ref_flags; /* DFS_REREFERRAL_V3.ReferralEntryFlags */
struct timespec64 etime;
- int path_consumed;
+ int path_consumed; /* RESP_GET_DFS_REFERRAL.PathConsumed */
int numtgts;
struct list_head tlist;
struct cache_dfs_tgt *tgthint;
@@ -166,14 +167,11 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
continue;
seq_printf(m,
- "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,"
- "interlink=%s,path_consumed=%d,expired=%s\n",
- ce->path,
- ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
- ce->ttl, ce->etime.tv_nsec,
- IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
- ce->path_consumed,
- cache_entry_expired(ce) ? "yes" : "no");
+ "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
+ ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
+ ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags,
+ IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
+ ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no");
list_for_each_entry(t, &ce->tlist, list) {
seq_printf(m, " %s%s\n",
@@ -236,11 +234,12 @@ static inline void dump_tgts(const struct cache_entry *ce)
static inline void dump_ce(const struct cache_entry *ce)
{
- cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,interlink=%s,path_consumed=%d,expired=%s\n",
+ cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
ce->path,
ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl,
ce->etime.tv_nsec,
- IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
+ ce->hdr_flags, ce->ref_flags,
+ IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
ce->path_consumed,
cache_entry_expired(ce) ? "yes" : "no");
dump_tgts(ce);
@@ -381,7 +380,8 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
ce->ttl = refs[0].ttl;
ce->etime = get_expire_time(ce->ttl);
ce->srvtype = refs[0].server_type;
- ce->flags = refs[0].ref_flag;
+ ce->hdr_flags = refs[0].flags;
+ ce->ref_flags = refs[0].ref_flag;
ce->path_consumed = refs[0].path_consumed;
for (i = 0; i < numrefs; i++) {
@@ -799,7 +799,8 @@ static int setup_referral(const char *path, struct cache_entry *ce,
ref->path_consumed = ce->path_consumed;
ref->ttl = ce->ttl;
ref->server_type = ce->srvtype;
- ref->ref_flag = ce->flags;
+ ref->ref_flag = ce->ref_flags;
+ ref->flags = ce->hdr_flags;
return 0;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6d001905c8e5..26de4329d161 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -580,7 +580,7 @@ int cifs_open(struct inode *inode, struct file *file)
} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
if (tcon->ses->serverNOS)
cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
- tcon->ses->serverName,
+ tcon->ses->ip_addr,
tcon->ses->serverNOS);
tcon->broken_posix_open = true;
} else if ((rc != -EIO) && (rc != -EREMOTE) &&
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 12a5da0230b5..892f51a21278 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -140,6 +140,8 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
fsparam_u32("rsize", Opt_rsize),
fsparam_u32("wsize", Opt_wsize),
fsparam_u32("actimeo", Opt_actimeo),
+ fsparam_u32("acdirmax", Opt_acdirmax),
+ fsparam_u32("acregmax", Opt_acregmax),
fsparam_u32("echo_interval", Opt_echo_interval),
fsparam_u32("max_credits", Opt_max_credits),
fsparam_u32("handletimeout", Opt_handletimeout),
@@ -397,7 +399,7 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
ctx->vals = &smb3any_values;
break;
case Smb_default:
- ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+ ctx->ops = &smb30_operations;
ctx->vals = &smbdefault_values;
break;
default:
@@ -542,20 +544,37 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
/* BB Need to add support for sep= here TBD */
while ((key = strsep(&options, ",")) != NULL) {
- if (*key) {
- size_t v_len = 0;
- char *value = strchr(key, '=');
-
- if (value) {
- if (value == key)
- continue;
- *value++ = 0;
- v_len = strlen(value);
- }
- ret = vfs_parse_fs_string(fc, key, value, v_len);
- if (ret < 0)
- break;
+ size_t len;
+ char *value;
+
+ if (*key == 0)
+ break;
+
+ /* Check if following character is the deliminator If yes,
+ * we have encountered a double deliminator reset the NULL
+ * character to the deliminator
+ */
+ while (options && options[0] == ',') {
+ len = strlen(key);
+ strcpy(key + len, options);
+ options = strchr(options, ',');
+ if (options)
+ *options++ = 0;
+ }
+
+
+ len = 0;
+ value = strchr(key, '=');
+ if (value) {
+ if (value == key)
+ continue;
+ *value++ = 0;
+ len = strlen(value);
}
+
+ ret = vfs_parse_fs_string(fc, key, value, len);
+ if (ret < 0)
+ break;
}
return ret;
@@ -929,12 +948,31 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
ctx->wsize = result.uint_32;
ctx->got_wsize = true;
break;
+ case Opt_acregmax:
+ ctx->acregmax = HZ * result.uint_32;
+ if (ctx->acregmax > CIFS_MAX_ACTIMEO) {
+ cifs_dbg(VFS, "acregmax too large\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
+ case Opt_acdirmax:
+ ctx->acdirmax = HZ * result.uint_32;
+ if (ctx->acdirmax > CIFS_MAX_ACTIMEO) {
+ cifs_dbg(VFS, "acdirmax too large\n");
+ goto cifs_parse_mount_err;
+ }
+ break;
case Opt_actimeo:
- ctx->actimeo = HZ * result.uint_32;
- if (ctx->actimeo > CIFS_MAX_ACTIMEO) {
- cifs_dbg(VFS, "attribute cache timeout too large\n");
+ if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) {
+ cifs_dbg(VFS, "timeout too large\n");
goto cifs_parse_mount_err;
}
+ if ((ctx->acdirmax != CIFS_DEF_ACTIMEO) ||
+ (ctx->acregmax != CIFS_DEF_ACTIMEO)) {
+ cifs_dbg(VFS, "actimeo ignored since acregmax or acdirmax specified\n");
+ break;
+ }
+ ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
break;
case Opt_echo_interval:
ctx->echo_interval = result.uint_32;
@@ -1361,7 +1399,8 @@ int smb3_init_fs_context(struct fs_context *fc)
/* default is to use strict cifs caching semantics */
ctx->strict_io = true;
- ctx->actimeo = CIFS_DEF_ACTIMEO;
+ ctx->acregmax = CIFS_DEF_ACTIMEO;
+ ctx->acdirmax = CIFS_DEF_ACTIMEO;
/* Most clients set timeout to 0, allows server to use its default */
ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 1c44a460e2c0..87dd1f7168f2 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -118,6 +118,8 @@ enum cifs_param {
Opt_rsize,
Opt_wsize,
Opt_actimeo,
+ Opt_acdirmax,
+ Opt_acregmax,
Opt_echo_interval,
Opt_max_credits,
Opt_snapshot,
@@ -232,7 +234,9 @@ struct smb3_fs_context {
unsigned int wsize;
unsigned int min_offload;
bool sockopt_tcp_nodelay:1;
- unsigned long actimeo; /* attribute cache timeout (jiffies) */
+ /* attribute cache timemout for files and directories in jiffies */
+ unsigned long acregmax;
+ unsigned long acdirmax;
struct smb_version_operations *ops;
struct smb_version_values *vals;
char *prepath;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 3e9c7bb23f26..7c61bc9573c0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2199,12 +2199,23 @@ cifs_inode_needs_reval(struct inode *inode)
if (!lookupCacheEnabled)
return true;
- if (!cifs_sb->ctx->actimeo)
- return true;
-
- if (!time_in_range(jiffies, cifs_i->time,
- cifs_i->time + cifs_sb->ctx->actimeo))
- return true;
+ /*
+ * depending on inode type, check if attribute caching disabled for
+ * files or directories
+ */
+ if (S_ISDIR(inode->i_mode)) {
+ if (!cifs_sb->ctx->acdirmax)
+ return true;
+ if (!time_in_range(jiffies, cifs_i->time,
+ cifs_i->time + cifs_sb->ctx->acdirmax))
+ return true;
+ } else { /* file */
+ if (!cifs_sb->ctx->acregmax)
+ return true;
+ if (!time_in_range(jiffies, cifs_i->time,
+ cifs_i->time + cifs_sb->ctx->acregmax))
+ return true;
+ }
/* hardlinked files w/ noserverino get "special" treatment */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 213465718fa8..183a3a868d7b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -218,7 +218,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
/* UNC and paths */
/* XXX: Use ses->server->hostname? */
- sprintf(unc, unc_fmt, ses->serverName);
+ sprintf(unc, unc_fmt, ses->ip_addr);
ctx.UNC = unc;
ctx.prepath = "";
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f19274857292..f5087295424c 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -63,17 +63,19 @@ smb2_add_credits(struct TCP_Server_Info *server,
const struct cifs_credits *credits, const int optype)
{
int *val, rc = -1;
+ int scredits, in_flight;
unsigned int add = credits->value;
unsigned int instance = credits->instance;
bool reconnect_detected = false;
+ bool reconnect_with_invalid_credits = false;
spin_lock(&server->req_lock);
val = server->ops->get_credits_field(server, optype);
/* eg found case where write overlapping reconnect messed up credits */
if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
- trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
- server->hostname, *val, add);
+ reconnect_with_invalid_credits = true;
+
if ((instance == 0) || (instance == server->reconnect_instance))
*val += add;
else
@@ -84,7 +86,9 @@ smb2_add_credits(struct TCP_Server_Info *server,
pr_warn_once("server overflowed SMB3 credits\n");
}
server->in_flight--;
- if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
+ if (server->in_flight == 0 &&
+ ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) &&
+ ((optype & CIFS_OP_MASK) != CIFS_SESS_OP))
rc = change_conf(server);
/*
* Sometimes server returns 0 credits on oplock break ack - we need to
@@ -97,14 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server,
server->oplock_credits++;
}
}
+ scredits = *val;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
if (reconnect_detected) {
+ trace_smb3_reconnect_detected(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, add, in_flight);
+
cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n",
add, instance);
}
+ if (reconnect_with_invalid_credits) {
+ trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, add, in_flight);
+ cifs_dbg(FYI, "Negotiate operation when server credits is non-zero. Optype: %d, server credits: %d, credits added: %d\n",
+ optype, scredits, add);
+ }
+
if (server->tcpStatus == CifsNeedReconnect
|| server->tcpStatus == CifsExiting)
return;
@@ -123,23 +139,30 @@ smb2_add_credits(struct TCP_Server_Info *server,
cifs_dbg(FYI, "disabling oplocks\n");
break;
default:
- trace_smb3_add_credits(server->CurrentMid,
- server->hostname, rc, add);
- cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, rc);
+ /* change_conf rebalanced credits for different types */
+ break;
}
+
+ trace_smb3_add_credits(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, add, in_flight);
+ cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, scredits);
}
static void
smb2_set_credits(struct TCP_Server_Info *server, const int val)
{
+ int scredits, in_flight;
+
spin_lock(&server->req_lock);
server->credits = val;
if (val == 1)
server->reconnect_instance++;
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
trace_smb3_set_credits(server->CurrentMid,
- server->hostname, val, val);
+ server->conn_id, server->hostname, scredits, val, in_flight);
cifs_dbg(FYI, "%s: set %u credits\n", __func__, val);
/* don't log while holding the lock */
@@ -171,7 +194,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
unsigned int *num, struct cifs_credits *credits)
{
int rc = 0;
- unsigned int scredits;
+ unsigned int scredits, in_flight;
spin_lock(&server->req_lock);
while (1) {
@@ -208,17 +231,18 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
credits->instance = server->reconnect_instance;
server->credits -= credits->value;
- scredits = server->credits;
server->in_flight++;
if (server->in_flight > server->max_in_flight)
server->max_in_flight = server->in_flight;
break;
}
}
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, -(credits->value));
+ server->conn_id, server->hostname, scredits, -(credits->value), in_flight);
cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
__func__, credits->value, scredits);
@@ -231,14 +255,14 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
const unsigned int payload_size)
{
int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE);
- int scredits;
+ int scredits, in_flight;
if (!credits->value || credits->value == new_val)
return 0;
if (credits->value < new_val) {
trace_smb3_too_many_credits(server->CurrentMid,
- server->hostname, 0, credits->value - new_val);
+ server->conn_id, server->hostname, 0, credits->value - new_val, 0);
cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
credits->value, new_val);
@@ -248,9 +272,13 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
spin_lock(&server->req_lock);
if (server->reconnect_instance != credits->instance) {
+ scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
+
trace_smb3_reconnect_detected(server->CurrentMid,
- server->hostname, 0, 0);
+ server->conn_id, server->hostname, scredits,
+ credits->value - new_val, in_flight);
cifs_server_dbg(VFS, "trying to return %d credits to old session\n",
credits->value - new_val);
return -EAGAIN;
@@ -258,15 +286,18 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
server->credits += credits->value - new_val;
scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
- credits->value = new_val;
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, credits->value - new_val);
+ server->conn_id, server->hostname, scredits,
+ credits->value - new_val, in_flight);
cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
__func__, credits->value - new_val, scredits);
+ credits->value = new_val;
+
return 0;
}
@@ -2369,7 +2400,7 @@ static bool
smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
{
struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
- int scredits;
+ int scredits, in_flight;
if (shdr->Status != STATUS_PENDING)
return false;
@@ -2378,11 +2409,13 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
spin_lock(&server->req_lock);
server->credits += le16_to_cpu(shdr->CreditRequest);
scredits = server->credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
wake_up(&server->request_q);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, le16_to_cpu(shdr->CreditRequest));
+ server->conn_id, server->hostname, scredits,
+ le16_to_cpu(shdr->CreditRequest), in_flight);
cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
__func__, le16_to_cpu(shdr->CreditRequest), scredits);
}
@@ -2418,6 +2451,34 @@ smb2_is_status_io_timeout(char *buf)
return false;
}
+static void
+smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
+{
+ struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+ struct list_head *tmp, *tmp1;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+
+ if (shdr->Status != STATUS_NETWORK_NAME_DELETED)
+ return;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &server->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ list_for_each(tmp1, &ses->tcon_list) {
+ tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+ if (tcon->tid == shdr->TreeId) {
+ tcon->need_reconnect = true;
+ spin_unlock(&cifs_tcp_ses_lock);
+ pr_warn_once("Server share %s deleted.\n",
+ tcon->treeName);
+ return;
+ }
+ }
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+}
+
static int
smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
struct cifsInodeInfo *cinode)
@@ -4605,6 +4666,10 @@ static void smb2_decrypt_offload(struct work_struct *work)
#ifdef CONFIG_CIFS_STATS2
mid->when_received = jiffies;
#endif
+ if (dw->server->ops->is_network_name_deleted)
+ dw->server->ops->is_network_name_deleted(dw->buf,
+ dw->server);
+
mid->callback(mid);
} else {
spin_lock(&GlobalMid_Lock);
@@ -4723,6 +4788,12 @@ non_offloaded_decrypt:
rc = handle_read_data(server, *mid, buf,
server->vals->read_rsp_size,
pages, npages, len, false);
+ if (rc >= 0) {
+ if (server->ops->is_network_name_deleted) {
+ server->ops->is_network_name_deleted(buf,
+ server);
+ }
+ }
}
free_pages:
@@ -5072,6 +5143,7 @@ struct smb_version_operations smb20_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_operations smb21_operations = {
@@ -5173,6 +5245,7 @@ struct smb_version_operations smb21_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_operations smb30_operations = {
@@ -5286,6 +5359,7 @@ struct smb_version_operations smb30_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_operations smb311_operations = {
@@ -5399,6 +5473,7 @@ struct smb_version_operations smb311_operations = {
.fiemap = smb3_fiemap,
.llseek = smb3_llseek,
.is_status_io_timeout = smb2_is_status_io_timeout,
+ .is_network_name_deleted = smb2_is_network_name_deleted,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 794fc3b68b4f..4bbb6126b14d 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -814,8 +814,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
SMB3ANY_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
- req->DialectCount = cpu_to_le16(2);
- total_len += 4;
+ req->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+ req->DialectCount = cpu_to_le16(3);
+ total_len += 6;
} else if (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -849,6 +850,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
SMB2_CLIENT_GUID_SIZE);
if ((server->vals->protocol_id == SMB311_PROT_ID) ||
(strcmp(server->vals->version_string,
+ SMB3ANY_VERSION_STRING) == 0) ||
+ (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0))
assemble_neg_contexts(req, server, &total_len);
}
@@ -883,6 +886,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
cifs_server_dbg(VFS,
"SMB2.1 dialect returned but not requested\n");
return -EIO;
+ } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
+ /* ops set to 3.0 by default for default so update */
+ server->ops = &smb311_operations;
+ server->vals = &smb311_values;
}
} else if (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
@@ -1042,10 +1049,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
SMB3ANY_VERSION_STRING) == 0) {
pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
- pneg_inbuf->DialectCount = cpu_to_le16(2);
- /* structure is big enough for 3 dialects, sending only 2 */
+ pneg_inbuf->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+ pneg_inbuf->DialectCount = cpu_to_le16(3);
+ /* SMB 2.1 not included so subtract one dialect from len */
inbuflen = sizeof(*pneg_inbuf) -
- (2 * sizeof(pneg_inbuf->Dialects[0]));
+ (sizeof(pneg_inbuf->Dialects[0]));
} else if (strcmp(server->vals->version_string,
SMBDEFAULT_VERSION_STRING) == 0) {
pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -1053,7 +1061,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
pneg_inbuf->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
pneg_inbuf->DialectCount = cpu_to_le16(4);
- /* structure is big enough for 3 dialects */
+ /* structure is big enough for 4 dialects */
inbuflen = sizeof(*pneg_inbuf);
} else {
/* otherwise specific dialect was requested */
@@ -1253,7 +1261,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
cifs_ses_server(sess_data->ses),
&rqst,
&sess_data->buf0_type,
- CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
+ CIFS_LOG_ERROR | CIFS_SESS_OP, &rsp_iov);
cifs_small_buf_release(sess_data->iov[0].iov_base);
memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec));
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index c3d1a584f251..d6df908dccad 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -851,17 +851,21 @@ DEFINE_SMB3_LEASE_ERR_EVENT(lease_err);
DECLARE_EVENT_CLASS(smb3_reconnect_class,
TP_PROTO(__u64 currmid,
+ __u64 conn_id,
char *hostname),
- TP_ARGS(currmid, hostname),
+ TP_ARGS(currmid, conn_id, hostname),
TP_STRUCT__entry(
__field(__u64, currmid)
+ __field(__u64, conn_id)
__field(char *, hostname)
),
TP_fast_assign(
__entry->currmid = currmid;
+ __entry->conn_id = conn_id;
__entry->hostname = hostname;
),
- TP_printk("server=%s current_mid=0x%llx",
+ TP_printk("conn_id=0x%llx server=%s current_mid=%llu",
+ __entry->conn_id,
__entry->hostname,
__entry->currmid)
)
@@ -869,44 +873,56 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class,
#define DEFINE_SMB3_RECONNECT_EVENT(name) \
DEFINE_EVENT(smb3_reconnect_class, smb3_##name, \
TP_PROTO(__u64 currmid, \
- char *hostname), \
- TP_ARGS(currmid, hostname))
+ __u64 conn_id, \
+ char *hostname), \
+ TP_ARGS(currmid, conn_id, hostname))
DEFINE_SMB3_RECONNECT_EVENT(reconnect);
DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
DECLARE_EVENT_CLASS(smb3_credit_class,
TP_PROTO(__u64 currmid,
+ __u64 conn_id,
char *hostname,
int credits,
- int credits_to_add),
- TP_ARGS(currmid, hostname, credits, credits_to_add),
+ int credits_to_add,
+ int in_flight),
+ TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight),
TP_STRUCT__entry(
__field(__u64, currmid)
+ __field(__u64, conn_id)
__field(char *, hostname)
__field(int, credits)
__field(int, credits_to_add)
+ __field(int, in_flight)
),
TP_fast_assign(
__entry->currmid = currmid;
+ __entry->conn_id = conn_id;
__entry->hostname = hostname;
__entry->credits = credits;
__entry->credits_to_add = credits_to_add;
+ __entry->in_flight = in_flight;
),
- TP_printk("server=%s current_mid=0x%llx credits=%d credits_to_add=%d",
+ TP_printk("conn_id=0x%llx server=%s current_mid=%llu "
+ "credits=%d credit_change=%d in_flight=%d",
+ __entry->conn_id,
__entry->hostname,
__entry->currmid,
__entry->credits,
- __entry->credits_to_add)
+ __entry->credits_to_add,
+ __entry->in_flight)
)
#define DEFINE_SMB3_CREDIT_EVENT(name) \
DEFINE_EVENT(smb3_credit_class, smb3_##name, \
TP_PROTO(__u64 currmid, \
+ __u64 conn_id, \
char *hostname, \
int credits, \
- int credits_to_add), \
- TP_ARGS(currmid, hostname, credits, credits_to_add))
+ int credits_to_add, \
+ int in_flight), \
+ TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight))
DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
DEFINE_SMB3_CREDIT_EVENT(reconnect_detected);
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 4a2b836eb017..e90a1d1380b0 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -445,7 +445,7 @@ unmask:
*/
server->tcpStatus = CifsNeedReconnect;
trace_smb3_partial_send_reconnect(server->CurrentMid,
- server->hostname);
+ server->conn_id, server->hostname);
}
smbd_done:
if (rc < 0 && rc != -EINTR)
@@ -527,7 +527,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
int *credits;
int optype;
long int t;
- int scredits = server->credits;
+ int scredits, in_flight;
if (timeout < 0)
t = MAX_JIFFY_OFFSET;
@@ -551,23 +551,39 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
server->max_in_flight = server->in_flight;
*credits -= 1;
*instance = server->reconnect_instance;
+ scredits = *credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
+
+ trace_smb3_add_credits(server->CurrentMid,
+ server->conn_id, server->hostname, scredits, -1, in_flight);
+ cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+ __func__, 1, scredits);
+
return 0;
}
while (1) {
if (*credits < num_credits) {
+ scredits = *credits;
spin_unlock(&server->req_lock);
+
cifs_num_waiters_inc(server);
rc = wait_event_killable_timeout(server->request_q,
has_credits(server, credits, num_credits), t);
cifs_num_waiters_dec(server);
if (!rc) {
+ spin_lock(&server->req_lock);
+ scredits = *credits;
+ in_flight = server->in_flight;
+ spin_unlock(&server->req_lock);
+
trace_smb3_credit_timeout(server->CurrentMid,
- server->hostname, num_credits, 0);
+ server->conn_id, server->hostname, scredits,
+ num_credits, in_flight);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
- timeout);
- return -ENOTSUPP;
+ timeout);
+ return -EBUSY;
}
if (rc == -ERESTARTSYS)
return -ERESTARTSYS;
@@ -595,6 +611,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
server->in_flight > 2 * MAX_COMPOUND &&
*credits <= MAX_COMPOUND) {
spin_unlock(&server->req_lock);
+
cifs_num_waiters_inc(server);
rc = wait_event_killable_timeout(
server->request_q,
@@ -603,13 +620,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
t);
cifs_num_waiters_dec(server);
if (!rc) {
+ spin_lock(&server->req_lock);
+ scredits = *credits;
+ in_flight = server->in_flight;
+ spin_unlock(&server->req_lock);
+
trace_smb3_credit_timeout(
- server->CurrentMid,
- server->hostname, num_credits,
- 0);
+ server->CurrentMid,
+ server->conn_id, server->hostname,
+ scredits, num_credits, in_flight);
cifs_server_dbg(VFS, "wait timed out after %d ms\n",
- timeout);
- return -ENOTSUPP;
+ timeout);
+ return -EBUSY;
}
if (rc == -ERESTARTSYS)
return -ERESTARTSYS;
@@ -625,16 +647,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
/* update # of requests on the wire to server */
if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
*credits -= num_credits;
- scredits = *credits;
server->in_flight += num_credits;
if (server->in_flight > server->max_in_flight)
server->max_in_flight = server->in_flight;
*instance = server->reconnect_instance;
}
+ scredits = *credits;
+ in_flight = server->in_flight;
spin_unlock(&server->req_lock);
trace_smb3_add_credits(server->CurrentMid,
- server->hostname, scredits, -(num_credits));
+ server->conn_id, server->hostname, scredits,
+ -(num_credits), in_flight);
cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
__func__, num_credits, scredits);
break;
@@ -656,13 +680,13 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
const int flags, unsigned int *instance)
{
int *credits;
- int scredits, sin_flight;
+ int scredits, in_flight;
credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
spin_lock(&server->req_lock);
scredits = *credits;
- sin_flight = server->in_flight;
+ in_flight = server->in_flight;
if (*credits < num) {
/*
@@ -684,10 +708,11 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
if (server->in_flight == 0) {
spin_unlock(&server->req_lock);
trace_smb3_insufficient_credits(server->CurrentMid,
- server->hostname, scredits, sin_flight);
+ server->conn_id, server->hostname, scredits,
+ num, in_flight);
cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
- __func__, sin_flight, num, scredits);
- return -ENOTSUPP;
+ __func__, in_flight, num, scredits);
+ return -EDEADLK;
}
}
spin_unlock(&server->req_lock);
@@ -1171,7 +1196,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP))
smb311_update_preauth_hash(ses, rqst[0].rq_iov,
rqst[0].rq_nvec);
@@ -1236,7 +1261,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
/*
* Compounding is never used during session establish.
*/
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
struct kvec iov = {
.iov_base = resp_iov[0].iov_base,
.iov_len = resp_iov[0].iov_len
diff --git a/fs/coredump.c b/fs/coredump.c
index ae778937a1ff..1c0fdc1aa70b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -897,10 +897,10 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
*/
page = get_dump_page(addr);
if (page) {
- void *kaddr = kmap(page);
+ void *kaddr = kmap_local_page(page);
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
- kunmap(page);
+ kunmap_local(kaddr);
put_page(page);
} else {
stop = !dump_skip(cprm, PAGE_SIZE);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 0957e1bb8eb2..b61491bf3166 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -695,7 +695,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
if (ret)
goto out;
sector = start_sector << (sdio->blkbits - 9);
- nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES);
+ nr_pages = bio_max_segs(sdio->pages_in_io);
BUG_ON(nr_pages <= 0);
dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
sdio->boundary = 0;
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index ea4f693bee22..f88851c5c250 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -215,10 +215,8 @@ submit_bio_retry:
/* max # of continuous pages */
if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
- if (nblocks > BIO_MAX_PAGES)
- nblocks = BIO_MAX_PAGES;
- bio = bio_alloc(GFP_NOIO, nblocks);
+ bio = bio_alloc(GFP_NOIO, bio_max_segs(nblocks));
bio->bi_end_io = erofs_readendio;
bio_set_dev(bio, sb->s_bdev);
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index f014c5e473a9..3db923403505 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -371,8 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
* bio_alloc will _always_ be able to allocate a bio if
* __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
*/
- bio = bio_alloc(GFP_KERNEL,
- min_t(int, nr_pages, BIO_MAX_PAGES));
+ bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages));
fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
GFP_KERNEL);
ext4_set_bio_post_read_ctx(bio, inode, page->index);
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b9721c8f116c..7c95818639a6 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -969,8 +969,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
unsigned int post_read_steps = 0;
bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
- min_t(int, nr_pages, BIO_MAX_PAGES),
- &f2fs_bioset);
+ bio_max_segs(nr_pages), &f2fs_bioset);
if (!bio)
return ERR_PTR(-ENOMEM);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index a8a0fb890e8d..4b0e2e3c2c88 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -2747,7 +2747,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
sum_entry = &sum->entries[0];
for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
- nrpages = min(last_offset - i, BIO_MAX_PAGES);
+ nrpages = bio_max_segs(last_offset - i);
/* readahead node pages */
f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 68376fa03880..c9775d5c6594 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -491,8 +491,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
di = (struct gfs2_dinode *)dibh->b_data;
gfs2_dinode_out(ip, di);
- di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
- di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
+ di->di_major = cpu_to_be32(imajor(&ip->i_inode));
+ di->di_minor = cpu_to_be32(iminor(&ip->i_inode));
di->__pad1 = 0;
di->__pad2 = 0;
di->__pad3 = 0;
diff --git a/fs/inode.c b/fs/inode.c
index 6dba963d3f6d..a047ab306f9a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -142,6 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
atomic_set(&inode->i_count, 1);
inode->i_op = &empty_iops;
inode->i_fop = &no_open_fops;
+ inode->i_ino = 0;
inode->__i_nlink = 1;
inode->i_opflags = 0;
if (sb->s_xattr)
diff --git a/fs/io-wq.c b/fs/io-wq.c
index c36bbcd823ce..28868eb4cd09 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -13,13 +13,10 @@
#include <linux/sched/mm.h>
#include <linux/percpu.h>
#include <linux/slab.h>
-#include <linux/kthread.h>
#include <linux/rculist_nulls.h>
-#include <linux/fs_struct.h>
-#include <linux/task_work.h>
-#include <linux/blk-cgroup.h>
-#include <linux/audit.h>
#include <linux/cpu.h>
+#include <linux/tracehook.h>
+#include <linux/freezer.h>
#include "../kernel/sched/sched.h"
#include "io-wq.h"
@@ -36,7 +33,6 @@ enum {
enum {
IO_WQ_BIT_EXIT = 0, /* wq exiting */
- IO_WQ_BIT_ERROR = 1, /* error on setup */
};
enum {
@@ -57,14 +53,9 @@ struct io_worker {
struct io_wq_work *cur_work;
spinlock_t lock;
+ struct completion ref_done;
+
struct rcu_head rcu;
- struct mm_struct *mm;
-#ifdef CONFIG_BLK_CGROUP
- struct cgroup_subsys_state *blkcg_css;
-#endif
- const struct cred *cur_creds;
- const struct cred *saved_creds;
- struct nsproxy *restore_nsproxy;
};
#if BITS_PER_LONG == 64
@@ -93,7 +84,6 @@ struct io_wqe {
struct {
raw_spinlock_t lock;
struct io_wq_work_list work_list;
- unsigned long hash_map;
unsigned flags;
} ____cacheline_aligned_in_smp;
@@ -103,6 +93,8 @@ struct io_wqe {
struct hlist_nulls_head free_list;
struct list_head all_list;
+ struct wait_queue_entry wait;
+
struct io_wq *wq;
struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
};
@@ -119,16 +111,33 @@ struct io_wq {
struct task_struct *manager;
struct user_struct *user;
+
+ struct io_wq_hash *hash;
+
refcount_t refs;
- struct completion done;
+ struct completion exited;
+
+ atomic_t worker_refs;
+ struct completion worker_done;
struct hlist_node cpuhp_node;
- refcount_t use_refs;
+ pid_t task_pid;
};
static enum cpuhp_state io_wq_online;
+struct io_cb_cancel_data {
+ work_cancel_fn *fn;
+ void *data;
+ int nr_running;
+ int nr_pending;
+ bool cancel_all;
+};
+
+static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+ struct io_cb_cancel_data *match);
+
static bool io_worker_get(struct io_worker *worker)
{
return refcount_inc_not_zero(&worker->ref);
@@ -137,62 +146,7 @@ static bool io_worker_get(struct io_worker *worker)
static void io_worker_release(struct io_worker *worker)
{
if (refcount_dec_and_test(&worker->ref))
- wake_up_process(worker->task);
-}
-
-/*
- * Note: drops the wqe->lock if returning true! The caller must re-acquire
- * the lock in that case. Some callers need to restart handling if this
- * happens, so we can't just re-acquire the lock on behalf of the caller.
- */
-static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
-{
- bool dropped_lock = false;
-
- if (worker->saved_creds) {
- revert_creds(worker->saved_creds);
- worker->cur_creds = worker->saved_creds = NULL;
- }
-
- if (current->files) {
- __acquire(&wqe->lock);
- raw_spin_unlock_irq(&wqe->lock);
- dropped_lock = true;
-
- task_lock(current);
- current->files = NULL;
- current->nsproxy = worker->restore_nsproxy;
- task_unlock(current);
- }
-
- if (current->fs)
- current->fs = NULL;
-
- /*
- * If we have an active mm, we need to drop the wq lock before unusing
- * it. If we do, return true and let the caller retry the idle loop.
- */
- if (worker->mm) {
- if (!dropped_lock) {
- __acquire(&wqe->lock);
- raw_spin_unlock_irq(&wqe->lock);
- dropped_lock = true;
- }
- __set_current_state(TASK_RUNNING);
- kthread_unuse_mm(worker->mm);
- mmput(worker->mm);
- worker->mm = NULL;
- }
-
-#ifdef CONFIG_BLK_CGROUP
- if (worker->blkcg_css) {
- kthread_associate_blkcg(NULL);
- worker->blkcg_css = NULL;
- }
-#endif
- if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
- return dropped_lock;
+ complete(&worker->ref_done);
}
static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
@@ -204,9 +158,10 @@ static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
return &wqe->acct[IO_WQ_ACCT_BOUND];
}
-static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
- struct io_worker *worker)
+static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker)
{
+ struct io_wqe *wqe = worker->wqe;
+
if (worker->flags & IO_WORKER_F_BOUND)
return &wqe->acct[IO_WQ_ACCT_BOUND];
@@ -216,39 +171,33 @@ static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
static void io_worker_exit(struct io_worker *worker)
{
struct io_wqe *wqe = worker->wqe;
- struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+ unsigned flags;
- /*
- * If we're not at zero, someone else is holding a brief reference
- * to the worker. Wait for that to go away.
- */
- set_current_state(TASK_INTERRUPTIBLE);
- if (!refcount_dec_and_test(&worker->ref))
- schedule();
- __set_current_state(TASK_RUNNING);
+ if (refcount_dec_and_test(&worker->ref))
+ complete(&worker->ref_done);
+ wait_for_completion(&worker->ref_done);
preempt_disable();
current->flags &= ~PF_IO_WORKER;
- if (worker->flags & IO_WORKER_F_RUNNING)
+ flags = worker->flags;
+ worker->flags = 0;
+ if (flags & IO_WORKER_F_RUNNING)
atomic_dec(&acct->nr_running);
- if (!(worker->flags & IO_WORKER_F_BOUND))
- atomic_dec(&wqe->wq->user->processes);
worker->flags = 0;
preempt_enable();
raw_spin_lock_irq(&wqe->lock);
- hlist_nulls_del_rcu(&worker->nulls_node);
+ if (flags & IO_WORKER_F_FREE)
+ hlist_nulls_del_rcu(&worker->nulls_node);
list_del_rcu(&worker->all_list);
- if (__io_worker_unuse(wqe, worker)) {
- __release(&wqe->lock);
- raw_spin_lock_irq(&wqe->lock);
- }
acct->nr_workers--;
raw_spin_unlock_irq(&wqe->lock);
kfree_rcu(worker, rcu);
- if (refcount_dec_and_test(&wqe->wq->refs))
- complete(&wqe->wq->done);
+ if (atomic_dec_and_test(&wqe->wq->worker_refs))
+ complete(&wqe->wq->worker_done);
+ do_exit(0);
}
static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@@ -306,35 +255,23 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
wake_up_process(wqe->wq->manager);
}
-static void io_wqe_inc_running(struct io_wqe *wqe, struct io_worker *worker)
+static void io_wqe_inc_running(struct io_worker *worker)
{
- struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
atomic_inc(&acct->nr_running);
}
-static void io_wqe_dec_running(struct io_wqe *wqe, struct io_worker *worker)
+static void io_wqe_dec_running(struct io_worker *worker)
__must_hold(wqe->lock)
{
- struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+ struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+ struct io_wqe *wqe = worker->wqe;
if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe))
io_wqe_wake_worker(wqe, acct);
}
-static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
-{
- allow_kernel_signal(SIGINT);
-
- current->flags |= PF_IO_WORKER;
- current->fs = NULL;
- current->files = NULL;
-
- worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
- worker->restore_nsproxy = current->nsproxy;
- io_wqe_inc_running(wqe, worker);
-}
-
/*
* Worker will start processing some work. Move it to the busy list, if
* it's currently on the freelist
@@ -357,19 +294,17 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
if (worker_bound != work_bound) {
- io_wqe_dec_running(wqe, worker);
+ io_wqe_dec_running(worker);
if (work_bound) {
worker->flags |= IO_WORKER_F_BOUND;
wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
- atomic_dec(&wqe->wq->user->processes);
} else {
worker->flags &= ~IO_WORKER_F_BOUND;
wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
- atomic_inc(&wqe->wq->user->processes);
}
- io_wqe_inc_running(wqe, worker);
+ io_wqe_inc_running(worker);
}
}
@@ -380,15 +315,13 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
* retry the loop in that case (we changed task state), we don't regrab
* the lock if we return success.
*/
-static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
+static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
__must_hold(wqe->lock)
{
if (!(worker->flags & IO_WORKER_F_FREE)) {
worker->flags |= IO_WORKER_F_FREE;
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
}
-
- return __io_worker_unuse(wqe, worker);
}
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
@@ -396,14 +329,31 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work)
return work->flags >> IO_WQ_HASH_SHIFT;
}
+static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+{
+ struct io_wq *wq = wqe->wq;
+
+ spin_lock(&wq->hash->wait.lock);
+ if (list_empty(&wqe->wait.entry)) {
+ __add_wait_queue(&wq->hash->wait, &wqe->wait);
+ if (!test_bit(hash, &wq->hash->map)) {
+ __set_current_state(TASK_RUNNING);
+ list_del_init(&wqe->wait.entry);
+ }
+ }
+ spin_unlock(&wq->hash->wait.lock);
+}
+
static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
__must_hold(wqe->lock)
{
struct io_wq_work_node *node, *prev;
struct io_wq_work *work, *tail;
- unsigned int hash;
+ unsigned int stall_hash = -1U;
wq_list_for_each(node, prev, &wqe->work_list) {
+ unsigned int hash;
+
work = container_of(node, struct io_wq_work, list);
/* not hashed, can run anytime */
@@ -412,111 +362,48 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
return work;
}
- /* hashed, can run if not already running */
hash = io_get_work_hash(work);
- if (!(wqe->hash_map & BIT(hash))) {
- wqe->hash_map |= BIT(hash);
- /* all items with this hash lie in [work, tail] */
- tail = wqe->hash_tail[hash];
+ /* all items with this hash lie in [work, tail] */
+ tail = wqe->hash_tail[hash];
+
+ /* hashed, can run if not already running */
+ if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
wqe->hash_tail[hash] = NULL;
wq_list_cut(&wqe->work_list, &tail->list, prev);
return work;
}
+ if (stall_hash == -1U)
+ stall_hash = hash;
+ /* fast forward to a next hash, for-each will fix up @prev */
+ node = &tail->list;
}
- return NULL;
-}
-
-static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
-{
- if (worker->mm) {
- kthread_unuse_mm(worker->mm);
- mmput(worker->mm);
- worker->mm = NULL;
- }
-
- if (mmget_not_zero(work->identity->mm)) {
- kthread_use_mm(work->identity->mm);
- worker->mm = work->identity->mm;
- return;
- }
-
- /* failed grabbing mm, ensure work gets cancelled */
- work->flags |= IO_WQ_WORK_CANCEL;
-}
-
-static inline void io_wq_switch_blkcg(struct io_worker *worker,
- struct io_wq_work *work)
-{
-#ifdef CONFIG_BLK_CGROUP
- if (!(work->flags & IO_WQ_WORK_BLKCG))
- return;
- if (work->identity->blkcg_css != worker->blkcg_css) {
- kthread_associate_blkcg(work->identity->blkcg_css);
- worker->blkcg_css = work->identity->blkcg_css;
+ if (stall_hash != -1U) {
+ raw_spin_unlock(&wqe->lock);
+ io_wait_on_hash(wqe, stall_hash);
+ raw_spin_lock(&wqe->lock);
}
-#endif
-}
-
-static void io_wq_switch_creds(struct io_worker *worker,
- struct io_wq_work *work)
-{
- const struct cred *old_creds = override_creds(work->identity->creds);
- worker->cur_creds = work->identity->creds;
- if (worker->saved_creds)
- put_cred(old_creds); /* creds set by previous switch */
- else
- worker->saved_creds = old_creds;
+ return NULL;
}
-static void io_impersonate_work(struct io_worker *worker,
- struct io_wq_work *work)
+static void io_flush_signals(void)
{
- if ((work->flags & IO_WQ_WORK_FILES) &&
- current->files != work->identity->files) {
- task_lock(current);
- current->files = work->identity->files;
- current->nsproxy = work->identity->nsproxy;
- task_unlock(current);
- if (!work->identity->files) {
- /* failed grabbing files, ensure work gets cancelled */
- work->flags |= IO_WQ_WORK_CANCEL;
- }
+ if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) {
+ if (current->task_works)
+ task_work_run();
+ clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL);
}
- if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
- current->fs = work->identity->fs;
- if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
- io_wq_switch_mm(worker, work);
- if ((work->flags & IO_WQ_WORK_CREDS) &&
- worker->cur_creds != work->identity->creds)
- io_wq_switch_creds(worker, work);
- if (work->flags & IO_WQ_WORK_FSIZE)
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
- else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
- current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
- io_wq_switch_blkcg(worker, work);
-#ifdef CONFIG_AUDIT
- current->loginuid = work->identity->loginuid;
- current->sessionid = work->identity->sessionid;
-#endif
}
static void io_assign_current_work(struct io_worker *worker,
struct io_wq_work *work)
{
if (work) {
- /* flush pending signals before assigning new work */
- if (signal_pending(current))
- flush_signals(current);
+ io_flush_signals();
cond_resched();
}
-#ifdef CONFIG_AUDIT
- current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
- current->sessionid = AUDIT_SID_UNSET;
-#endif
-
spin_lock_irq(&worker->lock);
worker->cur_work = work;
spin_unlock_irq(&worker->lock);
@@ -550,6 +437,7 @@ get_next:
if (!work)
break;
io_assign_current_work(worker, work);
+ __set_current_state(TASK_RUNNING);
/* handle a whole dependent link */
do {
@@ -557,7 +445,6 @@ get_next:
unsigned int hash = io_get_work_hash(work);
next_hashed = wq_next_work(work);
- io_impersonate_work(worker, work);
wq->do_work(work);
io_assign_current_work(worker, NULL);
@@ -572,8 +459,10 @@ get_next:
io_wqe_enqueue(wqe, linked);
if (hash != -1U && !next_hashed) {
+ clear_bit(hash, &wq->hash->map);
+ if (wq_has_sleeper(&wq->hash->wait))
+ wake_up(&wq->hash->wait);
raw_spin_lock_irq(&wqe->lock);
- wqe->hash_map &= ~BIT_ULL(hash);
wqe->flags &= ~IO_WQE_FLAG_STALLED;
/* skip unnecessary unlock-lock wqe->lock */
if (!work)
@@ -591,28 +480,29 @@ static int io_wqe_worker(void *data)
struct io_worker *worker = data;
struct io_wqe *wqe = worker->wqe;
struct io_wq *wq = wqe->wq;
+ char buf[TASK_COMM_LEN];
+
+ worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
+ io_wqe_inc_running(worker);
- io_worker_start(wqe, worker);
+ sprintf(buf, "iou-wrk-%d", wq->task_pid);
+ set_task_comm(current, buf);
while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
set_current_state(TASK_INTERRUPTIBLE);
loop:
raw_spin_lock_irq(&wqe->lock);
if (io_wqe_run_queue(wqe)) {
- __set_current_state(TASK_RUNNING);
io_worker_handle_work(worker);
goto loop;
}
- /* drops the lock on success, retry */
- if (__io_worker_idle(wqe, worker)) {
- __release(&wqe->lock);
- goto loop;
- }
+ __io_worker_idle(wqe, worker);
raw_spin_unlock_irq(&wqe->lock);
- if (signal_pending(current))
- flush_signals(current);
+ io_flush_signals();
if (schedule_timeout(WORKER_IDLE_TIMEOUT))
continue;
+ if (fatal_signal_pending(current))
+ break;
/* timed out, exit unless we're the fixed worker */
if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
!(worker->flags & IO_WORKER_F_FIXED))
@@ -636,15 +526,16 @@ loop:
*/
void io_wq_worker_running(struct task_struct *tsk)
{
- struct io_worker *worker = kthread_data(tsk);
- struct io_wqe *wqe = worker->wqe;
+ struct io_worker *worker = tsk->pf_io_worker;
+ if (!worker)
+ return;
if (!(worker->flags & IO_WORKER_F_UP))
return;
if (worker->flags & IO_WORKER_F_RUNNING)
return;
worker->flags |= IO_WORKER_F_RUNNING;
- io_wqe_inc_running(wqe, worker);
+ io_wqe_inc_running(worker);
}
/*
@@ -654,9 +545,10 @@ void io_wq_worker_running(struct task_struct *tsk)
*/
void io_wq_worker_sleeping(struct task_struct *tsk)
{
- struct io_worker *worker = kthread_data(tsk);
- struct io_wqe *wqe = worker->wqe;
+ struct io_worker *worker = tsk->pf_io_worker;
+ if (!worker)
+ return;
if (!(worker->flags & IO_WORKER_F_UP))
return;
if (!(worker->flags & IO_WORKER_F_RUNNING))
@@ -664,15 +556,18 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
worker->flags &= ~IO_WORKER_F_RUNNING;
- raw_spin_lock_irq(&wqe->lock);
- io_wqe_dec_running(wqe, worker);
- raw_spin_unlock_irq(&wqe->lock);
+ raw_spin_lock_irq(&worker->wqe->lock);
+ io_wqe_dec_running(worker);
+ raw_spin_unlock_irq(&worker->wqe->lock);
}
static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
{
struct io_wqe_acct *acct = &wqe->acct[index];
struct io_worker *worker;
+ struct task_struct *tsk;
+
+ __set_current_state(TASK_RUNNING);
worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
if (!worker)
@@ -682,14 +577,22 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
worker->nulls_node.pprev = NULL;
worker->wqe = wqe;
spin_lock_init(&worker->lock);
+ init_completion(&worker->ref_done);
+
+ atomic_inc(&wq->worker_refs);
- worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node,
- "io_wqe_worker-%d/%d", index, wqe->node);
- if (IS_ERR(worker->task)) {
+ tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+ if (IS_ERR(tsk)) {
+ if (atomic_dec_and_test(&wq->worker_refs))
+ complete(&wq->worker_done);
kfree(worker);
return false;
}
- kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
+
+ tsk->pf_io_worker = worker;
+ worker->task = tsk;
+ set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
+ tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY;
raw_spin_lock_irq(&wqe->lock);
hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
@@ -701,12 +604,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
worker->flags |= IO_WORKER_F_FIXED;
acct->nr_workers++;
raw_spin_unlock_irq(&wqe->lock);
-
- if (index == IO_WQ_ACCT_UNBOUND)
- atomic_inc(&wq->user->processes);
-
- refcount_inc(&wq->refs);
- wake_up_process(worker->task);
+ wake_up_new_task(tsk);
return true;
}
@@ -715,6 +613,8 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
{
struct io_wqe_acct *acct = &wqe->acct[index];
+ if (acct->nr_workers && test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state))
+ return false;
/* if we have available workers or no work, no need */
if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
return false;
@@ -748,97 +648,92 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
static bool io_wq_worker_wake(struct io_worker *worker, void *data)
{
+ set_notify_signal(worker->task);
wake_up_process(worker->task);
return false;
}
-/*
- * Manager thread. Tasked with creating new workers, if we need them.
- */
-static int io_wq_manager(void *data)
+static void io_wq_check_workers(struct io_wq *wq)
{
- struct io_wq *wq = data;
int node;
- /* create fixed workers */
- refcount_set(&wq->refs, 1);
for_each_node(node) {
+ struct io_wqe *wqe = wq->wqes[node];
+ bool fork_worker[2] = { false, false };
+
if (!node_online(node))
continue;
- if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
- continue;
- set_bit(IO_WQ_BIT_ERROR, &wq->state);
- set_bit(IO_WQ_BIT_EXIT, &wq->state);
- goto out;
+
+ raw_spin_lock_irq(&wqe->lock);
+ if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
+ fork_worker[IO_WQ_ACCT_BOUND] = true;
+ if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
+ fork_worker[IO_WQ_ACCT_UNBOUND] = true;
+ raw_spin_unlock_irq(&wqe->lock);
+ if (fork_worker[IO_WQ_ACCT_BOUND])
+ create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
+ if (fork_worker[IO_WQ_ACCT_UNBOUND])
+ create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
}
+}
- complete(&wq->done);
+static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
+{
+ return true;
+}
- while (!kthread_should_stop()) {
- if (current->task_works)
- task_work_run();
+static void io_wq_cancel_pending(struct io_wq *wq)
+{
+ struct io_cb_cancel_data match = {
+ .fn = io_wq_work_match_all,
+ .cancel_all = true,
+ };
+ int node;
- for_each_node(node) {
- struct io_wqe *wqe = wq->wqes[node];
- bool fork_worker[2] = { false, false };
+ for_each_node(node)
+ io_wqe_cancel_pending_work(wq->wqes[node], &match);
+}
+
+/*
+ * Manager thread. Tasked with creating new workers, if we need them.
+ */
+static int io_wq_manager(void *data)
+{
+ struct io_wq *wq = data;
+ char buf[TASK_COMM_LEN];
+ int node;
- if (!node_online(node))
- continue;
+ sprintf(buf, "iou-mgr-%d", wq->task_pid);
+ set_task_comm(current, buf);
- raw_spin_lock_irq(&wqe->lock);
- if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
- fork_worker[IO_WQ_ACCT_BOUND] = true;
- if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
- fork_worker[IO_WQ_ACCT_UNBOUND] = true;
- raw_spin_unlock_irq(&wqe->lock);
- if (fork_worker[IO_WQ_ACCT_BOUND])
- create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
- if (fork_worker[IO_WQ_ACCT_UNBOUND])
- create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
- }
+ do {
set_current_state(TASK_INTERRUPTIBLE);
+ io_wq_check_workers(wq);
schedule_timeout(HZ);
- }
-
- if (current->task_works)
- task_work_run();
+ try_to_freeze();
+ if (fatal_signal_pending(current))
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
-out:
- if (refcount_dec_and_test(&wq->refs)) {
- complete(&wq->done);
- return 0;
- }
- /* if ERROR is set and we get here, we have workers to wake */
- if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
- rcu_read_lock();
- for_each_node(node)
- io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
- rcu_read_unlock();
- }
- return 0;
-}
-
-static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
- struct io_wq_work *work)
-{
- bool free_worker;
-
- if (!(work->flags & IO_WQ_WORK_UNBOUND))
- return true;
- if (atomic_read(&acct->nr_running))
- return true;
+ io_wq_check_workers(wq);
rcu_read_lock();
- free_worker = !hlist_nulls_empty(&wqe->free_list);
+ for_each_node(node)
+ io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
rcu_read_unlock();
- if (free_worker)
- return true;
- if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
- !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
- return false;
+ /* we might not ever have created any workers */
+ if (atomic_read(&wq->worker_refs))
+ wait_for_completion(&wq->worker_done);
- return true;
+ spin_lock_irq(&wq->hash->wait.lock);
+ for_each_node(node)
+ list_del_init(&wq->wqes[node]->wait.entry);
+ spin_unlock_irq(&wq->hash->wait.lock);
+
+ io_wq_cancel_pending(wq);
+ complete(&wq->exited);
+ do_exit(0);
}
static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
@@ -872,20 +767,35 @@ append:
wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
}
+static int io_wq_fork_manager(struct io_wq *wq)
+{
+ struct task_struct *tsk;
+
+ if (wq->manager)
+ return 0;
+
+ reinit_completion(&wq->worker_done);
+ tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE);
+ if (!IS_ERR(tsk)) {
+ wq->manager = get_task_struct(tsk);
+ wake_up_new_task(tsk);
+ return 0;
+ }
+
+ return PTR_ERR(tsk);
+}
+
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
int work_flags;
unsigned long flags;
- /*
- * Do early check to see if we need a new unbound worker, and if we do,
- * if we're allowed to do so. This isn't 100% accurate as there's a
- * gap between this check and incrementing the value, but that's OK.
- * It's close enough to not be an issue, fork() has the same delay.
- */
- if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
- io_run_cancel(work, wqe);
+ /* Can only happen if manager creation fails after exec */
+ if (io_wq_fork_manager(wqe->wq) ||
+ test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
+ work->flags |= IO_WQ_WORK_CANCEL;
+ wqe->wq->do_work(work);
return;
}
@@ -919,14 +829,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
}
-struct io_cb_cancel_data {
- work_cancel_fn *fn;
- void *data;
- int nr_running;
- int nr_pending;
- bool cancel_all;
-};
-
static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
{
struct io_cb_cancel_data *match = data;
@@ -939,7 +841,7 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
spin_lock_irqsave(&worker->lock, flags);
if (worker->cur_work &&
match->fn(worker->cur_work, match->data)) {
- send_sig(SIGINT, worker->task, 1);
+ set_notify_signal(worker->task);
match->nr_running++;
}
spin_unlock_irqrestore(&worker->lock, flags);
@@ -1043,6 +945,24 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
return IO_WQ_CANCEL_NOTFOUND;
}
+static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
+ int sync, void *key)
+{
+ struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
+ int ret;
+
+ list_del_init(&wait->entry);
+
+ rcu_read_lock();
+ ret = io_wqe_activate_free_worker(wqe);
+ rcu_read_unlock();
+
+ if (!ret)
+ wake_up_process(wqe->wq->manager);
+
+ return 1;
+}
+
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
{
int ret = -ENOMEM, node;
@@ -1063,12 +983,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
if (ret)
goto err_wqes;
+ refcount_inc(&data->hash->refs);
+ wq->hash = data->hash;
wq->free_work = data->free_work;
wq->do_work = data->do_work;
- /* caller must already hold a reference to this */
- wq->user = data->user;
-
ret = -ENOMEM;
for_each_node(node) {
struct io_wqe *wqe;
@@ -1083,11 +1002,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
wqe->node = alloc_node;
wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
- if (wq->user) {
- wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+ wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
task_rlimit(current, RLIMIT_NPROC);
- }
atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
+ wqe->wait.func = io_wqe_hash_wake;
+ INIT_LIST_HEAD(&wqe->wait.entry);
wqe->wq = wq;
raw_spin_lock_init(&wqe->lock);
INIT_WQ_LIST(&wqe->work_list);
@@ -1095,24 +1014,19 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
INIT_LIST_HEAD(&wqe->all_list);
}
- init_completion(&wq->done);
+ wq->task_pid = current->pid;
+ init_completion(&wq->exited);
+ refcount_set(&wq->refs, 1);
- wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
- if (!IS_ERR(wq->manager)) {
- wake_up_process(wq->manager);
- wait_for_completion(&wq->done);
- if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
- ret = -ENOMEM;
- goto err;
- }
- refcount_set(&wq->use_refs, 1);
- reinit_completion(&wq->done);
+ init_completion(&wq->worker_done);
+ atomic_set(&wq->worker_refs, 0);
+
+ ret = io_wq_fork_manager(wq);
+ if (!ret)
return wq;
- }
- ret = PTR_ERR(wq->manager);
- complete(&wq->done);
err:
+ io_wq_put_hash(data->hash);
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
for_each_node(node)
kfree(wq->wqes[node]);
@@ -1123,46 +1037,46 @@ err_wq:
return ERR_PTR(ret);
}
-bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
+static void io_wq_destroy_manager(struct io_wq *wq)
{
- if (data->free_work != wq->free_work || data->do_work != wq->do_work)
- return false;
-
- return refcount_inc_not_zero(&wq->use_refs);
+ if (wq->manager) {
+ wake_up_process(wq->manager);
+ wait_for_completion(&wq->exited);
+ put_task_struct(wq->manager);
+ wq->manager = NULL;
+ }
}
-static void __io_wq_destroy(struct io_wq *wq)
+static void io_wq_destroy(struct io_wq *wq)
{
int node;
cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
set_bit(IO_WQ_BIT_EXIT, &wq->state);
- if (wq->manager)
- kthread_stop(wq->manager);
-
- rcu_read_lock();
- for_each_node(node)
- io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
- rcu_read_unlock();
+ io_wq_destroy_manager(wq);
- wait_for_completion(&wq->done);
-
- for_each_node(node)
- kfree(wq->wqes[node]);
+ for_each_node(node) {
+ struct io_wqe *wqe = wq->wqes[node];
+ WARN_ON_ONCE(!wq_list_empty(&wqe->work_list));
+ kfree(wqe);
+ }
+ io_wq_put_hash(wq->hash);
kfree(wq->wqes);
kfree(wq);
}
-void io_wq_destroy(struct io_wq *wq)
+void io_wq_put(struct io_wq *wq)
{
- if (refcount_dec_and_test(&wq->use_refs))
- __io_wq_destroy(wq);
+ if (refcount_dec_and_test(&wq->refs))
+ io_wq_destroy(wq);
}
-struct task_struct *io_wq_get_task(struct io_wq *wq)
+void io_wq_put_and_exit(struct io_wq *wq)
{
- return wq->manager;
+ set_bit(IO_WQ_BIT_EXIT, &wq->state);
+ io_wq_destroy_manager(wq);
+ io_wq_put(wq);
}
static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
diff --git a/fs/io-wq.h b/fs/io-wq.h
index 096f1021018e..5fbf7997149e 100644
--- a/fs/io-wq.h
+++ b/fs/io-wq.h
@@ -1,6 +1,7 @@
#ifndef INTERNAL_IO_WQ_H
#define INTERNAL_IO_WQ_H
+#include <linux/refcount.h>
#include <linux/io_uring.h>
struct io_wq;
@@ -11,13 +12,6 @@ enum {
IO_WQ_WORK_UNBOUND = 4,
IO_WQ_WORK_CONCURRENT = 16,
- IO_WQ_WORK_FILES = 32,
- IO_WQ_WORK_FS = 64,
- IO_WQ_WORK_MM = 128,
- IO_WQ_WORK_CREDS = 256,
- IO_WQ_WORK_BLKCG = 512,
- IO_WQ_WORK_FSIZE = 1024,
-
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
};
@@ -85,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list,
struct io_wq_work {
struct io_wq_work_node list;
- struct io_identity *identity;
unsigned flags;
+ unsigned short personality;
};
static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
@@ -100,16 +94,27 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
typedef void (io_wq_work_fn)(struct io_wq_work *);
-struct io_wq_data {
- struct user_struct *user;
+struct io_wq_hash {
+ refcount_t refs;
+ unsigned long map;
+ struct wait_queue_head wait;
+};
+
+static inline void io_wq_put_hash(struct io_wq_hash *hash)
+{
+ if (refcount_dec_and_test(&hash->refs))
+ kfree(hash);
+}
+struct io_wq_data {
+ struct io_wq_hash *hash;
io_wq_work_fn *do_work;
free_work_fn *free_work;
};
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-bool io_wq_get(struct io_wq *wq, struct io_wq_data *data);
-void io_wq_destroy(struct io_wq *wq);
+void io_wq_put(struct io_wq *wq);
+void io_wq_put_and_exit(struct io_wq *wq);
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
void io_wq_hash_work(struct io_wq_work *work, void *val);
@@ -124,8 +129,6 @@ typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
void *data, bool cancel_all);
-struct task_struct *io_wq_get_task(struct io_wq *wq);
-
#if defined(CONFIG_IO_WQ)
extern void io_wq_worker_sleeping(struct task_struct *);
extern void io_wq_worker_running(struct task_struct *);
@@ -140,6 +143,7 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
static inline bool io_wq_current_is_worker(void)
{
- return in_task() && (current->flags & PF_IO_WORKER);
+ return in_task() && (current->flags & PF_IO_WORKER) &&
+ current->pf_io_worker;
}
#endif
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 14ce789927e4..92c25b5f1349 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -57,7 +57,6 @@
#include <linux/mman.h>
#include <linux/percpu.h>
#include <linux/slab.h>
-#include <linux/kthread.h>
#include <linux/blkdev.h>
#include <linux/bvec.h>
#include <linux/net.h>
@@ -75,13 +74,11 @@
#include <linux/fsnotify.h>
#include <linux/fadvise.h>
#include <linux/eventpoll.h>
-#include <linux/fs_struct.h>
#include <linux/splice.h>
#include <linux/task_work.h>
#include <linux/pagemap.h>
#include <linux/io_uring.h>
-#include <linux/blk-cgroup.h>
-#include <linux/audit.h>
+#include <linux/freezer.h>
#define CREATE_TRACE_POINTS
#include <trace/events/io_uring.h>
@@ -104,6 +101,10 @@
#define IORING_MAX_RESTRICTIONS (IORING_RESTRICTION_LAST + \
IORING_REGISTER_LAST + IORING_OP_LAST)
+#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
+ IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+ IOSQE_BUFFER_SELECT)
+
struct io_uring {
u32 head ____cacheline_aligned_in_smp;
u32 tail ____cacheline_aligned_in_smp;
@@ -232,6 +233,7 @@ struct fixed_rsrc_data {
struct fixed_rsrc_ref_node *node;
struct percpu_ref refs;
struct completion done;
+ bool quiesce;
};
struct io_buffer {
@@ -249,6 +251,11 @@ struct io_restriction {
bool registered;
};
+enum {
+ IO_SQ_THREAD_SHOULD_STOP = 0,
+ IO_SQ_THREAD_SHOULD_PARK,
+};
+
struct io_sq_data {
refcount_t refs;
struct mutex lock;
@@ -262,6 +269,13 @@ struct io_sq_data {
struct wait_queue_head wait;
unsigned sq_thread_idle;
+ int sq_cpu;
+ pid_t task_pid;
+
+ unsigned long state;
+ struct completion startup;
+ struct completion parked;
+ struct completion exited;
};
#define IO_IOPOLL_BATCH 8
@@ -279,8 +293,14 @@ struct io_comp_state {
struct list_head locked_free_list;
};
+struct io_submit_link {
+ struct io_kiocb *head;
+ struct io_kiocb *last;
+};
+
struct io_submit_state {
struct blk_plug plug;
+ struct io_submit_link link;
/*
* io_kiocb alloc cache
@@ -312,12 +332,11 @@ struct io_ring_ctx {
struct {
unsigned int flags;
unsigned int compat: 1;
- unsigned int limit_mem: 1;
unsigned int cq_overflow_flushed: 1;
unsigned int drain_next: 1;
unsigned int eventfd_async: 1;
unsigned int restricted: 1;
- unsigned int sqo_dead: 1;
+ unsigned int sqo_exec: 1;
/*
* Ring buffer of indices into array of io_uring_sqe, which is
@@ -339,6 +358,9 @@ struct io_ring_ctx {
unsigned cached_cq_overflow;
unsigned long sq_check_overflow;
+ /* hashed buffered write serialization */
+ struct io_wq_hash *hash_map;
+
struct list_head defer_list;
struct list_head timeout_list;
struct list_head cq_overflow_list;
@@ -355,22 +377,9 @@ struct io_ring_ctx {
struct io_rings *rings;
- /* IO offload */
- struct io_wq *io_wq;
-
- /*
- * For SQPOLL usage - we hold a reference to the parent task, so we
- * have access to the ->files
- */
- struct task_struct *sqo_task;
-
/* Only used for accounting purposes */
struct mm_struct *mm_account;
-#ifdef CONFIG_BLK_CGROUP
- struct cgroup_subsys_state *sqo_blkcg_css;
-#endif
-
struct io_sq_data *sq_data; /* if using sq thread polling */
struct wait_queue_head sqo_sq_wait;
@@ -390,13 +399,6 @@ struct io_ring_ctx {
struct user_struct *user;
- const struct cred *creds;
-
-#ifdef CONFIG_AUDIT
- kuid_t loginuid;
- unsigned int sessionid;
-#endif
-
struct completion ref_comp;
struct completion sq_thread_comp;
@@ -445,6 +447,11 @@ struct io_ring_ctx {
struct io_restriction restrictions;
+ /* exit task_work */
+ struct callback_head *exit_task_work;
+
+ struct wait_queue_head hash_wait;
+
/* Keep this last, we don't need it for the fast path */
struct work_struct exit_work;
};
@@ -673,7 +680,6 @@ enum {
REQ_F_POLLED_BIT,
REQ_F_BUFFER_SELECTED_BIT,
REQ_F_NO_FILE_TABLE_BIT,
- REQ_F_WORK_INITIALIZED_BIT,
REQ_F_LTIMEOUT_ACTIVE_BIT,
REQ_F_COMPLETE_INLINE_BIT,
@@ -697,7 +703,7 @@ enum {
/* fail rest of links */
REQ_F_FAIL_LINK = BIT(REQ_F_FAIL_LINK_BIT),
- /* on inflight list */
+ /* on inflight list, should be cancelled and waited on exit reliably */
REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT),
/* read/write uses file position */
REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT),
@@ -715,8 +721,6 @@ enum {
REQ_F_BUFFER_SELECTED = BIT(REQ_F_BUFFER_SELECTED_BIT),
/* doesn't need file table for this request */
REQ_F_NO_FILE_TABLE = BIT(REQ_F_NO_FILE_TABLE_BIT),
- /* io_wq_work is initialized */
- REQ_F_WORK_INITIALIZED = BIT(REQ_F_WORK_INITIALIZED_BIT),
/* linked timeout is active, i.e. prepared by link's head */
REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
/* completion is deferred through io_comp_state */
@@ -827,7 +831,6 @@ struct io_op_def {
unsigned plug : 1;
/* size of async data needed, if any */
unsigned short async_size;
- unsigned work_flags;
};
static const struct io_op_def io_op_defs[] = {
@@ -840,7 +843,6 @@ static const struct io_op_def io_op_defs[] = {
.needs_async_data = 1,
.plug = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_WRITEV] = {
.needs_file = 1,
@@ -850,12 +852,9 @@ static const struct io_op_def io_op_defs[] = {
.needs_async_data = 1,
.plug = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
- IO_WQ_WORK_FSIZE,
},
[IORING_OP_FSYNC] = {
.needs_file = 1,
- .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_READ_FIXED] = {
.needs_file = 1,
@@ -863,7 +862,6 @@ static const struct io_op_def io_op_defs[] = {
.pollin = 1,
.plug = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
},
[IORING_OP_WRITE_FIXED] = {
.needs_file = 1,
@@ -872,8 +870,6 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.plug = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
- IO_WQ_WORK_MM,
},
[IORING_OP_POLL_ADD] = {
.needs_file = 1,
@@ -882,7 +878,6 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_POLL_REMOVE] = {},
[IORING_OP_SYNC_FILE_RANGE] = {
.needs_file = 1,
- .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_SENDMSG] = {
.needs_file = 1,
@@ -890,8 +885,6 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_async_msghdr),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
- IO_WQ_WORK_FS,
},
[IORING_OP_RECVMSG] = {
.needs_file = 1,
@@ -900,29 +893,23 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_async_msghdr),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
- IO_WQ_WORK_FS,
},
[IORING_OP_TIMEOUT] = {
.needs_async_data = 1,
.async_size = sizeof(struct io_timeout_data),
- .work_flags = IO_WQ_WORK_MM,
},
[IORING_OP_TIMEOUT_REMOVE] = {
/* used by timeout updates' prep() */
- .work_flags = IO_WQ_WORK_MM,
},
[IORING_OP_ACCEPT] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollin = 1,
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES,
},
[IORING_OP_ASYNC_CANCEL] = {},
[IORING_OP_LINK_TIMEOUT] = {
.needs_async_data = 1,
.async_size = sizeof(struct io_timeout_data),
- .work_flags = IO_WQ_WORK_MM,
},
[IORING_OP_CONNECT] = {
.needs_file = 1,
@@ -930,26 +917,14 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.needs_async_data = 1,
.async_size = sizeof(struct io_async_connect),
- .work_flags = IO_WQ_WORK_MM,
},
[IORING_OP_FALLOCATE] = {
.needs_file = 1,
- .work_flags = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
- },
- [IORING_OP_OPENAT] = {
- .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
- IO_WQ_WORK_FS | IO_WQ_WORK_MM,
- },
- [IORING_OP_CLOSE] = {
- .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
- },
- [IORING_OP_FILES_UPDATE] = {
- .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM,
- },
- [IORING_OP_STATX] = {
- .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_MM |
- IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
},
+ [IORING_OP_OPENAT] = {},
+ [IORING_OP_CLOSE] = {},
+ [IORING_OP_FILES_UPDATE] = {},
+ [IORING_OP_STATX] = {},
[IORING_OP_READ] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
@@ -957,7 +932,6 @@ static const struct io_op_def io_op_defs[] = {
.buffer_select = 1,
.plug = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_WRITE] = {
.needs_file = 1,
@@ -965,42 +939,31 @@ static const struct io_op_def io_op_defs[] = {
.pollout = 1,
.plug = 1,
.async_size = sizeof(struct io_async_rw),
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
- IO_WQ_WORK_FSIZE,
},
[IORING_OP_FADVISE] = {
.needs_file = 1,
- .work_flags = IO_WQ_WORK_BLKCG,
- },
- [IORING_OP_MADVISE] = {
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
+ [IORING_OP_MADVISE] = {},
[IORING_OP_SEND] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_RECV] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollin = 1,
.buffer_select = 1,
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
},
[IORING_OP_OPENAT2] = {
- .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_FS |
- IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
},
[IORING_OP_EPOLL_CTL] = {
.unbound_nonreg_file = 1,
- .work_flags = IO_WQ_WORK_FILES,
},
[IORING_OP_SPLICE] = {
.needs_file = 1,
.hash_reg_file = 1,
.unbound_nonreg_file = 1,
- .work_flags = IO_WQ_WORK_BLKCG,
},
[IORING_OP_PROVIDE_BUFFERS] = {},
[IORING_OP_REMOVE_BUFFERS] = {},
@@ -1012,24 +975,18 @@ static const struct io_op_def io_op_defs[] = {
[IORING_OP_SHUTDOWN] = {
.needs_file = 1,
},
- [IORING_OP_RENAMEAT] = {
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
- IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
- },
- [IORING_OP_UNLINKAT] = {
- .work_flags = IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
- IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
- },
+ [IORING_OP_RENAMEAT] = {},
+ [IORING_OP_UNLINKAT] = {},
};
static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct task_struct *task,
struct files_struct *files);
+static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node);
static struct fixed_rsrc_ref_node *alloc_fixed_rsrc_ref_node(
struct io_ring_ctx *ctx);
-static void init_fixed_file_ref_node(struct io_ring_ctx *ctx,
- struct fixed_rsrc_ref_node *ref_node);
+static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
static bool io_rw_reissue(struct io_kiocb *req);
static void io_cqring_fill_event(struct io_kiocb *req, long res);
@@ -1112,190 +1069,20 @@ static bool io_match_task(struct io_kiocb *head,
return true;
io_for_each_link(req, head) {
- if (!(req->flags & REQ_F_WORK_INITIALIZED))
- continue;
- if (req->file && req->file->f_op == &io_uring_fops)
+ if (req->flags & REQ_F_INFLIGHT)
return true;
- if ((req->work.flags & IO_WQ_WORK_FILES) &&
- req->work.identity->files == files)
+ if (req->task->files == files)
return true;
}
return false;
}
-static void io_sq_thread_drop_mm_files(void)
-{
- struct files_struct *files = current->files;
- struct mm_struct *mm = current->mm;
-
- if (mm) {
- kthread_unuse_mm(mm);
- mmput(mm);
- current->mm = NULL;
- }
- if (files) {
- struct nsproxy *nsproxy = current->nsproxy;
-
- task_lock(current);
- current->files = NULL;
- current->nsproxy = NULL;
- task_unlock(current);
- put_files_struct(files);
- put_nsproxy(nsproxy);
- }
-}
-
-static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx)
-{
- if (!current->files) {
- struct files_struct *files;
- struct nsproxy *nsproxy;
-
- task_lock(ctx->sqo_task);
- files = ctx->sqo_task->files;
- if (!files) {
- task_unlock(ctx->sqo_task);
- return -EOWNERDEAD;
- }
- atomic_inc(&files->count);
- get_nsproxy(ctx->sqo_task->nsproxy);
- nsproxy = ctx->sqo_task->nsproxy;
- task_unlock(ctx->sqo_task);
-
- task_lock(current);
- current->files = files;
- current->nsproxy = nsproxy;
- task_unlock(current);
- }
- return 0;
-}
-
-static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
-{
- struct mm_struct *mm;
-
- if (current->mm)
- return 0;
-
- task_lock(ctx->sqo_task);
- mm = ctx->sqo_task->mm;
- if (unlikely(!mm || !mmget_not_zero(mm)))
- mm = NULL;
- task_unlock(ctx->sqo_task);
-
- if (mm) {
- kthread_use_mm(mm);
- return 0;
- }
-
- return -EFAULT;
-}
-
-static int __io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- const struct io_op_def *def = &io_op_defs[req->opcode];
- int ret;
-
- if (def->work_flags & IO_WQ_WORK_MM) {
- ret = __io_sq_thread_acquire_mm(ctx);
- if (unlikely(ret))
- return ret;
- }
-
- if (def->needs_file || (def->work_flags & IO_WQ_WORK_FILES)) {
- ret = __io_sq_thread_acquire_files(ctx);
- if (unlikely(ret))
- return ret;
- }
-
- return 0;
-}
-
-static inline int io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- if (!(ctx->flags & IORING_SETUP_SQPOLL))
- return 0;
- return __io_sq_thread_acquire_mm_files(ctx, req);
-}
-
-static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx,
- struct cgroup_subsys_state **cur_css)
-
-{
-#ifdef CONFIG_BLK_CGROUP
- /* puts the old one when swapping */
- if (*cur_css != ctx->sqo_blkcg_css) {
- kthread_associate_blkcg(ctx->sqo_blkcg_css);
- *cur_css = ctx->sqo_blkcg_css;
- }
-#endif
-}
-
-static void io_sq_thread_unassociate_blkcg(void)
-{
-#ifdef CONFIG_BLK_CGROUP
- kthread_associate_blkcg(NULL);
-#endif
-}
-
static inline void req_set_fail_links(struct io_kiocb *req)
{
if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
req->flags |= REQ_F_FAIL_LINK;
}
-/*
- * None of these are dereferenced, they are simply used to check if any of
- * them have changed. If we're under current and check they are still the
- * same, we're fine to grab references to them for actual out-of-line use.
- */
-static void io_init_identity(struct io_identity *id)
-{
- id->files = current->files;
- id->mm = current->mm;
-#ifdef CONFIG_BLK_CGROUP
- rcu_read_lock();
- id->blkcg_css = blkcg_css();
- rcu_read_unlock();
-#endif
- id->creds = current_cred();
- id->nsproxy = current->nsproxy;
- id->fs = current->fs;
- id->fsize = rlimit(RLIMIT_FSIZE);
-#ifdef CONFIG_AUDIT
- id->loginuid = current->loginuid;
- id->sessionid = current->sessionid;
-#endif
- refcount_set(&id->count, 1);
-}
-
-static inline void __io_req_init_async(struct io_kiocb *req)
-{
- memset(&req->work, 0, sizeof(req->work));
- req->flags |= REQ_F_WORK_INITIALIZED;
-}
-
-/*
- * Note: must call io_req_init_async() for the first time you
- * touch any members of io_wq_work.
- */
-static inline void io_req_init_async(struct io_kiocb *req)
-{
- struct io_uring_task *tctx = current->io_uring;
-
- if (req->flags & REQ_F_WORK_INITIALIZED)
- return;
-
- __io_req_init_async(req);
-
- /* Grab a ref if this isn't our static identity */
- req->work.identity = tctx->identity;
- if (tctx->identity != &tctx->__identity)
- refcount_inc(&req->work.identity->count);
-}
-
static void io_ring_ctx_ref_free(struct percpu_ref *ref)
{
struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -1378,111 +1165,11 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
return false;
}
-static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
-{
- if (req->work.identity == &tctx->__identity)
- return;
- if (refcount_dec_and_test(&req->work.identity->count))
- kfree(req->work.identity);
-}
-
-static void io_req_clean_work(struct io_kiocb *req)
-{
- if (!(req->flags & REQ_F_WORK_INITIALIZED))
- return;
-
- if (req->work.flags & IO_WQ_WORK_MM)
- mmdrop(req->work.identity->mm);
-#ifdef CONFIG_BLK_CGROUP
- if (req->work.flags & IO_WQ_WORK_BLKCG)
- css_put(req->work.identity->blkcg_css);
-#endif
- if (req->work.flags & IO_WQ_WORK_CREDS)
- put_cred(req->work.identity->creds);
- if (req->work.flags & IO_WQ_WORK_FS) {
- struct fs_struct *fs = req->work.identity->fs;
-
- spin_lock(&req->work.identity->fs->lock);
- if (--fs->users)
- fs = NULL;
- spin_unlock(&req->work.identity->fs->lock);
- if (fs)
- free_fs_struct(fs);
- }
- if (req->work.flags & IO_WQ_WORK_FILES) {
- put_files_struct(req->work.identity->files);
- put_nsproxy(req->work.identity->nsproxy);
- }
- if (req->flags & REQ_F_INFLIGHT) {
- struct io_ring_ctx *ctx = req->ctx;
- struct io_uring_task *tctx = req->task->io_uring;
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->inflight_lock, flags);
- list_del(&req->inflight_entry);
- spin_unlock_irqrestore(&ctx->inflight_lock, flags);
- req->flags &= ~REQ_F_INFLIGHT;
- if (atomic_read(&tctx->in_idle))
- wake_up(&tctx->wait);
- }
-
- req->flags &= ~REQ_F_WORK_INITIALIZED;
- req->work.flags &= ~(IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | IO_WQ_WORK_FS |
- IO_WQ_WORK_CREDS | IO_WQ_WORK_FILES);
- io_put_identity(req->task->io_uring, req);
-}
-
-/*
- * Create a private copy of io_identity, since some fields don't match
- * the current context.
- */
-static bool io_identity_cow(struct io_kiocb *req)
-{
- struct io_uring_task *tctx = current->io_uring;
- const struct cred *creds = NULL;
- struct io_identity *id;
-
- if (req->work.flags & IO_WQ_WORK_CREDS)
- creds = req->work.identity->creds;
-
- id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL);
- if (unlikely(!id)) {
- req->work.flags |= IO_WQ_WORK_CANCEL;
- return false;
- }
-
- /*
- * We can safely just re-init the creds we copied Either the field
- * matches the current one, or we haven't grabbed it yet. The only
- * exception is ->creds, through registered personalities, so handle
- * that one separately.
- */
- io_init_identity(id);
- if (creds)
- id->creds = creds;
-
- /* add one for this request */
- refcount_inc(&id->count);
-
- /* drop tctx and req identity references, if needed */
- if (tctx->identity != &tctx->__identity &&
- refcount_dec_and_test(&tctx->identity->count))
- kfree(tctx->identity);
- if (req->work.identity != &tctx->__identity &&
- refcount_dec_and_test(&req->work.identity->count))
- kfree(req->work.identity);
-
- req->work.identity = id;
- tctx->identity = id;
- return true;
-}
-
static void io_req_track_inflight(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
if (!(req->flags & REQ_F_INFLIGHT)) {
- io_req_init_async(req);
req->flags |= REQ_F_INFLIGHT;
spin_lock_irq(&ctx->inflight_lock);
@@ -1491,86 +1178,11 @@ static void io_req_track_inflight(struct io_kiocb *req)
}
}
-static bool io_grab_identity(struct io_kiocb *req)
-{
- const struct io_op_def *def = &io_op_defs[req->opcode];
- struct io_identity *id = req->work.identity;
-
- if (def->work_flags & IO_WQ_WORK_FSIZE) {
- if (id->fsize != rlimit(RLIMIT_FSIZE))
- return false;
- req->work.flags |= IO_WQ_WORK_FSIZE;
- }
-#ifdef CONFIG_BLK_CGROUP
- if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
- (def->work_flags & IO_WQ_WORK_BLKCG)) {
- rcu_read_lock();
- if (id->blkcg_css != blkcg_css()) {
- rcu_read_unlock();
- return false;
- }
- /*
- * This should be rare, either the cgroup is dying or the task
- * is moving cgroups. Just punt to root for the handful of ios.
- */
- if (css_tryget_online(id->blkcg_css))
- req->work.flags |= IO_WQ_WORK_BLKCG;
- rcu_read_unlock();
- }
-#endif
- if (!(req->work.flags & IO_WQ_WORK_CREDS)) {
- if (id->creds != current_cred())
- return false;
- get_cred(id->creds);
- req->work.flags |= IO_WQ_WORK_CREDS;
- }
-#ifdef CONFIG_AUDIT
- if (!uid_eq(current->loginuid, id->loginuid) ||
- current->sessionid != id->sessionid)
- return false;
-#endif
- if (!(req->work.flags & IO_WQ_WORK_FS) &&
- (def->work_flags & IO_WQ_WORK_FS)) {
- if (current->fs != id->fs)
- return false;
- spin_lock(&id->fs->lock);
- if (!id->fs->in_exec) {
- id->fs->users++;
- req->work.flags |= IO_WQ_WORK_FS;
- } else {
- req->work.flags |= IO_WQ_WORK_CANCEL;
- }
- spin_unlock(&current->fs->lock);
- }
- if (!(req->work.flags & IO_WQ_WORK_FILES) &&
- (def->work_flags & IO_WQ_WORK_FILES) &&
- !(req->flags & REQ_F_NO_FILE_TABLE)) {
- if (id->files != current->files ||
- id->nsproxy != current->nsproxy)
- return false;
- atomic_inc(&id->files->count);
- get_nsproxy(id->nsproxy);
- req->work.flags |= IO_WQ_WORK_FILES;
- io_req_track_inflight(req);
- }
- if (!(req->work.flags & IO_WQ_WORK_MM) &&
- (def->work_flags & IO_WQ_WORK_MM)) {
- if (id->mm != current->mm)
- return false;
- mmgrab(id->mm);
- req->work.flags |= IO_WQ_WORK_MM;
- }
-
- return true;
-}
-
static void io_prep_async_work(struct io_kiocb *req)
{
const struct io_op_def *def = &io_op_defs[req->opcode];
struct io_ring_ctx *ctx = req->ctx;
- io_req_init_async(req);
-
if (req->flags & REQ_F_FORCE_ASYNC)
req->work.flags |= IO_WQ_WORK_CONCURRENT;
@@ -1581,17 +1193,6 @@ static void io_prep_async_work(struct io_kiocb *req)
if (def->unbound_nonreg_file)
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
-
- /* if we fail grabbing identity, we must COW, regrab, and retry */
- if (io_grab_identity(req))
- return;
-
- if (!io_identity_cow(req))
- return;
-
- /* can't fail at this point */
- if (!io_grab_identity(req))
- WARN_ON(1);
}
static void io_prep_async_link(struct io_kiocb *req)
@@ -1602,25 +1203,20 @@ static void io_prep_async_link(struct io_kiocb *req)
io_prep_async_work(cur);
}
-static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
+static void io_queue_async_work(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_kiocb *link = io_prep_linked_timeout(req);
+ struct io_uring_task *tctx = req->task->io_uring;
+
+ BUG_ON(!tctx);
+ BUG_ON(!tctx->io_wq);
trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
&req->work, req->flags);
- io_wq_enqueue(ctx->io_wq, &req->work);
- return link;
-}
-
-static void io_queue_async_work(struct io_kiocb *req)
-{
- struct io_kiocb *link;
-
/* init ->work of the whole link before punting */
io_prep_async_link(req);
- link = __io_queue_async_work(req);
-
+ io_wq_enqueue(tctx->io_wq, &req->work);
if (link)
io_queue_linked_timeout(link);
}
@@ -1855,18 +1451,22 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
return all_flushed;
}
-static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
struct task_struct *tsk,
struct files_struct *files)
{
+ bool ret = true;
+
if (test_bit(0, &ctx->cq_check_overflow)) {
/* iopoll syncs against uring_lock, not completion_lock */
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_lock(&ctx->uring_lock);
- __io_cqring_overflow_flush(ctx, force, tsk, files);
+ ret = __io_cqring_overflow_flush(ctx, force, tsk, files);
if (ctx->flags & IORING_SETUP_IOPOLL)
mutex_unlock(&ctx->uring_lock);
}
+
+ return ret;
}
static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
@@ -2048,9 +1648,19 @@ static void io_dismantle_req(struct io_kiocb *req)
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
if (req->fixed_rsrc_refs)
percpu_ref_put(req->fixed_rsrc_refs);
- io_req_clean_work(req);
+
+ if (req->flags & REQ_F_INFLIGHT) {
+ struct io_ring_ctx *ctx = req->ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->inflight_lock, flags);
+ list_del(&req->inflight_entry);
+ spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+ req->flags &= ~REQ_F_INFLIGHT;
+ }
}
+/* must to be called somewhat shortly after putting a request */
static inline void io_put_task(struct task_struct *task, int nr)
{
struct io_uring_task *tctx = task->io_uring;
@@ -2134,15 +1744,7 @@ static void io_fail_links(struct io_kiocb *req)
trace_io_uring_fail_link(req, link);
io_cqring_fill_event(link, -ECANCELED);
- /*
- * It's ok to free under spinlock as they're not linked anymore,
- * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
- * work.fs->lock.
- */
- if (link->flags & REQ_F_WORK_INITIALIZED)
- io_put_req_deferred(link, 2);
- else
- io_double_put_req(link);
+ io_put_req_deferred(link, 2);
link = nxt;
}
io_commit_cqring(ctx);
@@ -2179,6 +1781,18 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
return __io_req_find_next(req);
}
+static void ctx_flush_and_put(struct io_ring_ctx *ctx)
+{
+ if (!ctx)
+ return;
+ if (ctx->submit_state.comp.nr) {
+ mutex_lock(&ctx->uring_lock);
+ io_submit_flush_completions(&ctx->submit_state.comp, ctx);
+ mutex_unlock(&ctx->uring_lock);
+ }
+ percpu_ref_put(&ctx->refs);
+}
+
static bool __tctx_task_work(struct io_uring_task *tctx)
{
struct io_ring_ctx *ctx = NULL;
@@ -2196,30 +1810,20 @@ static bool __tctx_task_work(struct io_uring_task *tctx)
node = list.first;
while (node) {
struct io_wq_work_node *next = node->next;
- struct io_ring_ctx *this_ctx;
struct io_kiocb *req;
req = container_of(node, struct io_kiocb, io_task_work.node);
- this_ctx = req->ctx;
- req->task_work.func(&req->task_work);
- node = next;
-
- if (!ctx) {
- ctx = this_ctx;
- } else if (ctx != this_ctx) {
- mutex_lock(&ctx->uring_lock);
- io_submit_flush_completions(&ctx->submit_state.comp, ctx);
- mutex_unlock(&ctx->uring_lock);
- ctx = this_ctx;
+ if (req->ctx != ctx) {
+ ctx_flush_and_put(ctx);
+ ctx = req->ctx;
+ percpu_ref_get(&ctx->refs);
}
- }
- if (ctx && ctx->submit_state.comp.nr) {
- mutex_lock(&ctx->uring_lock);
- io_submit_flush_completions(&ctx->submit_state.comp, ctx);
- mutex_unlock(&ctx->uring_lock);
+ req->task_work.func(&req->task_work);
+ node = next;
}
+ ctx_flush_and_put(ctx);
return list.first != NULL;
}
@@ -2227,10 +1831,10 @@ static void tctx_task_work(struct callback_head *cb)
{
struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work);
+ clear_bit(0, &tctx->task_state);
+
while (__tctx_task_work(tctx))
cond_resched();
-
- clear_bit(0, &tctx->task_state);
}
static int io_task_work_add(struct task_struct *tsk, struct io_kiocb *req,
@@ -2303,11 +1907,14 @@ static int io_req_task_work_add(struct io_kiocb *req)
static void io_req_task_work_add_fallback(struct io_kiocb *req,
task_work_func_t cb)
{
- struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
+ struct io_ring_ctx *ctx = req->ctx;
+ struct callback_head *head;
init_task_work(&req->task_work, cb);
- task_work_add(tsk, &req->task_work, TWA_NONE);
- wake_up_process(tsk);
+ do {
+ head = READ_ONCE(ctx->exit_task_work);
+ req->task_work.next = head;
+ } while (cmpxchg(&ctx->exit_task_work, head, &req->task_work) != head);
}
static void __io_req_task_cancel(struct io_kiocb *req, int error)
@@ -2329,7 +1936,9 @@ static void io_req_task_cancel(struct callback_head *cb)
struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
struct io_ring_ctx *ctx = req->ctx;
- __io_req_task_cancel(req, -ECANCELED);
+ mutex_lock(&ctx->uring_lock);
+ __io_req_task_cancel(req, req->result);
+ mutex_unlock(&ctx->uring_lock);
percpu_ref_put(&ctx->refs);
}
@@ -2339,15 +1948,11 @@ static void __io_req_task_submit(struct io_kiocb *req)
/* ctx stays valid until unlock, even if we drop all ours ctx->refs */
mutex_lock(&ctx->uring_lock);
- if (!ctx->sqo_dead && !(current->flags & PF_EXITING) &&
- !io_sq_thread_acquire_mm_files(ctx, req))
+ if (!(current->flags & PF_EXITING) && !current->in_execve)
__io_queue_sqe(req);
else
__io_req_task_cancel(req, -EFAULT);
mutex_unlock(&ctx->uring_lock);
-
- if (ctx->flags & IORING_SETUP_SQPOLL)
- io_sq_thread_drop_mm_files();
}
static void io_req_task_submit(struct callback_head *cb)
@@ -2364,11 +1969,22 @@ static void io_req_task_queue(struct io_kiocb *req)
req->task_work.func = io_req_task_submit;
ret = io_req_task_work_add(req);
if (unlikely(ret)) {
+ req->result = -ECANCELED;
percpu_ref_get(&req->ctx->refs);
io_req_task_work_add_fallback(req, io_req_task_cancel);
}
}
+static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
+{
+ percpu_ref_get(&req->ctx->refs);
+ req->result = ret;
+ req->task_work.func = io_req_task_cancel;
+
+ if (unlikely(io_req_task_work_add(req)))
+ io_req_task_work_add_fallback(req, io_req_task_cancel);
+}
+
static inline void io_queue_next(struct io_kiocb *req)
{
struct io_kiocb *nxt = io_req_find_next(req);
@@ -2794,24 +2410,37 @@ static bool io_resubmit_prep(struct io_kiocb *req)
return false;
return !io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
}
-#endif
-static bool io_rw_reissue(struct io_kiocb *req)
+static bool io_rw_should_reissue(struct io_kiocb *req)
{
-#ifdef CONFIG_BLOCK
umode_t mode = file_inode(req->file)->i_mode;
- int ret;
+ struct io_ring_ctx *ctx = req->ctx;
if (!S_ISBLK(mode) && !S_ISREG(mode))
return false;
- if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker())
+ if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
+ !(ctx->flags & IORING_SETUP_IOPOLL)))
return false;
+ /*
+ * If ref is dying, we might be running poll reap from the exit work.
+ * Don't attempt to reissue from that path, just let it fail with
+ * -EAGAIN.
+ */
+ if (percpu_ref_is_dying(&ctx->refs))
+ return false;
+ return true;
+}
+#endif
- lockdep_assert_held(&req->ctx->uring_lock);
+static bool io_rw_reissue(struct io_kiocb *req)
+{
+#ifdef CONFIG_BLOCK
+ if (!io_rw_should_reissue(req))
+ return false;
- ret = io_sq_thread_acquire_mm_files(req->ctx, req);
+ lockdep_assert_held(&req->ctx->uring_lock);
- if (!ret && io_resubmit_prep(req)) {
+ if (io_resubmit_prep(req)) {
refcount_inc(&req->refs);
io_queue_async_work(req);
return true;
@@ -2849,6 +2478,19 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
{
struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+#ifdef CONFIG_BLOCK
+ /* Rewind iter, if we have one. iopoll path resubmits as usual */
+ if (res == -EAGAIN && io_rw_should_reissue(req)) {
+ struct io_async_rw *rw = req->async_data;
+
+ if (rw)
+ iov_iter_revert(&rw->iter,
+ req->result - iov_iter_count(&rw->iter));
+ else if (!io_resubmit_prep(req))
+ res = -EIO;
+ }
+#endif
+
if (kiocb->ki_flags & IOCB_WRITE)
kiocb_end_write(req);
@@ -3467,19 +3109,9 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- ssize_t ret;
-
- ret = io_prep_rw(req, sqe);
- if (ret)
- return ret;
-
if (unlikely(!(req->file->f_mode & FMODE_READ)))
return -EBADF;
-
- /* either don't need iovec imported or already have it */
- if (!req->async_data)
- return 0;
- return io_rw_prep_async(req, READ);
+ return io_prep_rw(req, sqe);
}
/*
@@ -3607,10 +3239,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
ret = io_iter_do_read(req, iter);
if (ret == -EIOCBQUEUED) {
- /* it's faster to check here then delegate to kfree */
- if (iovec)
- kfree(iovec);
- return 0;
+ if (req->async_data)
+ iov_iter_revert(iter, io_size - iov_iter_count(iter));
+ goto out_free;
} else if (ret == -EAGAIN) {
/* IOPOLL retry should happen for io-wq threads */
if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
@@ -3631,6 +3262,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (ret2)
return ret2;
+ iovec = NULL;
rw = req->async_data;
/* now use our persistent iterator, if we aren't already */
iter = &rw->iter;
@@ -3654,27 +3286,22 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
if (ret == -EIOCBQUEUED)
return 0;
/* we got some bytes, but not all. retry. */
+ kiocb->ki_flags &= ~IOCB_WAITQ;
} while (ret > 0 && ret < io_size);
done:
kiocb_done(kiocb, ret, issue_flags);
+out_free:
+ /* it's faster to check here then delegate to kfree */
+ if (iovec)
+ kfree(iovec);
return 0;
}
static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- ssize_t ret;
-
- ret = io_prep_rw(req, sqe);
- if (ret)
- return ret;
-
if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
return -EBADF;
-
- /* either don't need iovec imported or already have it */
- if (!req->async_data)
- return 0;
- return io_rw_prep_async(req, WRITE);
+ return io_prep_rw(req, sqe);
}
static int io_write(struct io_kiocb *req, unsigned int issue_flags)
@@ -3746,6 +3373,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
/* no retry on NONBLOCK nor RWF_NOWAIT */
if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
goto done;
+ if (ret2 == -EIOCBQUEUED && req->async_data)
+ iov_iter_revert(iter, io_size - iov_iter_count(iter));
if (!force_nonblock || ret2 != -EAGAIN) {
/* IOPOLL retry should happen for io-wq threads */
if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
@@ -3924,7 +3553,6 @@ static int __io_splice_prep(struct io_kiocb *req,
* Splice operation will be punted aync, and here need to
* modify io_wq_work.flags, so initialize io_wq_work firstly.
*/
- io_req_init_async(req);
req->work.flags |= IO_WQ_WORK_UNBOUND;
}
@@ -4011,7 +3639,7 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
-static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
@@ -4200,7 +3828,7 @@ err:
static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
{
- return io_openat2(req, issue_flags & IO_URING_F_NONBLOCK);
+ return io_openat2(req, issue_flags);
}
static int io_remove_buffers_prep(struct io_kiocb *req,
@@ -4598,13 +4226,10 @@ err:
return 0;
}
-static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_ring_ctx *ctx = req->ctx;
- if (!req->file)
- return -EBADF;
-
if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
@@ -4664,11 +4289,21 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
req->sr_msg.msg_flags, &iomsg->free_iov);
}
+static int io_sendmsg_prep_async(struct io_kiocb *req)
+{
+ int ret;
+
+ if (!io_op_defs[req->opcode].needs_async_data)
+ return 0;
+ ret = io_sendmsg_copy_hdr(req, req->async_data);
+ if (!ret)
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return ret;
+}
+
static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- struct io_async_msghdr *async_msg = req->async_data;
struct io_sr_msg *sr = &req->sr_msg;
- int ret;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4681,13 +4316,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
-
- if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
- return 0;
- ret = io_sendmsg_copy_hdr(req, async_msg);
- if (!ret)
- req->flags |= REQ_F_NEED_CLEANUP;
- return ret;
+ return 0;
}
static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
@@ -4881,13 +4510,22 @@ static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
return io_put_kbuf(req, req->sr_msg.kbuf);
}
-static int io_recvmsg_prep(struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static int io_recvmsg_prep_async(struct io_kiocb *req)
{
- struct io_async_msghdr *async_msg = req->async_data;
- struct io_sr_msg *sr = &req->sr_msg;
int ret;
+ if (!io_op_defs[req->opcode].needs_async_data)
+ return 0;
+ ret = io_recvmsg_copy_hdr(req, req->async_data);
+ if (!ret)
+ req->flags |= REQ_F_NEED_CLEANUP;
+ return ret;
+}
+
+static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+ struct io_sr_msg *sr = &req->sr_msg;
+
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4900,13 +4538,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
if (req->ctx->compat)
sr->msg_flags |= MSG_CMSG_COMPAT;
#endif
-
- if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
- return 0;
- ret = io_recvmsg_copy_hdr(req, async_msg);
- if (!ret)
- req->flags |= REQ_F_NEED_CLEANUP;
- return ret;
+ return 0;
}
static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
@@ -5059,10 +4691,17 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
return 0;
}
+static int io_connect_prep_async(struct io_kiocb *req)
+{
+ struct io_async_connect *io = req->async_data;
+ struct io_connect *conn = &req->connect;
+
+ return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
+}
+
static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_connect *conn = &req->connect;
- struct io_async_connect *io = req->async_data;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -5071,12 +4710,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
conn->addr_len = READ_ONCE(sqe->addr2);
-
- if (!io)
- return 0;
-
- return move_addr_to_kernel(conn->addr, conn->addr_len,
- &io->address);
+ return 0;
}
static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
@@ -5121,56 +4755,32 @@ out:
return 0;
}
#else /* !CONFIG_NET */
-static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_send(struct io_kiocb *req, unsigned int issue_flags)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_recvmsg_prep(struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- return -EOPNOTSUPP;
-}
-
-static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
-{
- return -EOPNOTSUPP;
-}
+#define IO_NETOP_FN(op) \
+static int io_##op(struct io_kiocb *req, unsigned int issue_flags) \
+{ \
+ return -EOPNOTSUPP; \
+}
+
+#define IO_NETOP_PREP(op) \
+IO_NETOP_FN(op) \
+static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
+{ \
+ return -EOPNOTSUPP; \
+} \
+
+#define IO_NETOP_PREP_ASYNC(op) \
+IO_NETOP_PREP(op) \
+static int io_##op##_prep_async(struct io_kiocb *req) \
+{ \
+ return -EOPNOTSUPP; \
+}
+
+IO_NETOP_PREP_ASYNC(sendmsg);
+IO_NETOP_PREP_ASYNC(recvmsg);
+IO_NETOP_PREP_ASYNC(connect);
+IO_NETOP_PREP(accept);
+IO_NETOP_FN(send);
+IO_NETOP_FN(recv);
#endif /* CONFIG_NET */
struct io_poll_table {
@@ -5357,6 +4967,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
pt->error = -EINVAL;
return;
}
+ /* double add on the same waitqueue head, ignore */
+ if (poll->head == head)
+ return;
poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
if (!poll) {
pt->error = -ENOMEM;
@@ -5892,6 +5505,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
data->mode = io_translate_timeout_mode(flags);
hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
+ io_req_track_inflight(req);
return 0;
}
@@ -5952,12 +5566,15 @@ static bool io_cancel_cb(struct io_wq_work *work, void *data)
return req->user_data == (unsigned long) data;
}
-static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
+static int io_async_cancel_one(struct io_uring_task *tctx, void *sqe_addr)
{
enum io_wq_cancel cancel_ret;
int ret = 0;
- cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false);
+ if (!tctx->io_wq)
+ return -ENOENT;
+
+ cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, sqe_addr, false);
switch (cancel_ret) {
case IO_WQ_CANCEL_OK:
ret = 0;
@@ -5980,7 +5597,8 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
unsigned long flags;
int ret;
- ret = io_async_cancel_one(ctx, (void *) (unsigned long) sqe_addr);
+ ret = io_async_cancel_one(req->task->io_uring,
+ (void *) (unsigned long) sqe_addr);
if (ret != -ENOENT) {
spin_lock_irqsave(&ctx->completion_lock, flags);
goto done;
@@ -6084,9 +5702,9 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
case IORING_OP_POLL_REMOVE:
return io_poll_remove_prep(req, sqe);
case IORING_OP_FSYNC:
- return io_prep_fsync(req, sqe);
+ return io_fsync_prep(req, sqe);
case IORING_OP_SYNC_FILE_RANGE:
- return io_prep_sfr(req, sqe);
+ return io_sfr_prep(req, sqe);
case IORING_OP_SENDMSG:
case IORING_OP_SEND:
return io_sendmsg_prep(req, sqe);
@@ -6144,14 +5762,39 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return-EINVAL;
}
-static int io_req_defer_prep(struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
+static int io_req_prep_async(struct io_kiocb *req)
+{
+ switch (req->opcode) {
+ case IORING_OP_READV:
+ case IORING_OP_READ_FIXED:
+ case IORING_OP_READ:
+ return io_rw_prep_async(req, READ);
+ case IORING_OP_WRITEV:
+ case IORING_OP_WRITE_FIXED:
+ case IORING_OP_WRITE:
+ return io_rw_prep_async(req, WRITE);
+ case IORING_OP_SENDMSG:
+ case IORING_OP_SEND:
+ return io_sendmsg_prep_async(req);
+ case IORING_OP_RECVMSG:
+ case IORING_OP_RECV:
+ return io_recvmsg_prep_async(req);
+ case IORING_OP_CONNECT:
+ return io_connect_prep_async(req);
+ }
+ return 0;
+}
+
+static int io_req_defer_prep(struct io_kiocb *req)
{
- if (!sqe)
+ if (!io_op_defs[req->opcode].needs_async_data)
return 0;
- if (io_alloc_async_data(req))
+ /* some opcodes init it during the inital prep */
+ if (req->async_data)
+ return 0;
+ if (__io_alloc_async_data(req))
return -EAGAIN;
- return io_req_prep(req, sqe);
+ return io_req_prep_async(req);
}
static u32 io_get_sequence(struct io_kiocb *req)
@@ -6167,7 +5810,7 @@ static u32 io_get_sequence(struct io_kiocb *req)
return total_submitted - nr_reqs;
}
-static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_req_defer(struct io_kiocb *req)
{
struct io_ring_ctx *ctx = req->ctx;
struct io_defer_entry *de;
@@ -6184,11 +5827,9 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
return 0;
- if (!req->async_data) {
- ret = io_req_defer_prep(req, sqe);
- if (ret)
- return ret;
- }
+ ret = io_req_defer_prep(req);
+ if (ret)
+ return ret;
io_prep_async_link(req);
de = kmalloc(sizeof(*de), GFP_KERNEL);
if (!de)
@@ -6272,8 +5913,22 @@ static void __io_clean_op(struct io_kiocb *req)
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_ring_ctx *ctx = req->ctx;
+ const struct cred *creds = NULL;
int ret;
+ if (req->work.personality) {
+ const struct cred *new_creds;
+
+ if (!(issue_flags & IO_URING_F_NONBLOCK))
+ mutex_lock(&ctx->uring_lock);
+ new_creds = idr_find(&ctx->personality_idr, req->work.personality);
+ if (!(issue_flags & IO_URING_F_NONBLOCK))
+ mutex_unlock(&ctx->uring_lock);
+ if (!new_creds)
+ return -EINVAL;
+ creds = override_creds(new_creds);
+ }
+
switch (req->opcode) {
case IORING_OP_NOP:
ret = io_nop(req, issue_flags);
@@ -6380,6 +6035,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
break;
}
+ if (creds)
+ revert_creds(creds);
+
if (ret)
return ret;
@@ -6427,29 +6085,11 @@ static void io_wq_submit_work(struct io_wq_work *work)
} while (1);
}
+ /* avoid locking problems by failing it from a clean context */
if (ret) {
- struct io_ring_ctx *lock_ctx = NULL;
-
- if (req->ctx->flags & IORING_SETUP_IOPOLL)
- lock_ctx = req->ctx;
-
- /*
- * io_iopoll_complete() does not hold completion_lock to
- * complete polled io, so here for polled io, we can not call
- * io_req_complete() directly, otherwise there maybe concurrent
- * access to cqring, defer_list, etc, which is not safe. Given
- * that io_iopoll_complete() is always called under uring_lock,
- * so here for polled io, we also get uring_lock to complete
- * it.
- */
- if (lock_ctx)
- mutex_lock(&lock_ctx->uring_lock);
-
- req_set_fail_links(req);
- io_req_complete(req, ret);
-
- if (lock_ctx)
- mutex_unlock(&lock_ctx->uring_lock);
+ /* io-wq is going to take one down */
+ refcount_inc(&req->refs);
+ io_req_task_queue_fail(req, ret);
}
}
@@ -6561,19 +6201,10 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
static void __io_queue_sqe(struct io_kiocb *req)
{
struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
- const struct cred *old_creds = NULL;
int ret;
- if ((req->flags & REQ_F_WORK_INITIALIZED) &&
- (req->work.flags & IO_WQ_WORK_CREDS) &&
- req->work.identity->creds != current_cred())
- old_creds = override_creds(req->work.identity->creds);
-
ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
- if (old_creds)
- revert_creds(old_creds);
-
/*
* We async punt it if the file wasn't marked NOWAIT, or if the file
* doesn't support non-blocking read/write attempts
@@ -6607,11 +6238,11 @@ static void __io_queue_sqe(struct io_kiocb *req)
io_queue_linked_timeout(linked_timeout);
}
-static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void io_queue_sqe(struct io_kiocb *req)
{
int ret;
- ret = io_req_defer(req, sqe);
+ ret = io_req_defer(req);
if (ret) {
if (ret != -EIOCBQUEUED) {
fail_req:
@@ -6620,42 +6251,133 @@ fail_req:
io_req_complete(req, ret);
}
} else if (req->flags & REQ_F_FORCE_ASYNC) {
- if (!req->async_data) {
- ret = io_req_defer_prep(req, sqe);
- if (unlikely(ret))
- goto fail_req;
- }
+ ret = io_req_defer_prep(req);
+ if (unlikely(ret))
+ goto fail_req;
io_queue_async_work(req);
} else {
- if (sqe) {
- ret = io_req_prep(req, sqe);
- if (unlikely(ret))
- goto fail_req;
- }
__io_queue_sqe(req);
}
}
-static inline void io_queue_link_head(struct io_kiocb *req)
+/*
+ * Check SQE restrictions (opcode and flags).
+ *
+ * Returns 'true' if SQE is allowed, 'false' otherwise.
+ */
+static inline bool io_check_restriction(struct io_ring_ctx *ctx,
+ struct io_kiocb *req,
+ unsigned int sqe_flags)
{
- if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
- io_put_req(req);
- io_req_complete(req, -ECANCELED);
- } else
- io_queue_sqe(req, NULL);
+ if (!ctx->restricted)
+ return true;
+
+ if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
+ return false;
+
+ if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
+ ctx->restrictions.sqe_flags_required)
+ return false;
+
+ if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
+ ctx->restrictions.sqe_flags_required))
+ return false;
+
+ return true;
}
-struct io_submit_link {
- struct io_kiocb *head;
- struct io_kiocb *last;
-};
+static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
+{
+ struct io_submit_state *state;
+ unsigned int sqe_flags;
+ int ret = 0;
+
+ req->opcode = READ_ONCE(sqe->opcode);
+ /* same numerical values with corresponding REQ_F_*, safe to copy */
+ req->flags = sqe_flags = READ_ONCE(sqe->flags);
+ req->user_data = READ_ONCE(sqe->user_data);
+ req->async_data = NULL;
+ req->file = NULL;
+ req->ctx = ctx;
+ req->link = NULL;
+ req->fixed_rsrc_refs = NULL;
+ /* one is dropped after submission, the other at completion */
+ refcount_set(&req->refs, 2);
+ req->task = current;
+ req->result = 0;
+
+ /* enforce forwards compatibility on users */
+ if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) {
+ req->flags = 0;
+ return -EINVAL;
+ }
+
+ if (unlikely(req->opcode >= IORING_OP_LAST))
+ return -EINVAL;
+
+ if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
+ return -EACCES;
+
+ if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
+ !io_op_defs[req->opcode].buffer_select)
+ return -EOPNOTSUPP;
+
+ req->work.list.next = NULL;
+ req->work.flags = 0;
+ req->work.personality = READ_ONCE(sqe->personality);
+ state = &ctx->submit_state;
+
+ /*
+ * Plug now if we have more than 1 IO left after this, and the target
+ * is potentially a read/write to block based storage.
+ */
+ if (!state->plug_started && state->ios_left > 1 &&
+ io_op_defs[req->opcode].plug) {
+ blk_start_plug(&state->plug);
+ state->plug_started = true;
+ }
+
+ if (io_op_defs[req->opcode].needs_file) {
+ bool fixed = req->flags & REQ_F_FIXED_FILE;
+
+ req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
+ if (unlikely(!req->file))
+ ret = -EBADF;
+ }
+
+ state->ios_left--;
+ return ret;
+}
-static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
- struct io_submit_link *link)
+static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+ const struct io_uring_sqe *sqe)
{
- struct io_ring_ctx *ctx = req->ctx;
+ struct io_submit_link *link = &ctx->submit_state.link;
int ret;
+ ret = io_init_req(ctx, req, sqe);
+ if (unlikely(ret)) {
+fail_req:
+ io_put_req(req);
+ io_req_complete(req, ret);
+ if (link->head) {
+ /* fail even hard links since we don't submit */
+ link->head->flags |= REQ_F_FAIL_LINK;
+ io_put_req(link->head);
+ io_req_complete(link->head, -ECANCELED);
+ link->head = NULL;
+ }
+ return ret;
+ }
+ ret = io_req_prep(req, sqe);
+ if (unlikely(ret))
+ goto fail_req;
+
+ /* don't need @sqe from now on */
+ trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
+ true, ctx->flags & IORING_SETUP_SQPOLL);
+
/*
* If we already have a head request, queue this one for async
* submittal once the head completes. If we don't have a head but
@@ -6677,19 +6399,16 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
head->flags |= REQ_F_IO_DRAIN;
ctx->drain_next = 1;
}
- ret = io_req_defer_prep(req, sqe);
- if (unlikely(ret)) {
- /* fail even hard links since we don't submit */
- head->flags |= REQ_F_FAIL_LINK;
- return ret;
- }
+ ret = io_req_defer_prep(req);
+ if (unlikely(ret))
+ goto fail_req;
trace_io_uring_link(ctx, req, head);
link->last->link = req;
link->last = req;
/* last request of a link, enqueue the link */
if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
- io_queue_link_head(head);
+ io_queue_sqe(head);
link->head = NULL;
}
} else {
@@ -6698,13 +6417,10 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
ctx->drain_next = 0;
}
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
- ret = io_req_defer_prep(req, sqe);
- if (unlikely(ret))
- req->flags |= REQ_F_FAIL_LINK;
link->head = req;
link->last = req;
} else {
- io_queue_sqe(req, sqe);
+ io_queue_sqe(req);
}
}
@@ -6717,6 +6433,8 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
static void io_submit_state_end(struct io_submit_state *state,
struct io_ring_ctx *ctx)
{
+ if (state->link.head)
+ io_queue_sqe(state->link.head);
if (state->comp.nr)
io_submit_flush_completions(&state->comp, ctx);
if (state->plug_started)
@@ -6732,6 +6450,8 @@ static void io_submit_state_start(struct io_submit_state *state,
{
state->plug_started = false;
state->ios_left = max_ios;
+ /* set only head, no need to init link_last in advance */
+ state->link.head = NULL;
}
static void io_commit_sqring(struct io_ring_ctx *ctx)
@@ -6777,117 +6497,9 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
return NULL;
}
-/*
- * Check SQE restrictions (opcode and flags).
- *
- * Returns 'true' if SQE is allowed, 'false' otherwise.
- */
-static inline bool io_check_restriction(struct io_ring_ctx *ctx,
- struct io_kiocb *req,
- unsigned int sqe_flags)
-{
- if (!ctx->restricted)
- return true;
-
- if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
- return false;
-
- if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
- ctx->restrictions.sqe_flags_required)
- return false;
-
- if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
- ctx->restrictions.sqe_flags_required))
- return false;
-
- return true;
-}
-
-#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
- IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
- IOSQE_BUFFER_SELECT)
-
-static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
- const struct io_uring_sqe *sqe)
-{
- struct io_submit_state *state;
- unsigned int sqe_flags;
- int id, ret = 0;
-
- req->opcode = READ_ONCE(sqe->opcode);
- /* same numerical values with corresponding REQ_F_*, safe to copy */
- req->flags = sqe_flags = READ_ONCE(sqe->flags);
- req->user_data = READ_ONCE(sqe->user_data);
- req->async_data = NULL;
- req->file = NULL;
- req->ctx = ctx;
- req->link = NULL;
- req->fixed_rsrc_refs = NULL;
- /* one is dropped after submission, the other at completion */
- refcount_set(&req->refs, 2);
- req->task = current;
- req->result = 0;
-
- /* enforce forwards compatibility on users */
- if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
- return -EINVAL;
-
- if (unlikely(req->opcode >= IORING_OP_LAST))
- return -EINVAL;
-
- if (unlikely(io_sq_thread_acquire_mm_files(ctx, req)))
- return -EFAULT;
-
- if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
- return -EACCES;
-
- if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
- !io_op_defs[req->opcode].buffer_select)
- return -EOPNOTSUPP;
-
- id = READ_ONCE(sqe->personality);
- if (id) {
- struct io_identity *iod;
-
- iod = idr_find(&ctx->personality_idr, id);
- if (unlikely(!iod))
- return -EINVAL;
- refcount_inc(&iod->count);
-
- __io_req_init_async(req);
- get_cred(iod->creds);
- req->work.identity = iod;
- req->work.flags |= IO_WQ_WORK_CREDS;
- }
-
- state = &ctx->submit_state;
-
- /*
- * Plug now if we have more than 1 IO left after this, and the target
- * is potentially a read/write to block based storage.
- */
- if (!state->plug_started && state->ios_left > 1 &&
- io_op_defs[req->opcode].plug) {
- blk_start_plug(&state->plug);
- state->plug_started = true;
- }
-
- if (io_op_defs[req->opcode].needs_file) {
- bool fixed = req->flags & REQ_F_FIXED_FILE;
-
- req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
- if (unlikely(!req->file))
- ret = -EBADF;
- }
-
- state->ios_left--;
- return ret;
-}
-
static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
{
- struct io_submit_link link;
- int i, submitted = 0;
+ int submitted = 0;
/* if we have a backlog and couldn't flush it all, return BUSY */
if (test_bit(0, &ctx->sq_check_overflow)) {
@@ -6903,14 +6515,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
percpu_counter_add(&current->io_uring->inflight, nr);
refcount_add(nr, &current->usage);
-
io_submit_state_start(&ctx->submit_state, nr);
- link.head = NULL;
- for (i = 0; i < nr; i++) {
+ while (submitted < nr) {
const struct io_uring_sqe *sqe;
struct io_kiocb *req;
- int err;
req = io_alloc_req(ctx);
if (unlikely(!req)) {
@@ -6925,20 +6534,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
}
/* will complete beyond this point, count as submitted */
submitted++;
-
- err = io_init_req(ctx, req, sqe);
- if (unlikely(err)) {
-fail_req:
- io_put_req(req);
- io_req_complete(req, err);
+ if (io_submit_sqe(ctx, req, sqe))
break;
- }
-
- trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
- true, ctx->flags & IORING_SETUP_SQPOLL);
- err = io_submit_sqe(req, sqe, &link);
- if (err)
- goto fail_req;
}
if (unlikely(submitted != nr)) {
@@ -6950,10 +6547,8 @@ fail_req:
percpu_counter_sub(&tctx->inflight, unused);
put_task_struct_many(current, unused);
}
- if (link.head)
- io_queue_link_head(link.head);
- io_submit_state_end(&ctx->submit_state, ctx);
+ io_submit_state_end(&ctx->submit_state, ctx);
/* Commit SQ ring head once we've consumed and submitted all SQEs */
io_commit_sqring(ctx);
@@ -6992,8 +6587,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
if (!list_empty(&ctx->iopoll_list))
io_do_iopoll(ctx, &nr_events, 0);
- if (to_submit && !ctx->sqo_dead &&
- likely(!percpu_ref_is_dying(&ctx->refs)))
+ if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)))
ret = io_submit_sqes(ctx, to_submit);
mutex_unlock(&ctx->uring_lock);
}
@@ -7030,71 +6624,94 @@ static void io_sqd_init_new(struct io_sq_data *sqd)
io_sqd_update_thread_idle(sqd);
}
+static bool io_sq_thread_should_stop(struct io_sq_data *sqd)
+{
+ return test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+}
+
+static bool io_sq_thread_should_park(struct io_sq_data *sqd)
+{
+ return test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+}
+
+static void io_sq_thread_parkme(struct io_sq_data *sqd)
+{
+ for (;;) {
+ /*
+ * TASK_PARKED is a special state; we must serialize against
+ * possible pending wakeups to avoid store-store collisions on
+ * task->state.
+ *
+ * Such a collision might possibly result in the task state
+ * changin from TASK_PARKED and us failing the
+ * wait_task_inactive() in kthread_park().
+ */
+ set_special_state(TASK_PARKED);
+ if (!test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state))
+ break;
+
+ /*
+ * Thread is going to call schedule(), do not preempt it,
+ * or the caller of kthread_park() may spend more time in
+ * wait_task_inactive().
+ */
+ preempt_disable();
+ complete(&sqd->parked);
+ schedule_preempt_disabled();
+ preempt_enable();
+ }
+ __set_current_state(TASK_RUNNING);
+}
+
static int io_sq_thread(void *data)
{
- struct cgroup_subsys_state *cur_css = NULL;
- struct files_struct *old_files = current->files;
- struct nsproxy *old_nsproxy = current->nsproxy;
- const struct cred *old_cred = NULL;
struct io_sq_data *sqd = data;
struct io_ring_ctx *ctx;
unsigned long timeout = 0;
+ char buf[TASK_COMM_LEN];
DEFINE_WAIT(wait);
- task_lock(current);
- current->files = NULL;
- current->nsproxy = NULL;
- task_unlock(current);
+ sprintf(buf, "iou-sqp-%d", sqd->task_pid);
+ set_task_comm(current, buf);
+ current->pf_io_worker = NULL;
+
+ if (sqd->sq_cpu != -1)
+ set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
+ else
+ set_cpus_allowed_ptr(current, cpu_online_mask);
+ current->flags |= PF_NO_SETAFFINITY;
- while (!kthread_should_stop()) {
+ wait_for_completion(&sqd->startup);
+
+ while (!io_sq_thread_should_stop(sqd)) {
int ret;
bool cap_entries, sqt_spin, needs_sched;
/*
* Any changes to the sqd lists are synchronized through the
- * kthread parking. This synchronizes the thread vs users,
+ * thread parking. This synchronizes the thread vs users,
* the users are synchronized on the sqd->ctx_lock.
*/
- if (kthread_should_park()) {
- kthread_parkme();
- /*
- * When sq thread is unparked, in case the previous park operation
- * comes from io_put_sq_data(), which means that sq thread is going
- * to be stopped, so here needs to have a check.
- */
- if (kthread_should_stop())
- break;
+ if (io_sq_thread_should_park(sqd)) {
+ io_sq_thread_parkme(sqd);
+ continue;
}
-
if (unlikely(!list_empty(&sqd->ctx_new_list))) {
io_sqd_init_new(sqd);
timeout = jiffies + sqd->sq_thread_idle;
}
-
+ if (fatal_signal_pending(current))
+ break;
sqt_spin = false;
cap_entries = !list_is_singular(&sqd->ctx_list);
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
- if (current->cred != ctx->creds) {
- if (old_cred)
- revert_creds(old_cred);
- old_cred = override_creds(ctx->creds);
- }
- io_sq_thread_associate_blkcg(ctx, &cur_css);
-#ifdef CONFIG_AUDIT
- current->loginuid = ctx->loginuid;
- current->sessionid = ctx->sessionid;
-#endif
-
ret = __io_sq_thread(ctx, cap_entries);
if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
sqt_spin = true;
-
- io_sq_thread_drop_mm_files();
}
if (sqt_spin || !time_after(jiffies, timeout)) {
io_run_task_work();
- io_sq_thread_drop_mm_files();
cond_resched();
if (sqt_spin)
timeout = jiffies + sqd->sq_thread_idle;
@@ -7115,11 +6732,12 @@ static int io_sq_thread(void *data)
}
}
- if (needs_sched && !kthread_should_park()) {
+ if (needs_sched && !io_sq_thread_should_park(sqd)) {
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_set_wakeup_flag(ctx);
schedule();
+ try_to_freeze();
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
io_ring_clear_wakeup_flag(ctx);
}
@@ -7128,22 +6746,32 @@ static int io_sq_thread(void *data)
timeout = jiffies + sqd->sq_thread_idle;
}
- io_run_task_work();
- io_sq_thread_drop_mm_files();
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+ io_uring_cancel_sqpoll(ctx);
- if (cur_css)
- io_sq_thread_unassociate_blkcg();
- if (old_cred)
- revert_creds(old_cred);
+ io_run_task_work();
- task_lock(current);
- current->files = old_files;
- current->nsproxy = old_nsproxy;
- task_unlock(current);
+ /*
+ * Ensure that we park properly if racing with someone trying to park
+ * while we're exiting. If we fail to grab the lock, check park and
+ * park if necessary. The ordering with the park bit and the lock
+ * ensures that we catch this reliably.
+ */
+ if (!mutex_trylock(&sqd->lock)) {
+ if (io_sq_thread_should_park(sqd))
+ io_sq_thread_parkme(sqd);
+ mutex_lock(&sqd->lock);
+ }
- kthread_parkme();
+ sqd->thread = NULL;
+ list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
+ ctx->sqo_exec = 1;
+ io_ring_set_wakeup_flag(ctx);
+ }
- return 0;
+ complete(&sqd->exited);
+ mutex_unlock(&sqd->lock);
+ do_exit(0);
}
struct io_wait_queue {
@@ -7264,11 +6892,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
trace_io_uring_cqring_wait(ctx, min_events);
do {
- io_cqring_overflow_flush(ctx, false, NULL, NULL);
+ /* if we can't even flush overflow, don't wait for more */
+ if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) {
+ ret = -EBUSY;
+ break;
+ }
prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
TASK_INTERRUPTIBLE);
ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
finish_wait(&ctx->wait, &iowq.wq);
+ cond_resched();
} while (ret > 0);
restore_saved_sigmask_unless(ret == -EINTR);
@@ -7328,38 +6961,59 @@ static void io_sqe_rsrc_set_node(struct io_ring_ctx *ctx,
percpu_ref_get(&rsrc_data->refs);
}
-static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
- struct io_ring_ctx *ctx,
- struct fixed_rsrc_ref_node *backup_node)
+static void io_sqe_rsrc_kill_node(struct io_ring_ctx *ctx, struct fixed_rsrc_data *data)
{
- struct fixed_rsrc_ref_node *ref_node;
- int ret;
+ struct fixed_rsrc_ref_node *ref_node = NULL;
io_rsrc_ref_lock(ctx);
ref_node = data->node;
+ data->node = NULL;
io_rsrc_ref_unlock(ctx);
if (ref_node)
percpu_ref_kill(&ref_node->refs);
+}
+
+static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
+ struct io_ring_ctx *ctx,
+ void (*rsrc_put)(struct io_ring_ctx *ctx,
+ struct io_rsrc_put *prsrc))
+{
+ struct fixed_rsrc_ref_node *backup_node;
+ int ret;
- percpu_ref_kill(&data->refs);
+ if (data->quiesce)
+ return -ENXIO;
- /* wait for all refs nodes to complete */
- flush_delayed_work(&ctx->rsrc_put_work);
+ data->quiesce = true;
do {
+ ret = -ENOMEM;
+ backup_node = alloc_fixed_rsrc_ref_node(ctx);
+ if (!backup_node)
+ break;
+ backup_node->rsrc_data = data;
+ backup_node->rsrc_put = rsrc_put;
+
+ io_sqe_rsrc_kill_node(ctx, data);
+ percpu_ref_kill(&data->refs);
+ flush_delayed_work(&ctx->rsrc_put_work);
+
ret = wait_for_completion_interruptible(&data->done);
if (!ret)
break;
+
+ percpu_ref_resurrect(&data->refs);
+ io_sqe_rsrc_set_node(ctx, data, backup_node);
+ backup_node = NULL;
+ reinit_completion(&data->done);
+ mutex_unlock(&ctx->uring_lock);
ret = io_run_task_work_sig();
- if (ret < 0) {
- percpu_ref_resurrect(&data->refs);
- reinit_completion(&data->done);
- io_sqe_rsrc_set_node(ctx, data, backup_node);
- return ret;
- }
- } while (1);
+ mutex_lock(&ctx->uring_lock);
+ } while (ret >= 0);
+ data->quiesce = false;
- destroy_fixed_rsrc_ref_node(backup_node);
- return 0;
+ if (backup_node)
+ destroy_fixed_rsrc_ref_node(backup_node);
+ return ret;
}
static struct fixed_rsrc_data *alloc_fixed_rsrc_data(struct io_ring_ctx *ctx)
@@ -7390,18 +7044,17 @@ static void free_fixed_rsrc_data(struct fixed_rsrc_data *data)
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
struct fixed_rsrc_data *data = ctx->file_data;
- struct fixed_rsrc_ref_node *backup_node;
unsigned nr_tables, i;
int ret;
- if (!data)
+ /*
+ * percpu_ref_is_dying() is to stop parallel files unregister
+ * Since we possibly drop uring lock later in this function to
+ * run task work.
+ */
+ if (!data || percpu_ref_is_dying(&data->refs))
return -ENXIO;
- backup_node = alloc_fixed_rsrc_ref_node(ctx);
- if (!backup_node)
- return -ENOMEM;
- init_fixed_file_ref_node(ctx, backup_node);
-
- ret = io_rsrc_ref_quiesce(data, ctx, backup_node);
+ ret = io_rsrc_ref_quiesce(data, ctx, io_ring_file_put);
if (ret)
return ret;
@@ -7415,20 +7068,76 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
return 0;
}
+static void io_sq_thread_unpark(struct io_sq_data *sqd)
+ __releases(&sqd->lock)
+{
+ if (sqd->thread == current)
+ return;
+ clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+ if (sqd->thread)
+ wake_up_state(sqd->thread, TASK_PARKED);
+ mutex_unlock(&sqd->lock);
+}
+
+static void io_sq_thread_park(struct io_sq_data *sqd)
+ __acquires(&sqd->lock)
+{
+ if (sqd->thread == current)
+ return;
+ set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+ mutex_lock(&sqd->lock);
+ if (sqd->thread) {
+ wake_up_process(sqd->thread);
+ wait_for_completion(&sqd->parked);
+ }
+}
+
+static void io_sq_thread_stop(struct io_sq_data *sqd)
+{
+ if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state))
+ return;
+ mutex_lock(&sqd->lock);
+ if (sqd->thread) {
+ set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+ WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state));
+ wake_up_process(sqd->thread);
+ mutex_unlock(&sqd->lock);
+ wait_for_completion(&sqd->exited);
+ WARN_ON_ONCE(sqd->thread);
+ } else {
+ mutex_unlock(&sqd->lock);
+ }
+}
+
static void io_put_sq_data(struct io_sq_data *sqd)
{
if (refcount_dec_and_test(&sqd->refs)) {
- /*
- * The park is a bit of a work-around, without it we get
- * warning spews on shutdown with SQPOLL set and affinity
- * set to a single CPU.
- */
+ io_sq_thread_stop(sqd);
+ kfree(sqd);
+ }
+}
+
+static void io_sq_thread_finish(struct io_ring_ctx *ctx)
+{
+ struct io_sq_data *sqd = ctx->sq_data;
+
+ if (sqd) {
+ complete(&sqd->startup);
if (sqd->thread) {
- kthread_park(sqd->thread);
- kthread_stop(sqd->thread);
+ wait_for_completion(&ctx->sq_thread_comp);
+ io_sq_thread_park(sqd);
}
- kfree(sqd);
+ mutex_lock(&sqd->ctx_lock);
+ list_del(&ctx->sqd_list);
+ io_sqd_update_thread_idle(sqd);
+ mutex_unlock(&sqd->ctx_lock);
+
+ if (sqd->thread)
+ io_sq_thread_unpark(sqd);
+
+ io_put_sq_data(sqd);
+ ctx->sq_data = NULL;
}
}
@@ -7475,68 +7184,12 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
mutex_init(&sqd->ctx_lock);
mutex_init(&sqd->lock);
init_waitqueue_head(&sqd->wait);
+ init_completion(&sqd->startup);
+ init_completion(&sqd->parked);
+ init_completion(&sqd->exited);
return sqd;
}
-static void io_sq_thread_unpark(struct io_sq_data *sqd)
- __releases(&sqd->lock)
-{
- if (!sqd->thread)
- return;
- kthread_unpark(sqd->thread);
- mutex_unlock(&sqd->lock);
-}
-
-static void io_sq_thread_park(struct io_sq_data *sqd)
- __acquires(&sqd->lock)
-{
- if (!sqd->thread)
- return;
- mutex_lock(&sqd->lock);
- kthread_park(sqd->thread);
-}
-
-static void io_sq_thread_stop(struct io_ring_ctx *ctx)
-{
- struct io_sq_data *sqd = ctx->sq_data;
-
- if (sqd) {
- if (sqd->thread) {
- /*
- * We may arrive here from the error branch in
- * io_sq_offload_create() where the kthread is created
- * without being waked up, thus wake it up now to make
- * sure the wait will complete.
- */
- wake_up_process(sqd->thread);
- wait_for_completion(&ctx->sq_thread_comp);
-
- io_sq_thread_park(sqd);
- }
-
- mutex_lock(&sqd->ctx_lock);
- list_del(&ctx->sqd_list);
- io_sqd_update_thread_idle(sqd);
- mutex_unlock(&sqd->ctx_lock);
-
- if (sqd->thread)
- io_sq_thread_unpark(sqd);
-
- io_put_sq_data(sqd);
- ctx->sq_data = NULL;
- }
-}
-
-static void io_finish_async(struct io_ring_ctx *ctx)
-{
- io_sq_thread_stop(ctx);
-
- if (ctx->io_wq) {
- io_wq_destroy(ctx->io_wq);
- ctx->io_wq = NULL;
- }
-}
-
#if defined(CONFIG_UNIX)
/*
* Ensure the UNIX gc is aware of our file set, so we are certain that
@@ -7563,7 +7216,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
skb->sk = sk;
nr_files = 0;
- fpl->user = get_uid(ctx->user);
+ fpl->user = get_uid(current_user());
for (i = 0; i < nr; i++) {
struct file *file = io_file_from_index(ctx, i + offset);
@@ -8095,54 +7748,34 @@ static struct io_wq_work *io_free_work(struct io_wq_work *work)
return req ? &req->work : NULL;
}
-static int io_init_wq_offload(struct io_ring_ctx *ctx,
- struct io_uring_params *p)
+static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx)
{
+ struct io_wq_hash *hash;
struct io_wq_data data;
- struct fd f;
- struct io_ring_ctx *ctx_attach;
unsigned int concurrency;
- int ret = 0;
-
- data.user = ctx->user;
- data.free_work = io_free_work;
- data.do_work = io_wq_submit_work;
- if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
- /* Do QD, or 4 * CPUS, whatever is smallest */
- concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
-
- ctx->io_wq = io_wq_create(concurrency, &data);
- if (IS_ERR(ctx->io_wq)) {
- ret = PTR_ERR(ctx->io_wq);
- ctx->io_wq = NULL;
- }
- return ret;
+ hash = ctx->hash_map;
+ if (!hash) {
+ hash = kzalloc(sizeof(*hash), GFP_KERNEL);
+ if (!hash)
+ return ERR_PTR(-ENOMEM);
+ refcount_set(&hash->refs, 1);
+ init_waitqueue_head(&hash->wait);
+ ctx->hash_map = hash;
}
- f = fdget(p->wq_fd);
- if (!f.file)
- return -EBADF;
-
- if (f.file->f_op != &io_uring_fops) {
- ret = -EINVAL;
- goto out_fput;
- }
+ data.hash = hash;
+ data.free_work = io_free_work;
+ data.do_work = io_wq_submit_work;
- ctx_attach = f.file->private_data;
- /* @io_wq is protected by holding the fd */
- if (!io_wq_get(ctx_attach->io_wq, &data)) {
- ret = -EINVAL;
- goto out_fput;
- }
+ /* Do QD, or 4 * CPUS, whatever is smallest */
+ concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
- ctx->io_wq = ctx_attach->io_wq;
-out_fput:
- fdput(f);
- return ret;
+ return io_wq_create(concurrency, &data);
}
-static int io_uring_alloc_task_context(struct task_struct *task)
+static int io_uring_alloc_task_context(struct task_struct *task,
+ struct io_ring_ctx *ctx)
{
struct io_uring_task *tctx;
int ret;
@@ -8157,13 +7790,19 @@ static int io_uring_alloc_task_context(struct task_struct *task)
return ret;
}
+ tctx->io_wq = io_init_wq_offload(ctx);
+ if (IS_ERR(tctx->io_wq)) {
+ ret = PTR_ERR(tctx->io_wq);
+ percpu_counter_destroy(&tctx->inflight);
+ kfree(tctx);
+ return ret;
+ }
+
xa_init(&tctx->xa);
init_waitqueue_head(&tctx->wait);
tctx->last = NULL;
atomic_set(&tctx->in_idle, 0);
tctx->sqpoll = false;
- io_init_identity(&tctx->__identity);
- tctx->identity = &tctx->__identity;
task->io_uring = tctx;
spin_lock_init(&tctx->task_lock);
INIT_WQ_LIST(&tctx->task_list);
@@ -8177,20 +7816,54 @@ void __io_uring_free(struct task_struct *tsk)
struct io_uring_task *tctx = tsk->io_uring;
WARN_ON_ONCE(!xa_empty(&tctx->xa));
- WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1);
- if (tctx->identity != &tctx->__identity)
- kfree(tctx->identity);
+ WARN_ON_ONCE(tctx->io_wq);
+
percpu_counter_destroy(&tctx->inflight);
kfree(tctx);
tsk->io_uring = NULL;
}
+static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx)
+{
+ struct task_struct *tsk;
+ int ret;
+
+ clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+ reinit_completion(&sqd->parked);
+ ctx->sqo_exec = 0;
+ sqd->task_pid = current->pid;
+ tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
+ if (IS_ERR(tsk))
+ return PTR_ERR(tsk);
+ ret = io_uring_alloc_task_context(tsk, ctx);
+ if (ret)
+ set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+ sqd->thread = tsk;
+ wake_up_new_task(tsk);
+ return ret;
+}
+
static int io_sq_offload_create(struct io_ring_ctx *ctx,
struct io_uring_params *p)
{
int ret;
+ /* Retain compatibility with failing for an invalid attach attempt */
+ if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) ==
+ IORING_SETUP_ATTACH_WQ) {
+ struct fd f;
+
+ f = fdget(p->wq_fd);
+ if (!f.file)
+ return -ENXIO;
+ if (f.file->f_op != &io_uring_fops) {
+ fdput(f);
+ return -EINVAL;
+ }
+ fdput(f);
+ }
if (ctx->flags & IORING_SETUP_SQPOLL) {
+ struct task_struct *tsk;
struct io_sq_data *sqd;
ret = -EPERM;
@@ -8215,7 +7888,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
ctx->sq_thread_idle = HZ;
if (sqd->thread)
- goto done;
+ return 0;
if (p->flags & IORING_SETUP_SQ_AFF) {
int cpu = p->sq_thread_cpu;
@@ -8226,18 +7899,22 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
if (!cpu_online(cpu))
goto err;
- sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd,
- cpu, "io_uring-sq");
+ sqd->sq_cpu = cpu;
} else {
- sqd->thread = kthread_create(io_sq_thread, sqd,
- "io_uring-sq");
+ sqd->sq_cpu = -1;
}
- if (IS_ERR(sqd->thread)) {
- ret = PTR_ERR(sqd->thread);
- sqd->thread = NULL;
+
+ sqd->task_pid = current->pid;
+ tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
+ if (IS_ERR(tsk)) {
+ ret = PTR_ERR(tsk);
goto err;
}
- ret = io_uring_alloc_task_context(sqd->thread);
+ ret = io_uring_alloc_task_context(tsk, ctx);
+ if (ret)
+ set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+ sqd->thread = tsk;
+ wake_up_new_task(tsk);
if (ret)
goto err;
} else if (p->flags & IORING_SETUP_SQ_AFF) {
@@ -8246,14 +7923,9 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
goto err;
}
-done:
- ret = io_init_wq_offload(ctx, p);
- if (ret)
- goto err;
-
return 0;
err:
- io_finish_async(ctx);
+ io_sq_thread_finish(ctx);
return ret;
}
@@ -8261,8 +7933,9 @@ static void io_sq_offload_start(struct io_ring_ctx *ctx)
{
struct io_sq_data *sqd = ctx->sq_data;
- if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread)
- wake_up_process(sqd->thread);
+ ctx->flags &= ~IORING_SETUP_R_DISABLED;
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ complete(&sqd->startup);
}
static inline void __io_unaccount_mem(struct user_struct *user,
@@ -8292,7 +7965,7 @@ static inline int __io_account_mem(struct user_struct *user,
static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
{
- if (ctx->limit_mem)
+ if (ctx->user)
__io_unaccount_mem(ctx->user, nr_pages);
if (ctx->mm_account)
@@ -8303,7 +7976,7 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
{
int ret;
- if (ctx->limit_mem) {
+ if (ctx->user) {
ret = __io_account_mem(ctx->user, nr_pages);
if (ret)
return ret;
@@ -8699,22 +8372,26 @@ static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
}
}
-static void io_req_caches_free(struct io_ring_ctx *ctx, struct task_struct *tsk)
+static void io_req_caches_free(struct io_ring_ctx *ctx)
{
struct io_submit_state *submit_state = &ctx->submit_state;
+ struct io_comp_state *cs = &ctx->submit_state.comp;
mutex_lock(&ctx->uring_lock);
- if (submit_state->free_reqs)
+ if (submit_state->free_reqs) {
kmem_cache_free_bulk(req_cachep, submit_state->free_reqs,
submit_state->reqs);
-
- io_req_cache_free(&submit_state->comp.free_list, NULL);
+ submit_state->free_reqs = 0;
+ }
spin_lock_irq(&ctx->completion_lock);
- io_req_cache_free(&submit_state->comp.locked_free_list, NULL);
+ list_splice_init(&cs->locked_free_list, &cs->free_list);
+ cs->locked_free_nr = 0;
spin_unlock_irq(&ctx->completion_lock);
+ io_req_cache_free(&cs->free_list, NULL);
+
mutex_unlock(&ctx->uring_lock);
}
@@ -8728,22 +8405,17 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
mutex_lock(&ctx->uring_lock);
mutex_unlock(&ctx->uring_lock);
- io_finish_async(ctx);
+ io_sq_thread_finish(ctx);
io_sqe_buffers_unregister(ctx);
- if (ctx->sqo_task) {
- put_task_struct(ctx->sqo_task);
- ctx->sqo_task = NULL;
+ if (ctx->mm_account) {
mmdrop(ctx->mm_account);
ctx->mm_account = NULL;
}
-#ifdef CONFIG_BLK_CGROUP
- if (ctx->sqo_blkcg_css)
- css_put(ctx->sqo_blkcg_css);
-#endif
-
+ mutex_lock(&ctx->uring_lock);
io_sqe_files_unregister(ctx);
+ mutex_unlock(&ctx->uring_lock);
io_eventfd_unregister(ctx);
io_destroy_buffers(ctx);
idr_destroy(&ctx->personality_idr);
@@ -8760,8 +8432,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
percpu_ref_exit(&ctx->refs);
free_uid(ctx->user);
- put_cred(ctx->creds);
- io_req_caches_free(ctx, NULL);
+ io_req_caches_free(ctx);
+ if (ctx->hash_map)
+ io_wq_put_hash(ctx->hash_map);
kfree(ctx->cancel_hash);
kfree(ctx);
}
@@ -8808,13 +8481,11 @@ static int io_uring_fasync(int fd, struct file *file, int on)
static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
{
- struct io_identity *iod;
+ const struct cred *creds;
- iod = idr_remove(&ctx->personality_idr, id);
- if (iod) {
- put_cred(iod->creds);
- if (refcount_dec_and_test(&iod->count))
- kfree(iod);
+ creds = idr_remove(&ctx->personality_idr, id);
+ if (creds) {
+ put_cred(creds);
return 0;
}
@@ -8829,6 +8500,28 @@ static int io_remove_personalities(int id, void *p, void *data)
return 0;
}
+static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
+{
+ struct callback_head *work, *next;
+ bool executed = false;
+
+ do {
+ work = xchg(&ctx->exit_task_work, NULL);
+ if (!work)
+ break;
+
+ do {
+ next = work->next;
+ work->func(work);
+ work = next;
+ cond_resched();
+ } while (work);
+ executed = true;
+ } while (1);
+
+ return executed;
+}
+
static void io_ring_exit_work(struct work_struct *work)
{
struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
@@ -8846,21 +8539,10 @@ static void io_ring_exit_work(struct work_struct *work)
io_ring_ctx_free(ctx);
}
-static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
-{
- struct io_kiocb *req = container_of(work, struct io_kiocb, work);
-
- return req->ctx == data;
-}
-
static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
{
mutex_lock(&ctx->uring_lock);
percpu_ref_kill(&ctx->refs);
-
- if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
- ctx->sqo_dead = 1;
-
/* if force is set, the ring is going away. always drop after that */
ctx->cq_overflow_flushed = 1;
if (ctx->rings)
@@ -8871,9 +8553,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
io_kill_timeouts(ctx, NULL, NULL);
io_poll_remove_all(ctx, NULL, NULL);
- if (ctx->io_wq)
- io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
-
/* if we failed setting up the ctx, we might not have any rings */
io_iopoll_try_reap_events(ctx);
@@ -8952,13 +8631,15 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
struct files_struct *files)
{
struct io_task_cancel cancel = { .task = task, .files = files, };
+ struct task_struct *tctx_task = task ?: current;
+ struct io_uring_task *tctx = tctx_task->io_uring;
while (1) {
enum io_wq_cancel cret;
bool ret = false;
- if (ctx->io_wq) {
- cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
+ if (tctx && tctx->io_wq) {
+ cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
&cancel, true);
ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
}
@@ -8974,6 +8655,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
ret |= io_poll_remove_all(ctx, task, files);
ret |= io_kill_timeouts(ctx, task, files);
ret |= io_run_task_work();
+ ret |= io_run_ctx_fallback(ctx);
io_cqring_overflow_flush(ctx, true, task, files);
if (!ret)
break;
@@ -9021,17 +8703,6 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
}
}
-static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
-{
- mutex_lock(&ctx->uring_lock);
- ctx->sqo_dead = 1;
- mutex_unlock(&ctx->uring_lock);
-
- /* make sure callers enter the ring to get error */
- if (ctx->rings)
- io_ring_set_wakeup_flag(ctx);
-}
-
/*
* We need to iteratively cancel requests, in case a request has dependent
* hard links. These persist even for failure of cancelations, hence keep
@@ -9043,10 +8714,15 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
struct task_struct *task = current;
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
- io_disable_sqo_submit(ctx);
- task = ctx->sq_data->thread;
- atomic_inc(&task->io_uring->in_idle);
+ /* never started, nothing to cancel */
+ if (ctx->flags & IORING_SETUP_R_DISABLED) {
+ io_sq_offload_start(ctx);
+ return;
+ }
io_sq_thread_park(ctx->sq_data);
+ task = ctx->sq_data->thread;
+ if (task)
+ atomic_inc(&task->io_uring->in_idle);
}
io_cancel_defer_files(ctx, task, files);
@@ -9055,10 +8731,10 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
if (!files)
io_uring_try_cancel_requests(ctx, task, NULL);
- if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
+ if (task)
atomic_dec(&task->io_uring->in_idle);
+ if (ctx->sq_data)
io_sq_thread_unpark(ctx->sq_data);
- }
}
/*
@@ -9070,7 +8746,7 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
int ret;
if (unlikely(!tctx)) {
- ret = io_uring_alloc_task_context(current);
+ ret = io_uring_alloc_task_context(current, ctx);
if (unlikely(ret))
return ret;
tctx = current->io_uring;
@@ -9086,10 +8762,6 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
fput(file);
return ret;
}
-
- /* one and only SQPOLL file note, held by sqo_task */
- WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) &&
- current != ctx->sqo_task);
}
tctx->last = file;
}
@@ -9119,13 +8791,17 @@ static void io_uring_del_task_file(struct file *file)
fput(file);
}
-static void io_uring_remove_task_files(struct io_uring_task *tctx)
+static void io_uring_clean_tctx(struct io_uring_task *tctx)
{
struct file *file;
unsigned long index;
xa_for_each(&tctx->xa, index, file)
io_uring_del_task_file(file);
+ if (tctx->io_wq) {
+ io_wq_put_and_exit(tctx->io_wq);
+ tctx->io_wq = NULL;
+ }
}
void __io_uring_files_cancel(struct files_struct *files)
@@ -9141,7 +8817,7 @@ void __io_uring_files_cancel(struct files_struct *files)
atomic_dec(&tctx->in_idle);
if (files)
- io_uring_remove_task_files(tctx);
+ io_uring_clean_tctx(tctx);
}
static s64 tctx_inflight(struct io_uring_task *tctx)
@@ -9151,15 +8827,19 @@ static s64 tctx_inflight(struct io_uring_task *tctx)
static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
{
+ struct io_sq_data *sqd = ctx->sq_data;
struct io_uring_task *tctx;
s64 inflight;
DEFINE_WAIT(wait);
- if (!ctx->sq_data)
+ if (!sqd)
+ return;
+ io_sq_thread_park(sqd);
+ if (!sqd->thread || !sqd->thread->io_uring) {
+ io_sq_thread_unpark(sqd);
return;
+ }
tctx = ctx->sq_data->thread->io_uring;
- io_disable_sqo_submit(ctx);
-
atomic_inc(&tctx->in_idle);
do {
/* read completions before cancelations */
@@ -9179,6 +8859,7 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
finish_wait(&tctx->wait, &wait);
} while (1);
atomic_dec(&tctx->in_idle);
+ io_sq_thread_unpark(sqd);
}
/*
@@ -9194,7 +8875,6 @@ void __io_uring_task_cancel(void)
/* make sure overflow events are dropped */
atomic_inc(&tctx->in_idle);
- /* trigger io_disable_sqo_submit() */
if (tctx->sqpoll) {
struct file *file;
unsigned long index;
@@ -9224,47 +8904,9 @@ void __io_uring_task_cancel(void)
atomic_dec(&tctx->in_idle);
- io_uring_remove_task_files(tctx);
-}
-
-static int io_uring_flush(struct file *file, void *data)
-{
- struct io_uring_task *tctx = current->io_uring;
- struct io_ring_ctx *ctx = file->private_data;
-
- if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
- io_uring_cancel_task_requests(ctx, NULL);
- io_req_caches_free(ctx, current);
- }
-
- if (!tctx)
- return 0;
-
- /* we should have cancelled and erased it before PF_EXITING */
- WARN_ON_ONCE((current->flags & PF_EXITING) &&
- xa_load(&tctx->xa, (unsigned long)file));
-
- /*
- * fput() is pending, will be 2 if the only other ref is our potential
- * task file note. If the task is exiting, drop regardless of count.
- */
- if (atomic_long_read(&file->f_count) != 2)
- return 0;
-
- if (ctx->flags & IORING_SETUP_SQPOLL) {
- /* there is only one file note, which is owned by sqo_task */
- WARN_ON_ONCE(ctx->sqo_task != current &&
- xa_load(&tctx->xa, (unsigned long)file));
- /* sqo_dead check is for when this happens after cancellation */
- WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead &&
- !xa_load(&tctx->xa, (unsigned long)file));
-
- io_disable_sqo_submit(ctx);
- }
-
- if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
- io_uring_del_task_file(file);
- return 0;
+ io_uring_clean_tctx(tctx);
+ /* all current's requests should be gone, we can kill tctx */
+ __io_uring_free(current);
}
static void *io_uring_validate_mmap_request(struct file *file,
@@ -9345,22 +8987,14 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
do {
if (!io_sqring_full(ctx))
break;
-
prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
- if (unlikely(ctx->sqo_dead)) {
- ret = -EOWNERDEAD;
- goto out;
- }
-
if (!io_sqring_full(ctx))
break;
-
schedule();
} while (!signal_pending(current));
finish_wait(&ctx->sqo_sq_wait, &wait);
-out:
return ret;
}
@@ -9435,9 +9069,13 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
if (ctx->flags & IORING_SETUP_SQPOLL) {
io_cqring_overflow_flush(ctx, false, NULL, NULL);
+ if (unlikely(ctx->sqo_exec)) {
+ ret = io_sq_thread_fork(ctx->sq_data, ctx);
+ if (ret)
+ goto out;
+ ctx->sqo_exec = 0;
+ }
ret = -EOWNERDEAD;
- if (unlikely(ctx->sqo_dead))
- goto out;
if (flags & IORING_ENTER_SQ_WAKEUP)
wake_up(&ctx->sq_data->wait);
if (flags & IORING_ENTER_SQ_WAIT) {
@@ -9491,8 +9129,7 @@ out_fput:
#ifdef CONFIG_PROC_FS
static int io_uring_show_cred(int id, void *p, void *data)
{
- struct io_identity *iod = p;
- const struct cred *cred = iod->creds;
+ const struct cred *cred = p;
struct seq_file *m = data;
struct user_namespace *uns = seq_user_ns(m);
struct group_info *gi;
@@ -9537,8 +9174,11 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
*/
has_lock = mutex_trylock(&ctx->uring_lock);
- if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL))
+ if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
sq = ctx->sq_data;
+ if (!sq->thread)
+ sq = NULL;
+ }
seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
@@ -9590,7 +9230,6 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
static const struct file_operations io_uring_fops = {
.release = io_uring_release,
- .flush = io_uring_flush,
.mmap = io_uring_mmap,
#ifndef CONFIG_MMU
.get_unmapped_area = io_uring_nommu_get_unmapped_area,
@@ -9698,7 +9337,6 @@ static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
static int io_uring_create(unsigned entries, struct io_uring_params *p,
struct io_uring_params __user *params)
{
- struct user_struct *user = NULL;
struct io_ring_ctx *ctx;
struct file *file;
int ret;
@@ -9740,22 +9378,12 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
p->cq_entries = 2 * p->sq_entries;
}
- user = get_uid(current_user());
-
ctx = io_ring_ctx_alloc(p);
- if (!ctx) {
- free_uid(user);
+ if (!ctx)
return -ENOMEM;
- }
ctx->compat = in_compat_syscall();
- ctx->limit_mem = !capable(CAP_IPC_LOCK);
- ctx->user = user;
- ctx->creds = get_current_cred();
-#ifdef CONFIG_AUDIT
- ctx->loginuid = current->loginuid;
- ctx->sessionid = current->sessionid;
-#endif
- ctx->sqo_task = get_task_struct(current);
+ if (!capable(CAP_IPC_LOCK))
+ ctx->user = get_uid(current_user());
/*
* This is just grabbed for accounting purposes. When a process exits,
@@ -9766,24 +9394,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
mmgrab(current->mm);
ctx->mm_account = current->mm;
-#ifdef CONFIG_BLK_CGROUP
- /*
- * The sq thread will belong to the original cgroup it was inited in.
- * If the cgroup goes offline (e.g. disabling the io controller), then
- * issued bios will be associated with the closest cgroup later in the
- * block layer.
- */
- rcu_read_lock();
- ctx->sqo_blkcg_css = blkcg_css();
- ret = css_tryget_online(ctx->sqo_blkcg_css);
- rcu_read_unlock();
- if (!ret) {
- /* don't init against a dying cgroup, have the user try again */
- ctx->sqo_blkcg_css = NULL;
- ret = -ENODEV;
- goto err;
- }
-#endif
ret = io_allocate_scq_urings(ctx, p);
if (ret)
goto err;
@@ -9817,7 +9427,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
- IORING_FEAT_EXT_ARG;
+ IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS;
if (copy_to_user(params, p, sizeof(*p))) {
ret = -EFAULT;
@@ -9836,7 +9446,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
*/
ret = io_uring_install_fd(ctx, file);
if (ret < 0) {
- io_disable_sqo_submit(ctx);
/* fput will clean it up */
fput(file);
return ret;
@@ -9845,7 +9454,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
return ret;
err:
- io_disable_sqo_submit(ctx);
io_ring_ctx_wait_and_kill(ctx);
return ret;
}
@@ -9923,21 +9531,15 @@ out:
static int io_register_personality(struct io_ring_ctx *ctx)
{
- struct io_identity *id;
+ const struct cred *creds;
int ret;
- id = kmalloc(sizeof(*id), GFP_KERNEL);
- if (unlikely(!id))
- return -ENOMEM;
-
- io_init_identity(id);
- id->creds = get_current_cred();
+ creds = get_current_cred();
- ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL);
- if (ret < 0) {
- put_cred(id->creds);
- kfree(id);
- }
+ ret = idr_alloc_cyclic(&ctx->personality_idr, (void *) creds, 1,
+ USHRT_MAX, GFP_KERNEL);
+ if (ret < 0)
+ put_cred(creds);
return ret;
}
@@ -10019,10 +9621,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
if (ctx->restrictions.registered)
ctx->restricted = 1;
- ctx->flags &= ~IORING_SETUP_R_DISABLED;
-
io_sq_offload_start(ctx);
-
return 0;
}
@@ -10196,6 +9795,8 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
ctx = f.file->private_data;
+ io_run_task_work();
+
mutex_lock(&ctx->uring_lock);
ret = __io_uring_register(ctx, opcode, arg, nr_args);
mutex_unlock(&ctx->uring_lock);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 16a1e82e3aeb..7ffcd7ef33d4 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -278,14 +278,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
if (!is_contig || bio_full(ctx->bio, plen)) {
gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
gfp_t orig_gfp = gfp;
- int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
if (ctx->bio)
submit_bio(ctx->bio);
if (ctx->rac) /* same as readahead_gfp_mask */
gfp |= __GFP_NORETRY | __GFP_NOWARN;
- ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+ ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs));
/*
* If the bio_alloc fails, try it again for a single page to
* avoid having to deal with partial page reads. This emulates
@@ -1459,13 +1459,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
goto redirty;
/*
- * Given that we do not allow direct reclaim to call us, we should
- * never be called in a recursive filesystem reclaim context.
- */
- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
- goto redirty;
-
- /*
* Is this page beyond the end of the file?
*
* The page index is less than the end_index, adjust the end_offset
diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
index 107ee80c3568..dab1b02eba5b 100644
--- a/fs/iomap/seek.c
+++ b/fs/iomap/seek.c
@@ -10,122 +10,17 @@
#include <linux/pagemap.h>
#include <linux/pagevec.h>
-/*
- * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
- * Returns true if found and updates @lastoff to the offset in file.
- */
-static bool
-page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
- int whence)
-{
- const struct address_space_operations *ops = inode->i_mapping->a_ops;
- unsigned int bsize = i_blocksize(inode), off;
- bool seek_data = whence == SEEK_DATA;
- loff_t poff = page_offset(page);
-
- if (WARN_ON_ONCE(*lastoff >= poff + PAGE_SIZE))
- return false;
-
- if (*lastoff < poff) {
- /*
- * Last offset smaller than the start of the page means we found
- * a hole:
- */
- if (whence == SEEK_HOLE)
- return true;
- *lastoff = poff;
- }
-
- /*
- * Just check the page unless we can and should check block ranges:
- */
- if (bsize == PAGE_SIZE || !ops->is_partially_uptodate)
- return PageUptodate(page) == seek_data;
-
- lock_page(page);
- if (unlikely(page->mapping != inode->i_mapping))
- goto out_unlock_not_found;
-
- for (off = 0; off < PAGE_SIZE; off += bsize) {
- if (offset_in_page(*lastoff) >= off + bsize)
- continue;
- if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
- unlock_page(page);
- return true;
- }
- *lastoff = poff + off + bsize;
- }
-
-out_unlock_not_found:
- unlock_page(page);
- return false;
-}
-
-/*
- * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
- *
- * Within unwritten extents, the page cache determines which parts are holes
- * and which are data: uptodate buffer heads count as data; everything else
- * counts as a hole.
- *
- * Returns the resulting offset on successs, and -ENOENT otherwise.
- */
static loff_t
-page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
- int whence)
-{
- pgoff_t index = offset >> PAGE_SHIFT;
- pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
- loff_t lastoff = offset;
- struct pagevec pvec;
-
- if (length <= 0)
- return -ENOENT;
-
- pagevec_init(&pvec);
-
- do {
- unsigned nr_pages, i;
-
- nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
- end - 1);
- if (nr_pages == 0)
- break;
-
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- if (page_seek_hole_data(inode, page, &lastoff, whence))
- goto check_range;
- lastoff = page_offset(page) + PAGE_SIZE;
- }
- pagevec_release(&pvec);
- } while (index < end);
-
- /* When no page at lastoff and we are not done, we found a hole. */
- if (whence != SEEK_HOLE)
- goto not_found;
-
-check_range:
- if (lastoff < offset + length)
- goto out;
-not_found:
- lastoff = -ENOENT;
-out:
- pagevec_release(&pvec);
- return lastoff;
-}
-
-
-static loff_t
-iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
+iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
void *data, struct iomap *iomap, struct iomap *srcmap)
{
+ loff_t offset = start;
+
switch (iomap->type) {
case IOMAP_UNWRITTEN:
- offset = page_cache_seek_hole_data(inode, offset, length,
- SEEK_HOLE);
- if (offset < 0)
+ offset = mapping_seek_hole_data(inode->i_mapping, start,
+ start + length, SEEK_HOLE);
+ if (offset == start + length)
return length;
fallthrough;
case IOMAP_HOLE:
@@ -164,15 +59,17 @@ iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
EXPORT_SYMBOL_GPL(iomap_seek_hole);
static loff_t
-iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
+iomap_seek_data_actor(struct inode *inode, loff_t start, loff_t length,
void *data, struct iomap *iomap, struct iomap *srcmap)
{
+ loff_t offset = start;
+
switch (iomap->type) {
case IOMAP_HOLE:
return length;
case IOMAP_UNWRITTEN:
- offset = page_cache_seek_hole_data(inode, offset, length,
- SEEK_DATA);
+ offset = mapping_seek_hole_data(inode->i_mapping, start,
+ start + length, SEEK_DATA);
if (offset < 0)
return length;
fallthrough;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index b2dc4d1f9dcc..1f0ffabbde56 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -551,7 +551,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
ret = -ENOMEM;
goto out_unload;
}
- inode->i_ino = 0;
inode->i_size = i_size_read(sb->s_bdev->bd_inode);
inode->i_mapping->a_ops = &jfs_metapage_aops;
inode_fake_hash(inode);
diff --git a/fs/mpage.c b/fs/mpage.c
index 830e6cc2a9e7..961234d68779 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -304,9 +304,7 @@ alloc_new:
goto out;
}
args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
- min_t(int, args->nr_pages,
- BIO_MAX_PAGES),
- gfp);
+ bio_max_segs(args->nr_pages), gfp);
if (args->bio == NULL)
goto confused;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 062fe984a697..56bb5a5fdc0d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -381,50 +381,36 @@ int mnt_want_write(struct vfsmount *m)
EXPORT_SYMBOL_GPL(mnt_want_write);
/**
- * mnt_clone_write - get write access to a mount
- * @mnt: the mount on which to take a write
- *
- * This is effectively like mnt_want_write, except
- * it must only be used to take an extra write reference
- * on a mountpoint that we already know has a write reference
- * on it. This allows some optimisation.
- *
- * After finished, mnt_drop_write must be called as usual to
- * drop the reference.
- */
-int mnt_clone_write(struct vfsmount *mnt)
-{
- /* superblock may be r/o */
- if (__mnt_is_readonly(mnt))
- return -EROFS;
- preempt_disable();
- mnt_inc_writers(real_mount(mnt));
- preempt_enable();
- return 0;
-}
-EXPORT_SYMBOL_GPL(mnt_clone_write);
-
-/**
* __mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
- * This is like __mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like __mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the check for emergency r/o remounts. This must be
+ * paired with __mnt_drop_write_file.
*/
int __mnt_want_write_file(struct file *file)
{
- if (!(file->f_mode & FMODE_WRITER))
- return __mnt_want_write(file->f_path.mnt);
- else
- return mnt_clone_write(file->f_path.mnt);
+ if (file->f_mode & FMODE_WRITER) {
+ /*
+ * Superblock may have become readonly while there are still
+ * writable fd's, e.g. due to a fs error with errors=remount-ro
+ */
+ if (__mnt_is_readonly(file->f_path.mnt))
+ return -EROFS;
+ return 0;
+ }
+ return __mnt_want_write(file->f_path.mnt);
}
/**
* mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
- * This is like mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the freeze protection and the check for emergency r/o
+ * remounts. This must be paired with mnt_drop_write_file.
*/
int mnt_want_write_file(struct file *file)
{
@@ -470,7 +456,8 @@ EXPORT_SYMBOL_GPL(mnt_drop_write);
void __mnt_drop_write_file(struct file *file)
{
- __mnt_drop_write(file->f_path.mnt);
+ if (!(file->f_mode & FMODE_WRITER))
+ __mnt_drop_write(file->f_path.mnt);
}
void mnt_drop_write_file(struct file *file)
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 1a96ce28efb0..fe860c538747 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -115,13 +115,13 @@ bl_submit_bio(struct bio *bio)
return NULL;
}
-static struct bio *
-bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
+static struct bio *bl_alloc_init_bio(unsigned int npg,
+ struct block_device *bdev, sector_t disk_sector,
bio_end_io_t end_io, struct parallel_io *par)
{
struct bio *bio;
- npg = min(npg, BIO_MAX_PAGES);
+ npg = bio_max_segs(npg);
bio = bio_alloc(GFP_NOIO, npg);
if (bio) {
bio->bi_iter.bi_sector = disk_sector;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 63940a7a70be..16ad5050e046 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -89,7 +89,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
EXPORT_SYMBOL_GPL(nfs_file_release);
/**
- * nfs_revalidate_size - Revalidate the file size
+ * nfs_revalidate_file_size - Revalidate the file size
* @inode: pointer to inode struct
* @filp: pointer to struct file
*
@@ -606,8 +606,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- unsigned long written = 0;
- ssize_t result;
+ unsigned int mntflags = NFS_SERVER(inode)->flags;
+ ssize_t result, written;
errseq_t since;
int error;
@@ -626,13 +626,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
/*
* O_APPEND implies that we must revalidate the file length.
*/
- if (iocb->ki_flags & IOCB_APPEND) {
+ if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
result = nfs_revalidate_file_size(inode, file);
if (result)
goto out;
}
- if (iocb->ki_pos > i_size_read(inode))
- nfs_revalidate_mapping(inode, file->f_mapping);
+
+ nfs_clear_invalid_mapping(file->f_mapping);
since = filemap_sample_wb_err(file->f_mapping);
nfs_start_io_write(inode);
@@ -648,6 +648,21 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
written = result;
iocb->ki_pos += written;
+
+ if (mntflags & NFS_MOUNT_WRITE_EAGER) {
+ result = filemap_fdatawrite_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
+ if (result < 0)
+ goto out;
+ }
+ if (mntflags & NFS_MOUNT_WRITE_WAIT) {
+ result = filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
+ if (result < 0)
+ goto out;
+ }
result = generic_write_sync(iocb, written);
if (result < 0)
goto out;
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 06894bcdea2d..971a9251c1d9 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -82,6 +82,7 @@ enum nfs_param {
Opt_v,
Opt_vers,
Opt_wsize,
+ Opt_write,
};
enum {
@@ -113,6 +114,19 @@ static const struct constant_table nfs_param_enums_lookupcache[] = {
{}
};
+enum {
+ Opt_write_lazy,
+ Opt_write_eager,
+ Opt_write_wait,
+};
+
+static const struct constant_table nfs_param_enums_write[] = {
+ { "lazy", Opt_write_lazy },
+ { "eager", Opt_write_eager },
+ { "wait", Opt_write_wait },
+ {}
+};
+
static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_flag_no("ac", Opt_ac),
fsparam_u32 ("acdirmax", Opt_acdirmax),
@@ -171,6 +185,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_flag ("v4.1", Opt_v),
fsparam_flag ("v4.2", Opt_v),
fsparam_string("vers", Opt_vers),
+ fsparam_enum ("write", Opt_write, nfs_param_enums_write),
fsparam_u32 ("wsize", Opt_wsize),
{}
};
@@ -770,6 +785,24 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
goto out_invalid_value;
}
break;
+ case Opt_write:
+ switch (result.uint_32) {
+ case Opt_write_lazy:
+ ctx->flags &=
+ ~(NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT);
+ break;
+ case Opt_write_eager:
+ ctx->flags |= NFS_MOUNT_WRITE_EAGER;
+ ctx->flags &= ~NFS_MOUNT_WRITE_WAIT;
+ break;
+ case Opt_write_wait:
+ ctx->flags |=
+ NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT;
+ break;
+ default:
+ goto out_invalid_value;
+ }
+ break;
/*
* Special options
@@ -1479,6 +1512,8 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->selected_flavor = RPC_AUTH_MAXFLAVOR;
ctx->minorversion = 0;
ctx->need_mount = true;
+
+ fc->s_iflags |= SB_I_STABLE_WRITES;
}
fc->fs_private = ctx;
fc->ops = &nfs_fs_context_ops;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index a60df88efc40..c4c021c6ebbd 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -390,10 +390,6 @@ static void nfs_readpage_from_fscache_complete(struct page *page,
if (!error) {
SetPageUptodate(page);
unlock_page(page);
- } else {
- error = nfs_readpage_async(context, page->mapping->host, page);
- if (error)
- unlock_page(page);
}
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 447e95974386..749bbea14d99 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -195,6 +195,18 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
}
EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
+#ifdef CONFIG_NFS_V4_2
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+ return nfsi->xattr_cache != NULL;
+}
+#else
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+ return false;
+}
+#endif
+
static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
{
struct nfs_inode *nfsi = NFS_I(inode);
@@ -209,6 +221,8 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
| NFS_INO_INVALID_XATTR);
}
+ if (!nfs_has_xattr_cache(nfsi))
+ flags &= ~NFS_INO_INVALID_XATTR;
if (inode->i_mapping->nrpages == 0)
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
nfsi->cache_validity |= flags;
@@ -1258,55 +1272,19 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
return 0;
}
-bool nfs_mapping_need_revalidate_inode(struct inode *inode)
-{
- return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
- NFS_STALE(inode);
-}
-
-int nfs_revalidate_mapping_rcu(struct inode *inode)
-{
- struct nfs_inode *nfsi = NFS_I(inode);
- unsigned long *bitlock = &nfsi->flags;
- int ret = 0;
-
- if (IS_SWAPFILE(inode))
- goto out;
- if (nfs_mapping_need_revalidate_inode(inode)) {
- ret = -ECHILD;
- goto out;
- }
- spin_lock(&inode->i_lock);
- if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
- (nfsi->cache_validity & NFS_INO_INVALID_DATA))
- ret = -ECHILD;
- spin_unlock(&inode->i_lock);
-out:
- return ret;
-}
-
/**
- * nfs_revalidate_mapping - Revalidate the pagecache
- * @inode: pointer to host inode
+ * nfs_clear_invalid_mapping - Conditionally clear a mapping
* @mapping: pointer to mapping
+ *
+ * If the NFS_INO_INVALID_DATA inode flag is set, clear the mapping.
*/
-int nfs_revalidate_mapping(struct inode *inode,
- struct address_space *mapping)
+int nfs_clear_invalid_mapping(struct address_space *mapping)
{
+ struct inode *inode = mapping->host;
struct nfs_inode *nfsi = NFS_I(inode);
unsigned long *bitlock = &nfsi->flags;
int ret = 0;
- /* swapfiles are not supposed to be shared. */
- if (IS_SWAPFILE(inode))
- goto out;
-
- if (nfs_mapping_need_revalidate_inode(inode)) {
- ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
- if (ret < 0)
- goto out;
- }
-
/*
* We must clear NFS_INO_INVALID_DATA first to ensure that
* invalidations that come in while we're shooting down the mappings
@@ -1337,8 +1315,8 @@ int nfs_revalidate_mapping(struct inode *inode,
set_bit(NFS_INO_INVALIDATING, bitlock);
smp_wmb();
- nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA|
- NFS_INO_DATA_INVAL_DEFER);
+ nfsi->cache_validity &=
+ ~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER);
spin_unlock(&inode->i_lock);
trace_nfs_invalidate_mapping_enter(inode);
ret = nfs_invalidate_mapping(inode, mapping);
@@ -1351,6 +1329,53 @@ out:
return ret;
}
+bool nfs_mapping_need_revalidate_inode(struct inode *inode)
+{
+ return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+ NFS_STALE(inode);
+}
+
+int nfs_revalidate_mapping_rcu(struct inode *inode)
+{
+ struct nfs_inode *nfsi = NFS_I(inode);
+ unsigned long *bitlock = &nfsi->flags;
+ int ret = 0;
+
+ if (IS_SWAPFILE(inode))
+ goto out;
+ if (nfs_mapping_need_revalidate_inode(inode)) {
+ ret = -ECHILD;
+ goto out;
+ }
+ spin_lock(&inode->i_lock);
+ if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
+ (nfsi->cache_validity & NFS_INO_INVALID_DATA))
+ ret = -ECHILD;
+ spin_unlock(&inode->i_lock);
+out:
+ return ret;
+}
+
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode: pointer to host inode
+ * @mapping: pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+ /* swapfiles are not supposed to be shared. */
+ if (IS_SWAPFILE(inode))
+ return 0;
+
+ if (nfs_mapping_need_revalidate_inode(inode)) {
+ int ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (ret < 0)
+ return ret;
+ }
+
+ return nfs_clear_invalid_mapping(mapping);
+}
+
static bool nfs_file_has_writers(struct nfs_inode *nfsi)
{
struct inode *inode = &nfsi->vfs_inode;
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 5604e807fc01..bb386a691e69 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -111,6 +111,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
fallthrough;
case -ENOTSUPP:
status = -EOPNOTSUPP;
+ goto getout;
default:
goto getout;
}
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 86acffe7335c..889a9f4c0310 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -609,6 +609,7 @@ found:
* changed. Schedule recovery!
*/
nfs4_schedule_path_down_recovery(pos);
+ goto out;
default:
goto out;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a07530cf673d..74bc5120013d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -71,10 +71,6 @@
#include "nfs4trace.h"
-#ifdef CONFIG_NFS_V4_2
-#include "nfs42.h"
-#endif /* CONFIG_NFS_V4_2 */
-
#define NFSDBG_FACILITY NFSDBG_PROC
#define NFS4_BITMASK_SZ 3
@@ -2231,6 +2227,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
default:
printk(KERN_ERR "NFS: %s: unhandled error "
"%d.\n", __func__, err);
+ fallthrough;
case 0:
case -ENOENT:
case -EAGAIN:
@@ -5438,15 +5435,16 @@ static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
if (cache_validity & NFS_INO_INVALID_ATIME)
bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
- if (cache_validity & NFS_INO_INVALID_ACCESS)
- bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
- FATTR4_WORD1_OWNER_GROUP;
- if (cache_validity & NFS_INO_INVALID_ACL)
- bitmask[0] |= FATTR4_WORD0_ACL;
- if (cache_validity & NFS_INO_INVALID_LABEL)
+ if (cache_validity & NFS_INO_INVALID_OTHER)
+ bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
+ FATTR4_WORD1_OWNER_GROUP |
+ FATTR4_WORD1_NUMLINKS;
+ if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
- if (cache_validity & NFS_INO_INVALID_CTIME)
+ if (cache_validity & NFS_INO_INVALID_CHANGE)
bitmask[0] |= FATTR4_WORD0_CHANGE;
+ if (cache_validity & NFS_INO_INVALID_CTIME)
+ bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
if (cache_validity & NFS_INO_INVALID_SIZE)
@@ -9708,6 +9706,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
case -NFS4ERR_BADLAYOUT: /* no layout */
case -NFS4ERR_GRACE: /* loca_recalim always false */
task->tk_status = 0;
+ break;
case 0:
break;
default:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 4bf10792cb5b..3a51351bdc6a 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1125,6 +1125,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
" sequence-id error on an"
" unconfirmed sequence %p!\n",
seqid->sequence);
+ return;
case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_BAD_STATEID:
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index af64b4e6fd1f..102b66e0bdef 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2875,6 +2875,7 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
switch (trypnfs) {
case PNFS_NOT_ATTEMPTED:
pnfs_write_through_mds(desc, hdr);
+ break;
case PNFS_ATTEMPTED:
break;
case PNFS_TRY_AGAIN:
@@ -3019,6 +3020,7 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
switch (trypnfs) {
case PNFS_NOT_ATTEMPTED:
pnfs_read_through_mds(desc, hdr);
+ break;
case PNFS_ATTEMPTED:
break;
case PNFS_TRY_AGAIN:
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index eb854f1f86e2..d2b6dce1f99f 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -74,6 +74,24 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
+static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio,
+ struct inode *inode)
+{
+ struct nfs_pgio_mirror *pgm;
+ unsigned long npages;
+
+ nfs_pageio_complete(pgio);
+
+ /* It doesn't make sense to do mirrored reads! */
+ WARN_ON_ONCE(pgio->pg_mirror_count != 1);
+
+ pgm = &pgio->pg_mirrors[0];
+ NFS_I(inode)->read_io += pgm->pg_bytes_written;
+ npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+}
+
+
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
struct nfs_pgio_mirror *mirror;
@@ -114,41 +132,10 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
nfs_release_request(req);
}
-int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
- struct page *page)
-{
- struct nfs_page *new;
- unsigned int len;
+struct nfs_readdesc {
struct nfs_pageio_descriptor pgio;
- struct nfs_pgio_mirror *pgm;
-
- len = nfs_page_length(page);
- if (len == 0)
- return nfs_return_empty_page(page);
- new = nfs_create_request(ctx, page, 0, len);
- if (IS_ERR(new)) {
- unlock_page(page);
- return PTR_ERR(new);
- }
- if (len < PAGE_SIZE)
- zero_user_segment(page, len, PAGE_SIZE);
-
- nfs_pageio_init_read(&pgio, inode, false,
- &nfs_async_read_completion_ops);
- if (!nfs_pageio_add_request(&pgio, new)) {
- nfs_list_remove_request(new);
- nfs_readpage_release(new, pgio.pg_error);
- }
- nfs_pageio_complete(&pgio);
-
- /* It doesn't make sense to do mirrored reads! */
- WARN_ON_ONCE(pgio.pg_mirror_count != 1);
-
- pgm = &pgio.pg_mirrors[0];
- NFS_I(inode)->read_io += pgm->pg_bytes_written;
-
- return pgio.pg_error < 0 ? pgio.pg_error : 0;
-}
+ struct nfs_open_context *ctx;
+};
static void nfs_page_group_set_uptodate(struct nfs_page *req)
{
@@ -171,8 +158,7 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
/* note: regions of the page not covered by a
- * request are zeroed in nfs_readpage_async /
- * readpage_async_filler */
+ * request are zeroed in readpage_async_filler */
if (bytes > hdr->good_bytes) {
/* nothing in this request was good, so zero
* the full extent of the request */
@@ -304,6 +290,38 @@ static void nfs_readpage_result(struct rpc_task *task,
nfs_readpage_retry(task, hdr);
}
+static int
+readpage_async_filler(void *data, struct page *page)
+{
+ struct nfs_readdesc *desc = data;
+ struct nfs_page *new;
+ unsigned int len;
+ int error;
+
+ len = nfs_page_length(page);
+ if (len == 0)
+ return nfs_return_empty_page(page);
+
+ new = nfs_create_request(desc->ctx, page, 0, len);
+ if (IS_ERR(new))
+ goto out_error;
+
+ if (len < PAGE_SIZE)
+ zero_user_segment(page, len, PAGE_SIZE);
+ if (!nfs_pageio_add_request(&desc->pgio, new)) {
+ nfs_list_remove_request(new);
+ error = desc->pgio.pg_error;
+ nfs_readpage_release(new, error);
+ goto out;
+ }
+ return 0;
+out_error:
+ error = PTR_ERR(new);
+ unlock_page(page);
+out:
+ return error;
+}
+
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
@@ -312,14 +330,13 @@ static void nfs_readpage_result(struct rpc_task *task,
*/
int nfs_readpage(struct file *file, struct page *page)
{
- struct nfs_open_context *ctx;
+ struct nfs_readdesc desc;
struct inode *inode = page_file_mapping(page)->host;
- int error;
+ int ret;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
page, PAGE_SIZE, page_index(page));
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
- nfs_add_stats(inode, NFSIOS_READPAGES, 1);
/*
* Try to flush any pending writes to the file..
@@ -328,93 +345,59 @@ int nfs_readpage(struct file *file, struct page *page)
* be any new pending writes generated at this point
* for this page (other pages can be written to).
*/
- error = nfs_wb_page(inode, page);
- if (error)
+ ret = nfs_wb_page(inode, page);
+ if (ret)
goto out_unlock;
if (PageUptodate(page))
goto out_unlock;
- error = -ESTALE;
+ ret = -ESTALE;
if (NFS_STALE(inode))
goto out_unlock;
if (file == NULL) {
- error = -EBADF;
- ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
- if (ctx == NULL)
+ ret = -EBADF;
+ desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+ if (desc.ctx == NULL)
goto out_unlock;
} else
- ctx = get_nfs_open_context(nfs_file_open_context(file));
+ desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
if (!IS_SYNC(inode)) {
- error = nfs_readpage_from_fscache(ctx, inode, page);
- if (error == 0)
+ ret = nfs_readpage_from_fscache(desc.ctx, inode, page);
+ if (ret == 0)
goto out;
}
- xchg(&ctx->error, 0);
- error = nfs_readpage_async(ctx, inode, page);
- if (!error) {
- error = wait_on_page_locked_killable(page);
- if (!PageUptodate(page) && !error)
- error = xchg(&ctx->error, 0);
- }
-out:
- put_nfs_open_context(ctx);
- return error;
-out_unlock:
- unlock_page(page);
- return error;
-}
-
-struct nfs_readdesc {
- struct nfs_pageio_descriptor *pgio;
- struct nfs_open_context *ctx;
-};
-
-static int
-readpage_async_filler(void *data, struct page *page)
-{
- struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
- struct nfs_page *new;
- unsigned int len;
- int error;
+ xchg(&desc.ctx->error, 0);
+ nfs_pageio_init_read(&desc.pgio, inode, false,
+ &nfs_async_read_completion_ops);
- len = nfs_page_length(page);
- if (len == 0)
- return nfs_return_empty_page(page);
+ ret = readpage_async_filler(&desc, page);
- new = nfs_create_request(desc->ctx, page, 0, len);
- if (IS_ERR(new))
- goto out_error;
+ if (!ret)
+ nfs_pageio_complete_read(&desc.pgio, inode);
- if (len < PAGE_SIZE)
- zero_user_segment(page, len, PAGE_SIZE);
- if (!nfs_pageio_add_request(desc->pgio, new)) {
- nfs_list_remove_request(new);
- error = desc->pgio->pg_error;
- nfs_readpage_release(new, error);
- goto out;
+ ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
+ if (!ret) {
+ ret = wait_on_page_locked_killable(page);
+ if (!PageUptodate(page) && !ret)
+ ret = xchg(&desc.ctx->error, 0);
}
- return 0;
-out_error:
- error = PTR_ERR(new);
- unlock_page(page);
out:
- return error;
+ put_nfs_open_context(desc.ctx);
+ return ret;
+out_unlock:
+ unlock_page(page);
+ return ret;
}
-int nfs_readpages(struct file *filp, struct address_space *mapping,
+int nfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
- struct nfs_pageio_descriptor pgio;
- struct nfs_pgio_mirror *pgm;
- struct nfs_readdesc desc = {
- .pgio = &pgio,
- };
+ struct nfs_readdesc desc;
struct inode *inode = mapping->host;
- unsigned long npages;
- int ret = -ESTALE;
+ int ret;
dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
inode->i_sb->s_id,
@@ -422,15 +405,17 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
nr_pages);
nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
+ ret = -ESTALE;
if (NFS_STALE(inode))
goto out;
- if (filp == NULL) {
+ if (file == NULL) {
+ ret = -EBADF;
desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
if (desc.ctx == NULL)
- return -EBADF;
+ goto out;
} else
- desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
+ desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
/* attempt to read as many of the pages as possible from the cache
* - this returns -ENOBUFS immediately if the cookie is negative
@@ -440,20 +425,13 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- nfs_pageio_init_read(&pgio, inode, false,
+ nfs_pageio_init_read(&desc.pgio, inode, false,
&nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
- nfs_pageio_complete(&pgio);
- /* It doesn't make sense to do mirrored reads! */
- WARN_ON_ONCE(pgio.pg_mirror_count != 1);
+ nfs_pageio_complete_read(&desc.pgio, inode);
- pgm = &pgio.pg_mirrors[0];
- NFS_I(inode)->read_io += pgm->pg_bytes_written;
- npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >>
- PAGE_SHIFT;
- nfs_add_stats(inode, NFSIOS_READPAGES, npages);
read_complete:
put_nfs_open_context(desc.ctx);
out:
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c7a924580eec..94885c6f8f54 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -523,6 +523,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
seq_puts(m, ",local_lock=flock");
else
seq_puts(m, ",local_lock=posix");
+
+ if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
+ if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
+ seq_puts(m, ",write=wait");
+ else
+ seq_puts(m, ",write=eager");
+ }
}
/*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 639c34fec04a..82bdcb982186 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -712,16 +712,23 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
struct inode *inode = mapping->host;
struct nfs_pageio_descriptor pgio;
- struct nfs_io_completion *ioc;
+ struct nfs_io_completion *ioc = NULL;
+ unsigned int mntflags = NFS_SERVER(inode)->flags;
+ int priority = 0;
int err;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
- ioc = nfs_io_completion_alloc(GFP_KERNEL);
- if (ioc)
- nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+ if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
+ wbc->for_background || wbc->for_sync || wbc->for_reclaim) {
+ ioc = nfs_io_completion_alloc(GFP_KERNEL);
+ if (ioc)
+ nfs_io_completion_init(ioc, nfs_io_completion_commit,
+ inode);
+ priority = wb_priority(wbc);
+ }
- nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+ nfs_pageio_init_write(&pgio, inode, priority, false,
&nfs_async_write_completion_ops);
pgio.pg_io_completion = ioc;
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
@@ -1278,19 +1285,21 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
* the PageUptodate() flag. In this case, we will need to turn off
* write optimisations that depend on the page contents being correct.
*/
-static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
+static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
+ unsigned int pagelen)
{
struct nfs_inode *nfsi = NFS_I(inode);
if (nfs_have_delegated_attributes(inode))
goto out;
- if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+ if (nfsi->cache_validity &
+ (NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
return false;
smp_rmb();
- if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
+ if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
return false;
out:
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+ if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0)
return false;
return PageUptodate(page) != 0;
}
@@ -1310,7 +1319,8 @@ is_whole_file_wrlock(struct file_lock *fl)
* If the file is opened for synchronous writes then we can just skip the rest
* of the checks.
*/
-static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
+static int nfs_can_extend_write(struct file *file, struct page *page,
+ struct inode *inode, unsigned int pagelen)
{
int ret;
struct file_lock_context *flctx = inode->i_flctx;
@@ -1318,7 +1328,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino
if (file->f_flags & O_DSYNC)
return 0;
- if (!nfs_write_pageuptodate(page, inode))
+ if (!nfs_write_pageuptodate(page, inode, pagelen))
return 0;
if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return 1;
@@ -1356,6 +1366,7 @@ int nfs_updatepage(struct file *file, struct page *page,
struct nfs_open_context *ctx = nfs_file_open_context(file);
struct address_space *mapping = page_file_mapping(page);
struct inode *inode = mapping->host;
+ unsigned int pagelen = nfs_page_length(page);
int status = 0;
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -1366,8 +1377,8 @@ int nfs_updatepage(struct file *file, struct page *page,
if (!count)
goto out;
- if (nfs_can_extend_write(file, page, inode)) {
- count = max(count + offset, nfs_page_length(page));
+ if (nfs_can_extend_write(file, page, inode, pagelen)) {
+ count = max(count + offset, pagelen);
offset = 0;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 56bf14316122..3851bfcdba56 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -67,7 +67,6 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/rcupdate.h>
-#include <linux/kallsyms.h>
#include <linux/stacktrace.h>
#include <linux/resource.h>
#include <linux/module.h>
@@ -386,19 +385,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task)
{
unsigned long wchan;
- char symname[KSYM_NAME_LEN];
- if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
- goto print0;
+ if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+ wchan = get_wchan(task);
+ else
+ wchan = 0;
- wchan = get_wchan(task);
- if (wchan && !lookup_symbol_name(wchan, symname)) {
- seq_puts(m, symname);
- return 0;
- }
+ if (wchan)
+ seq_printf(m, "%ps", (void *) wchan);
+ else
+ seq_putc(m, '0');
-print0:
- seq_putc(m, '0');
return 0;
}
#endif /* CONFIG_KALLSYMS */
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 656ba24c317d..984e42f8cb11 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -571,7 +571,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
error = -ENOMEM;
if (count >= KMALLOC_MAX_SIZE)
goto out;
- kbuf = kzalloc(count + 1, GFP_KERNEL);
+ kbuf = kvzalloc(count + 1, GFP_KERNEL);
if (!kbuf)
goto out;
@@ -600,7 +600,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
error = count;
out_free_buf:
- kfree(kbuf);
+ kvfree(kbuf);
out:
sysctl_head_finish(head);
diff --git a/fs/proc/self.c b/fs/proc/self.c
index a4012154e109..72cd69bcaf4a 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -16,13 +16,6 @@ static const char *proc_self_get_link(struct dentry *dentry,
pid_t tgid = task_tgid_nr_ns(current, ns);
char *name;
- /*
- * Not currently supported. Once we can inherit all of struct pid,
- * we can allow this.
- */
- if (current->flags & PF_IO_WORKER)
- return ERR_PTR(-EOPNOTSUPP);
-
if (!tgid)
return ERR_PTR(-ENOENT);
/* max length of unsigned int in decimal + NULL term */
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index d56681d86d28..a553273fbd41 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -17,13 +17,6 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
pid_t pid = task_pid_nr_ns(current, ns);
char *name;
- /*
- * Not currently supported. Once we can inherit all of struct pid,
- * we can allow this.
- */
- if (current->flags & PF_IO_WORKER)
- return ERR_PTR(-EOPNOTSUPP);
-
if (!pid)
return ERR_PTR(-ENOENT);
name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 93a217e4f563..14658b009f1b 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -467,7 +467,7 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type,
static void pstore_kill_sb(struct super_block *sb)
{
mutex_lock(&pstore_sb_lock);
- WARN_ON(pstore_sb != sb);
+ WARN_ON(pstore_sb && pstore_sb != sb);
kill_litter_super(sb);
pstore_sb = NULL;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index aa8e0b65ff1a..fff363bfd484 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -246,7 +246,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
pr_info("error in header, %d\n", numerr);
prz->corrected_bytes += numerr;
} else if (numerr < 0) {
- pr_info("uncorrectable error in header\n");
+ pr_info_ratelimited("uncorrectable error in header\n");
prz->bad_blocks++;
}
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index b56ff451adce..5b6fcb9b44e2 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -2805,7 +2805,7 @@ xfs_btree_split_worker(
struct xfs_btree_split_args *args = container_of(work,
struct xfs_btree_split_args, work);
unsigned long pflags;
- unsigned long new_pflags = PF_MEMALLOC_NOFS;
+ unsigned long new_pflags = 0;
/*
* we are in a transaction context here, but may also be doing work
@@ -2817,12 +2817,20 @@ xfs_btree_split_worker(
new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
current_set_flags_nested(&pflags, new_pflags);
+ xfs_trans_set_context(args->cur->bc_tp);
args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
args->key, args->curp, args->stat);
- complete(args->done);
+ xfs_trans_clear_context(args->cur->bc_tp);
current_restore_flags_nested(&pflags, new_pflags);
+
+ /*
+ * Do not access args after complete() has run here. We don't own args
+ * and the owner may run and free args before we return here.
+ */
+ complete(args->done);
+
}
/*
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4304c6416fbb..b4186d666157 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc(
* We hand off the transaction to the completion thread now, so
* clear the flag here.
*/
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+ xfs_trans_clear_context(tp);
return 0;
}
@@ -125,7 +125,7 @@ xfs_setfilesize_ioend(
* thus we need to mark ourselves as being in a transaction manually.
* Similarly for freeze protection.
*/
- current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+ xfs_trans_set_context(tp);
__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
/* we abort the update if there was an IO error */
@@ -568,6 +568,12 @@ xfs_vm_writepage(
{
struct xfs_writepage_ctx wpc = { };
+ if (WARN_ON_ONCE(current->journal_info)) {
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ return 0;
+ }
+
return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
}
@@ -578,6 +584,13 @@ xfs_vm_writepages(
{
struct xfs_writepage_ctx wpc = { };
+ /*
+ * Writing back data in a transaction context can result in recursive
+ * transactions. This is bad, so issue a warning and get out of here.
+ */
+ if (WARN_ON_ONCE(current->journal_info))
+ return 0;
+
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
}
diff --git a/fs/xfs/xfs_bio_io.c b/fs/xfs/xfs_bio_io.c
index e2148f2d5d6b..17f36db2f792 100644
--- a/fs/xfs/xfs_bio_io.c
+++ b/fs/xfs/xfs_bio_io.c
@@ -6,7 +6,7 @@
static inline unsigned int bio_max_vecs(unsigned int count)
{
- return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
+ return bio_max_segs(howmany(count, PAGE_SIZE));
}
int
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index f6e5235df7c9..37a1d12762d8 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1480,7 +1480,7 @@ xfs_buf_ioapply_map(
int op)
{
int page_index;
- int total_nr_pages = bp->b_page_count;
+ unsigned int total_nr_pages = bp->b_page_count;
int nr_pages;
struct bio *bio;
sector_t sector = bp->b_maps[map].bm_bn;
@@ -1505,7 +1505,7 @@ xfs_buf_ioapply_map(
next_chunk:
atomic_inc(&bp->b_io_remaining);
- nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
+ nr_pages = bio_max_segs(total_nr_pages);
bio = bio_alloc(GFP_NOIO, nr_pages);
bio_set_dev(bio, bp->b_target->bt_bdev);
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 3991e59cfd18..ef17c1f6db32 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -344,7 +344,6 @@ xfs_extent_busy_trim(
ASSERT(*len > 0);
spin_lock(&args->pag->pagb_lock);
-restart:
fbno = *bno;
flen = *len;
rbp = args->pag->pagb_tree.rb_node;
@@ -363,19 +362,6 @@ restart:
continue;
}
- /*
- * If this is a metadata allocation, try to reuse the busy
- * extent instead of trimming the allocation.
- */
- if (!(args->datatype & XFS_ALLOC_USERDATA) &&
- !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
- if (!xfs_extent_busy_update_extent(args->mp, args->pag,
- busyp, fbno, flen,
- false))
- goto restart;
- continue;
- }
-
if (bbno <= fbno) {
/* start overlap */
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 145e06c47744..546a6cd96729 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -51,7 +51,7 @@ xfs_panic_mask_proc_handler(
#endif /* CONFIG_PROC_FS */
STATIC int
-xfs_deprecate_irix_sgid_inherit_proc_handler(
+xfs_deprecated_dointvec_minmax(
struct ctl_table *ctl,
int write,
void *buffer,
@@ -59,24 +59,8 @@ xfs_deprecate_irix_sgid_inherit_proc_handler(
loff_t *ppos)
{
if (write) {
- printk_once(KERN_WARNING
- "XFS: " "%s sysctl option is deprecated.\n",
- ctl->procname);
- }
- return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-}
-
-STATIC int
-xfs_deprecate_irix_symlink_mode_proc_handler(
- struct ctl_table *ctl,
- int write,
- void *buffer,
- size_t *lenp,
- loff_t *ppos)
-{
- if (write) {
- printk_once(KERN_WARNING
- "XFS: " "%s sysctl option is deprecated.\n",
+ printk_ratelimited(KERN_WARNING
+ "XFS: %s sysctl option is deprecated.\n",
ctl->procname);
}
return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
@@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.sgid_inherit.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = xfs_deprecate_irix_sgid_inherit_proc_handler,
+ .proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.sgid_inherit.min,
.extra2 = &xfs_params.sgid_inherit.max
},
@@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = {
.data = &xfs_params.symlink_mode.val,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = xfs_deprecate_irix_symlink_mode_proc_handler,
+ .proc_handler = xfs_deprecated_dointvec_minmax,
.extra1 = &xfs_params.symlink_mode.min,
.extra2 = &xfs_params.symlink_mode.max
},
@@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = {
.extra1 = &xfs_params.blockgc_timer.min,
.extra2 = &xfs_params.blockgc_timer.max,
},
+ {
+ .procname = "speculative_cow_prealloc_lifetime",
+ .data = &xfs_params.blockgc_timer.val,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = xfs_deprecated_dointvec_minmax,
+ .extra1 = &xfs_params.blockgc_timer.min,
+ .extra2 = &xfs_params.blockgc_timer.max,
+ },
/* please keep this the last entry */
#ifdef CONFIG_PROC_FS
{
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 44f72c09c203..b22a09e9daee 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -72,6 +72,7 @@ xfs_trans_free(
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
trace_xfs_trans_free(tp, _RET_IP_);
+ xfs_trans_clear_context(tp);
if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp);
@@ -123,7 +124,8 @@ xfs_trans_dup(
ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
tp->t_rtx_res = tp->t_rtx_res_used;
- ntp->t_pflags = tp->t_pflags;
+
+ xfs_trans_switch_context(tp, ntp);
/* move deferred ops over to the new tp */
xfs_defer_move(ntp, tp);
@@ -157,9 +159,6 @@ xfs_trans_reserve(
int error = 0;
bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
- /* Mark this thread as being in a transaction */
- current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
/*
* Attempt to reserve the needed disk blocks by decrementing
* the number needed from the number available. This will
@@ -167,10 +166,8 @@ xfs_trans_reserve(
*/
if (blocks > 0) {
error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
- if (error != 0) {
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+ if (error != 0)
return -ENOSPC;
- }
tp->t_blk_res += blocks;
}
@@ -244,9 +241,6 @@ undo_blocks:
xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
tp->t_blk_res = 0;
}
-
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
return error;
}
@@ -260,6 +254,7 @@ xfs_trans_alloc(
struct xfs_trans **tpp)
{
struct xfs_trans *tp;
+ bool want_retry = true;
int error;
/*
@@ -267,9 +262,11 @@ xfs_trans_alloc(
* GFP_NOFS allocation context so that we avoid lockdep false positives
* by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/
+retry:
tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
+ xfs_trans_set_context(tp);
/*
* Zero-reservation ("empty") transactions can't modify anything, so
@@ -289,7 +286,9 @@ xfs_trans_alloc(
tp->t_firstblock = NULLFSBLOCK;
error = xfs_trans_reserve(tp, resp, blocks, rtextents);
- if (error == -ENOSPC) {
+ if (error == -ENOSPC && want_retry) {
+ xfs_trans_cancel(tp);
+
/*
* We weren't able to reserve enough space for the transaction.
* Flush the other speculative space allocations to free space.
@@ -297,8 +296,11 @@ xfs_trans_alloc(
* other locks.
*/
error = xfs_blockgc_free_space(mp, NULL);
- if (!error)
- error = xfs_trans_reserve(tp, resp, blocks, rtextents);
+ if (error)
+ return error;
+
+ want_retry = false;
+ goto retry;
}
if (error) {
xfs_trans_cancel(tp);
@@ -893,7 +895,6 @@ __xfs_trans_commit(
xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free(tp);
/*
@@ -925,7 +926,6 @@ out_unreserve:
xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
tp->t_ticket = NULL;
}
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
xfs_trans_free_items(tp, !!error);
xfs_trans_free(tp);
@@ -985,9 +985,6 @@ xfs_trans_cancel(
tp->t_ticket = NULL;
}
- /* mark this thread as no longer being in a transaction */
- current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
xfs_trans_free_items(tp, dirty);
xfs_trans_free(tp);
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 8b03fbfe9a1b..9dd745cf77c9 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
struct xfs_trans **tpp);
+static inline void
+xfs_trans_set_context(
+ struct xfs_trans *tp)
+{
+ ASSERT(current->journal_info == NULL);
+ tp->t_pflags = memalloc_nofs_save();
+ current->journal_info = tp;
+}
+
+static inline void
+xfs_trans_clear_context(
+ struct xfs_trans *tp)
+{
+ if (current->journal_info == tp) {
+ memalloc_nofs_restore(tp->t_pflags);
+ current->journal_info = NULL;
+ }
+}
+
+static inline void
+xfs_trans_switch_context(
+ struct xfs_trans *old_tp,
+ struct xfs_trans *new_tp)
+{
+ ASSERT(current->journal_info == old_tp);
+ new_tp->t_pflags = old_tp->t_pflags;
+ old_tp->t_pflags = 0;
+ current->journal_info = new_tp;
+}
+
#endif /* __XFS_TRANS_H__ */
diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h
new file mode 100644
index 000000000000..1a3ad6d29833
--- /dev/null
+++ b/include/asm-generic/numa.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_NUMA_H
+#define __ASM_GENERIC_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#define NR_NODE_MEMBLKS (MAX_NUMNODES * 2)
+
+int __node_distance(int from, int to);
+#define node_distance(a, b) __node_distance(a, b)
+
+extern nodemask_t numa_nodes_parsed __initdata;
+
+extern bool numa_off;
+
+/* Mappings between node number and cpus on that node. */
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+void numa_clear_node(unsigned int cpu);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+const struct cpumask *cpumask_of_node(int node);
+#else
+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+ if (node == NUMA_NO_NODE)
+ return cpu_all_mask;
+
+ return node_to_cpumask_map[node];
+}
+#endif
+
+void __init arch_numa_init(void);
+int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+void __init numa_set_distance(int from, int to, int distance);
+void __init numa_free_distance(void);
+void __init early_map_cpu_to_node(unsigned int cpu, int nid);
+void numa_store_cpu_info(unsigned int cpu);
+void numa_add_cpu(unsigned int cpu);
+void numa_remove_cpu(unsigned int cpu);
+
+#else /* CONFIG_NUMA */
+
+static inline void numa_store_cpu_info(unsigned int cpu) { }
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
+static inline void arch_numa_init(void) { }
+static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
+
+#endif /* CONFIG_NUMA */
+
+#endif /* __ASM_GENERIC_NUMA_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 6786f8c0182f..0331d5d49551 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -968,12 +968,13 @@
#endif
/*
- * Clang's -fsanitize=kernel-address and -fsanitize=thread produce
- * unwanted sections (.eh_frame and .init_array.*), but
- * CONFIG_CONSTRUCTORS wants to keep any .init_array.* sections.
+ * Clang's -fprofile-arcs, -fsanitize=kernel-address, and
+ * -fsanitize=thread produce unwanted sections (.eh_frame
+ * and .init_array.*), but CONFIG_CONSTRUCTORS wants to
+ * keep any .init_array.* sections.
* https://bugs.llvm.org/show_bug.cgi?id=46478
*/
-#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN)
+#if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN)
# ifdef CONFIG_CONSTRUCTORS
# define SANITIZER_DISCARDS \
*(.eh_frame)
diff --git a/include/dt-bindings/clock/k210-clk.h b/include/dt-bindings/clock/k210-clk.h
index a48176ad3c23..b2de702cbf75 100644
--- a/include/dt-bindings/clock/k210-clk.h
+++ b/include/dt-bindings/clock/k210-clk.h
@@ -9,7 +9,6 @@
/*
* Kendryte K210 SoC clock identifiers (arbitrary values).
*/
-#define K210_CLK_ACLK 0
#define K210_CLK_CPU 0
#define K210_CLK_SRAM0 1
#define K210_CLK_SRAM1 2
diff --git a/include/dt-bindings/pinctrl/k210-fpioa.h b/include/dt-bindings/pinctrl/k210-fpioa.h
new file mode 100644
index 000000000000..314285eab3a1
--- /dev/null
+++ b/include/dt-bindings/pinctrl/k210-fpioa.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2020 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef PINCTRL_K210_FPIOA_H
+#define PINCTRL_K210_FPIOA_H
+
+/*
+ * Full list of FPIOA functions from
+ * kendryte-standalone-sdk/lib/drivers/include/fpioa.h
+ */
+#define K210_PCF_MASK GENMASK(7, 0)
+#define K210_PCF_JTAG_TCLK 0 /* JTAG Test Clock */
+#define K210_PCF_JTAG_TDI 1 /* JTAG Test Data In */
+#define K210_PCF_JTAG_TMS 2 /* JTAG Test Mode Select */
+#define K210_PCF_JTAG_TDO 3 /* JTAG Test Data Out */
+#define K210_PCF_SPI0_D0 4 /* SPI0 Data 0 */
+#define K210_PCF_SPI0_D1 5 /* SPI0 Data 1 */
+#define K210_PCF_SPI0_D2 6 /* SPI0 Data 2 */
+#define K210_PCF_SPI0_D3 7 /* SPI0 Data 3 */
+#define K210_PCF_SPI0_D4 8 /* SPI0 Data 4 */
+#define K210_PCF_SPI0_D5 9 /* SPI0 Data 5 */
+#define K210_PCF_SPI0_D6 10 /* SPI0 Data 6 */
+#define K210_PCF_SPI0_D7 11 /* SPI0 Data 7 */
+#define K210_PCF_SPI0_SS0 12 /* SPI0 Chip Select 0 */
+#define K210_PCF_SPI0_SS1 13 /* SPI0 Chip Select 1 */
+#define K210_PCF_SPI0_SS2 14 /* SPI0 Chip Select 2 */
+#define K210_PCF_SPI0_SS3 15 /* SPI0 Chip Select 3 */
+#define K210_PCF_SPI0_ARB 16 /* SPI0 Arbitration */
+#define K210_PCF_SPI0_SCLK 17 /* SPI0 Serial Clock */
+#define K210_PCF_UARTHS_RX 18 /* UART High speed Receiver */
+#define K210_PCF_UARTHS_TX 19 /* UART High speed Transmitter */
+#define K210_PCF_RESV6 20 /* Reserved function */
+#define K210_PCF_RESV7 21 /* Reserved function */
+#define K210_PCF_CLK_SPI1 22 /* Clock SPI1 */
+#define K210_PCF_CLK_I2C1 23 /* Clock I2C1 */
+#define K210_PCF_GPIOHS0 24 /* GPIO High speed 0 */
+#define K210_PCF_GPIOHS1 25 /* GPIO High speed 1 */
+#define K210_PCF_GPIOHS2 26 /* GPIO High speed 2 */
+#define K210_PCF_GPIOHS3 27 /* GPIO High speed 3 */
+#define K210_PCF_GPIOHS4 28 /* GPIO High speed 4 */
+#define K210_PCF_GPIOHS5 29 /* GPIO High speed 5 */
+#define K210_PCF_GPIOHS6 30 /* GPIO High speed 6 */
+#define K210_PCF_GPIOHS7 31 /* GPIO High speed 7 */
+#define K210_PCF_GPIOHS8 32 /* GPIO High speed 8 */
+#define K210_PCF_GPIOHS9 33 /* GPIO High speed 9 */
+#define K210_PCF_GPIOHS10 34 /* GPIO High speed 10 */
+#define K210_PCF_GPIOHS11 35 /* GPIO High speed 11 */
+#define K210_PCF_GPIOHS12 36 /* GPIO High speed 12 */
+#define K210_PCF_GPIOHS13 37 /* GPIO High speed 13 */
+#define K210_PCF_GPIOHS14 38 /* GPIO High speed 14 */
+#define K210_PCF_GPIOHS15 39 /* GPIO High speed 15 */
+#define K210_PCF_GPIOHS16 40 /* GPIO High speed 16 */
+#define K210_PCF_GPIOHS17 41 /* GPIO High speed 17 */
+#define K210_PCF_GPIOHS18 42 /* GPIO High speed 18 */
+#define K210_PCF_GPIOHS19 43 /* GPIO High speed 19 */
+#define K210_PCF_GPIOHS20 44 /* GPIO High speed 20 */
+#define K210_PCF_GPIOHS21 45 /* GPIO High speed 21 */
+#define K210_PCF_GPIOHS22 46 /* GPIO High speed 22 */
+#define K210_PCF_GPIOHS23 47 /* GPIO High speed 23 */
+#define K210_PCF_GPIOHS24 48 /* GPIO High speed 24 */
+#define K210_PCF_GPIOHS25 49 /* GPIO High speed 25 */
+#define K210_PCF_GPIOHS26 50 /* GPIO High speed 26 */
+#define K210_PCF_GPIOHS27 51 /* GPIO High speed 27 */
+#define K210_PCF_GPIOHS28 52 /* GPIO High speed 28 */
+#define K210_PCF_GPIOHS29 53 /* GPIO High speed 29 */
+#define K210_PCF_GPIOHS30 54 /* GPIO High speed 30 */
+#define K210_PCF_GPIOHS31 55 /* GPIO High speed 31 */
+#define K210_PCF_GPIO0 56 /* GPIO pin 0 */
+#define K210_PCF_GPIO1 57 /* GPIO pin 1 */
+#define K210_PCF_GPIO2 58 /* GPIO pin 2 */
+#define K210_PCF_GPIO3 59 /* GPIO pin 3 */
+#define K210_PCF_GPIO4 60 /* GPIO pin 4 */
+#define K210_PCF_GPIO5 61 /* GPIO pin 5 */
+#define K210_PCF_GPIO6 62 /* GPIO pin 6 */
+#define K210_PCF_GPIO7 63 /* GPIO pin 7 */
+#define K210_PCF_UART1_RX 64 /* UART1 Receiver */
+#define K210_PCF_UART1_TX 65 /* UART1 Transmitter */
+#define K210_PCF_UART2_RX 66 /* UART2 Receiver */
+#define K210_PCF_UART2_TX 67 /* UART2 Transmitter */
+#define K210_PCF_UART3_RX 68 /* UART3 Receiver */
+#define K210_PCF_UART3_TX 69 /* UART3 Transmitter */
+#define K210_PCF_SPI1_D0 70 /* SPI1 Data 0 */
+#define K210_PCF_SPI1_D1 71 /* SPI1 Data 1 */
+#define K210_PCF_SPI1_D2 72 /* SPI1 Data 2 */
+#define K210_PCF_SPI1_D3 73 /* SPI1 Data 3 */
+#define K210_PCF_SPI1_D4 74 /* SPI1 Data 4 */
+#define K210_PCF_SPI1_D5 75 /* SPI1 Data 5 */
+#define K210_PCF_SPI1_D6 76 /* SPI1 Data 6 */
+#define K210_PCF_SPI1_D7 77 /* SPI1 Data 7 */
+#define K210_PCF_SPI1_SS0 78 /* SPI1 Chip Select 0 */
+#define K210_PCF_SPI1_SS1 79 /* SPI1 Chip Select 1 */
+#define K210_PCF_SPI1_SS2 80 /* SPI1 Chip Select 2 */
+#define K210_PCF_SPI1_SS3 81 /* SPI1 Chip Select 3 */
+#define K210_PCF_SPI1_ARB 82 /* SPI1 Arbitration */
+#define K210_PCF_SPI1_SCLK 83 /* SPI1 Serial Clock */
+#define K210_PCF_SPI2_D0 84 /* SPI2 Data 0 */
+#define K210_PCF_SPI2_SS 85 /* SPI2 Select */
+#define K210_PCF_SPI2_SCLK 86 /* SPI2 Serial Clock */
+#define K210_PCF_I2S0_MCLK 87 /* I2S0 Master Clock */
+#define K210_PCF_I2S0_SCLK 88 /* I2S0 Serial Clock(BCLK) */
+#define K210_PCF_I2S0_WS 89 /* I2S0 Word Select(LRCLK) */
+#define K210_PCF_I2S0_IN_D0 90 /* I2S0 Serial Data Input 0 */
+#define K210_PCF_I2S0_IN_D1 91 /* I2S0 Serial Data Input 1 */
+#define K210_PCF_I2S0_IN_D2 92 /* I2S0 Serial Data Input 2 */
+#define K210_PCF_I2S0_IN_D3 93 /* I2S0 Serial Data Input 3 */
+#define K210_PCF_I2S0_OUT_D0 94 /* I2S0 Serial Data Output 0 */
+#define K210_PCF_I2S0_OUT_D1 95 /* I2S0 Serial Data Output 1 */
+#define K210_PCF_I2S0_OUT_D2 96 /* I2S0 Serial Data Output 2 */
+#define K210_PCF_I2S0_OUT_D3 97 /* I2S0 Serial Data Output 3 */
+#define K210_PCF_I2S1_MCLK 98 /* I2S1 Master Clock */
+#define K210_PCF_I2S1_SCLK 99 /* I2S1 Serial Clock(BCLK) */
+#define K210_PCF_I2S1_WS 100 /* I2S1 Word Select(LRCLK) */
+#define K210_PCF_I2S1_IN_D0 101 /* I2S1 Serial Data Input 0 */
+#define K210_PCF_I2S1_IN_D1 102 /* I2S1 Serial Data Input 1 */
+#define K210_PCF_I2S1_IN_D2 103 /* I2S1 Serial Data Input 2 */
+#define K210_PCF_I2S1_IN_D3 104 /* I2S1 Serial Data Input 3 */
+#define K210_PCF_I2S1_OUT_D0 105 /* I2S1 Serial Data Output 0 */
+#define K210_PCF_I2S1_OUT_D1 106 /* I2S1 Serial Data Output 1 */
+#define K210_PCF_I2S1_OUT_D2 107 /* I2S1 Serial Data Output 2 */
+#define K210_PCF_I2S1_OUT_D3 108 /* I2S1 Serial Data Output 3 */
+#define K210_PCF_I2S2_MCLK 109 /* I2S2 Master Clock */
+#define K210_PCF_I2S2_SCLK 110 /* I2S2 Serial Clock(BCLK) */
+#define K210_PCF_I2S2_WS 111 /* I2S2 Word Select(LRCLK) */
+#define K210_PCF_I2S2_IN_D0 112 /* I2S2 Serial Data Input 0 */
+#define K210_PCF_I2S2_IN_D1 113 /* I2S2 Serial Data Input 1 */
+#define K210_PCF_I2S2_IN_D2 114 /* I2S2 Serial Data Input 2 */
+#define K210_PCF_I2S2_IN_D3 115 /* I2S2 Serial Data Input 3 */
+#define K210_PCF_I2S2_OUT_D0 116 /* I2S2 Serial Data Output 0 */
+#define K210_PCF_I2S2_OUT_D1 117 /* I2S2 Serial Data Output 1 */
+#define K210_PCF_I2S2_OUT_D2 118 /* I2S2 Serial Data Output 2 */
+#define K210_PCF_I2S2_OUT_D3 119 /* I2S2 Serial Data Output 3 */
+#define K210_PCF_RESV0 120 /* Reserved function */
+#define K210_PCF_RESV1 121 /* Reserved function */
+#define K210_PCF_RESV2 122 /* Reserved function */
+#define K210_PCF_RESV3 123 /* Reserved function */
+#define K210_PCF_RESV4 124 /* Reserved function */
+#define K210_PCF_RESV5 125 /* Reserved function */
+#define K210_PCF_I2C0_SCLK 126 /* I2C0 Serial Clock */
+#define K210_PCF_I2C0_SDA 127 /* I2C0 Serial Data */
+#define K210_PCF_I2C1_SCLK 128 /* I2C1 Serial Clock */
+#define K210_PCF_I2C1_SDA 129 /* I2C1 Serial Data */
+#define K210_PCF_I2C2_SCLK 130 /* I2C2 Serial Clock */
+#define K210_PCF_I2C2_SDA 131 /* I2C2 Serial Data */
+#define K210_PCF_DVP_XCLK 132 /* DVP System Clock */
+#define K210_PCF_DVP_RST 133 /* DVP System Reset */
+#define K210_PCF_DVP_PWDN 134 /* DVP Power Down Mode */
+#define K210_PCF_DVP_VSYNC 135 /* DVP Vertical Sync */
+#define K210_PCF_DVP_HSYNC 136 /* DVP Horizontal Sync */
+#define K210_PCF_DVP_PCLK 137 /* Pixel Clock */
+#define K210_PCF_DVP_D0 138 /* Data Bit 0 */
+#define K210_PCF_DVP_D1 139 /* Data Bit 1 */
+#define K210_PCF_DVP_D2 140 /* Data Bit 2 */
+#define K210_PCF_DVP_D3 141 /* Data Bit 3 */
+#define K210_PCF_DVP_D4 142 /* Data Bit 4 */
+#define K210_PCF_DVP_D5 143 /* Data Bit 5 */
+#define K210_PCF_DVP_D6 144 /* Data Bit 6 */
+#define K210_PCF_DVP_D7 145 /* Data Bit 7 */
+#define K210_PCF_SCCB_SCLK 146 /* Serial Camera Control Bus Clock */
+#define K210_PCF_SCCB_SDA 147 /* Serial Camera Control Bus Data */
+#define K210_PCF_UART1_CTS 148 /* UART1 Clear To Send */
+#define K210_PCF_UART1_DSR 149 /* UART1 Data Set Ready */
+#define K210_PCF_UART1_DCD 150 /* UART1 Data Carrier Detect */
+#define K210_PCF_UART1_RI 151 /* UART1 Ring Indicator */
+#define K210_PCF_UART1_SIR_IN 152 /* UART1 Serial Infrared Input */
+#define K210_PCF_UART1_DTR 153 /* UART1 Data Terminal Ready */
+#define K210_PCF_UART1_RTS 154 /* UART1 Request To Send */
+#define K210_PCF_UART1_OUT2 155 /* UART1 User-designated Output 2 */
+#define K210_PCF_UART1_OUT1 156 /* UART1 User-designated Output 1 */
+#define K210_PCF_UART1_SIR_OUT 157 /* UART1 Serial Infrared Output */
+#define K210_PCF_UART1_BAUD 158 /* UART1 Transmit Clock Output */
+#define K210_PCF_UART1_RE 159 /* UART1 Receiver Output Enable */
+#define K210_PCF_UART1_DE 160 /* UART1 Driver Output Enable */
+#define K210_PCF_UART1_RS485_EN 161 /* UART1 RS485 Enable */
+#define K210_PCF_UART2_CTS 162 /* UART2 Clear To Send */
+#define K210_PCF_UART2_DSR 163 /* UART2 Data Set Ready */
+#define K210_PCF_UART2_DCD 164 /* UART2 Data Carrier Detect */
+#define K210_PCF_UART2_RI 165 /* UART2 Ring Indicator */
+#define K210_PCF_UART2_SIR_IN 166 /* UART2 Serial Infrared Input */
+#define K210_PCF_UART2_DTR 167 /* UART2 Data Terminal Ready */
+#define K210_PCF_UART2_RTS 168 /* UART2 Request To Send */
+#define K210_PCF_UART2_OUT2 169 /* UART2 User-designated Output 2 */
+#define K210_PCF_UART2_OUT1 170 /* UART2 User-designated Output 1 */
+#define K210_PCF_UART2_SIR_OUT 171 /* UART2 Serial Infrared Output */
+#define K210_PCF_UART2_BAUD 172 /* UART2 Transmit Clock Output */
+#define K210_PCF_UART2_RE 173 /* UART2 Receiver Output Enable */
+#define K210_PCF_UART2_DE 174 /* UART2 Driver Output Enable */
+#define K210_PCF_UART2_RS485_EN 175 /* UART2 RS485 Enable */
+#define K210_PCF_UART3_CTS 176 /* UART3 Clear To Send */
+#define K210_PCF_UART3_DSR 177 /* UART3 Data Set Ready */
+#define K210_PCF_UART3_DCD 178 /* UART3 Data Carrier Detect */
+#define K210_PCF_UART3_RI 179 /* UART3 Ring Indicator */
+#define K210_PCF_UART3_SIR_IN 180 /* UART3 Serial Infrared Input */
+#define K210_PCF_UART3_DTR 181 /* UART3 Data Terminal Ready */
+#define K210_PCF_UART3_RTS 182 /* UART3 Request To Send */
+#define K210_PCF_UART3_OUT2 183 /* UART3 User-designated Output 2 */
+#define K210_PCF_UART3_OUT1 184 /* UART3 User-designated Output 1 */
+#define K210_PCF_UART3_SIR_OUT 185 /* UART3 Serial Infrared Output */
+#define K210_PCF_UART3_BAUD 186 /* UART3 Transmit Clock Output */
+#define K210_PCF_UART3_RE 187 /* UART3 Receiver Output Enable */
+#define K210_PCF_UART3_DE 188 /* UART3 Driver Output Enable */
+#define K210_PCF_UART3_RS485_EN 189 /* UART3 RS485 Enable */
+#define K210_PCF_TIMER0_TOGGLE1 190 /* TIMER0 Toggle Output 1 */
+#define K210_PCF_TIMER0_TOGGLE2 191 /* TIMER0 Toggle Output 2 */
+#define K210_PCF_TIMER0_TOGGLE3 192 /* TIMER0 Toggle Output 3 */
+#define K210_PCF_TIMER0_TOGGLE4 193 /* TIMER0 Toggle Output 4 */
+#define K210_PCF_TIMER1_TOGGLE1 194 /* TIMER1 Toggle Output 1 */
+#define K210_PCF_TIMER1_TOGGLE2 195 /* TIMER1 Toggle Output 2 */
+#define K210_PCF_TIMER1_TOGGLE3 196 /* TIMER1 Toggle Output 3 */
+#define K210_PCF_TIMER1_TOGGLE4 197 /* TIMER1 Toggle Output 4 */
+#define K210_PCF_TIMER2_TOGGLE1 198 /* TIMER2 Toggle Output 1 */
+#define K210_PCF_TIMER2_TOGGLE2 199 /* TIMER2 Toggle Output 2 */
+#define K210_PCF_TIMER2_TOGGLE3 200 /* TIMER2 Toggle Output 3 */
+#define K210_PCF_TIMER2_TOGGLE4 201 /* TIMER2 Toggle Output 4 */
+#define K210_PCF_CLK_SPI2 202 /* Clock SPI2 */
+#define K210_PCF_CLK_I2C2 203 /* Clock I2C2 */
+#define K210_PCF_INTERNAL0 204 /* Internal function signal 0 */
+#define K210_PCF_INTERNAL1 205 /* Internal function signal 1 */
+#define K210_PCF_INTERNAL2 206 /* Internal function signal 2 */
+#define K210_PCF_INTERNAL3 207 /* Internal function signal 3 */
+#define K210_PCF_INTERNAL4 208 /* Internal function signal 4 */
+#define K210_PCF_INTERNAL5 209 /* Internal function signal 5 */
+#define K210_PCF_INTERNAL6 210 /* Internal function signal 6 */
+#define K210_PCF_INTERNAL7 211 /* Internal function signal 7 */
+#define K210_PCF_INTERNAL8 212 /* Internal function signal 8 */
+#define K210_PCF_INTERNAL9 213 /* Internal function signal 9 */
+#define K210_PCF_INTERNAL10 214 /* Internal function signal 10 */
+#define K210_PCF_INTERNAL11 215 /* Internal function signal 11 */
+#define K210_PCF_INTERNAL12 216 /* Internal function signal 12 */
+#define K210_PCF_INTERNAL13 217 /* Internal function signal 13 */
+#define K210_PCF_INTERNAL14 218 /* Internal function signal 14 */
+#define K210_PCF_INTERNAL15 219 /* Internal function signal 15 */
+#define K210_PCF_INTERNAL16 220 /* Internal function signal 16 */
+#define K210_PCF_INTERNAL17 221 /* Internal function signal 17 */
+#define K210_PCF_CONSTANT 222 /* Constant function */
+#define K210_PCF_INTERNAL18 223 /* Internal function signal 18 */
+#define K210_PCF_DEBUG0 224 /* Debug function 0 */
+#define K210_PCF_DEBUG1 225 /* Debug function 1 */
+#define K210_PCF_DEBUG2 226 /* Debug function 2 */
+#define K210_PCF_DEBUG3 227 /* Debug function 3 */
+#define K210_PCF_DEBUG4 228 /* Debug function 4 */
+#define K210_PCF_DEBUG5 229 /* Debug function 5 */
+#define K210_PCF_DEBUG6 230 /* Debug function 6 */
+#define K210_PCF_DEBUG7 231 /* Debug function 7 */
+#define K210_PCF_DEBUG8 232 /* Debug function 8 */
+#define K210_PCF_DEBUG9 233 /* Debug function 9 */
+#define K210_PCF_DEBUG10 234 /* Debug function 10 */
+#define K210_PCF_DEBUG11 235 /* Debug function 11 */
+#define K210_PCF_DEBUG12 236 /* Debug function 12 */
+#define K210_PCF_DEBUG13 237 /* Debug function 13 */
+#define K210_PCF_DEBUG14 238 /* Debug function 14 */
+#define K210_PCF_DEBUG15 239 /* Debug function 15 */
+#define K210_PCF_DEBUG16 240 /* Debug function 16 */
+#define K210_PCF_DEBUG17 241 /* Debug function 17 */
+#define K210_PCF_DEBUG18 242 /* Debug function 18 */
+#define K210_PCF_DEBUG19 243 /* Debug function 19 */
+#define K210_PCF_DEBUG20 244 /* Debug function 20 */
+#define K210_PCF_DEBUG21 245 /* Debug function 21 */
+#define K210_PCF_DEBUG22 246 /* Debug function 22 */
+#define K210_PCF_DEBUG23 247 /* Debug function 23 */
+#define K210_PCF_DEBUG24 248 /* Debug function 24 */
+#define K210_PCF_DEBUG25 249 /* Debug function 25 */
+#define K210_PCF_DEBUG26 250 /* Debug function 26 */
+#define K210_PCF_DEBUG27 251 /* Debug function 27 */
+#define K210_PCF_DEBUG28 252 /* Debug function 28 */
+#define K210_PCF_DEBUG29 253 /* Debug function 29 */
+#define K210_PCF_DEBUG30 254 /* Debug function 30 */
+#define K210_PCF_DEBUG31 255 /* Debug function 31 */
+
+#define K210_FPIOA(pin, func) (((pin) << 16) | (func))
+
+#define K210_PC_POWER_3V3 0
+#define K210_PC_POWER_1V8 1
+
+#endif /* PINCTRL_K210_FPIOA_H */
diff --git a/include/dt-bindings/reset/k210-rst.h b/include/dt-bindings/reset/k210-rst.h
new file mode 100644
index 000000000000..883c1aed50e8
--- /dev/null
+++ b/include/dt-bindings/reset/k210-rst.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2019 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef RESET_K210_SYSCTL_H
+#define RESET_K210_SYSCTL_H
+
+/*
+ * Kendryte K210 SoC system controller K210_SYSCTL_SOFT_RESET register bits.
+ * Taken from Kendryte SDK (kendryte-standalone-sdk).
+ */
+#define K210_RST_ROM 0
+#define K210_RST_DMA 1
+#define K210_RST_AI 2
+#define K210_RST_DVP 3
+#define K210_RST_FFT 4
+#define K210_RST_GPIO 5
+#define K210_RST_SPI0 6
+#define K210_RST_SPI1 7
+#define K210_RST_SPI2 8
+#define K210_RST_SPI3 9
+#define K210_RST_I2S0 10
+#define K210_RST_I2S1 11
+#define K210_RST_I2S2 12
+#define K210_RST_I2C0 13
+#define K210_RST_I2C1 14
+#define K210_RST_I2C2 15
+#define K210_RST_UART1 16
+#define K210_RST_UART2 17
+#define K210_RST_UART3 18
+#define K210_RST_AES 19
+#define K210_RST_FPIOA 20
+#define K210_RST_TIMER0 21
+#define K210_RST_TIMER1 22
+#define K210_RST_TIMER2 23
+#define K210_RST_WDT0 24
+#define K210_RST_WDT1 25
+#define K210_RST_SHA 26
+#define K210_RST_RTC 29
+
+#endif /* RESET_K210_SYSCTL_H */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 3c5757d539ab..fcdaab723916 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -746,12 +746,12 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
static inline void acpi_dev_put(struct acpi_device *adev) {}
-static inline bool is_acpi_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
{
return false;
}
-static inline bool is_acpi_device_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
{
return false;
}
@@ -761,7 +761,7 @@ static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwno
return NULL;
}
-static inline bool is_acpi_data_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
{
return false;
}
@@ -1079,19 +1079,25 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c
#if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB)
bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
struct acpi_resource_gpio **agpio);
-int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index);
+int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index);
#else
static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
struct acpi_resource_gpio **agpio)
{
return false;
}
-static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
+static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev,
+ const char *name, int index)
{
return -ENXIO;
}
#endif
+static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
+{
+ return acpi_dev_gpio_irq_get_by(adev, NULL, index);
+}
+
/* Device properties */
#ifdef CONFIG_ACPI
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 5b468f2242ff..983ed2fe7c85 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -20,7 +20,12 @@
#define BIO_BUG_ON
#endif
-#define BIO_MAX_PAGES 256
+#define BIO_MAX_PAGES 256U
+
+static inline unsigned int bio_max_segs(unsigned int nr_segs)
+{
+ return min(nr_segs, BIO_MAX_PAGES);
+}
#define bio_prio(bio) (bio)->bi_ioprio
#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio)
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index a61f192c096b..a5a48303b0f1 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -214,7 +214,7 @@ static inline int get_count_order_long(unsigned long l)
* __ffs64 - find first set bit in a 64 bit word
* @word: The 64 bit word
*
- * On 64 bit arches this is a synomyn for __ffs
+ * On 64 bit arches this is a synonym for __ffs
* The result is not defined if no bits are set, so check that @word
* is non-zero before calling this.
*/
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 69035e9f632b..bc6bc8383b43 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -65,8 +65,6 @@ typedef void (rq_end_io_fn)(struct request *, blk_status_t);
* request flags */
typedef __u32 __bitwise req_flags_t;
-/* elevator knows about this request */
-#define RQF_SORTED ((__force req_flags_t)(1 << 0))
/* drive already may have started this one */
#define RQF_STARTED ((__force req_flags_t)(1 << 1))
/* may not be passed by ioscheduler */
@@ -462,7 +460,6 @@ struct request_queue {
#ifdef CONFIG_PM
struct device *dev;
enum rpm_status rpm_status;
- unsigned int nr_pending;
#endif
/*
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 05556573b896..a083e15df608 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -23,8 +23,6 @@ struct blk_trace {
u32 pid;
u32 dev;
struct dentry *dir;
- struct dentry *dropped_file;
- struct dentry *msg_file;
struct list_head running_list;
atomic_t dropped;
};
@@ -119,7 +117,7 @@ struct compat_blk_user_trace_setup {
#endif
-extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
+void blk_fill_rwbs(char *rwbs, unsigned int op);
static inline sector_t blk_rq_trace_sector(struct request *rq)
{
diff --git a/include/linux/cfag12864b.h b/include/linux/cfag12864b.h
index 4060004968c8..6617d9c68d86 100644
--- a/include/linux/cfag12864b.h
+++ b/include/linux/cfag12864b.h
@@ -4,7 +4,7 @@
* Version: 0.1.0
* Description: cfag12864b LCD driver header
*
- * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
* Date: 2006-10-12
*/
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index ee09a39627d6..f14adb882338 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -168,6 +168,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_X86_CQM_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
+ CPUHP_AP_PERF_S390_CFD_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
CPUHP_AP_PERF_ARM_CCI_ONLINE,
CPUHP_AP_PERF_ARM_CCN_ONLINE,
@@ -185,6 +186,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+ CPUHP_AP_PERF_CSKY_ONLINE,
CPUHP_AP_WATCHDOG_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
diff --git a/include/linux/cred.h b/include/linux/cred.h
index 18639c069263..4c6350503697 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -25,7 +25,7 @@ struct inode;
struct group_info {
atomic_t usage;
int ngroups;
- kgid_t gid[0];
+ kgid_t gid[];
} __randomize_layout;
/**
diff --git a/include/linux/device.h b/include/linux/device.h
index 7619a84f8ce4..ba660731bd25 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -291,6 +291,7 @@ struct device_dma_parameters {
* sg limitations.
*/
unsigned int max_segment_size;
+ unsigned int min_align_mask;
unsigned long segment_boundary_mask;
};
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index fbfa3f5abd94..2a984cb4d1e0 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -509,6 +509,22 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
return -EIO;
}
+static inline unsigned int dma_get_min_align_mask(struct device *dev)
+{
+ if (dev->dma_parms)
+ return dev->dma_parms->min_align_mask;
+ return 0;
+}
+
+static inline int dma_set_min_align_mask(struct device *dev,
+ unsigned int min_align_mask)
+{
+ if (WARN_ON_ONCE(!dev->dma_parms))
+ return -EIO;
+ dev->dma_parms->min_align_mask = min_align_mask;
+ return 0;
+}
+
static inline int dma_get_cache_alignment(void)
{
#ifdef ARCH_DMA_MINALIGN
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
new file mode 100644
index 000000000000..c1be37437e77
--- /dev/null
+++ b/include/linux/fortify-string.h
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FORTIFY_STRING_H_
+#define _LINUX_FORTIFY_STRING_H_
+
+
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
+extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
+extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
+extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove);
+extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset);
+extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat);
+extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy);
+extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
+extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
+extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
+#else
+#define __underlying_memchr __builtin_memchr
+#define __underlying_memcmp __builtin_memcmp
+#define __underlying_memcpy __builtin_memcpy
+#define __underlying_memmove __builtin_memmove
+#define __underlying_memset __builtin_memset
+#define __underlying_strcat __builtin_strcat
+#define __underlying_strcpy __builtin_strcpy
+#define __underlying_strlen __builtin_strlen
+#define __underlying_strncat __builtin_strncat
+#define __underlying_strncpy __builtin_strncpy
+#endif
+
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 1);
+
+ if (__builtin_constant_p(size) && p_size < size)
+ __write_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __underlying_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+ size_t p_size = __builtin_object_size(p, 1);
+
+ if (p_size == (size_t)-1)
+ return __underlying_strcat(p, q);
+ if (strlcat(p, q, p_size) >= p_size)
+ fortify_panic(__func__);
+ return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+ __kernel_size_t ret;
+ size_t p_size = __builtin_object_size(p, 1);
+
+ /* Work around gcc excess stack consumption issue */
+ if (p_size == (size_t)-1 ||
+ (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
+ return __underlying_strlen(p);
+ ret = strnlen(p, p_size);
+ if (p_size <= ret)
+ fortify_panic(__func__);
+ return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+ size_t p_size = __builtin_object_size(p, 1);
+ __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+
+ if (p_size <= ret && maxlen != ret)
+ fortify_panic(__func__);
+ return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+ size_t ret;
+ size_t p_size = __builtin_object_size(p, 1);
+ size_t q_size = __builtin_object_size(q, 1);
+
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __real_strlcpy(p, q, size);
+ ret = strlen(q);
+ if (size) {
+ size_t len = (ret >= size) ? size - 1 : ret;
+
+ if (__builtin_constant_p(len) && len >= p_size)
+ __write_overflow();
+ if (len >= p_size)
+ fortify_panic(__func__);
+ __underlying_memcpy(p, q, len);
+ p[len] = '\0';
+ }
+ return ret;
+}
+
+/* defined after fortified strnlen to reuse it */
+extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
+__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size)
+{
+ size_t len;
+ /* Use string size rather than possible enclosing struct size. */
+ size_t p_size = __builtin_object_size(p, 1);
+ size_t q_size = __builtin_object_size(q, 1);
+
+ /* If we cannot get size of p and q default to call strscpy. */
+ if (p_size == (size_t) -1 && q_size == (size_t) -1)
+ return __real_strscpy(p, q, size);
+
+ /*
+ * If size can be known at compile time and is greater than
+ * p_size, generate a compile time write overflow error.
+ */
+ if (__builtin_constant_p(size) && size > p_size)
+ __write_overflow();
+
+ /*
+ * This call protects from read overflow, because len will default to q
+ * length if it smaller than size.
+ */
+ len = strnlen(q, size);
+ /*
+ * If len equals size, we will copy only size bytes which leads to
+ * -E2BIG being returned.
+ * Otherwise we will copy len + 1 because of the final '\O'.
+ */
+ len = len == size ? size : len + 1;
+
+ /*
+ * Generate a runtime write overflow error if len is greater than
+ * p_size.
+ */
+ if (len > p_size)
+ fortify_panic(__func__);
+
+ /*
+ * We can now safely call vanilla strscpy because we are protected from:
+ * 1. Read overflow thanks to call to strnlen().
+ * 2. Write overflow thanks to above ifs.
+ */
+ return __real_strscpy(p, q, len);
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+ size_t p_len, copy_len;
+ size_t p_size = __builtin_object_size(p, 1);
+ size_t q_size = __builtin_object_size(q, 1);
+
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __underlying_strncat(p, q, count);
+ p_len = strlen(p);
+ copy_len = strnlen(q, count);
+ if (p_size < p_len + copy_len + 1)
+ fortify_panic(__func__);
+ __underlying_memcpy(p + p_len, q, copy_len);
+ p[p_len + copy_len] = '\0';
+ return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+
+ if (__builtin_constant_p(size) && p_size < size)
+ __write_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __underlying_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __write_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __underlying_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __write_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __underlying_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+ size_t q_size = __builtin_object_size(q, 0);
+
+ if (__builtin_constant_p(size)) {
+ if (p_size < size)
+ __read_overflow();
+ if (q_size < size)
+ __read_overflow2();
+ }
+ if (p_size < size || q_size < size)
+ fortify_panic(__func__);
+ return __underlying_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __underlying_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+ size_t p_size = __builtin_object_size(p, 0);
+
+ if (__builtin_constant_p(size) && p_size < size)
+ __read_overflow();
+ if (p_size < size)
+ fortify_panic(__func__);
+ return __real_kmemdup(p, size, gfp);
+}
+
+/* defined after fortified strlen and memcpy to reuse them */
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+ size_t p_size = __builtin_object_size(p, 1);
+ size_t q_size = __builtin_object_size(q, 1);
+ size_t size;
+
+ if (p_size == (size_t)-1 && q_size == (size_t)-1)
+ return __underlying_strcpy(p, q);
+ size = strlen(q) + 1;
+ /* test here to use the more stringent object size */
+ if (p_size < size)
+ fortify_panic(__func__);
+ memcpy(p, q, size);
+ return p;
+}
+
+/* Don't use these outside the FORITFY_SOURCE implementation */
+#undef __underlying_memchr
+#undef __underlying_memcmp
+#undef __underlying_memcpy
+#undef __underlying_memmove
+#undef __underlying_memset
+#undef __underlying_strcat
+#undef __underlying_strcpy
+#undef __underlying_strlen
+#undef __underlying_strncat
+#undef __underlying_strncpy
+
+#endif /* _LINUX_FORTIFY_STRING_H_ */
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 220cd553a9e7..8572a1474e16 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -634,6 +634,8 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
extern void pm_restrict_gfp_mask(void);
extern void pm_restore_gfp_mask(void);
+extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
+
#ifdef CONFIG_PM_SLEEP
extern bool pm_suspended_storage(void);
#else
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index ef49307611d2..c73b25bc9213 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -674,6 +674,8 @@ struct acpi_gpio_mapping {
* get GpioIo type explicitly, this quirk may be used.
*/
#define ACPI_GPIO_QUIRK_ONLY_GPIOIO BIT(1)
+/* Use given pin as an absolute GPIO number in the system */
+#define ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER BIT(2)
unsigned int quirks;
};
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index d2c70d3772a3..44170f312ae7 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -276,4 +276,60 @@ static inline void copy_highpage(struct page *to, struct page *from)
#endif
+static inline void memcpy_page(struct page *dst_page, size_t dst_off,
+ struct page *src_page, size_t src_off,
+ size_t len)
+{
+ char *dst = kmap_local_page(dst_page);
+ char *src = kmap_local_page(src_page);
+
+ VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
+ memcpy(dst + dst_off, src + src_off, len);
+ kunmap_local(src);
+ kunmap_local(dst);
+}
+
+static inline void memmove_page(struct page *dst_page, size_t dst_off,
+ struct page *src_page, size_t src_off,
+ size_t len)
+{
+ char *dst = kmap_local_page(dst_page);
+ char *src = kmap_local_page(src_page);
+
+ VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
+ memmove(dst + dst_off, src + src_off, len);
+ kunmap_local(src);
+ kunmap_local(dst);
+}
+
+static inline void memset_page(struct page *page, size_t offset, int val,
+ size_t len)
+{
+ char *addr = kmap_local_page(page);
+
+ VM_BUG_ON(offset + len > PAGE_SIZE);
+ memset(addr + offset, val, len);
+ kunmap_local(addr);
+}
+
+static inline void memcpy_from_page(char *to, struct page *page,
+ size_t offset, size_t len)
+{
+ char *from = kmap_local_page(page);
+
+ VM_BUG_ON(offset + len > PAGE_SIZE);
+ memcpy(to, from + offset, len);
+ kunmap_local(from);
+}
+
+static inline void memcpy_to_page(struct page *page, size_t offset,
+ const char *from, size_t len)
+{
+ char *to = kmap_local_page(page);
+
+ VM_BUG_ON(offset + len > PAGE_SIZE);
+ memcpy(to + offset, from, len);
+ kunmap_local(to);
+}
+
#endif /* _LINUX_HIGHMEM_H */
diff --git a/include/linux/init.h b/include/linux/init.h
index a01f01c1a5c5..31f54de58429 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -338,14 +338,14 @@ struct obs_kernel_param {
var = 1; \
return 0; \
} \
- __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1); \
+ early_param(str_on, parse_##var##_on); \
\
static int __init parse_##var##_off(char *arg) \
{ \
var = 0; \
return 0; \
} \
- __setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
+ early_param(str_off, parse_##var##_off)
/* Relies on boot_command_line being set */
void __init parse_early_param(void);
diff --git a/include/linux/initrd.h b/include/linux/initrd.h
index 8db6f8c8030b..85c15717af34 100644
--- a/include/linux/initrd.h
+++ b/include/linux/initrd.h
@@ -1,5 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_INITRD_H
+#define __LINUX_INITRD_H
+
#define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
/* starting block # of image */
@@ -15,6 +18,12 @@ extern int initrd_below_start_ok;
extern unsigned long initrd_start, initrd_end;
extern void free_initrd_mem(unsigned long, unsigned long);
+#ifdef CONFIG_BLK_DEV_INITRD
+extern void __init reserve_initrd_mem(void);
+#else
+static inline void __init reserve_initrd_mem(void) {}
+#endif
+
extern phys_addr_t phys_initrd_start;
extern unsigned long phys_initrd_size;
@@ -24,3 +33,5 @@ extern char __initramfs_start[];
extern unsigned long __initramfs_size;
void console_on_rootfs(void);
+
+#endif /* __LINUX_INITRD_H */
diff --git a/include/linux/io_uring.h b/include/linux/io_uring.h
index 2eb6d19de336..7cb7bd0e334c 100644
--- a/include/linux/io_uring.h
+++ b/include/linux/io_uring.h
@@ -5,23 +5,6 @@
#include <linux/sched.h>
#include <linux/xarray.h>
-struct io_identity {
- struct files_struct *files;
- struct mm_struct *mm;
-#ifdef CONFIG_BLK_CGROUP
- struct cgroup_subsys_state *blkcg_css;
-#endif
- const struct cred *creds;
- struct nsproxy *nsproxy;
- struct fs_struct *fs;
- unsigned long fsize;
-#ifdef CONFIG_AUDIT
- kuid_t loginuid;
- unsigned int sessionid;
-#endif
- refcount_t count;
-};
-
struct io_wq_work_node {
struct io_wq_work_node *next;
};
@@ -36,9 +19,8 @@ struct io_uring_task {
struct xarray xa;
struct wait_queue_head wait;
struct file *last;
+ void *io_wq;
struct percpu_counter inflight;
- struct io_identity __identity;
- struct io_identity *identity;
atomic_t in_idle;
bool sqpoll;
@@ -56,12 +38,12 @@ void __io_uring_free(struct task_struct *tsk);
static inline void io_uring_task_cancel(void)
{
- if (current->io_uring && !xa_empty(&current->io_uring->xa))
+ if (current->io_uring)
__io_uring_task_cancel();
}
static inline void io_uring_files_cancel(struct files_struct *files)
{
- if (current->io_uring && !xa_empty(&current->io_uring->xa))
+ if (current->io_uring)
__io_uring_files_cancel(files);
}
static inline void io_uring_free(struct task_struct *tsk)
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 7eaf2d9effb4..b91732bd05d7 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -83,6 +83,7 @@ static inline void kasan_disable_current(void) {}
struct kasan_cache {
int alloc_meta_offset;
int free_meta_offset;
+ bool is_kmalloc;
};
#ifdef CONFIG_KASAN_HW_TAGS
@@ -143,6 +144,13 @@ static __always_inline void kasan_cache_create(struct kmem_cache *cache,
__kasan_cache_create(cache, size, flags);
}
+void __kasan_cache_create_kmalloc(struct kmem_cache *cache);
+static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
+{
+ if (kasan_enabled())
+ __kasan_cache_create_kmalloc(cache);
+}
+
size_t __kasan_metadata_size(struct kmem_cache *cache);
static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
{
@@ -192,6 +200,13 @@ static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object)
return false;
}
+void __kasan_kfree_large(void *ptr, unsigned long ip);
+static __always_inline void kasan_kfree_large(void *ptr)
+{
+ if (kasan_enabled())
+ __kasan_kfree_large(ptr, _RET_IP_);
+}
+
void __kasan_slab_free_mempool(void *ptr, unsigned long ip);
static __always_inline void kasan_slab_free_mempool(void *ptr)
{
@@ -239,13 +254,6 @@ static __always_inline void * __must_check kasan_krealloc(const void *object,
return (void *)object;
}
-void __kasan_kfree_large(void *ptr, unsigned long ip);
-static __always_inline void kasan_kfree_large(void *ptr)
-{
- if (kasan_enabled())
- __kasan_kfree_large(ptr, _RET_IP_);
-}
-
/*
* Unlike kasan_check_read/write(), kasan_check_byte() is performed even for
* the hardware tag-based mode that doesn't rely on compiler instrumentation.
@@ -278,6 +286,7 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {}
static inline void kasan_cache_create(struct kmem_cache *cache,
unsigned int *size,
slab_flags_t *flags) {}
+static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
static inline void kasan_poison_slab(struct page *page) {}
static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
@@ -293,6 +302,7 @@ static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
{
return false;
}
+static inline void kasan_kfree_large(void *ptr) {}
static inline void kasan_slab_free_mempool(void *ptr) {}
static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
gfp_t flags)
@@ -313,7 +323,6 @@ static inline void *kasan_krealloc(const void *object, size_t new_size,
{
return (void *)object;
}
-static inline void kasan_kfree_large(void *ptr) {}
static inline bool kasan_check_byte(const void *address)
{
return true;
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 5f61389f5f36..8a7aa1d7e0e3 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -314,6 +314,8 @@ extern void machine_kexec_cleanup(struct kimage *image);
extern int kernel_kexec(void);
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
+int machine_kexec_post_load(struct kimage *image);
+
extern void __crash_kexec(struct pt_regs *);
extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
new file mode 100644
index 000000000000..a70d1ea03532
--- /dev/null
+++ b/include/linux/kfence.h
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel Electric-Fence (KFENCE). Public interface for allocator and fault
+ * handler integration. For more info see Documentation/dev-tools/kfence.rst.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef _LINUX_KFENCE_H
+#define _LINUX_KFENCE_H
+
+#include <linux/mm.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KFENCE
+
+/*
+ * We allocate an even number of pages, as it simplifies calculations to map
+ * address to metadata indices; effectively, the very first page serves as an
+ * extended guard page, but otherwise has no special purpose.
+ */
+#define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE)
+extern char *__kfence_pool;
+
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+#include <linux/static_key.h>
+DECLARE_STATIC_KEY_FALSE(kfence_allocation_key);
+#else
+#include <linux/atomic.h>
+extern atomic_t kfence_allocation_gate;
+#endif
+
+/**
+ * is_kfence_address() - check if an address belongs to KFENCE pool
+ * @addr: address to check
+ *
+ * Return: true or false depending on whether the address is within the KFENCE
+ * object range.
+ *
+ * KFENCE objects live in a separate page range and are not to be intermixed
+ * with regular heap objects (e.g. KFENCE objects must never be added to the
+ * allocator freelists). Failing to do so may and will result in heap
+ * corruptions, therefore is_kfence_address() must be used to check whether
+ * an object requires specific handling.
+ *
+ * Note: This function may be used in fast-paths, and is performance critical.
+ * Future changes should take this into account; for instance, we want to avoid
+ * introducing another load and therefore need to keep KFENCE_POOL_SIZE a
+ * constant (until immediate patching support is added to the kernel).
+ */
+static __always_inline bool is_kfence_address(const void *addr)
+{
+ /*
+ * The non-NULL check is required in case the __kfence_pool pointer was
+ * never initialized; keep it in the slow-path after the range-check.
+ */
+ return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr);
+}
+
+/**
+ * kfence_alloc_pool() - allocate the KFENCE pool via memblock
+ */
+void __init kfence_alloc_pool(void);
+
+/**
+ * kfence_init() - perform KFENCE initialization at boot time
+ *
+ * Requires that kfence_alloc_pool() was called before. This sets up the
+ * allocation gate timer, and requires that workqueues are available.
+ */
+void __init kfence_init(void);
+
+/**
+ * kfence_shutdown_cache() - handle shutdown_cache() for KFENCE objects
+ * @s: cache being shut down
+ *
+ * Before shutting down a cache, one must ensure there are no remaining objects
+ * allocated from it. Because KFENCE objects are not referenced from the cache
+ * directly, we need to check them here.
+ *
+ * Note that shutdown_cache() is internal to SL*B, and kmem_cache_destroy() does
+ * not return if allocated objects still exist: it prints an error message and
+ * simply aborts destruction of a cache, leaking memory.
+ *
+ * If the only such objects are KFENCE objects, we will not leak the entire
+ * cache, but instead try to provide more useful debug info by making allocated
+ * objects "zombie allocations". Objects may then still be used or freed (which
+ * is handled gracefully), but usage will result in showing KFENCE error reports
+ * which include stack traces to the user of the object, the original allocation
+ * site, and caller to shutdown_cache().
+ */
+void kfence_shutdown_cache(struct kmem_cache *s);
+
+/*
+ * Allocate a KFENCE object. Allocators must not call this function directly,
+ * use kfence_alloc() instead.
+ */
+void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
+
+/**
+ * kfence_alloc() - allocate a KFENCE object with a low probability
+ * @s: struct kmem_cache with object requirements
+ * @size: exact size of the object to allocate (can be less than @s->size
+ * e.g. for kmalloc caches)
+ * @flags: GFP flags
+ *
+ * Return:
+ * * NULL - must proceed with allocating as usual,
+ * * non-NULL - pointer to a KFENCE object.
+ *
+ * kfence_alloc() should be inserted into the heap allocation fast path,
+ * allowing it to transparently return KFENCE-allocated objects with a low
+ * probability using a static branch (the probability is controlled by the
+ * kfence.sample_interval boot parameter).
+ */
+static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+{
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+ if (static_branch_unlikely(&kfence_allocation_key))
+#else
+ if (unlikely(!atomic_read(&kfence_allocation_gate)))
+#endif
+ return __kfence_alloc(s, size, flags);
+ return NULL;
+}
+
+/**
+ * kfence_ksize() - get actual amount of memory allocated for a KFENCE object
+ * @addr: pointer to a heap object
+ *
+ * Return:
+ * * 0 - not a KFENCE object, must call __ksize() instead,
+ * * non-0 - this many bytes can be accessed without causing a memory error.
+ *
+ * kfence_ksize() returns the number of bytes requested for a KFENCE object at
+ * allocation time. This number may be less than the object size of the
+ * corresponding struct kmem_cache.
+ */
+size_t kfence_ksize(const void *addr);
+
+/**
+ * kfence_object_start() - find the beginning of a KFENCE object
+ * @addr: address within a KFENCE-allocated object
+ *
+ * Return: address of the beginning of the object.
+ *
+ * SL[AU]B-allocated objects are laid out within a page one by one, so it is
+ * easy to calculate the beginning of an object given a pointer inside it and
+ * the object size. The same is not true for KFENCE, which places a single
+ * object at either end of the page. This helper function is used to find the
+ * beginning of a KFENCE-allocated object.
+ */
+void *kfence_object_start(const void *addr);
+
+/**
+ * __kfence_free() - release a KFENCE heap object to KFENCE pool
+ * @addr: object to be freed
+ *
+ * Requires: is_kfence_address(addr)
+ *
+ * Release a KFENCE object and mark it as freed.
+ */
+void __kfence_free(void *addr);
+
+/**
+ * kfence_free() - try to release an arbitrary heap object to KFENCE pool
+ * @addr: object to be freed
+ *
+ * Return:
+ * * false - object doesn't belong to KFENCE pool and was ignored,
+ * * true - object was released to KFENCE pool.
+ *
+ * Release a KFENCE object and mark it as freed. May be called on any object,
+ * even non-KFENCE objects, to simplify integration of the hooks into the
+ * allocator's free codepath. The allocator must check the return value to
+ * determine if it was a KFENCE object or not.
+ */
+static __always_inline __must_check bool kfence_free(void *addr)
+{
+ if (!is_kfence_address(addr))
+ return false;
+ __kfence_free(addr);
+ return true;
+}
+
+/**
+ * kfence_handle_page_fault() - perform page fault handling for KFENCE pages
+ * @addr: faulting address
+ * @is_write: is access a write
+ * @regs: current struct pt_regs (can be NULL, but shows full stack trace)
+ *
+ * Return:
+ * * false - address outside KFENCE pool,
+ * * true - page fault handled by KFENCE, no additional handling required.
+ *
+ * A page fault inside KFENCE pool indicates a memory error, such as an
+ * out-of-bounds access, a use-after-free or an invalid memory access. In these
+ * cases KFENCE prints an error message and marks the offending page as
+ * present, so that the kernel can proceed.
+ */
+bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs);
+
+#else /* CONFIG_KFENCE */
+
+static inline bool is_kfence_address(const void *addr) { return false; }
+static inline void kfence_alloc_pool(void) { }
+static inline void kfence_init(void) { }
+static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
+static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
+static inline size_t kfence_ksize(const void *addr) { return 0; }
+static inline void *kfence_object_start(const void *addr) { return NULL; }
+static inline void __kfence_free(void *addr) { }
+static inline bool __must_check kfence_free(void *addr) { return false; }
+static inline bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write,
+ struct pt_regs *regs)
+{
+ return false;
+}
+
+#endif
+
+#endif /* _LINUX_KFENCE_H */
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 0444b44bd156..392a3670944c 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -359,9 +359,11 @@ extern atomic_t kgdb_active;
extern bool dbg_is_early;
extern void __init dbg_late_init(void);
extern void kgdb_panic(const char *msg);
+extern void kgdb_free_init_mem(void);
#else /* ! CONFIG_KGDB */
#define in_dbg_master() (0)
#define dbg_late_init()
static inline void kgdb_panic(const char *msg) {}
+static inline void kgdb_free_init_mem(void) { }
#endif /* ! CONFIG_KGDB */
#endif /* _KGDB_H_ */
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index c941b7377321..2fcc01891b47 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -3,6 +3,7 @@
#define _LINUX_KHUGEPAGED_H
#include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
+#include <linux/shmem_fs.h>
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -57,6 +58,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
{
if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
if ((khugepaged_always() ||
+ (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) ||
(khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
!(vm_flags & VM_NOHUGEPAGE) &&
!test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
diff --git a/include/linux/ks0108.h b/include/linux/ks0108.h
index 0738389b42b6..1a37a664f915 100644
--- a/include/linux/ks0108.h
+++ b/include/linux/ks0108.h
@@ -4,7 +4,7 @@
* Version: 0.1.0
* Description: ks0108 LCD Controller driver header
*
- * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
* Date: 2006-10-31
*/
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e126ebda36d0..1b65e7204344 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -11,6 +11,7 @@
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/bug.h>
+#include <linux/minmax.h>
#include <linux/mm.h>
#include <linux/mmu_notifier.h>
#include <linux/preempt.h>
@@ -506,6 +507,8 @@ struct kvm {
struct mmu_notifier mmu_notifier;
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
+ unsigned long mmu_notifier_range_start;
+ unsigned long mmu_notifier_range_end;
#endif
long tlbs_dirty;
struct list_head devices;
@@ -733,7 +736,7 @@ kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
- bool *writable);
+ bool *writable, hva_t *hva);
void kvm_release_pfn_clean(kvm_pfn_t pfn);
void kvm_release_pfn_dirty(kvm_pfn_t pfn);
@@ -1207,6 +1210,26 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
return 1;
return 0;
}
+
+static inline int mmu_notifier_retry_hva(struct kvm *kvm,
+ unsigned long mmu_seq,
+ unsigned long hva)
+{
+ lockdep_assert_held(&kvm->mmu_lock);
+ /*
+ * If mmu_notifier_count is non-zero, then the range maintained by
+ * kvm_mmu_notifier_invalidate_range_start contains all addresses that
+ * might be being invalidated. Note that it may include some false
+ * positives, due to shortcuts when handing concurrent invalidations.
+ */
+ if (unlikely(kvm->mmu_notifier_count) &&
+ hva >= kvm->mmu_notifier_range_start &&
+ hva < kvm->mmu_notifier_range_end)
+ return 1;
+ if (kvm->mmu_notifier_seq != mmu_seq)
+ return 1;
+ return 0;
+}
#endif
#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
diff --git a/include/linux/led-class-flash.h b/include/linux/led-class-flash.h
index 21a3358a1731..612b4cab3819 100644
--- a/include/linux/led-class-flash.h
+++ b/include/linux/led-class-flash.h
@@ -85,6 +85,7 @@ static inline struct led_classdev_flash *lcdev_to_flcdev(
return container_of(lcdev, struct led_classdev_flash, led_cdev);
}
+#if IS_ENABLED(CONFIG_LEDS_CLASS_FLASH)
/**
* led_classdev_flash_register_ext - register a new object of LED class with
* init data and with support for flash LEDs
@@ -98,12 +99,6 @@ int led_classdev_flash_register_ext(struct device *parent,
struct led_classdev_flash *fled_cdev,
struct led_init_data *init_data);
-static inline int led_classdev_flash_register(struct device *parent,
- struct led_classdev_flash *fled_cdev)
-{
- return led_classdev_flash_register_ext(parent, fled_cdev, NULL);
-}
-
/**
* led_classdev_flash_unregister - unregisters an object of led_classdev class
* with support for flash LEDs
@@ -118,15 +113,44 @@ int devm_led_classdev_flash_register_ext(struct device *parent,
struct led_init_data *init_data);
+void devm_led_classdev_flash_unregister(struct device *parent,
+ struct led_classdev_flash *fled_cdev);
+
+#else
+
+static inline int led_classdev_flash_register_ext(struct device *parent,
+ struct led_classdev_flash *fled_cdev,
+ struct led_init_data *init_data)
+{
+ return 0;
+}
+
+static inline void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev) {};
+static inline int devm_led_classdev_flash_register_ext(struct device *parent,
+ struct led_classdev_flash *fled_cdev,
+ struct led_init_data *init_data)
+{
+ return 0;
+}
+
+static inline void devm_led_classdev_flash_unregister(struct device *parent,
+ struct led_classdev_flash *fled_cdev)
+{};
+
+#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) */
+
+static inline int led_classdev_flash_register(struct device *parent,
+ struct led_classdev_flash *fled_cdev)
+{
+ return led_classdev_flash_register_ext(parent, fled_cdev, NULL);
+}
+
static inline int devm_led_classdev_flash_register(struct device *parent,
struct led_classdev_flash *fled_cdev)
{
return devm_led_classdev_flash_register_ext(parent, fled_cdev, NULL);
}
-void devm_led_classdev_flash_unregister(struct device *parent,
- struct led_classdev_flash *fled_cdev);
-
/**
* led_set_flash_strobe - setup flash strobe
* @fled_cdev: the flash LED to set strobe on
diff --git a/include/linux/led-class-multicolor.h b/include/linux/led-class-multicolor.h
index 5116f9a866cc..210d57bcd767 100644
--- a/include/linux/led-class-multicolor.h
+++ b/include/linux/led-class-multicolor.h
@@ -44,12 +44,6 @@ int led_classdev_multicolor_register_ext(struct device *parent,
struct led_classdev_mc *mcled_cdev,
struct led_init_data *init_data);
-static inline int led_classdev_multicolor_register(struct device *parent,
- struct led_classdev_mc *mcled_cdev)
-{
- return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
-}
-
/**
* led_classdev_multicolor_unregister - unregisters an object of led_classdev
* class with support for multicolor LEDs
@@ -68,13 +62,6 @@ int devm_led_classdev_multicolor_register_ext(struct device *parent,
struct led_classdev_mc *mcled_cdev,
struct led_init_data *init_data);
-static inline int devm_led_classdev_multicolor_register(struct device *parent,
- struct led_classdev_mc *mcled_cdev)
-{
- return devm_led_classdev_multicolor_register_ext(parent, mcled_cdev,
- NULL);
-}
-
void devm_led_classdev_multicolor_unregister(struct device *parent,
struct led_classdev_mc *mcled_cdev);
#else
@@ -83,27 +70,33 @@ static inline int led_classdev_multicolor_register_ext(struct device *parent,
struct led_classdev_mc *mcled_cdev,
struct led_init_data *init_data)
{
- return -EINVAL;
-}
-
-static inline int led_classdev_multicolor_register(struct device *parent,
- struct led_classdev_mc *mcled_cdev)
-{
- return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
+ return 0;
}
static inline void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev) {};
static inline int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
enum led_brightness brightness)
{
- return -EINVAL;
+ return 0;
}
static inline int devm_led_classdev_multicolor_register_ext(struct device *parent,
struct led_classdev_mc *mcled_cdev,
struct led_init_data *init_data)
{
- return -EINVAL;
+ return 0;
+}
+
+static inline void devm_led_classdev_multicolor_unregister(struct device *parent,
+ struct led_classdev_mc *mcled_cdev)
+{};
+
+#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */
+
+static inline int led_classdev_multicolor_register(struct device *parent,
+ struct led_classdev_mc *mcled_cdev)
+{
+ return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
}
static inline int devm_led_classdev_multicolor_register(struct device *parent,
@@ -113,9 +106,4 @@ static inline int devm_led_classdev_multicolor_register(struct device *parent,
NULL);
}
-static inline void devm_led_classdev_multicolor_unregister(struct device *parent,
- struct led_classdev_mc *mcled_cdev)
-{};
-
-#endif /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */
#endif /* _LINUX_MULTICOLOR_LEDS_H_INCLUDED */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 6a8d6409c993..329fd914cf24 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -63,8 +63,8 @@ struct led_hw_trigger_type {
struct led_classdev {
const char *name;
- enum led_brightness brightness;
- enum led_brightness max_brightness;
+ unsigned int brightness;
+ unsigned int max_brightness;
int flags;
/* Lower 16 bits reflect status */
@@ -253,8 +253,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
* software blink timer that implements blinking when the
* hardware doesn't. This function is guaranteed not to sleep.
*/
-void led_set_brightness(struct led_classdev *led_cdev,
- enum led_brightness brightness);
+void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness);
/**
* led_set_brightness_sync - set LED brightness synchronously
@@ -267,8 +266,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
*
* Returns: 0 on success or negative error value on failure
*/
-int led_set_brightness_sync(struct led_classdev *led_cdev,
- enum led_brightness value);
+int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value);
/**
* led_update_brightness - update LED brightness
@@ -565,7 +563,7 @@ static inline void ledtrig_cpu(enum cpu_led_event evt)
#ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
void led_classdev_notify_brightness_hw_changed(
- struct led_classdev *led_cdev, enum led_brightness brightness);
+ struct led_classdev *led_cdev, unsigned int brightness);
#else
static inline void led_classdev_notify_brightness_hw_changed(
struct led_classdev *led_cdev, enum led_brightness brightness) { }
diff --git a/include/linux/litex.h b/include/linux/litex.h
index 40f5be503593..5ea9ccf5cce4 100644
--- a/include/linux/litex.h
+++ b/include/linux/litex.h
@@ -3,9 +3,6 @@
* Common LiteX header providing
* helper functions for accessing CSRs.
*
- * Implementation of the functions is provided by
- * the LiteX SoC Controller driver.
- *
* Copyright (C) 2019-2020 Antmicro <www.antmicro.com>
*/
@@ -13,90 +10,147 @@
#define _LINUX_LITEX_H
#include <linux/io.h>
-#include <linux/types.h>
-#include <linux/compiler_types.h>
+
+/* LiteX SoCs support 8- or 32-bit CSR Bus data width (i.e., subreg. size) */
+#if defined(CONFIG_LITEX_SUBREG_SIZE) && \
+ (CONFIG_LITEX_SUBREG_SIZE == 1 || CONFIG_LITEX_SUBREG_SIZE == 4)
+#define LITEX_SUBREG_SIZE CONFIG_LITEX_SUBREG_SIZE
+#else
+#error LiteX subregister size (LITEX_SUBREG_SIZE) must be 4 or 1!
+#endif
+#define LITEX_SUBREG_SIZE_BIT (LITEX_SUBREG_SIZE * 8)
+
+/* LiteX subregisters of any width are always aligned on a 4-byte boundary */
+#define LITEX_SUBREG_ALIGN 0x4
+
+static inline void _write_litex_subregister(u32 val, void __iomem *addr)
+{
+ writel((u32 __force)cpu_to_le32(val), addr);
+}
+
+static inline u32 _read_litex_subregister(void __iomem *addr)
+{
+ return le32_to_cpu((__le32 __force)readl(addr));
+}
/*
- * The parameters below are true for LiteX SoCs configured for 8-bit CSR Bus,
- * 32-bit aligned.
+ * LiteX SoC Generator, depending on the configuration, can split a single
+ * logical CSR (Control&Status Register) into a series of consecutive physical
+ * registers.
+ *
+ * For example, in the configuration with 8-bit CSR Bus, a 32-bit aligned,
+ * 32-bit wide logical CSR will be laid out as four 32-bit physical
+ * subregisters, each one containing one byte of meaningful data.
*
- * Supporting other configurations will require extending the logic in this
- * header and in the LiteX SoC controller driver.
+ * For details see: https://github.com/enjoy-digital/litex/wiki/CSR-Bus
*/
-#define LITEX_REG_SIZE 0x4
-#define LITEX_SUBREG_SIZE 0x1
-#define LITEX_SUBREG_SIZE_BIT (LITEX_SUBREG_SIZE * 8)
-#define WRITE_LITEX_SUBREGISTER(val, base_offset, subreg_id) \
- writel((u32 __force)cpu_to_le32(val), base_offset + (LITEX_REG_SIZE * subreg_id))
+/* number of LiteX subregisters needed to store a register of given reg_size */
+#define _litex_num_subregs(reg_size) \
+ (((reg_size) - 1) / LITEX_SUBREG_SIZE + 1)
-#define READ_LITEX_SUBREGISTER(base_offset, subreg_id) \
- le32_to_cpu((__le32 __force)readl(base_offset + (LITEX_REG_SIZE * subreg_id)))
+/*
+ * since the number of 4-byte aligned subregisters required to store a single
+ * LiteX CSR (MMIO) register varies with LITEX_SUBREG_SIZE, the offset of the
+ * next adjacent LiteX CSR register w.r.t. the offset of the current one also
+ * depends on how many subregisters the latter is spread across
+ */
+#define _next_reg_off(off, size) \
+ ((off) + _litex_num_subregs(size) * LITEX_SUBREG_ALIGN)
-void litex_set_reg(void __iomem *reg, unsigned long reg_sz, unsigned long val);
+/*
+ * The purpose of `_litex_[set|get]_reg()` is to implement the logic of
+ * writing to/reading from the LiteX CSR in a single place that can be then
+ * reused by all LiteX drivers via the `litex_[write|read][8|16|32|64]()`
+ * accessors for the appropriate data width.
+ * NOTE: direct use of `_litex_[set|get]_reg()` by LiteX drivers is strongly
+ * discouraged, as they perform no error checking on the requested data width!
+ */
-unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_sz);
+/**
+ * _litex_set_reg() - Writes a value to the LiteX CSR (Control&Status Register)
+ * @reg: Address of the CSR
+ * @reg_size: The width of the CSR expressed in the number of bytes
+ * @val: Value to be written to the CSR
+ *
+ * This function splits a single (possibly multi-byte) LiteX CSR write into
+ * a series of subregister writes with a proper offset.
+ * NOTE: caller is responsible for ensuring (0 < reg_size <= sizeof(u64)).
+ */
+static inline void _litex_set_reg(void __iomem *reg, size_t reg_size, u64 val)
+{
+ u8 shift = _litex_num_subregs(reg_size) * LITEX_SUBREG_SIZE_BIT;
+
+ while (shift > 0) {
+ shift -= LITEX_SUBREG_SIZE_BIT;
+ _write_litex_subregister(val >> shift, reg);
+ reg += LITEX_SUBREG_ALIGN;
+ }
+}
+
+/**
+ * _litex_get_reg() - Reads a value of the LiteX CSR (Control&Status Register)
+ * @reg: Address of the CSR
+ * @reg_size: The width of the CSR expressed in the number of bytes
+ *
+ * Return: Value read from the CSR
+ *
+ * This function generates a series of subregister reads with a proper offset
+ * and joins their results into a single (possibly multi-byte) LiteX CSR value.
+ * NOTE: caller is responsible for ensuring (0 < reg_size <= sizeof(u64)).
+ */
+static inline u64 _litex_get_reg(void __iomem *reg, size_t reg_size)
+{
+ u64 r;
+ u8 i;
+
+ r = _read_litex_subregister(reg);
+ for (i = 1; i < _litex_num_subregs(reg_size); i++) {
+ r <<= LITEX_SUBREG_SIZE_BIT;
+ reg += LITEX_SUBREG_ALIGN;
+ r |= _read_litex_subregister(reg);
+ }
+ return r;
+}
static inline void litex_write8(void __iomem *reg, u8 val)
{
- WRITE_LITEX_SUBREGISTER(val, reg, 0);
+ _litex_set_reg(reg, sizeof(u8), val);
}
static inline void litex_write16(void __iomem *reg, u16 val)
{
- WRITE_LITEX_SUBREGISTER(val >> 8, reg, 0);
- WRITE_LITEX_SUBREGISTER(val, reg, 1);
+ _litex_set_reg(reg, sizeof(u16), val);
}
static inline void litex_write32(void __iomem *reg, u32 val)
{
- WRITE_LITEX_SUBREGISTER(val >> 24, reg, 0);
- WRITE_LITEX_SUBREGISTER(val >> 16, reg, 1);
- WRITE_LITEX_SUBREGISTER(val >> 8, reg, 2);
- WRITE_LITEX_SUBREGISTER(val, reg, 3);
+ _litex_set_reg(reg, sizeof(u32), val);
}
static inline void litex_write64(void __iomem *reg, u64 val)
{
- WRITE_LITEX_SUBREGISTER(val >> 56, reg, 0);
- WRITE_LITEX_SUBREGISTER(val >> 48, reg, 1);
- WRITE_LITEX_SUBREGISTER(val >> 40, reg, 2);
- WRITE_LITEX_SUBREGISTER(val >> 32, reg, 3);
- WRITE_LITEX_SUBREGISTER(val >> 24, reg, 4);
- WRITE_LITEX_SUBREGISTER(val >> 16, reg, 5);
- WRITE_LITEX_SUBREGISTER(val >> 8, reg, 6);
- WRITE_LITEX_SUBREGISTER(val, reg, 7);
+ _litex_set_reg(reg, sizeof(u64), val);
}
static inline u8 litex_read8(void __iomem *reg)
{
- return READ_LITEX_SUBREGISTER(reg, 0);
+ return _litex_get_reg(reg, sizeof(u8));
}
static inline u16 litex_read16(void __iomem *reg)
{
- return (READ_LITEX_SUBREGISTER(reg, 0) << 8)
- | (READ_LITEX_SUBREGISTER(reg, 1));
+ return _litex_get_reg(reg, sizeof(u16));
}
static inline u32 litex_read32(void __iomem *reg)
{
- return (READ_LITEX_SUBREGISTER(reg, 0) << 24)
- | (READ_LITEX_SUBREGISTER(reg, 1) << 16)
- | (READ_LITEX_SUBREGISTER(reg, 2) << 8)
- | (READ_LITEX_SUBREGISTER(reg, 3));
+ return _litex_get_reg(reg, sizeof(u32));
}
static inline u64 litex_read64(void __iomem *reg)
{
- return ((u64)READ_LITEX_SUBREGISTER(reg, 0) << 56)
- | ((u64)READ_LITEX_SUBREGISTER(reg, 1) << 48)
- | ((u64)READ_LITEX_SUBREGISTER(reg, 2) << 40)
- | ((u64)READ_LITEX_SUBREGISTER(reg, 3) << 32)
- | ((u64)READ_LITEX_SUBREGISTER(reg, 4) << 24)
- | ((u64)READ_LITEX_SUBREGISTER(reg, 5) << 16)
- | ((u64)READ_LITEX_SUBREGISTER(reg, 6) << 8)
- | ((u64)READ_LITEX_SUBREGISTER(reg, 7));
+ return _litex_get_reg(reg, sizeof(u64));
}
#endif /* _LINUX_LITEX_H */
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index 9004375c462e..27eb383cb95d 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -42,7 +42,7 @@ struct device *mdev_get_iommu_device(struct device *dev);
* @mdev: mdev_device structure on of mediated device
* that is being created
* Returns integer: success (0) or error (< 0)
- * @remove: Called to free resources in parent device's driver for a
+ * @remove: Called to free resources in parent device's driver for
* a mediated device. It is mandatory to provide 'remove'
* ops.
* @mdev: mdev_device device structure which is being
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 439a89e758d8..4da95e684e20 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -27,9 +27,8 @@ struct memory_block {
unsigned long start_section_nr;
unsigned long state; /* serialized by the dev->lock */
int online_type; /* for passing data to online routine */
- int phys_device; /* to which fru does this belong? */
- struct device dev;
int nid; /* NID for this memory block */
+ struct device dev;
};
int arch_get_memory_phys_device(unsigned long start_pfn);
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 15acce5ab106..7288aa5ef73b 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -16,22 +16,7 @@ struct resource;
struct vmem_altmap;
#ifdef CONFIG_MEMORY_HOTPLUG
-/*
- * Return page for the valid pfn only if the page is online. All pfn
- * walkers which rely on the fully initialized page->flags and others
- * should use this rather than pfn_valid && pfn_to_page
- */
-#define pfn_to_online_page(pfn) \
-({ \
- struct page *___page = NULL; \
- unsigned long ___pfn = pfn; \
- unsigned long ___nr = pfn_to_section_nr(___pfn); \
- \
- if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
- pfn_valid_within(___pfn)) \
- ___page = pfn_to_page(___pfn); \
- ___page; \
-})
+struct page *pfn_to_online_page(unsigned long pfn);
/*
* Types for free bootmem stored in page->lru.next. These have to be in
@@ -68,7 +53,7 @@ typedef int __bitwise mhp_t;
* with this flag set, the resource pointer must no longer be used as it
* might be stale, or the resource might have changed.
*/
-#define MEMHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
+#define MHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
/*
* Extended parameters for memory hotplug:
@@ -81,6 +66,9 @@ struct mhp_params {
pgprot_t pgprot;
};
+bool mhp_range_allowed(u64 start, u64 size, bool need_mapping);
+struct range mhp_get_pluggable_range(bool need_mapping);
+
/*
* Zone resizing functions
*
@@ -131,10 +119,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size,
struct mhp_params *params);
extern u64 max_mem_size;
-extern int memhp_online_type_from_str(const char *str);
+extern int mhp_online_type_from_str(const char *str);
/* Default online_type (MMOP_*) when new memory blocks are added. */
-extern int memhp_default_online_type;
+extern int mhp_default_online_type;
/* If movable_node boot option specified */
extern bool movable_node_enabled;
static inline bool movable_node_is_enabled(void)
@@ -281,6 +269,13 @@ static inline bool movable_node_is_enabled(void)
}
#endif /* ! CONFIG_MEMORY_HOTPLUG */
+/*
+ * Keep this declaration outside CONFIG_MEMORY_HOTPLUG as some
+ * platforms might override and use arch_get_mappable_range()
+ * for internal non memory hotplug purposes.
+ */
+struct range arch_get_mappable_range(void);
+
#if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
/*
* pgdat resizing functions
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 79c49e7f5c30..f5b464daeeca 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -137,6 +137,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
+bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
@@ -165,6 +166,11 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
return NULL;
}
+static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
+{
+ return false;
+}
+
static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
{
return 0;
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9198b7ade85f..47946cec7584 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -503,6 +503,9 @@ struct zone {
* bootmem allocator):
* managed_pages = present_pages - reserved_pages;
*
+ * cma pages is present pages that are assigned for CMA use
+ * (MIGRATE_CMA).
+ *
* So present_pages may be used by memory hotplug or memory power
* management logic to figure out unmanaged pages by checking
* (present_pages - managed_pages). And managed_pages should be used
@@ -527,6 +530,9 @@ struct zone {
atomic_long_t managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
+#ifdef CONFIG_CMA
+ unsigned long cma_pages;
+#endif
const char *name;
@@ -624,6 +630,15 @@ static inline unsigned long zone_managed_pages(struct zone *zone)
return (unsigned long)atomic_long_read(&zone->managed_pages);
}
+static inline unsigned long zone_cma_pages(struct zone *zone)
+{
+#ifdef CONFIG_CMA
+ return zone->cma_pages;
+#else
+ return 0;
+#endif
+}
+
static inline unsigned long zone_end_pfn(const struct zone *zone)
{
return zone->zone_start_pfn + zone->spanned_pages;
@@ -903,6 +918,18 @@ static inline int local_memory_node(int node_id) { return node_id; };
*/
#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool zone_is_zone_device(struct zone *zone)
+{
+ return zone_idx(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool zone_is_zone_device(struct zone *zone)
+{
+ return false;
+}
+#endif
+
/*
* Returns true if a zone has pages managed by the buddy allocator.
* All the reclaim decisions have to use this function rather than
@@ -1291,13 +1318,14 @@ extern size_t mem_section_usage_size(void);
* which results in PFN_SECTION_SHIFT equal 6.
* To sum it up, at least 6 bits are available.
*/
-#define SECTION_MARKED_PRESENT (1UL<<0)
-#define SECTION_HAS_MEM_MAP (1UL<<1)
-#define SECTION_IS_ONLINE (1UL<<2)
-#define SECTION_IS_EARLY (1UL<<3)
-#define SECTION_MAP_LAST_BIT (1UL<<4)
-#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT 3
+#define SECTION_MARKED_PRESENT (1UL<<0)
+#define SECTION_HAS_MEM_MAP (1UL<<1)
+#define SECTION_IS_ONLINE (1UL<<2)
+#define SECTION_IS_EARLY (1UL<<3)
+#define SECTION_TAINT_ZONE_DEVICE (1UL<<4)
+#define SECTION_MAP_LAST_BIT (1UL<<5)
+#define SECTION_MAP_MASK (~(SECTION_MAP_LAST_BIT-1))
+#define SECTION_NID_SHIFT 3
static inline struct page *__section_mem_map_addr(struct mem_section *section)
{
@@ -1336,6 +1364,13 @@ static inline int online_section(struct mem_section *section)
return (section && (section->section_mem_map & SECTION_IS_ONLINE));
}
+static inline int online_device_section(struct mem_section *section)
+{
+ unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
+
+ return section && ((section->section_mem_map & flags) == flags);
+}
+
static inline int online_section_nr(unsigned long nr)
{
return online_section(__nr_to_section(nr));
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 161f4419db6c..5d92a7e1a742 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -86,7 +86,6 @@ struct path;
extern int mnt_want_write(struct vfsmount *mnt);
extern int mnt_want_write_file(struct file *file);
-extern int mnt_clone_write(struct vfsmount *mnt);
extern void mnt_drop_write(struct vfsmount *mnt);
extern void mnt_drop_write_file(struct file *file);
extern void mntput(struct vfsmount *mnt);
diff --git a/include/linux/net.h b/include/linux/net.h
index 9e2324efc26a..ba736b457a06 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -42,8 +42,6 @@ struct net;
#define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4
-#define PROTO_CMSG_DATA_ONLY 0x0001
-
#ifndef ARCH_HAS_SOCKET_TYPES
/**
* enum sock_type - Socket types
@@ -138,7 +136,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
struct proto_ops {
int family;
- unsigned int flags;
struct module *owner;
int (*release) (struct socket *sock);
int (*bind) (struct socket *sock,
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 8c6c4e32fc2f..eadaabd18dc7 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -388,6 +388,7 @@ extern int nfs_open(struct inode *, struct file *);
extern int nfs_attribute_cache_expired(struct inode *inode);
extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
+extern int nfs_clear_invalid_mapping(struct address_space *mapping);
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
extern int nfs_revalidate_mapping_rcu(struct inode *inode);
@@ -571,8 +572,6 @@ nfs_have_writebacks(struct inode *inode)
extern int nfs_readpage(struct file *, struct page *);
extern int nfs_readpages(struct file *, struct address_space *,
struct list_head *, unsigned);
-extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
- struct page *);
/*
* inline functions
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 38e60ec742df..6f76b32a0238 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -142,7 +142,7 @@ struct nfs_server {
struct nlm_host *nlm_host; /* NLM client handle */
struct nfs_iostats __percpu *io_stats; /* I/O statistics */
atomic_long_t writeback; /* number of writeback pages */
- int flags; /* various flags */
+ unsigned int flags; /* various flags */
/* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
#define NFS_MOUNT_LOOKUP_CACHE_NONEG 0x10000
@@ -153,6 +153,8 @@ struct nfs_server {
#define NFS_MOUNT_LOCAL_FCNTL 0x200000
#define NFS_MOUNT_SOFTERR 0x400000
#define NFS_MOUNT_SOFTREVAL 0x800000
+#define NFS_MOUNT_WRITE_EAGER 0x01000000
+#define NFS_MOUNT_WRITE_WAIT 0x02000000
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index db914477057b..04a34c08e0a6 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -810,7 +810,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
/*
* Flags checked when a page is freed. Pages being freed should not have
- * these flags set. It they are, there is a problem.
+ * these flags set. If they are, there is a problem.
*/
#define PAGE_FLAGS_CHECK_AT_FREE \
(1UL << PG_lru | 1UL << PG_locked | \
@@ -821,7 +821,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
/*
* Flags checked when a page is prepped for return by the page allocator.
- * Pages being prepped should not have these flags set. It they are set,
+ * Pages being prepped should not have these flags set. If they are set,
* there has been a kernel bug or struct page corruption.
*
* __PG_HWPOISON is exceptional because it needs to be kept beyond page's
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index bd629d676a27..20225b067583 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -315,6 +315,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
#define FGP_NOWAIT 0x00000020
#define FGP_FOR_MMAP 0x00000040
#define FGP_HEAD 0x00000080
+#define FGP_ENTRY 0x00000100
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
int fgp_flags, gfp_t cache_gfp_mask);
@@ -450,8 +451,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
}
unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
- unsigned int nr_entries, struct page **entries,
- pgoff_t *indices);
+ pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
pgoff_t end, unsigned int nr_pages,
struct page **pages);
@@ -759,6 +759,8 @@ extern void __delete_from_page_cache(struct page *page, void *shadow);
void replace_page_cache_page(struct page *old, struct page *new);
void delete_from_page_cache_batch(struct address_space *mapping,
struct pagevec *pvec);
+loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
+ int whence);
/*
* Like add_to_page_cache_locked, but used to add newly allocated pages:
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index ad4ddc17d403..7f3f19065a9f 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -25,10 +25,6 @@ struct pagevec {
void __pagevec_release(struct pagevec *pvec);
void __pagevec_lru_add(struct pagevec *pvec);
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
- struct address_space *mapping,
- pgoff_t start, unsigned nr_entries,
- pgoff_t *indices);
void pagevec_remove_exceptionals(struct pagevec *pvec);
unsigned pagevec_lookup_range(struct pagevec *pvec,
struct address_space *mapping,
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 36eb748f3c97..cdfc4e9f253e 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -432,14 +432,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
* To be differentiate with macro pte_mkyoung, this macro is used on platforms
* where software maintains page access bit.
*/
-#ifndef pte_sw_mkyoung
-static inline pte_t pte_sw_mkyoung(pte_t pte)
-{
- return pte;
-}
-#define pte_sw_mkyoung pte_sw_mkyoung
-#endif
-
#ifndef pte_savedwrite
#define pte_savedwrite pte_write
#endif
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index 2a9df80ea887..b5ebf6c01292 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -171,7 +171,7 @@ static inline void ptrace_event(int event, unsigned long message)
*
* Check whether @event is enabled and, if so, report @event and @pid
* to the ptrace parent. @pid is reported as the pid_t seen from the
- * the ptrace parent's pid namespace.
+ * ptrace parent's pid namespace.
*
* Called without locks.
*/
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 70085ca1a3fc..def5c62c93b3 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -213,7 +213,8 @@ struct page_vma_mapped_walk {
static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
{
- if (pvmw->pte)
+ /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
+ if (pvmw->pte && !PageHuge(pvmw->page))
pte_unmap(pvmw->pte);
if (pvmw->ptl)
spin_unlock(pvmw->ptl);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26f499810dfa..ef00bb22164c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -895,6 +895,9 @@ struct task_struct {
/* CLONE_CHILD_CLEARTID: */
int __user *clear_child_tid;
+ /* PF_IO_WORKER */
+ void *pf_io_worker;
+
u64 utime;
u64 stime;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index c0f71f2e7160..ef02be869cf2 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -31,6 +31,7 @@ struct kernel_clone_args {
/* Number of elements in *set_tid */
size_t set_tid_size;
int cgroup;
+ int io_thread;
struct cgroup *cgrp;
struct css_set *cset;
};
@@ -82,6 +83,7 @@ extern void exit_files(struct task_struct *);
extern void exit_itimers(struct signal_struct *);
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
struct task_struct *fork_idle(int);
struct mm_struct *copy_init_mm(void);
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 9eb430c163c2..3aa5e1e73ab6 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_SLAB_DEF_H
#define _LINUX_SLAB_DEF_H
+#include <linux/kfence.h>
#include <linux/reciprocal_div.h>
/*
@@ -114,6 +115,8 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
static inline int objs_per_slab_page(const struct kmem_cache *cache,
const struct page *page)
{
+ if (is_kfence_address(page_address(page)))
+ return 1;
return cache->num;
}
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 1be0ed5befa1..dcde82a4434c 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -7,6 +7,7 @@
*
* (C) 2007 SGI, Christoph Lameter
*/
+#include <linux/kfence.h>
#include <linux/kobject.h>
#include <linux/reciprocal_div.h>
@@ -185,6 +186,8 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
const struct page *page, void *obj)
{
+ if (is_kfence_address(obj))
+ return 0;
return __obj_to_index(cache, page_address(page), obj);
}
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 120ffddc03d2..3a5446ac014a 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -187,4 +187,6 @@ void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable);
irqreturn_t sdw_intel_thread(int irq, void *dev_id);
+#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE BIT(1)
+
#endif
diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h
index 24d49c732341..6bb4bc1a5f54 100644
--- a/include/linux/stackdepot.h
+++ b/include/linux/stackdepot.h
@@ -21,4 +21,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
+#ifdef CONFIG_STACKDEPOT
+int stack_depot_init(void);
+#else
+static inline int stack_depot_init(void)
+{
+ return 0;
+}
+#endif /* CONFIG_STACKDEPOT */
+
#endif
diff --git a/include/linux/string.h b/include/linux/string.h
index 4fcfb56abcf5..9521d8cab18e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -266,287 +266,7 @@ void __read_overflow3(void) __compiletime_error("detected read beyond size of ob
void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
#if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
-
-#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
-extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
-extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
-extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove);
-extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset);
-extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat);
-extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy);
-extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
-extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
-extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
-#else
-#define __underlying_memchr __builtin_memchr
-#define __underlying_memcmp __builtin_memcmp
-#define __underlying_memcpy __builtin_memcpy
-#define __underlying_memmove __builtin_memmove
-#define __underlying_memset __builtin_memset
-#define __underlying_strcat __builtin_strcat
-#define __underlying_strcpy __builtin_strcpy
-#define __underlying_strlen __builtin_strlen
-#define __underlying_strncat __builtin_strncat
-#define __underlying_strncpy __builtin_strncpy
-#endif
-
-__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
-{
- size_t p_size = __builtin_object_size(p, 1);
- if (__builtin_constant_p(size) && p_size < size)
- __write_overflow();
- if (p_size < size)
- fortify_panic(__func__);
- return __underlying_strncpy(p, q, size);
-}
-
-__FORTIFY_INLINE char *strcat(char *p, const char *q)
-{
- size_t p_size = __builtin_object_size(p, 1);
- if (p_size == (size_t)-1)
- return __underlying_strcat(p, q);
- if (strlcat(p, q, p_size) >= p_size)
- fortify_panic(__func__);
- return p;
-}
-
-__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
-{
- __kernel_size_t ret;
- size_t p_size = __builtin_object_size(p, 1);
-
- /* Work around gcc excess stack consumption issue */
- if (p_size == (size_t)-1 ||
- (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
- return __underlying_strlen(p);
- ret = strnlen(p, p_size);
- if (p_size <= ret)
- fortify_panic(__func__);
- return ret;
-}
-
-extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
-__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
-{
- size_t p_size = __builtin_object_size(p, 1);
- __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
- if (p_size <= ret && maxlen != ret)
- fortify_panic(__func__);
- return ret;
-}
-
-/* defined after fortified strlen to reuse it */
-extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
-__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
-{
- size_t ret;
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
- if (p_size == (size_t)-1 && q_size == (size_t)-1)
- return __real_strlcpy(p, q, size);
- ret = strlen(q);
- if (size) {
- size_t len = (ret >= size) ? size - 1 : ret;
- if (__builtin_constant_p(len) && len >= p_size)
- __write_overflow();
- if (len >= p_size)
- fortify_panic(__func__);
- __underlying_memcpy(p, q, len);
- p[len] = '\0';
- }
- return ret;
-}
-
-/* defined after fortified strnlen to reuse it */
-extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
-__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size)
-{
- size_t len;
- /* Use string size rather than possible enclosing struct size. */
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
-
- /* If we cannot get size of p and q default to call strscpy. */
- if (p_size == (size_t) -1 && q_size == (size_t) -1)
- return __real_strscpy(p, q, size);
-
- /*
- * If size can be known at compile time and is greater than
- * p_size, generate a compile time write overflow error.
- */
- if (__builtin_constant_p(size) && size > p_size)
- __write_overflow();
-
- /*
- * This call protects from read overflow, because len will default to q
- * length if it smaller than size.
- */
- len = strnlen(q, size);
- /*
- * If len equals size, we will copy only size bytes which leads to
- * -E2BIG being returned.
- * Otherwise we will copy len + 1 because of the final '\O'.
- */
- len = len == size ? size : len + 1;
-
- /*
- * Generate a runtime write overflow error if len is greater than
- * p_size.
- */
- if (len > p_size)
- fortify_panic(__func__);
-
- /*
- * We can now safely call vanilla strscpy because we are protected from:
- * 1. Read overflow thanks to call to strnlen().
- * 2. Write overflow thanks to above ifs.
- */
- return __real_strscpy(p, q, len);
-}
-
-/* defined after fortified strlen and strnlen to reuse them */
-__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
-{
- size_t p_len, copy_len;
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
- if (p_size == (size_t)-1 && q_size == (size_t)-1)
- return __underlying_strncat(p, q, count);
- p_len = strlen(p);
- copy_len = strnlen(q, count);
- if (p_size < p_len + copy_len + 1)
- fortify_panic(__func__);
- __underlying_memcpy(p + p_len, q, copy_len);
- p[p_len + copy_len] = '\0';
- return p;
-}
-
-__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
-{
- size_t p_size = __builtin_object_size(p, 0);
- if (__builtin_constant_p(size) && p_size < size)
- __write_overflow();
- if (p_size < size)
- fortify_panic(__func__);
- return __underlying_memset(p, c, size);
-}
-
-__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
-{
- size_t p_size = __builtin_object_size(p, 0);
- size_t q_size = __builtin_object_size(q, 0);
- if (__builtin_constant_p(size)) {
- if (p_size < size)
- __write_overflow();
- if (q_size < size)
- __read_overflow2();
- }
- if (p_size < size || q_size < size)
- fortify_panic(__func__);
- return __underlying_memcpy(p, q, size);
-}
-
-__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
-{
- size_t p_size = __builtin_object_size(p, 0);
- size_t q_size = __builtin_object_size(q, 0);
- if (__builtin_constant_p(size)) {
- if (p_size < size)
- __write_overflow();
- if (q_size < size)
- __read_overflow2();
- }
- if (p_size < size || q_size < size)
- fortify_panic(__func__);
- return __underlying_memmove(p, q, size);
-}
-
-extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
-__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
-{
- size_t p_size = __builtin_object_size(p, 0);
- if (__builtin_constant_p(size) && p_size < size)
- __read_overflow();
- if (p_size < size)
- fortify_panic(__func__);
- return __real_memscan(p, c, size);
-}
-
-__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
-{
- size_t p_size = __builtin_object_size(p, 0);
- size_t q_size = __builtin_object_size(q, 0);
- if (__builtin_constant_p(size)) {
- if (p_size < size)
- __read_overflow();
- if (q_size < size)
- __read_overflow2();
- }
- if (p_size < size || q_size < size)
- fortify_panic(__func__);
- return __underlying_memcmp(p, q, size);
-}
-
-__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
-{
- size_t p_size = __builtin_object_size(p, 0);
- if (__builtin_constant_p(size) && p_size < size)
- __read_overflow();
- if (p_size < size)
- fortify_panic(__func__);
- return __underlying_memchr(p, c, size);
-}
-
-void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
-__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
-{
- size_t p_size = __builtin_object_size(p, 0);
- if (__builtin_constant_p(size) && p_size < size)
- __read_overflow();
- if (p_size < size)
- fortify_panic(__func__);
- return __real_memchr_inv(p, c, size);
-}
-
-extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
-__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
-{
- size_t p_size = __builtin_object_size(p, 0);
- if (__builtin_constant_p(size) && p_size < size)
- __read_overflow();
- if (p_size < size)
- fortify_panic(__func__);
- return __real_kmemdup(p, size, gfp);
-}
-
-/* defined after fortified strlen and memcpy to reuse them */
-__FORTIFY_INLINE char *strcpy(char *p, const char *q)
-{
- size_t p_size = __builtin_object_size(p, 1);
- size_t q_size = __builtin_object_size(q, 1);
- size_t size;
- if (p_size == (size_t)-1 && q_size == (size_t)-1)
- return __underlying_strcpy(p, q);
- size = strlen(q) + 1;
- /* test here to use the more stringent object size */
- if (p_size < size)
- fortify_panic(__func__);
- memcpy(p, q, size);
- return p;
-}
-
-/* Don't use these outside the FORITFY_SOURCE implementation */
-#undef __underlying_memchr
-#undef __underlying_memcmp
-#undef __underlying_memcpy
-#undef __underlying_memmove
-#undef __underlying_memset
-#undef __underlying_strcat
-#undef __underlying_strcpy
-#undef __underlying_strlen
-#undef __underlying_strncat
-#undef __underlying_strncpy
+#include <linux/fortify-string.h>
#endif
/**
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 32f665b1ee85..4cc6ec3bf0ab 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -485,6 +485,7 @@ struct backing_dev_info;
extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
extern void exit_swap_address_space(unsigned int type);
extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
+sector_t swap_page_sector(struct page *page);
static inline void put_swap_device(struct swap_info_struct *si)
{
diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h
index d9c9fc9ca5d2..5857a937c637 100644
--- a/include/linux/swiotlb.h
+++ b/include/linux/swiotlb.h
@@ -29,6 +29,7 @@ enum swiotlb_force {
* controllable.
*/
#define IO_TLB_SHIFT 11
+#define IO_TLB_SIZE (1 << IO_TLB_SHIFT)
/* default to 64MB */
#define IO_TLB_DEFAULT_SIZE (64UL<<20)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 7077fec653bb..28e7af1406f2 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -349,15 +349,8 @@ struct trace_event_call {
struct event_filter *filter;
void *mod;
void *data;
- /*
- * bit 0: filter_active
- * bit 1: allow trace by non root (cap any)
- * bit 2: failed to apply filter
- * bit 3: trace internal event (do not enable)
- * bit 4: Event was enabled by module
- * bit 5: use call filter rather than file filter
- * bit 6: Event is a tracepoint
- */
+
+ /* See the TRACE_EVENT_FL_* flags above */
int flags; /* static flags of different events */
#ifdef CONFIG_PERF_EVENTS
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 773135fc6e19..506d625163a1 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -313,6 +313,12 @@ static inline void __mod_node_page_state(struct pglist_data *pgdat,
enum node_stat_item item, int delta)
{
if (vmstat_item_in_bytes(item)) {
+ /*
+ * Only cgroups use subpage accounting right now; at
+ * the global level, these items still change in
+ * multiples of whole pages. Store them as pages
+ * internally to keep the per-cpu counters compact.
+ */
VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
delta >>= PAGE_SHIFT;
}
diff --git a/include/linux/zpool.h b/include/linux/zpool.h
index 51bf43076165..e8997010612a 100644
--- a/include/linux/zpool.h
+++ b/include/linux/zpool.h
@@ -73,6 +73,7 @@ u64 zpool_get_total_size(struct zpool *pool);
* @malloc: allocate mem from a pool.
* @free: free mem from a pool.
* @shrink: shrink the pool.
+ * @sleep_mapped: whether zpool driver can sleep during map.
* @map: map a handle.
* @unmap: unmap a handle.
* @total_size: get total size of a pool.
@@ -100,6 +101,7 @@ struct zpool_driver {
int (*shrink)(void *pool, unsigned int pages,
unsigned int *reclaimed);
+ bool sleep_mapped;
void *(*map)(void *pool, unsigned long handle,
enum zpool_mapmode mm);
void (*unmap)(void *pool, unsigned long handle);
@@ -112,5 +114,6 @@ void zpool_register_driver(struct zpool_driver *driver);
int zpool_unregister_driver(struct zpool_driver *driver);
bool zpool_evictable(struct zpool *pool);
+bool zpool_can_sleep_mapped(struct zpool *pool);
#endif
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 4807ca4d52e0..2a430e713ce5 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -35,7 +35,7 @@ enum zs_mapmode {
struct zs_pool_stats {
/* How many pages were migrated (freed) */
- unsigned long pages_compacted;
+ atomic_long_t pages_compacted;
};
struct zs_pool;
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index b3bbd10eb3f0..02f966e9358f 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -187,7 +187,7 @@ struct iscsi_conn {
struct iscsi_task *task; /* xmit task in progress */
/* xmit */
- spinlock_t taskqueuelock; /* protects the next three lists */
+ /* items must be added/deleted under frwd lock */
struct list_head mgmtqueue; /* mgmt (control) xmit queue */
struct list_head cmdqueue; /* data-path cmd queue */
struct list_head requeue; /* tasks needing another run */
@@ -332,7 +332,7 @@ struct iscsi_session {
* cmdsn, queued_cmdsn *
* session resources: *
* - cmdpool kfifo_out , *
- * - mgmtpool, */
+ * - mgmtpool, queues */
spinlock_t back_lock; /* protects cmdsn_exp *
* cmdsn_max, *
* cmdpool kfifo_in */
@@ -395,6 +395,8 @@ extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht,
extern void iscsi_host_remove(struct Scsi_Host *shost);
extern void iscsi_host_free(struct Scsi_Host *shost);
extern int iscsi_target_alloc(struct scsi_target *starget);
+extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+ uint16_t requested_cmds_max);
/*
* session management
diff --git a/include/soc/canaan/k210-sysctl.h b/include/soc/canaan/k210-sysctl.h
new file mode 100644
index 000000000000..0c2b2c2dabca
--- /dev/null
+++ b/include/soc/canaan/k210-sysctl.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef K210_SYSCTL_H
+#define K210_SYSCTL_H
+
+/*
+ * Kendryte K210 SoC system controller registers offsets.
+ * Taken from Kendryte SDK (kendryte-standalone-sdk).
+ */
+#define K210_SYSCTL_GIT_ID 0x00 /* Git short commit id */
+#define K210_SYSCTL_UART_BAUD 0x04 /* Default UARTHS baud rate */
+#define K210_SYSCTL_PLL0 0x08 /* PLL0 controller */
+#define K210_SYSCTL_PLL1 0x0C /* PLL1 controller */
+#define K210_SYSCTL_PLL2 0x10 /* PLL2 controller */
+#define K210_SYSCTL_PLL_LOCK 0x18 /* PLL lock tester */
+#define K210_SYSCTL_ROM_ERROR 0x1C /* AXI ROM detector */
+#define K210_SYSCTL_SEL0 0x20 /* Clock select controller 0 */
+#define K210_SYSCTL_SEL1 0x24 /* Clock select controller 1 */
+#define K210_SYSCTL_EN_CENT 0x28 /* Central clock enable */
+#define K210_SYSCTL_EN_PERI 0x2C /* Peripheral clock enable */
+#define K210_SYSCTL_SOFT_RESET 0x30 /* Soft reset ctrl */
+#define K210_SYSCTL_PERI_RESET 0x34 /* Peripheral reset controller */
+#define K210_SYSCTL_THR0 0x38 /* Clock threshold controller 0 */
+#define K210_SYSCTL_THR1 0x3C /* Clock threshold controller 1 */
+#define K210_SYSCTL_THR2 0x40 /* Clock threshold controller 2 */
+#define K210_SYSCTL_THR3 0x44 /* Clock threshold controller 3 */
+#define K210_SYSCTL_THR4 0x48 /* Clock threshold controller 4 */
+#define K210_SYSCTL_THR5 0x4C /* Clock threshold controller 5 */
+#define K210_SYSCTL_THR6 0x50 /* Clock threshold controller 6 */
+#define K210_SYSCTL_MISC 0x54 /* Miscellaneous controller */
+#define K210_SYSCTL_PERI 0x58 /* Peripheral controller */
+#define K210_SYSCTL_SPI_SLEEP 0x5C /* SPI sleep controller */
+#define K210_SYSCTL_RESET_STAT 0x60 /* Reset source status */
+#define K210_SYSCTL_DMA_SEL0 0x64 /* DMA handshake selector 0 */
+#define K210_SYSCTL_DMA_SEL1 0x68 /* DMA handshake selector 1 */
+#define K210_SYSCTL_POWER_SEL 0x6C /* IO Power Mode Select controller */
+
+void k210_clk_early_init(void __iomem *regs);
+
+#endif
diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h
index 743c2f442280..d0574805865f 100644
--- a/include/sound/intel-nhlt.h
+++ b/include/sound/intel-nhlt.h
@@ -113,6 +113,11 @@ struct nhlt_vendor_dmic_array_config {
} __packed;
enum {
+ NHLT_CONFIG_TYPE_GENERIC = 0,
+ NHLT_CONFIG_TYPE_MIC_ARRAY = 1
+};
+
+enum {
NHLT_MIC_ARRAY_2CH_SMALL = 0xa,
NHLT_MIC_ARRAY_2CH_BIG = 0xb,
NHLT_MIC_ARRAY_4CH_1ST_GEOM = 0xc,
diff --git a/include/sound/soc-acpi.h b/include/sound/soc-acpi.h
index 9a43c44dcbbb..c45075024c30 100644
--- a/include/sound/soc-acpi.h
+++ b/include/sound/soc-acpi.h
@@ -174,7 +174,7 @@ struct snd_soc_acpi_codecs {
static inline bool snd_soc_acpi_sof_parent(struct device *dev)
{
return dev->parent && dev->parent->driver && dev->parent->driver->name &&
- !strcmp(dev->parent->driver->name, "sof-audio-acpi");
+ !strncmp(dev->parent->driver->name, "sof-audio-acpi", strlen("sof-audio-acpi"));
}
#endif
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 6336780d83a7..ce2fba49c95d 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -72,6 +72,7 @@ int transport_backend_register(const struct target_backend_ops *);
void target_backend_unregister(const struct target_backend_ops *);
void target_complete_cmd(struct se_cmd *, u8);
+void target_set_cmd_data_length(struct se_cmd *, int);
void target_complete_cmd_with_length(struct se_cmd *, u8, int);
void transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *);
diff --git a/include/trace/events/bcache.h b/include/trace/events/bcache.h
index e41c611d6d3b..899fdacf57b9 100644
--- a/include/trace/events/bcache.h
+++ b/include/trace/events/bcache.h
@@ -28,7 +28,7 @@ DECLARE_EVENT_CLASS(bcache_request,
__entry->sector = bio->bi_iter.bi_sector;
__entry->orig_sector = bio->bi_iter.bi_sector - 16;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
),
TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(bcache_bio,
__entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
),
TP_printk("%d,%d %s %llu + %u",
@@ -137,7 +137,7 @@ TRACE_EVENT(bcache_read,
__entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
__entry->cache_hit = hit;
__entry->bypass = bypass;
),
@@ -168,7 +168,7 @@ TRACE_EVENT(bcache_write,
__entry->inode = inode;
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
__entry->writeback = writeback;
__entry->bypass = bypass;
),
@@ -238,7 +238,7 @@ TRACE_EVENT(bcache_journal_write,
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio->bi_iter.bi_size >> 9;
__entry->nr_keys = keys;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
),
TP_printk("%d,%d %s %llu + %u keys %u",
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0d782663a005..cc5ab96a7471 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -89,7 +89,7 @@ TRACE_EVENT(block_rq_requeue,
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
- blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
@@ -133,7 +133,7 @@ TRACE_EVENT(block_rq_complete,
__entry->nr_sector = nr_bytes >> 9;
__entry->error = error;
- blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
+ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
@@ -166,7 +166,7 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
__entry->bytes = blk_rq_bytes(rq);
- blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -196,7 +196,7 @@ DEFINE_EVENT(block_rq, block_rq_insert,
/**
* block_rq_issue - issue pending block IO request operation to device driver
- * @rq: block IO operation operation request
+ * @rq: block IO operation request
*
* Called when block operation request @rq from queue @q is sent to a
* device driver for processing.
@@ -210,7 +210,7 @@ DEFINE_EVENT(block_rq, block_rq_issue,
/**
* block_rq_merge - merge request with another one in the elevator
- * @rq: block IO operation operation request
+ * @rq: block IO operation request
*
* Called when block operation request @rq from queue @q is merged to another
* request queued in the elevator.
@@ -249,7 +249,7 @@ TRACE_EVENT(block_bio_complete,
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
__entry->error = blk_status_to_errno(bio->bi_status);
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
),
TP_printk("%d,%d %s %llu + %u [%d]",
@@ -276,7 +276,7 @@ DECLARE_EVENT_CLASS(block_bio,
__entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->nr_sector = bio_sectors(bio);
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -433,7 +433,7 @@ TRACE_EVENT(block_split,
__entry->dev = bio_dev(bio);
__entry->sector = bio->bi_iter.bi_sector;
__entry->new_sector = new_sector;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
@@ -474,7 +474,7 @@ TRACE_EVENT(block_bio_remap,
__entry->nr_sector = bio_sectors(bio);
__entry->old_dev = dev;
__entry->old_sector = from;
- blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+ blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
),
TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
@@ -518,7 +518,7 @@ TRACE_EVENT(block_rq_remap,
__entry->old_dev = dev;
__entry->old_sector = from;
__entry->nr_bios = blk_rq_count_bios(rq);
- blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+ blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
),
TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u",
diff --git a/include/trace/events/error_report.h b/include/trace/events/error_report.h
new file mode 100644
index 000000000000..96f64bf218b2
--- /dev/null
+++ b/include/trace/events/error_report.h
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Declarations for error reporting tracepoints.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM error_report
+
+#if !defined(_TRACE_ERROR_REPORT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ERROR_REPORT_H
+
+#include <linux/tracepoint.h>
+
+#ifndef __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+enum error_detector {
+ ERROR_DETECTOR_KFENCE,
+ ERROR_DETECTOR_KASAN
+};
+
+#endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */
+
+#define error_detector_list \
+ EM(ERROR_DETECTOR_KFENCE, "kfence") \
+ EMe(ERROR_DETECTOR_KASAN, "kasan")
+/* Always end the list with an EMe. */
+
+#undef EM
+#undef EMe
+
+#define EM(a, b) TRACE_DEFINE_ENUM(a);
+#define EMe(a, b) TRACE_DEFINE_ENUM(a);
+
+error_detector_list
+
+#undef EM
+#undef EMe
+
+#define EM(a, b) { a, b },
+#define EMe(a, b) { a, b }
+
+#define show_error_detector_list(val) \
+ __print_symbolic(val, error_detector_list)
+
+DECLARE_EVENT_CLASS(error_report_template,
+ TP_PROTO(enum error_detector error_detector, unsigned long id),
+ TP_ARGS(error_detector, id),
+ TP_STRUCT__entry(__field(enum error_detector, error_detector)
+ __field(unsigned long, id)),
+ TP_fast_assign(__entry->error_detector = error_detector;
+ __entry->id = id;),
+ TP_printk("[%s] %lx",
+ show_error_detector_list(__entry->error_detector),
+ __entry->id));
+
+/**
+ * error_report_end - called after printing the error report
+ * @error_detector: short string describing the error detection tool
+ * @id: pseudo-unique descriptor identifying the report
+ * (e.g. the memory access address)
+ *
+ * This event occurs right after a debugging tool finishes printing the error
+ * report.
+ */
+DEFINE_EVENT(error_report_template, error_report_end,
+ TP_PROTO(enum error_detector error_detector, unsigned long id),
+ TP_ARGS(error_detector, id));
+
+#endif /* _TRACE_ERROR_REPORT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 40845b0d5dad..3a60b6b6db32 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -144,17 +144,17 @@ TRACE_EVENT(kmem_cache_free,
TP_STRUCT__entry(
__field( unsigned long, call_site )
__field( const void *, ptr )
- __field( const char *, name )
+ __string( name, name )
),
TP_fast_assign(
__entry->call_site = call_site;
__entry->ptr = ptr;
- __entry->name = name;
+ __assign_str(name, name);
),
TP_printk("call_site=%pS ptr=%p name=%s",
- (void *)__entry->call_site, __entry->ptr, __entry->name)
+ (void *)__entry->call_site, __entry->ptr, __get_str(name))
);
TRACE_EVENT(mm_page_free,
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 76e85e16854b..c838e7ac1c2d 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -60,6 +60,51 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
), \
TP_ARGS(wc, cid))
+DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
+ TP_PROTO(
+ const struct ib_wc *wc,
+ const struct rpc_rdma_cid *cid
+ ),
+
+ TP_ARGS(wc, cid),
+
+ TP_STRUCT__entry(
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(u32, received)
+ __field(unsigned long, status)
+ __field(unsigned int, vendor_err)
+ ),
+
+ TP_fast_assign(
+ __entry->cq_id = cid->ci_queue_id;
+ __entry->completion_id = cid->ci_completion_id;
+ __entry->status = wc->status;
+ if (wc->status) {
+ __entry->received = 0;
+ __entry->vendor_err = wc->vendor_err;
+ } else {
+ __entry->received = wc->byte_len;
+ __entry->vendor_err = 0;
+ }
+ ),
+
+ TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x) received=%u",
+ __entry->cq_id, __entry->completion_id,
+ rdma_show_wc_status(__entry->status),
+ __entry->status, __entry->vendor_err,
+ __entry->received
+ )
+);
+
+#define DEFINE_RECEIVE_COMPLETION_EVENT(name) \
+ DEFINE_EVENT(rpcrdma_receive_completion_class, name, \
+ TP_PROTO( \
+ const struct ib_wc *wc, \
+ const struct rpc_rdma_cid *cid \
+ ), \
+ TP_ARGS(wc, cid))
+
DECLARE_EVENT_CLASS(xprtrdma_reply_class,
TP_PROTO(
const struct rpcrdma_rep *rep
@@ -838,7 +883,8 @@ TRACE_EVENT(xprtrdma_post_linv_err,
** Completion events
**/
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_receive);
+DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
+
DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
@@ -1790,7 +1836,7 @@ TRACE_EVENT(svcrdma_post_recv,
)
);
-DEFINE_COMPLETION_EVENT(svcrdma_wc_receive);
+DEFINE_RECEIVE_COMPLETION_EVENT(svcrdma_wc_receive);
TRACE_EVENT(svcrdma_rq_post_err,
TP_PROTO(
diff --git a/include/uapi/linux/firewire-cdev.h b/include/uapi/linux/firewire-cdev.h
index 7e5b5c10a49c..5effa9832802 100644
--- a/include/uapi/linux/firewire-cdev.h
+++ b/include/uapi/linux/firewire-cdev.h
@@ -844,7 +844,7 @@ struct fw_cdev_queue_iso {
* struct fw_cdev_start_iso - Start an isochronous transmission or reception
* @cycle: Cycle in which to start I/O. If @cycle is greater than or
* equal to 0, the I/O will start on that cycle.
- * @sync: Determines the value to wait for for receive packets that have
+ * @sync: Determines the value to wait for receive packets that have
* the %FW_CDEV_ISO_SYNC bit set
* @tags: Tag filter bit mask. Only valid for isochronous reception.
* Determines the tag values for which packets will be accepted.
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index 9a61c28ed3ae..ee3127461ee0 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -84,7 +84,7 @@ struct input_id {
* in units per radian.
* When INPUT_PROP_ACCELEROMETER is set the resolution changes.
* The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in
- * in units per g (units/g) and in units per degree per second
+ * units per g (units/g) and in units per degree per second
* (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ).
*/
struct input_absinfo {
diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index ac4e1738a9af..2514eb6b1cf2 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -262,6 +262,7 @@ struct io_uring_params {
#define IORING_FEAT_POLL_32BITS (1U << 6)
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
#define IORING_FEAT_EXT_ARG (1U << 8)
+#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
/*
* io_uring_register(2) opcodes and arguments
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 8b281f722e5b..f6afee209620 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1154,6 +1154,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
struct kvm_xen_hvm_config {
__u32 flags;
@@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr {
union {
__u64 gpa;
__u64 pad[8];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
} u;
};
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index bf3cfc7c35d0..0b1386073d49 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -88,6 +88,13 @@ struct xenbus_device {
struct completion down;
struct work_struct work;
struct semaphore reclaim_sem;
+
+ /* Event channel based statistics and settings. */
+ atomic_t event_channels;
+ atomic_t events;
+ atomic_t spurious_events;
+ atomic_t jiffies_eoi_delayed;
+ unsigned int spurious_threshold;
};
static inline struct xenbus_device *to_xenbus_device(struct device *dev)
diff --git a/init/Kconfig b/init/Kconfig
index 719871f8727c..22946fe5ded9 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -19,7 +19,7 @@ config CC_VERSION_TEXT
CC_VERSION_TEXT so it is recorded in include/config/auto.conf.cmd.
When the compiler is updated, Kconfig will be invoked.
- - Ensure full rebuild when the compier is updated
+ - Ensure full rebuild when the compiler is updated
include/linux/kconfig.h contains this option in the comment line so
fixdep adds include/config/cc/version/text.h into the auto-generated
dependency. When the compiler is updated, syncconfig will touch it
@@ -2283,7 +2283,6 @@ config TRIM_UNUSED_KSYMS
config UNUSED_KSYMS_WHITELIST
string "Whitelist of symbols to keep in ksymtab"
depends on TRIM_UNUSED_KSYMS
- default "scripts/lto-used-symbollist.txt" if LTO_CLANG
help
By default, all unused exported symbols will be un-exported from the
build when TRIM_UNUSED_KSYMS is selected.
diff --git a/init/initramfs.c b/init/initramfs.c
index 55b74d7e5260..d677e8e717f1 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -11,6 +11,7 @@
#include <linux/utime.h>
#include <linux/file.h>
#include <linux/memblock.h>
+#include <linux/mm.h>
#include <linux/namei.h>
#include <linux/init_syscalls.h>
@@ -45,6 +46,16 @@ static void __init error(char *x)
message = x;
}
+static void panic_show_mem(const char *fmt, ...)
+{
+ va_list args;
+
+ show_mem(0, NULL);
+ va_start(args, fmt);
+ panic(fmt, args);
+ va_end(args);
+}
+
/* link hash */
#define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
@@ -80,7 +91,7 @@ static char __init *find_link(int major, int minor, int ino,
}
q = kmalloc(sizeof(struct hash), GFP_KERNEL);
if (!q)
- panic("can't allocate link hash entry");
+ panic_show_mem("can't allocate link hash entry");
q->major = major;
q->minor = minor;
q->ino = ino;
@@ -125,7 +136,7 @@ static void __init dir_add(const char *name, time64_t mtime)
{
struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL);
if (!de)
- panic("can't allocate dir_entry buffer");
+ panic_show_mem("can't allocate dir_entry buffer");
INIT_LIST_HEAD(&de->list);
de->name = kstrdup(name, GFP_KERNEL);
de->mtime = mtime;
@@ -460,7 +471,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len)
name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
if (!header_buf || !symlink_buf || !name_buf)
- panic("can't allocate buffers");
+ panic_show_mem("can't allocate buffers");
state = Start;
this_header = 0;
@@ -535,6 +546,51 @@ extern unsigned long __initramfs_size;
#include <linux/initrd.h>
#include <linux/kexec.h>
+void __init reserve_initrd_mem(void)
+{
+ phys_addr_t start;
+ unsigned long size;
+
+ /* Ignore the virtul address computed during device tree parsing */
+ initrd_start = initrd_end = 0;
+
+ if (!phys_initrd_size)
+ return;
+ /*
+ * Round the memory region to page boundaries as per free_initrd_mem()
+ * This allows us to detect whether the pages overlapping the initrd
+ * are in use, but more importantly, reserves the entire set of pages
+ * as we don't want these pages allocated for other purposes.
+ */
+ start = round_down(phys_initrd_start, PAGE_SIZE);
+ size = phys_initrd_size + (phys_initrd_start - start);
+ size = round_up(size, PAGE_SIZE);
+
+ if (!memblock_is_region_memory(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
+ (u64)start, size);
+ goto disable;
+ }
+
+ if (memblock_is_region_reserved(start, size)) {
+ pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
+ (u64)start, size);
+ goto disable;
+ }
+
+ memblock_reserve(start, size);
+ /* Now convert initrd to virtual addresses */
+ initrd_start = (unsigned long)__va(phys_initrd_start);
+ initrd_end = initrd_start + phys_initrd_size;
+ initrd_below_start_ok = 1;
+
+ return;
+disable:
+ pr_cont(" - disabling initrd\n");
+ initrd_start = 0;
+ initrd_end = 0;
+}
+
void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
{
#ifdef CONFIG_ARCH_KEEP_MEMBLOCK
@@ -607,7 +663,7 @@ static int __init populate_rootfs(void)
/* Load the built in initramfs */
char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
if (err)
- panic("%s", err); /* Failed to decompress INTERNAL initramfs */
+ panic_show_mem("%s", err); /* Failed to decompress INTERNAL initramfs */
if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
goto done;
diff --git a/init/main.c b/init/main.c
index e9933cbf60d4..53b278845b88 100644
--- a/init/main.c
+++ b/init/main.c
@@ -40,6 +40,7 @@
#include <linux/security.h>
#include <linux/smp.h>
#include <linux/profile.h>
+#include <linux/kfence.h>
#include <linux/rcupdate.h>
#include <linux/moduleparam.h>
#include <linux/kallsyms.h>
@@ -96,6 +97,7 @@
#include <linux/mem_encrypt.h>
#include <linux/kcsan.h>
#include <linux/init_syscalls.h>
+#include <linux/stackdepot.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -824,7 +826,9 @@ static void __init mm_init(void)
*/
page_ext_init_flatmem();
init_mem_debugging_and_hardening();
+ kfence_alloc_pool();
report_meminit();
+ stack_depot_init();
mem_init();
/* page_owner must be initialized after buddy is ready */
page_ext_init_flatmem_late();
@@ -955,6 +959,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
hrtimers_init();
softirq_init();
timekeeping_init();
+ kfence_init();
/*
* For best initial stack canary entropy, prepare it after:
@@ -1421,6 +1426,7 @@ static int __ref kernel_init(void *unused)
async_synchronize_full();
kprobe_free_init_mem();
ftrace_free_init_mem();
+ kgdb_free_init_mem();
free_initmem();
mark_readonly();
diff --git a/init/version.c b/init/version.c
index 80d2b7566b39..92afc782b043 100644
--- a/init/version.c
+++ b/init/version.c
@@ -16,14 +16,6 @@
#include <linux/version.h>
#include <linux/proc_ns.h>
-#ifndef CONFIG_KALLSYMS
-#define version(a) Version_ ## a
-#define version_string(a) version(a)
-
-extern int version_string(LINUX_VERSION_CODE);
-int version_string(LINUX_VERSION_CODE);
-#endif
-
struct uts_namespace init_uts_ns = {
.ns.count = REFCOUNT_INIT(2),
.name = {
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 5b3f01da172b..60739d5e3373 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -84,7 +84,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
dentry = kern_path_locked(pathname, &path);
if (IS_ERR(dentry))
- return (void *)dentry; /* returning an error */
+ return ERR_CAST(dentry); /* returning an error */
inode = path.dentry->d_inode;
inode_unlock(inode);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index b636d517c02c..4708aec492df 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -455,6 +455,17 @@ setundefined:
return 0;
}
+void kgdb_free_init_mem(void)
+{
+ int i;
+
+ /* Clear init memory breakpoints. */
+ for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+ if (init_section_contains((void *)kgdb_break[i].bpt_addr, 0))
+ kgdb_break[i].state = BP_UNDEFINED;
+ }
+}
+
#ifdef CONFIG_KGDB_KDB
void kdb_dump_stack_on_cpu(int cpu)
{
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 7c42df6e6100..c10e855a03bc 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -50,9 +50,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/swiotlb.h>
-#define OFFSET(val,align) ((unsigned long) \
- ( (val) & ( (align) - 1)))
-
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/*
@@ -103,6 +100,11 @@ static unsigned int max_segment;
static phys_addr_t *io_tlb_orig_addr;
/*
+ * The mapped buffer's size should be validated during a sync operation.
+ */
+static size_t *io_tlb_orig_size;
+
+/*
* Protect the above data structures in the map and unmap calls
*/
static DEFINE_SPINLOCK(io_tlb_lock);
@@ -171,7 +173,7 @@ void __init swiotlb_adjust_size(unsigned long new_size)
* adjust/expand SWIOTLB size for their use.
*/
if (!io_tlb_nslabs) {
- size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
+ size = ALIGN(new_size, IO_TLB_SIZE);
io_tlb_nslabs = size >> IO_TLB_SHIFT;
io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
@@ -192,6 +194,16 @@ void swiotlb_print_info(void)
bytes >> 20);
}
+static inline unsigned long io_tlb_offset(unsigned long val)
+{
+ return val & (IO_TLB_SEGSIZE - 1);
+}
+
+static inline unsigned long nr_slots(u64 val)
+{
+ return DIV_ROUND_UP(val, IO_TLB_SIZE);
+}
+
/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
@@ -240,9 +252,16 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
+ alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
+ io_tlb_orig_size = memblock_alloc(alloc_size, PAGE_SIZE);
+ if (!io_tlb_orig_size)
+ panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+
for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ io_tlb_orig_size[i] = 0;
}
io_tlb_index = 0;
no_iotlb_memory = false;
@@ -363,7 +382,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
* between io_tlb_start and io_tlb_end.
*/
io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(int)));
+ get_order(io_tlb_nslabs * sizeof(int)));
if (!io_tlb_list)
goto cleanup3;
@@ -374,9 +393,18 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
if (!io_tlb_orig_addr)
goto cleanup4;
+ io_tlb_orig_size = (size_t *)
+ __get_free_pages(GFP_KERNEL,
+ get_order(io_tlb_nslabs *
+ sizeof(size_t)));
+ if (!io_tlb_orig_size)
+ goto cleanup5;
+
+
for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+ io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ io_tlb_orig_size[i] = 0;
}
io_tlb_index = 0;
no_iotlb_memory = false;
@@ -389,6 +417,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
return 0;
+cleanup5:
+ free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
+ sizeof(phys_addr_t)));
+
cleanup4:
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
sizeof(int)));
@@ -404,6 +436,8 @@ void __init swiotlb_exit(void)
return;
if (late_alloc) {
+ free_pages((unsigned long)io_tlb_orig_size,
+ get_order(io_tlb_nslabs * sizeof(size_t)));
free_pages((unsigned long)io_tlb_orig_addr,
get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -413,6 +447,8 @@ void __init swiotlb_exit(void)
} else {
memblock_free_late(__pa(io_tlb_orig_addr),
PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+ memblock_free_late(__pa(io_tlb_orig_size),
+ PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t)));
memblock_free_late(__pa(io_tlb_list),
PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
memblock_free_late(io_tlb_start,
@@ -461,79 +497,71 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
}
}
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
- size_t mapping_size, size_t alloc_size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
- unsigned long flags;
- phys_addr_t tlb_addr;
- unsigned int nslots, stride, index, wrap;
- int i;
- unsigned long mask;
- unsigned long offset_slots;
- unsigned long max_slots;
- unsigned long tmp_io_tlb_used;
-
- if (no_iotlb_memory)
- panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
- if (mem_encrypt_active())
- pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+#define slot_addr(start, idx) ((start) + ((idx) << IO_TLB_SHIFT))
- if (mapping_size > alloc_size) {
- dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
- mapping_size, alloc_size);
- return (phys_addr_t)DMA_MAPPING_ERROR;
- }
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+ return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
- mask = dma_get_seg_boundary(hwdev);
+/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+static inline unsigned long get_max_slots(unsigned long boundary_mask)
+{
+ if (boundary_mask == ~0UL)
+ return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ return nr_slots(boundary_mask + 1);
+}
- tbl_dma_addr &= mask;
+static unsigned int wrap_index(unsigned int index)
+{
+ if (index >= io_tlb_nslabs)
+ return 0;
+ return index;
+}
- offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+/*
+ * Find a suitable number of IO TLB entries size that will fit this request and
+ * allocate a buffer from that IO TLB pool.
+ */
+static int find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size)
+{
+ unsigned long boundary_mask = dma_get_seg_boundary(dev);
+ dma_addr_t tbl_dma_addr =
+ phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+ unsigned long max_slots = get_max_slots(boundary_mask);
+ unsigned int iotlb_align_mask =
+ dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+ unsigned int nslots = nr_slots(alloc_size), stride;
+ unsigned int index, wrap, count = 0, i;
+ unsigned long flags;
- /*
- * Carefully handle integer overflow which can occur when mask == ~0UL.
- */
- max_slots = mask + 1
- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ BUG_ON(!nslots);
/*
- * For mappings greater than or equal to a page, we limit the stride
- * (and hence alignment) to a page size.
+ * For mappings with an alignment requirement don't bother looping to
+ * unaligned slots once we found an aligned one. For allocations of
+ * PAGE_SIZE or larger only look for page aligned allocations.
*/
- nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+ stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
if (alloc_size >= PAGE_SIZE)
- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
- else
- stride = 1;
-
- BUG_ON(!nslots);
+ stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
- /*
- * Find suitable number of IO TLB entries size that will fit this
- * request and allocate a buffer from that IO TLB pool.
- */
spin_lock_irqsave(&io_tlb_lock, flags);
-
if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
goto not_found;
- index = ALIGN(io_tlb_index, stride);
- if (index >= io_tlb_nslabs)
- index = 0;
- wrap = index;
-
+ index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
do {
- while (iommu_is_span_boundary(index, nslots, offset_slots,
- max_slots)) {
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- if (index == wrap)
- goto not_found;
+ if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+ (orig_addr & iotlb_align_mask)) {
+ index = wrap_index(index + 1);
+ continue;
}
/*
@@ -541,55 +569,96 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
* contiguous buffers, we allocate the buffers from that slot
* and mark the entries as '0' indicating unavailable.
*/
- if (io_tlb_list[index] >= nslots) {
- int count = 0;
-
- for (i = index; i < (int) (index + nslots); i++)
- io_tlb_list[i] = 0;
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
- tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
- /*
- * Update the indices to avoid searching in the next
- * round.
- */
- io_tlb_index = ((index + nslots) < io_tlb_nslabs
- ? (index + nslots) : 0);
-
- goto found;
+ if (!iommu_is_span_boundary(index, nslots,
+ nr_slots(tbl_dma_addr),
+ max_slots)) {
+ if (io_tlb_list[index] >= nslots)
+ goto found;
}
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
+ index = wrap_index(index + stride);
} while (index != wrap);
not_found:
- tmp_io_tlb_used = io_tlb_used;
-
spin_unlock_irqrestore(&io_tlb_lock, flags);
- if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
- dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
- return (phys_addr_t)DMA_MAPPING_ERROR;
+ return -1;
+
found:
+ for (i = index; i < index + nslots; i++)
+ io_tlb_list[i] = 0;
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+ io_tlb_list[i]; i--)
+ io_tlb_list[i] = ++count;
+
+ /*
+ * Update the indices to avoid searching in the next round.
+ */
+ if (index + nslots < io_tlb_nslabs)
+ io_tlb_index = index + nslots;
+ else
+ io_tlb_index = 0;
io_tlb_used += nslots;
+
spin_unlock_irqrestore(&io_tlb_lock, flags);
+ return index;
+}
+
+phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ size_t mapping_size, size_t alloc_size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int index, i;
+ phys_addr_t tlb_addr;
+
+ if (no_iotlb_memory)
+ panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
+ if (mem_encrypt_active())
+ pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+ if (mapping_size > alloc_size) {
+ dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+ mapping_size, alloc_size);
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ index = find_slots(dev, orig_addr, alloc_size + offset);
+ if (index == -1) {
+ if (!(attrs & DMA_ATTR_NO_WARN))
+ dev_warn_ratelimited(dev,
+ "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+ alloc_size, io_tlb_nslabs, io_tlb_used);
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
/*
* Save away the mapping from the original address to the DMA address.
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
- for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+ for (i = 0; i < nr_slots(alloc_size + offset); i++) {
+ io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+ io_tlb_orig_size[index+i] = alloc_size - (i << IO_TLB_SHIFT);
+ }
+ tlb_addr = slot_addr(io_tlb_start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
return tlb_addr;
}
+static void validate_sync_size_and_truncate(struct device *hwdev, size_t orig_size, size_t *size)
+{
+ if (*size > orig_size) {
+ /* Warn and truncate mapping_size */
+ dev_WARN_ONCE(hwdev, 1,
+ "Attempt for buffer overflow. Original size: %zu. Mapping size: %zu.\n",
+ orig_size, *size);
+ *size = orig_size;
+ }
+}
+
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
@@ -598,10 +667,13 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
enum dma_data_direction dir, unsigned long attrs)
{
unsigned long flags;
- int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+ int i, count, nslots = nr_slots(alloc_size + offset);
+ int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ validate_sync_size_and_truncate(hwdev, io_tlb_orig_size[index], &mapping_size);
+
/*
* First, sync the memory before unmapping the entry
*/
@@ -617,26 +689,30 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
* with slots below and above the pool being returned.
*/
spin_lock_irqsave(&io_tlb_lock, flags);
- {
- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
- io_tlb_list[index + nslots] : 0);
- /*
- * Step 1: return the slots to the free list, merging the
- * slots with superceeding slots
- */
- for (i = index + nslots - 1; i >= index; i--) {
- io_tlb_list[i] = ++count;
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
- }
- /*
- * Step 2: merge the returned slots with the preceding slots,
- * if available (non zero)
- */
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
+ if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+ count = io_tlb_list[index + nslots];
+ else
+ count = 0;
- io_tlb_used -= nslots;
+ /*
+ * Step 1: return the slots to the free list, merging the slots with
+ * superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--) {
+ io_tlb_list[i] = ++count;
+ io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ io_tlb_orig_size[i] = 0;
}
+
+ /*
+ * Step 2: merge the returned slots with the preceding slots, if
+ * available (non zero)
+ */
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
+ i--)
+ io_tlb_list[i] = ++count;
+ io_tlb_used -= nslots;
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
@@ -645,11 +721,13 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
enum dma_sync_target target)
{
int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+ size_t orig_size = io_tlb_orig_size[index];
phys_addr_t orig_addr = io_tlb_orig_addr[index];
if (orig_addr == INVALID_PHYS_ADDR)
return;
- orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
+
+ validate_sync_size_and_truncate(hwdev, orig_size, &size);
switch (target) {
case SYNC_FOR_CPU:
@@ -707,7 +785,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
size_t swiotlb_max_mapping_size(struct device *dev)
{
- return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
+ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
}
bool is_swiotlb_active(void)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 129dee540a8b..0aeca5f3c0ac 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -269,7 +269,7 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
if (!event->parent) {
/*
* If this is a !child event, we must hold ctx::mutex to
- * stabilize the the event->ctx relation. See
+ * stabilize the event->ctx relation. See
* perf_event_ctx_lock().
*/
lockdep_assert_held(&ctx->mutex);
@@ -1303,7 +1303,7 @@ static void put_ctx(struct perf_event_context *ctx)
* life-time rules separate them. That is an exiting task cannot fork, and a
* spawning task cannot (yet) exit.
*
- * But remember that that these are parent<->child context relations, and
+ * But remember that these are parent<->child context relations, and
* migration does not affect children, therefore these two orderings should not
* interact.
*
@@ -1442,7 +1442,7 @@ static u64 primary_event_id(struct perf_event *event)
/*
* Get the perf_event_context for a task and lock it.
*
- * This has to cope with with the fact that until it is locked,
+ * This has to cope with the fact that until it is locked,
* the context could get moved to another task.
*/
static struct perf_event_context *
@@ -2486,7 +2486,7 @@ static void perf_set_shadow_time(struct perf_event *event,
* But this is a bit hairy.
*
* So instead, we have an explicit cgroup call to remain
- * within the time time source all along. We believe it
+ * within the time source all along. We believe it
* is cleaner and simpler to understand.
*/
if (is_cgroup_event(event))
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 3ea7f8f92f1d..6addc9780319 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1733,7 +1733,7 @@ void uprobe_free_utask(struct task_struct *t)
}
/*
- * Allocate a uprobe_task object for the task if if necessary.
+ * Allocate a uprobe_task object for the task if necessary.
* Called when the thread hits a breakpoint.
*
* Returns:
diff --git a/kernel/fork.c b/kernel/fork.c
index d66cd1014211..d3171e8e88e5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1940,6 +1940,8 @@ static __latent_entropy struct task_struct *copy_process(
p = dup_task_struct(current, node);
if (!p)
goto fork_out;
+ if (args->io_thread)
+ p->flags |= PF_IO_WORKER;
/*
* This _must_ happen before we call free_task(), i.e. before we jump
@@ -2411,6 +2413,34 @@ struct mm_struct *copy_init_mm(void)
}
/*
+ * This is like kernel_clone(), but shaved down and tailored to just
+ * creating io_uring workers. It returns a created task, or an error pointer.
+ * The returned task is inactive, and the caller must fire it up through
+ * wake_up_new_task(p). All signals are blocked in the created task.
+ */
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
+{
+ unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
+ CLONE_IO;
+ struct kernel_clone_args args = {
+ .flags = ((lower_32_bits(flags) | CLONE_VM |
+ CLONE_UNTRACED) & ~CSIGNAL),
+ .exit_signal = (lower_32_bits(flags) & CSIGNAL),
+ .stack = (unsigned long)fn,
+ .stack_size = (unsigned long)arg,
+ .io_thread = 1,
+ };
+ struct task_struct *tsk;
+
+ tsk = copy_process(NULL, 0, node, &args);
+ if (!IS_ERR(tsk)) {
+ sigfillset(&tsk->blocked);
+ sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+ }
+ return tsk;
+}
+
+/*
* Ok, this is the main fork-routine.
*
* It copies the process, and if successful kick-starts
diff --git a/kernel/groups.c b/kernel/groups.c
index fe7e6385530e..787b381c7c00 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -15,12 +15,7 @@
struct group_info *groups_alloc(int gidsetsize)
{
struct group_info *gi;
- unsigned int len;
-
- len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
- gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
- if (!gi)
- gi = __vmalloc(len, GFP_KERNEL_ACCOUNT);
+ gi = kvmalloc(struct_size(gi, gid, gidsetsize), GFP_KERNEL_ACCOUNT);
if (!gi)
return NULL;
diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h
index 39d30ccf8d87..48aaf2ac0d0d 100644
--- a/kernel/kexec_internal.h
+++ b/kernel/kexec_internal.h
@@ -13,8 +13,6 @@ void kimage_terminate(struct kimage *image);
int kimage_is_destination_range(struct kimage *image,
unsigned long start, unsigned long end);
-int machine_kexec_post_load(struct kimage *image);
-
extern struct mutex kexec_mutex;
#ifdef CONFIG_KEXEC_FILE
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 03b21135313c..48fff6437901 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1420,7 +1420,7 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
}
/*
- * Performs the wakeup of the the top-waiter and re-enables preemption.
+ * Performs the wakeup of the top-waiter and re-enables preemption.
*/
void rt_mutex_postunlock(struct wake_q_head *wake_q)
{
@@ -1819,7 +1819,7 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
* been started.
* @waiter: the pre-initialized rt_mutex_waiter
*
- * Wait for the the lock acquisition started on our behalf by
+ * Wait for the lock acquisition started on our behalf by
* rt_mutex_start_proxy_lock(). Upon failure, the caller must call
* rt_mutex_cleanup_proxy_lock().
*
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index ba67600c7b2c..abba5df50006 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -1048,7 +1048,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
/*
* If there were already threads queued before us and:
- * 1) there are no no active locks, wake the front
+ * 1) there are no active locks, wake the front
* queued process(es) as the handoff bit might be set.
* 2) there are no active writers and some readers, the lock
* must be read owned; so we try to wake any read lock
diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c
index d9dd94defc0a..9aa855a96c4a 100644
--- a/kernel/locking/semaphore.c
+++ b/kernel/locking/semaphore.c
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(down_killable);
* @sem: the semaphore to be acquired
*
* Try to acquire the semaphore atomically. Returns 0 if the semaphore has
- * been acquired successfully or 1 if it it cannot be acquired.
+ * been acquired successfully or 1 if it cannot be acquired.
*
* NOTE: This return value is inverted from both spin_trylock and
* mutex_trylock! Be careful about this when converting code.
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 61db50f7ca86..821cf1723814 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -375,7 +375,7 @@ static int ptrace_attach(struct task_struct *task, long request,
audit_ptrace(task);
retval = -EPERM;
- if (unlikely(task->flags & PF_KTHREAD))
+ if (unlikely(task->flags & (PF_KTHREAD | PF_IO_WORKER)))
goto out;
if (same_thread_group(task, current))
goto out;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 8a8bd7b13634..794c2cb945f8 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5126,7 +5126,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
/*
* When a group wakes up we want to make sure that its quota is not already
* expired/exceeded, otherwise it may be allowed to steal additional ticks of
- * runtime as update_curr() throttling can not not trigger until it's on-rq.
+ * runtime as update_curr() throttling can not trigger until it's on-rq.
*/
static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
{
diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c
index 08ae45ad9261..acdae625c636 100644
--- a/kernel/sched/membarrier.c
+++ b/kernel/sched/membarrier.c
@@ -454,7 +454,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
/*
* For each cpu runqueue, if the task's mm match @mm, ensure that all
- * @mm's membarrier state set bits are also set in in the runqueue's
+ * @mm's membarrier state set bits are also set in the runqueue's
* membarrier state. This ensures that a runqueue scheduling
* between threads which are users of @mm has its membarrier state
* updated.
diff --git a/kernel/signal.c b/kernel/signal.c
index 5ad8566534e7..ba4d1ef39a9e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -91,7 +91,7 @@ static bool sig_task_ignored(struct task_struct *t, int sig, bool force)
return true;
/* Only allow kernel generated signals to this kthread */
- if (unlikely((t->flags & PF_KTHREAD) &&
+ if (unlikely((t->flags & (PF_KTHREAD | PF_IO_WORKER)) &&
(handler == SIG_KTHREAD_KERNEL) && !force))
return true;
@@ -1096,7 +1096,7 @@ static int __send_signal(int sig, struct kernel_siginfo *info, struct task_struc
/*
* Skip useless siginfo allocation for SIGKILL and kernel threads.
*/
- if ((sig == SIGKILL) || (t->flags & PF_KTHREAD))
+ if ((sig == SIGKILL) || (t->flags & (PF_KTHREAD | PF_IO_WORKER)))
goto out_set;
/*
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c9fbdd848138..62fbd09b5dc1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2962,7 +2962,7 @@ static struct ctl_table vm_table[] = {
.data = &block_dump,
.maxlen = sizeof(block_dump),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
@@ -2970,7 +2970,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_vfs_cache_pressure,
.maxlen = sizeof(sysctl_vfs_cache_pressure),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
@@ -2980,7 +2980,7 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_legacy_va_layout,
.maxlen = sizeof(sysctl_legacy_va_layout),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
#endif
@@ -2990,7 +2990,7 @@ static struct ctl_table vm_table[] = {
.data = &node_reclaim_mode,
.maxlen = sizeof(node_reclaim_mode),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
},
{
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 9c266b93cbc0..7fa82778c3e6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -694,7 +694,7 @@ config TRACEPOINT_BENCHMARK
help
This option creates the tracepoint "benchmark:benchmark_event".
When the tracepoint is enabled, it kicks off a kernel thread that
- goes into an infinite loop (calling cond_sched() to let other tasks
+ goes into an infinite loop (calling cond_resched() to let other tasks
run), and calls the tracepoint. Each iteration will record the time
it took to write to the tracepoint and the next iteration that
data will be passed to the tracepoint itself. That is, the tracepoint
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 7e44cea89fdc..b28d3e5013cd 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o
obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
+obj-$(CONFIG_TRACEPOINTS) += error_report-traces.o
obj-$(CONFIG_TRACEPOINTS) += power-traces.o
ifeq ($(CONFIG_PM),y)
obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index c286c13bd31a..c221e4c3f625 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -312,8 +312,6 @@ record_it:
static void blk_trace_free(struct blk_trace *bt)
{
- debugfs_remove(bt->msg_file);
- debugfs_remove(bt->dropped_file);
relay_close(bt->rchan);
debugfs_remove(bt->dir);
free_percpu(bt->sequence);
@@ -545,10 +543,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
INIT_LIST_HEAD(&bt->running_list);
ret = -EIO;
- bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
- &blk_dropped_fops);
-
- bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+ debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
+ debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
bt->rchan = relay_open("trace", dir, buts->buf_size,
buts->buf_nr, &blk_relay_callbacks, bt);
@@ -1868,7 +1864,17 @@ void blk_trace_remove_sysfs(struct device *dev)
#ifdef CONFIG_EVENT_TRACING
-void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes)
+/**
+ * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string.
+ * @rwbs: buffer to be filled
+ * @op: REQ_OP_XXX for the tracepoint
+ *
+ * Description:
+ * Maps the REQ_OP_XXX to character and fills the buffer provided by the
+ * caller with resulting string.
+ *
+ **/
+void blk_fill_rwbs(char *rwbs, unsigned int op)
{
int i = 0;
diff --git a/kernel/trace/error_report-traces.c b/kernel/trace/error_report-traces.c
new file mode 100644
index 000000000000..f89792c25b11
--- /dev/null
+++ b/kernel/trace/error_report-traces.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Error reporting trace points.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/error_report.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(error_report_end);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b9dad3500041..68744c51517e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2815,6 +2815,17 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
return 0;
/*
+ * It's possible that the event time delta is zero
+ * (has the same time stamp as the previous event)
+ * in which case write_stamp and before_stamp could
+ * be the same. In such a case, force before_stamp
+ * to be different than write_stamp. It doesn't
+ * matter what it is, as long as its different.
+ */
+ if (!delta)
+ rb_time_set(&cpu_buffer->before_stamp, 0);
+
+ /*
* If an event were to come in now, it would see that the
* write_stamp and the before_stamp are different, and assume
* that this event just added itself before updating
@@ -3307,9 +3318,13 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
goto out;
}
atomic_inc(&cpu_buffer->record_disabled);
- pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld after:%lld\n",
- cpu_buffer->cpu,
- ts + info->delta, info->ts, info->delta, info->after);
+ /* There's some cases in boot up that this can happen */
+ WARN_ON_ONCE(system_state != SYSTEM_BOOTING);
+ pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s\n",
+ cpu_buffer->cpu,
+ ts + info->delta, info->ts, info->delta,
+ info->before, info->after,
+ full ? " (full)" : "");
dump_buffer_page(bpage, info, tail);
atomic_dec(&ts_dump);
/* Do not re-enable checking */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e295c413580e..eccb4e1187cc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1929,6 +1929,12 @@ static int run_tracer_selftest(struct tracer *type)
if (!selftests_can_run)
return save_selftest(type);
+ if (!tracing_is_on()) {
+ pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
+ type->name);
+ return 0;
+ }
+
/*
* Run a selftest on this tracer.
* Here we reset the trace buffer, and set the current
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dec13ff66077..a6446c03cfbc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -605,7 +605,6 @@ void trace_graph_function(struct trace_array *tr,
void trace_latency_header(struct seq_file *m);
void trace_default_header(struct seq_file *m);
void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
-int trace_empty(struct trace_iterator *iter);
void trace_graph_return(struct ftrace_graph_ret *trace);
int trace_graph_entry(struct ftrace_graph_ent *trace);
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 2979a96595b4..8d71e6c83f10 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1225,8 +1225,10 @@ static int __create_synth_event(const char *name, const char *raw_fields)
goto err;
}
- if (!argc)
+ if (!argc) {
+ argv_free(argv);
continue;
+ }
n_fields_this_loop = 0;
consumed = 0;
diff --git a/lib/Kconfig b/lib/Kconfig
index 46806332a8cc..a38cc61256f1 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -651,6 +651,15 @@ config STACKDEPOT
bool
select STACKTRACE
+config STACK_HASH_ORDER
+ int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
+ range 12 20
+ default 20
+ depends on STACKDEPOT
+ help
+ Select the hash size as a power of 2 for the stackdepot hash table.
+ Choose a lower value to reduce the memory impact.
+
config SBITMAP
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index f9febffffc21..2779c29d9981 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -938,6 +938,7 @@ config DEBUG_STACKOVERFLOW
If in doubt, say "N".
source "lib/Kconfig.kasan"
+source "lib/Kconfig.kfence"
endmenu # "Memory Debugging"
diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence
new file mode 100644
index 000000000000..78f50ccb3b45
--- /dev/null
+++ b/lib/Kconfig.kfence
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config HAVE_ARCH_KFENCE
+ bool
+
+menuconfig KFENCE
+ bool "KFENCE: low-overhead sampling-based memory safety error detector"
+ depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
+ select STACKTRACE
+ help
+ KFENCE is a low-overhead sampling-based detector of heap out-of-bounds
+ access, use-after-free, and invalid-free errors. KFENCE is designed
+ to have negligible cost to permit enabling it in production
+ environments.
+
+ See <file:Documentation/dev-tools/kfence.rst> for more details.
+
+ Note that, KFENCE is not a substitute for explicit testing with tools
+ such as KASAN. KFENCE can detect a subset of bugs that KASAN can
+ detect, albeit at very different performance profiles. If you can
+ afford to use KASAN, continue using KASAN, for example in test
+ environments. If your kernel targets production use, and cannot
+ enable KASAN due to its cost, consider using KFENCE.
+
+if KFENCE
+
+config KFENCE_STATIC_KEYS
+ bool "Use static keys to set up allocations"
+ default y
+ depends on JUMP_LABEL # To ensure performance, require jump labels
+ help
+ Use static keys (static branches) to set up KFENCE allocations. Using
+ static keys is normally recommended, because it avoids a dynamic
+ branch in the allocator's fast path. However, with very low sample
+ intervals, or on systems that do not support jump labels, a dynamic
+ branch may still be an acceptable performance trade-off.
+
+config KFENCE_SAMPLE_INTERVAL
+ int "Default sample interval in milliseconds"
+ default 100
+ help
+ The KFENCE sample interval determines the frequency with which heap
+ allocations will be guarded by KFENCE. May be overridden via boot
+ parameter "kfence.sample_interval".
+
+ Set this to 0 to disable KFENCE by default, in which case only
+ setting "kfence.sample_interval" to a non-zero value enables KFENCE.
+
+config KFENCE_NUM_OBJECTS
+ int "Number of guarded objects available"
+ range 1 65535
+ default 255
+ help
+ The number of guarded objects available. For each KFENCE object, 2
+ pages are required; with one containing the object and two adjacent
+ ones used as guard pages.
+
+config KFENCE_STRESS_TEST_FAULTS
+ int "Stress testing of fault handling and error reporting" if EXPERT
+ default 0
+ help
+ The inverse probability with which to randomly protect KFENCE object
+ pages, resulting in spurious use-after-frees. The main purpose of
+ this option is to stress test KFENCE with concurrent error reports
+ and allocations/frees. A value of 0 disables stress testing logic.
+
+ Only for KFENCE testing; set to 0 if you are not a KFENCE developer.
+
+config KFENCE_KUNIT_TEST
+ tristate "KFENCE integration test suite" if !KUNIT_ALL_TESTS
+ default KUNIT_ALL_TESTS
+ depends on TRACEPOINTS && KUNIT
+ help
+ Test suite for KFENCE, testing various error detection scenarios with
+ various allocation types, and checking that reports are correctly
+ output to console.
+
+ Say Y here if you want the test to be built into the kernel and run
+ during boot; say M if you want the test to build as a module; say N
+ if you are unsure.
+
+endif # KFENCE
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 3a0b1c930733..e5372a13511d 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -112,23 +112,6 @@ config UBSAN_UNREACHABLE
This option enables -fsanitize=unreachable which checks for control
flow reaching an expected-to-be-unreachable position.
-config UBSAN_SIGNED_OVERFLOW
- bool "Perform checking for signed arithmetic overflow"
- default UBSAN
- depends on $(cc-option,-fsanitize=signed-integer-overflow)
- help
- This option enables -fsanitize=signed-integer-overflow which checks
- for overflow of any arithmetic operations with signed integers.
-
-config UBSAN_UNSIGNED_OVERFLOW
- bool "Perform checking for unsigned arithmetic overflow"
- depends on $(cc-option,-fsanitize=unsigned-integer-overflow)
- depends on !X86_32 # avoid excessive stack usage on x86-32/clang
- help
- This option enables -fsanitize=unsigned-integer-overflow which checks
- for overflow of any arithmetic operations with unsigned integers. This
- currently causes x86 to fail to boot.
-
config UBSAN_OBJECT_SIZE
bool "Perform checking for accesses beyond the end of objects"
default UBSAN
diff --git a/lib/cmdline.c b/lib/cmdline.c
index dfd4c4423f9a..5d474c626e24 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -228,7 +228,6 @@ char *next_arg(char *args, char **param, char **val)
{
unsigned int i, equals = 0;
int in_quote = 0, quoted = 0;
- char *next;
if (*args == '"') {
args++;
@@ -266,10 +265,10 @@ char *next_arg(char *args, char **param, char **val)
if (args[i]) {
args[i] = '\0';
- next = args + i + 1;
+ args += i + 1;
} else
- next = args + i;
+ args += i;
/* Chew up trailing spaces. */
- return skip_spaces(next);
+ return skip_spaces(args);
}
diff --git a/lib/extable.c b/lib/extable.c
index c3e59caf7ffa..9c9f40bd2b3d 100644
--- a/lib/extable.c
+++ b/lib/extable.c
@@ -21,7 +21,6 @@ static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
}
#endif
-#ifndef ARCH_HAS_SORT_EXTABLE
#ifndef ARCH_HAS_RELATIVE_EXTABLE
#define swap_ex NULL
#else
@@ -88,9 +87,6 @@ void trim_init_extable(struct module *m)
m->num_exentries--;
}
#endif /* CONFIG_MODULES */
-#endif /* !ARCH_HAS_SORT_EXTABLE */
-
-#ifndef ARCH_HAS_SEARCH_EXTABLE
static int cmp_ex_search(const void *key, const void *elt)
{
@@ -120,4 +116,3 @@ search_extable(const struct exception_table_entry *base,
return bsearch(&value, base, num,
sizeof(struct exception_table_entry), cmp_ex_search);
}
-#endif
diff --git a/lib/genalloc.c b/lib/genalloc.c
index dab97bb69df6..5dcf9cdcbc46 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -81,7 +81,8 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
* users set the same bit, one user will return remain bits, otherwise
* return 0.
*/
-static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
+static unsigned long
+bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
{
unsigned long *p = map + BIT_WORD(start);
const unsigned long size = start + nr;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index d8ca336ff9cd..f66c62aa7154 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -464,20 +464,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
}
EXPORT_SYMBOL(iov_iter_init);
-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
-{
- char *from = kmap_atomic(page);
- memcpy(to, from + offset, len);
- kunmap_atomic(from);
-}
-
-static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
-{
- char *to = kmap_atomic(page);
- memcpy(to + offset, from, len);
- kunmap_atomic(to);
-}
-
static void memzero_page(struct page *page, size_t offset, size_t len)
{
char *addr = kmap_atomic(page);
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index 890dcc2e984e..49f67a0c6e5d 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -31,6 +31,7 @@
#include <linux/stackdepot.h>
#include <linux/string.h>
#include <linux/types.h>
+#include <linux/memblock.h>
#define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
@@ -141,14 +142,38 @@ static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
return stack;
}
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
#define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
#define STACK_HASH_SEED 0x9747b28c
-static struct stack_record *stack_table[STACK_HASH_SIZE] = {
- [0 ... STACK_HASH_SIZE - 1] = NULL
-};
+static bool stack_depot_disable;
+static struct stack_record **stack_table;
+
+static int __init is_stack_depot_disabled(char *str)
+{
+ int ret;
+
+ ret = kstrtobool(str, &stack_depot_disable);
+ if (!ret && stack_depot_disable) {
+ pr_info("Stack Depot is disabled\n");
+ stack_table = NULL;
+ }
+ return 0;
+}
+early_param("stack_depot_disable", is_stack_depot_disabled);
+
+int __init stack_depot_init(void)
+{
+ if (!stack_depot_disable) {
+ size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+ int i;
+
+ stack_table = memblock_alloc(size, size);
+ for (i = 0; i < STACK_HASH_SIZE; i++)
+ stack_table[i] = NULL;
+ }
+ return 0;
+}
/* Calculate hash for a stack */
static inline u32 hash_stack(unsigned long *entries, unsigned int size)
@@ -242,7 +267,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
unsigned long flags;
u32 hash;
- if (unlikely(nr_entries == 0))
+ if (unlikely(nr_entries == 0) || stack_depot_disable)
goto fast_exit;
hash = hash_stack(entries, nr_entries);
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 25576303897b..e5647d147b35 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -252,11 +252,14 @@ static void kmalloc_large_oob_right(struct kunit *test)
kfree(ptr);
}
-static void kmalloc_oob_krealloc_more(struct kunit *test)
+static void krealloc_more_oob_helper(struct kunit *test,
+ size_t size1, size_t size2)
{
char *ptr1, *ptr2;
- size_t size1 = 17;
- size_t size2 = 19;
+ size_t middle;
+
+ KUNIT_ASSERT_LT(test, size1, size2);
+ middle = size1 + (size2 - size1) / 2;
ptr1 = kmalloc(size1, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
@@ -264,15 +267,31 @@ static void kmalloc_oob_krealloc_more(struct kunit *test)
ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
+ /* All offsets up to size2 must be accessible. */
+ ptr2[size1 - 1] = 'x';
+ ptr2[size1] = 'x';
+ ptr2[middle] = 'x';
+ ptr2[size2 - 1] = 'x';
+
+ /* Generic mode is precise, so unaligned size2 must be inaccessible. */
+ if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
+
+ /* For all modes first aligned offset after size2 must be inaccessible. */
+ KUNIT_EXPECT_KASAN_FAIL(test,
+ ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
+
kfree(ptr2);
}
-static void kmalloc_oob_krealloc_less(struct kunit *test)
+static void krealloc_less_oob_helper(struct kunit *test,
+ size_t size1, size_t size2)
{
char *ptr1, *ptr2;
- size_t size1 = 17;
- size_t size2 = 15;
+ size_t middle;
+
+ KUNIT_ASSERT_LT(test, size2, size1);
+ middle = size2 + (size1 - size2) / 2;
ptr1 = kmalloc(size1, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
@@ -280,10 +299,79 @@ static void kmalloc_oob_krealloc_less(struct kunit *test)
ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
- KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
+ /* Must be accessible for all modes. */
+ ptr2[size2 - 1] = 'x';
+
+ /* Generic mode is precise, so unaligned size2 must be inaccessible. */
+ if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
+
+ /* For all modes first aligned offset after size2 must be inaccessible. */
+ KUNIT_EXPECT_KASAN_FAIL(test,
+ ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
+
+ /*
+ * For all modes all size2, middle, and size1 should land in separate
+ * granules and thus the latter two offsets should be inaccessible.
+ */
+ KUNIT_EXPECT_LE(test, round_up(size2, KASAN_GRANULE_SIZE),
+ round_down(middle, KASAN_GRANULE_SIZE));
+ KUNIT_EXPECT_LE(test, round_up(middle, KASAN_GRANULE_SIZE),
+ round_down(size1, KASAN_GRANULE_SIZE));
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr2[middle] = 'x');
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1 - 1] = 'x');
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1] = 'x');
+
kfree(ptr2);
}
+static void krealloc_more_oob(struct kunit *test)
+{
+ krealloc_more_oob_helper(test, 201, 235);
+}
+
+static void krealloc_less_oob(struct kunit *test)
+{
+ krealloc_less_oob_helper(test, 235, 201);
+}
+
+static void krealloc_pagealloc_more_oob(struct kunit *test)
+{
+ /* page_alloc fallback in only implemented for SLUB. */
+ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
+
+ krealloc_more_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 201,
+ KMALLOC_MAX_CACHE_SIZE + 235);
+}
+
+static void krealloc_pagealloc_less_oob(struct kunit *test)
+{
+ /* page_alloc fallback in only implemented for SLUB. */
+ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
+
+ krealloc_less_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 235,
+ KMALLOC_MAX_CACHE_SIZE + 201);
+}
+
+/*
+ * Check that krealloc() detects a use-after-free, returns NULL,
+ * and doesn't unpoison the freed object.
+ */
+static void krealloc_uaf(struct kunit *test)
+{
+ char *ptr1, *ptr2;
+ int size1 = 201;
+ int size2 = 235;
+
+ ptr1 = kmalloc(size1, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
+ kfree(ptr1);
+
+ KUNIT_EXPECT_KASAN_FAIL(test, ptr2 = krealloc(ptr1, size2, GFP_KERNEL));
+ KUNIT_ASSERT_PTR_EQ(test, (void *)ptr2, NULL);
+ KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)ptr1);
+}
+
static void kmalloc_oob_16(struct kunit *test)
{
struct {
@@ -977,8 +1065,11 @@ static struct kunit_case kasan_kunit_test_cases[] = {
KUNIT_CASE(pagealloc_oob_right),
KUNIT_CASE(pagealloc_uaf),
KUNIT_CASE(kmalloc_large_oob_right),
- KUNIT_CASE(kmalloc_oob_krealloc_more),
- KUNIT_CASE(kmalloc_oob_krealloc_less),
+ KUNIT_CASE(krealloc_more_oob),
+ KUNIT_CASE(krealloc_less_oob),
+ KUNIT_CASE(krealloc_pagealloc_more_oob),
+ KUNIT_CASE(krealloc_pagealloc_less_oob),
+ KUNIT_CASE(krealloc_uaf),
KUNIT_CASE(kmalloc_oob_16),
KUNIT_CASE(kmalloc_uaf_16),
KUNIT_CASE(kmalloc_oob_in_memset),
diff --git a/lib/test_ubsan.c b/lib/test_ubsan.c
index 5e5d9355ef49..7e7bbd0f3fd2 100644
--- a/lib/test_ubsan.c
+++ b/lib/test_ubsan.c
@@ -11,51 +11,6 @@ typedef void(*test_ubsan_fp)(void);
#config, IS_ENABLED(config) ? "y" : "n"); \
} while (0)
-static void test_ubsan_add_overflow(void)
-{
- volatile int val = INT_MAX;
- volatile unsigned int uval = UINT_MAX;
-
- UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
- val += 2;
-
- UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
- uval += 2;
-}
-
-static void test_ubsan_sub_overflow(void)
-{
- volatile int val = INT_MIN;
- volatile unsigned int uval = 0;
- volatile int val2 = 2;
-
- UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
- val -= val2;
-
- UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
- uval -= val2;
-}
-
-static void test_ubsan_mul_overflow(void)
-{
- volatile int val = INT_MAX / 2;
- volatile unsigned int uval = UINT_MAX / 2;
-
- UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
- val *= 3;
-
- UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
- uval *= 3;
-}
-
-static void test_ubsan_negate_overflow(void)
-{
- volatile int val = INT_MIN;
-
- UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
- val = -val;
-}
-
static void test_ubsan_divrem_overflow(void)
{
volatile int val = 16;
@@ -155,10 +110,6 @@ static void test_ubsan_object_size_mismatch(void)
}
static const test_ubsan_fp test_ubsan_array[] = {
- test_ubsan_add_overflow,
- test_ubsan_sub_overflow,
- test_ubsan_mul_overflow,
- test_ubsan_negate_overflow,
test_ubsan_shift_out_of_bounds,
test_ubsan_out_of_bounds,
test_ubsan_load_invalid_value,
diff --git a/lib/ubsan.c b/lib/ubsan.c
index bec38c64d6a6..26229973049d 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -163,74 +163,6 @@ static void ubsan_epilogue(void)
}
}
-static void handle_overflow(struct overflow_data *data, void *lhs,
- void *rhs, char op)
-{
-
- struct type_descriptor *type = data->type;
- char lhs_val_str[VALUE_LENGTH];
- char rhs_val_str[VALUE_LENGTH];
-
- if (suppress_report(&data->location))
- return;
-
- ubsan_prologue(&data->location, type_is_signed(type) ?
- "signed-integer-overflow" :
- "unsigned-integer-overflow");
-
- val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
- val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
- pr_err("%s %c %s cannot be represented in type %s\n",
- lhs_val_str,
- op,
- rhs_val_str,
- type->type_name);
-
- ubsan_epilogue();
-}
-
-void __ubsan_handle_add_overflow(void *data,
- void *lhs, void *rhs)
-{
-
- handle_overflow(data, lhs, rhs, '+');
-}
-EXPORT_SYMBOL(__ubsan_handle_add_overflow);
-
-void __ubsan_handle_sub_overflow(void *data,
- void *lhs, void *rhs)
-{
- handle_overflow(data, lhs, rhs, '-');
-}
-EXPORT_SYMBOL(__ubsan_handle_sub_overflow);
-
-void __ubsan_handle_mul_overflow(void *data,
- void *lhs, void *rhs)
-{
- handle_overflow(data, lhs, rhs, '*');
-}
-EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
-
-void __ubsan_handle_negate_overflow(void *_data, void *old_val)
-{
- struct overflow_data *data = _data;
- char old_val_str[VALUE_LENGTH];
-
- if (suppress_report(&data->location))
- return;
-
- ubsan_prologue(&data->location, "negation-overflow");
-
- val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
-
- pr_err("negation of %s cannot be represented in type %s:\n",
- old_val_str, data->type->type_name);
-
- ubsan_epilogue();
-}
-EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
-
-
void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs)
{
struct overflow_data *data = _data;
diff --git a/mm/Makefile b/mm/Makefile
index 135bbb65511a..72227b24a616 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_PAGE_POISONING) += page_poison.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_KASAN) += kasan/
+obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_MEMTEST) += memtest.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index eca555f658d9..576220acd686 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -8,6 +8,7 @@
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/mm.h>
+#include <linux/sched/mm.h>
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/writeback.h>
@@ -578,7 +579,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
{
struct bdi_writeback *wb;
- might_sleep_if(gfpflags_allow_blocking(gfp));
+ might_alloc(gfp);
if (!memcg_css->parent)
return &bdi->wb;
diff --git a/mm/cma.c b/mm/cma.c
index 20c4f6f40037..54eee2119822 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -94,34 +94,29 @@ static void cma_clear_bitmap(struct cma *cma, unsigned long pfn,
static void __init cma_activate_area(struct cma *cma)
{
- unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
- unsigned i = cma->count >> pageblock_order;
+ unsigned long base_pfn = cma->base_pfn, pfn;
struct zone *zone;
cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL);
if (!cma->bitmap)
goto out_error;
- WARN_ON_ONCE(!pfn_valid(pfn));
- zone = page_zone(pfn_to_page(pfn));
-
- do {
- unsigned j;
-
- base_pfn = pfn;
- for (j = pageblock_nr_pages; j; --j, pfn++) {
- WARN_ON_ONCE(!pfn_valid(pfn));
- /*
- * alloc_contig_range requires the pfn range
- * specified to be in the same zone. Make this
- * simple by forcing the entire CMA resv range
- * to be in the same zone.
- */
- if (page_zone(pfn_to_page(pfn)) != zone)
- goto not_in_zone;
- }
- init_cma_reserved_pageblock(pfn_to_page(base_pfn));
- } while (--i);
+ /*
+ * alloc_contig_range() requires the pfn range specified to be in the
+ * same zone. Simplify by forcing the entire CMA resv range to be in the
+ * same zone.
+ */
+ WARN_ON_ONCE(!pfn_valid(base_pfn));
+ zone = page_zone(pfn_to_page(base_pfn));
+ for (pfn = base_pfn + 1; pfn < base_pfn + cma->count; pfn++) {
+ WARN_ON_ONCE(!pfn_valid(pfn));
+ if (page_zone(pfn_to_page(pfn)) != zone)
+ goto not_in_zone;
+ }
+
+ for (pfn = base_pfn; pfn < base_pfn + cma->count;
+ pfn += pageblock_nr_pages)
+ init_cma_reserved_pageblock(pfn_to_page(pfn));
mutex_init(&cma->lock);
@@ -135,6 +130,10 @@ static void __init cma_activate_area(struct cma *cma)
not_in_zone:
bitmap_free(cma->bitmap);
out_error:
+ /* Expose all pages to the buddy, they are useless for CMA. */
+ for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++)
+ free_reserved_page(pfn_to_page(pfn));
+ totalcma_pages -= cma->count;
cma->count = 0;
pr_err("CMA area %s could not be activated\n", cma->name);
return;
@@ -336,6 +335,23 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
limit = highmem_start;
}
+ /*
+ * If there is enough memory, try a bottom-up allocation first.
+ * It will place the new cma area close to the start of the node
+ * and guarantee that the compaction is moving pages out of the
+ * cma area and not into it.
+ * Avoid using first 4GB to not interfere with constrained zones
+ * like DMA/DMA32.
+ */
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+ if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) {
+ memblock_set_bottom_up(true);
+ addr = memblock_alloc_range_nid(size, alignment, SZ_4G,
+ limit, nid, true);
+ memblock_set_bottom_up(false);
+ }
+#endif
+
if (!addr) {
addr = memblock_alloc_range_nid(size, alignment, base,
limit, nid, true);
@@ -484,8 +500,8 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
}
if (ret && !no_warn) {
- pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n",
- __func__, count, ret);
+ pr_err("%s: %s: alloc failed, req-size: %zu pages, ret: %d\n",
+ __func__, cma->name, count, ret);
cma_debug_show_areas(cma);
}
diff --git a/mm/dmapool.c b/mm/dmapool.c
index a97c97232337..f3791532fef2 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -28,6 +28,7 @@
#include <linux/mutex.h>
#include <linux/poison.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/slab.h>
#include <linux/stat.h>
#include <linux/spinlock.h>
@@ -319,7 +320,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
size_t offset;
void *retval;
- might_sleep_if(gfpflags_allow_blocking(mem_flags));
+ might_alloc(mem_flags);
spin_lock_irqsave(&pool->lock, flags);
list_for_each_entry(page, &pool->page_list, page_list) {
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index a0018ad1a1f6..164607c7cdf1 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -181,17 +181,17 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
}
}
- if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n",
- addr, size))
+ if (WARN(slot < 0, "%s(%p, %08lx) not found slot\n",
+ __func__, addr, size))
return;
if (WARN(prev_size[slot] != size,
- "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
- addr, size, slot, prev_size[slot]))
+ "%s(%p, %08lx) [%d] size not consistent %08lx\n",
+ __func__, addr, size, slot, prev_size[slot]))
return;
- WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n",
- addr, size, slot);
+ WARN(early_ioremap_debug, "%s(%p, %08lx) [%d]\n",
+ __func__, addr, size, slot);
virt_addr = (unsigned long)addr;
if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)))
diff --git a/mm/filemap.c b/mm/filemap.c
index 46a8b9e82434..43700480d897 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1658,8 +1658,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
}
EXPORT_SYMBOL(page_cache_prev_miss);
-/**
- * find_get_entry - find and get a page cache entry
+/*
+ * mapping_get_entry - Get a page cache entry.
* @mapping: the address_space to search
* @index: The page cache index.
*
@@ -1671,7 +1671,8 @@ EXPORT_SYMBOL(page_cache_prev_miss);
*
* Return: The head page or shadow entry, %NULL if nothing is found.
*/
-struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
+static struct page *mapping_get_entry(struct address_space *mapping,
+ pgoff_t index)
{
XA_STATE(xas, &mapping->i_pages, index);
struct page *page;
@@ -1708,39 +1709,6 @@ out:
}
/**
- * find_lock_entry - Locate and lock a page cache entry.
- * @mapping: The address_space to search.
- * @index: The page cache index.
- *
- * Looks up the page at @mapping & @index. If there is a page in the
- * cache, the head page is returned locked and with an increased refcount.
- *
- * If the slot holds a shadow entry of a previously evicted page, or a
- * swap entry from shmem/tmpfs, it is returned.
- *
- * Context: May sleep.
- * Return: The head page or shadow entry, %NULL if nothing is found.
- */
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
-{
- struct page *page;
-
-repeat:
- page = find_get_entry(mapping, index);
- if (page && !xa_is_value(page)) {
- lock_page(page);
- /* Has the page been truncated? */
- if (unlikely(page->mapping != mapping)) {
- unlock_page(page);
- put_page(page);
- goto repeat;
- }
- VM_BUG_ON_PAGE(!thp_contains(page, index), page);
- }
- return page;
-}
-
-/**
* pagecache_get_page - Find and get a reference to a page.
* @mapping: The address_space to search.
* @index: The page index.
@@ -1755,6 +1723,8 @@ repeat:
* * %FGP_LOCK - The page is returned locked.
* * %FGP_HEAD - If the page is present and a THP, return the head page
* rather than the exact page specified by the index.
+ * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it
+ * instead of allocating a new page to replace it.
* * %FGP_CREAT - If no page is present then a new page is allocated using
* @gfp_mask and added to the page cache and the VM's LRU list.
* The page is returned locked and with an increased refcount.
@@ -1778,9 +1748,12 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
struct page *page;
repeat:
- page = find_get_entry(mapping, index);
- if (xa_is_value(page))
+ page = mapping_get_entry(mapping, index);
+ if (xa_is_value(page)) {
+ if (fgp_flags & FGP_ENTRY)
+ return page;
page = NULL;
+ }
if (!page)
goto no_page;
@@ -1852,18 +1825,53 @@ no_page:
}
EXPORT_SYMBOL(pagecache_get_page);
+static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
+ xa_mark_t mark)
+{
+ struct page *page;
+
+retry:
+ if (mark == XA_PRESENT)
+ page = xas_find(xas, max);
+ else
+ page = xas_find_marked(xas, max, mark);
+
+ if (xas_retry(xas, page))
+ goto retry;
+ /*
+ * A shadow entry of a recently evicted page, a swap
+ * entry from shmem/tmpfs or a DAX entry. Return it
+ * without attempting to raise page count.
+ */
+ if (!page || xa_is_value(page))
+ return page;
+
+ if (!page_cache_get_speculative(page))
+ goto reset;
+
+ /* Has the page moved or been split? */
+ if (unlikely(page != xas_reload(xas))) {
+ put_page(page);
+ goto reset;
+ }
+
+ return page;
+reset:
+ xas_reset(xas);
+ goto retry;
+}
+
/**
* find_get_entries - gang pagecache lookup
* @mapping: The address_space to search
* @start: The starting page cache index
- * @nr_entries: The maximum number of entries
- * @entries: Where the resulting entries are placed
+ * @end: The final page index (inclusive).
+ * @pvec: Where the resulting entries are placed.
* @indices: The cache indices corresponding to the entries in @entries
*
- * find_get_entries() will search for and return a group of up to
- * @nr_entries entries in the mapping. The entries are placed at
- * @entries. find_get_entries() takes a reference against any actual
- * pages it returns.
+ * find_get_entries() will search for and return a batch of entries in
+ * the mapping. The entries are placed in @pvec. find_get_entries()
+ * takes a reference on any actual pages it returns.
*
* The search returns a group of mapping-contiguous page cache entries
* with ascending indexes. There may be holes in the indices due to
@@ -1879,60 +1887,97 @@ EXPORT_SYMBOL(pagecache_get_page);
*
* Return: the number of pages and shadow entries which were found.
*/
-unsigned find_get_entries(struct address_space *mapping,
- pgoff_t start, unsigned int nr_entries,
- struct page **entries, pgoff_t *indices)
+unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+ pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
{
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
unsigned int ret = 0;
-
- if (!nr_entries)
- return 0;
+ unsigned nr_entries = PAGEVEC_SIZE;
rcu_read_lock();
- xas_for_each(&xas, page, ULONG_MAX) {
- if (xas_retry(&xas, page))
- continue;
- /*
- * A shadow entry of a recently evicted page, a swap
- * entry from shmem/tmpfs or a DAX entry. Return it
- * without attempting to raise page count.
- */
- if (xa_is_value(page))
- goto export;
-
- if (!page_cache_get_speculative(page))
- goto retry;
-
- /* Has the page moved or been split? */
- if (unlikely(page != xas_reload(&xas)))
- goto put_page;
-
+ while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
/*
* Terminate early on finding a THP, to allow the caller to
* handle it all at once; but continue if this is hugetlbfs.
*/
- if (PageTransHuge(page) && !PageHuge(page)) {
+ if (!xa_is_value(page) && PageTransHuge(page) &&
+ !PageHuge(page)) {
page = find_subpage(page, xas.xa_index);
nr_entries = ret + 1;
}
-export:
+
indices[ret] = xas.xa_index;
- entries[ret] = page;
+ pvec->pages[ret] = page;
if (++ret == nr_entries)
break;
- continue;
-put_page:
- put_page(page);
-retry:
- xas_reset(&xas);
}
rcu_read_unlock();
+
+ pvec->nr = ret;
return ret;
}
/**
+ * find_lock_entries - Find a batch of pagecache entries.
+ * @mapping: The address_space to search.
+ * @start: The starting page cache index.
+ * @end: The final page index (inclusive).
+ * @pvec: Where the resulting entries are placed.
+ * @indices: The cache indices of the entries in @pvec.
+ *
+ * find_lock_entries() will return a batch of entries from @mapping.
+ * Swap, shadow and DAX entries are included. Pages are returned
+ * locked and with an incremented refcount. Pages which are locked by
+ * somebody else or under writeback are skipped. Only the head page of
+ * a THP is returned. Pages which are partially outside the range are
+ * not returned.
+ *
+ * The entries have ascending indexes. The indices may not be consecutive
+ * due to not-present entries, THP pages, pages which could not be locked
+ * or pages under writeback.
+ *
+ * Return: The number of entries which were found.
+ */
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+ pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+{
+ XA_STATE(xas, &mapping->i_pages, start);
+ struct page *page;
+
+ rcu_read_lock();
+ while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
+ if (!xa_is_value(page)) {
+ if (page->index < start)
+ goto put;
+ VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
+ if (page->index + thp_nr_pages(page) - 1 > end)
+ goto put;
+ if (!trylock_page(page))
+ goto put;
+ if (page->mapping != mapping || PageWriteback(page))
+ goto unlock;
+ VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index),
+ page);
+ }
+ indices[pvec->nr] = xas.xa_index;
+ if (!pagevec_add(pvec, page))
+ break;
+ goto next;
+unlock:
+ unlock_page(page);
+put:
+ put_page(page);
+next:
+ if (!xa_is_value(page) && PageTransHuge(page))
+ xas_set(&xas, page->index + thp_nr_pages(page));
+ }
+ rcu_read_unlock();
+
+ return pagevec_count(pvec);
+}
+
+/**
* find_get_pages_range - gang pagecache lookup
* @mapping: The address_space to search
* @start: The starting page index
@@ -1965,30 +2010,16 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
return 0;
rcu_read_lock();
- xas_for_each(&xas, page, end) {
- if (xas_retry(&xas, page))
- continue;
+ while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
/* Skip over shadow, swap and DAX entries */
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
- goto retry;
-
- /* Has the page moved or been split? */
- if (unlikely(page != xas_reload(&xas)))
- goto put_page;
-
pages[ret] = find_subpage(page, xas.xa_index);
if (++ret == nr_pages) {
*start = xas.xa_index + 1;
goto out;
}
- continue;
-put_page:
- put_page(page);
-retry:
- xas_reset(&xas);
}
/*
@@ -2062,7 +2093,7 @@ retry:
EXPORT_SYMBOL(find_get_pages_contig);
/**
- * find_get_pages_range_tag - find and return pages in given range matching @tag
+ * find_get_pages_range_tag - Find and return head pages matching @tag.
* @mapping: the address_space to search
* @index: the starting page index
* @end: The final page index (inclusive)
@@ -2070,8 +2101,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
* @nr_pages: the maximum number of pages
* @pages: where the resulting pages are placed
*
- * Like find_get_pages, except we only return pages which are tagged with
- * @tag. We update @index to index the next page for the traversal.
+ * Like find_get_pages(), except we only return head pages which are tagged
+ * with @tag. @index is updated to the index immediately after the last
+ * page we return, ready for the next iteration.
*
* Return: the number of pages which were found.
*/
@@ -2087,9 +2119,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
return 0;
rcu_read_lock();
- xas_for_each_marked(&xas, page, end, tag) {
- if (xas_retry(&xas, page))
- continue;
+ while ((page = find_get_entry(&xas, end, tag))) {
/*
* Shadow entries should never be tagged, but this iteration
* is lockless so there is a window for page reclaim to evict
@@ -2098,23 +2128,11 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
if (xa_is_value(page))
continue;
- if (!page_cache_get_speculative(page))
- goto retry;
-
- /* Has the page moved or been split? */
- if (unlikely(page != xas_reload(&xas)))
- goto put_page;
-
- pages[ret] = find_subpage(page, xas.xa_index);
+ pages[ret] = page;
if (++ret == nr_pages) {
- *index = xas.xa_index + 1;
+ *index = page->index + thp_nr_pages(page);
goto out;
}
- continue;
-put_page:
- put_page(page);
-retry:
- xas_reset(&xas);
}
/*
@@ -2592,6 +2610,109 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
}
EXPORT_SYMBOL(generic_file_read_iter);
+static inline loff_t page_seek_hole_data(struct xa_state *xas,
+ struct address_space *mapping, struct page *page,
+ loff_t start, loff_t end, bool seek_data)
+{
+ const struct address_space_operations *ops = mapping->a_ops;
+ size_t offset, bsz = i_blocksize(mapping->host);
+
+ if (xa_is_value(page) || PageUptodate(page))
+ return seek_data ? start : end;
+ if (!ops->is_partially_uptodate)
+ return seek_data ? end : start;
+
+ xas_pause(xas);
+ rcu_read_unlock();
+ lock_page(page);
+ if (unlikely(page->mapping != mapping))
+ goto unlock;
+
+ offset = offset_in_thp(page, start) & ~(bsz - 1);
+
+ do {
+ if (ops->is_partially_uptodate(page, offset, bsz) == seek_data)
+ break;
+ start = (start + bsz) & ~(bsz - 1);
+ offset += bsz;
+ } while (offset < thp_size(page));
+unlock:
+ unlock_page(page);
+ rcu_read_lock();
+ return start;
+}
+
+static inline
+unsigned int seek_page_size(struct xa_state *xas, struct page *page)
+{
+ if (xa_is_value(page))
+ return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
+ return thp_size(page);
+}
+
+/**
+ * mapping_seek_hole_data - Seek for SEEK_DATA / SEEK_HOLE in the page cache.
+ * @mapping: Address space to search.
+ * @start: First byte to consider.
+ * @end: Limit of search (exclusive).
+ * @whence: Either SEEK_HOLE or SEEK_DATA.
+ *
+ * If the page cache knows which blocks contain holes and which blocks
+ * contain data, your filesystem can use this function to implement
+ * SEEK_HOLE and SEEK_DATA. This is useful for filesystems which are
+ * entirely memory-based such as tmpfs, and filesystems which support
+ * unwritten extents.
+ *
+ * Return: The requested offset on successs, or -ENXIO if @whence specifies
+ * SEEK_DATA and there is no data after @start. There is an implicit hole
+ * after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start
+ * and @end contain data.
+ */
+loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
+ loff_t end, int whence)
+{
+ XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
+ pgoff_t max = (end - 1) / PAGE_SIZE;
+ bool seek_data = (whence == SEEK_DATA);
+ struct page *page;
+
+ if (end <= start)
+ return -ENXIO;
+
+ rcu_read_lock();
+ while ((page = find_get_entry(&xas, max, XA_PRESENT))) {
+ loff_t pos = xas.xa_index * PAGE_SIZE;
+
+ if (start < pos) {
+ if (!seek_data)
+ goto unlock;
+ start = pos;
+ }
+
+ pos += seek_page_size(&xas, page);
+ start = page_seek_hole_data(&xas, mapping, page, start, pos,
+ seek_data);
+ if (start < pos)
+ goto unlock;
+ if (!xa_is_value(page))
+ put_page(page);
+ }
+ rcu_read_unlock();
+
+ if (seek_data)
+ return -ENXIO;
+ goto out;
+
+unlock:
+ rcu_read_unlock();
+ if (!xa_is_value(page))
+ put_page(page);
+out:
+ if (start > end)
+ return end;
+ return start;
+}
+
#ifdef CONFIG_MMU
#define MMAP_LOTSAMISS (100)
/*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d77605c30f2e..395c75111d33 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -668,9 +668,9 @@ release:
* available
* never: never stall for any thp allocation
*/
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma)
{
- const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+ const bool vma_madvised = vma && (vma->vm_flags & VM_HUGEPAGE);
/* Always do synchronous compaction */
if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
@@ -762,7 +762,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
}
return ret;
}
- gfp = alloc_hugepage_direct_gfpmask(vma);
+ gfp = vma_thp_gfp_mask(vma);
page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
if (unlikely(!page)) {
count_vm_event(THP_FAULT_FALLBACK);
diff --git a/mm/internal.h b/mm/internal.h
index 25d2b2439f19..9902648f2206 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -60,8 +60,8 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
force_page_cache_ra(&ractl, &file->f_ra, nr_to_read);
}
-struct page *find_get_entry(struct address_space *mapping, pgoff_t index);
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t index);
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+ pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
/**
* page_evictable - test whether a page is evictable
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index b18189ef3a92..b5e08d4cefec 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -210,6 +210,11 @@ void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
*size = optimal_size;
}
+void __kasan_cache_create_kmalloc(struct kmem_cache *cache)
+{
+ cache->kasan_info.is_kmalloc = true;
+}
+
size_t __kasan_metadata_size(struct kmem_cache *cache)
{
if (!kasan_stack_collection_enabled())
@@ -256,7 +261,8 @@ void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object)
void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
{
- kasan_poison(object, cache->object_size, KASAN_KMALLOC_REDZONE);
+ kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE),
+ KASAN_KMALLOC_REDZONE);
}
/*
@@ -273,22 +279,13 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
* based on objects indexes, so that objects that are next to each other
* get different tags.
*/
-static u8 assign_tag(struct kmem_cache *cache, const void *object,
- bool init, bool keep_tag)
+static inline u8 assign_tag(struct kmem_cache *cache,
+ const void *object, bool init)
{
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
return 0xff;
/*
- * 1. When an object is kmalloc()'ed, two hooks are called:
- * kasan_slab_alloc() and kasan_kmalloc(). We assign the
- * tag only in the first one.
- * 2. We reuse the same tag for krealloc'ed objects.
- */
- if (keep_tag)
- return get_tag(object);
-
- /*
* If the cache neither has a constructor nor has SLAB_TYPESAFE_BY_RCU
* set, assign a tag when the object is being allocated (init == false).
*/
@@ -320,13 +317,13 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
}
/* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */
- object = set_tag(object, assign_tag(cache, object, true, false));
+ object = set_tag(object, assign_tag(cache, object, true));
return (void *)object;
}
-static bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
- unsigned long ip, bool quarantine)
+static inline bool ____kasan_slab_free(struct kmem_cache *cache,
+ void *object, unsigned long ip, bool quarantine)
{
u8 tag;
void *tagged_object;
@@ -335,6 +332,9 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
tagged_object = object;
object = kasan_reset_tag(object);
+ if (is_kfence_address(object))
+ return false;
+
if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
object)) {
kasan_report_invalid_free(tagged_object, ip);
@@ -350,15 +350,14 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
return true;
}
- kasan_poison(object, cache->object_size, KASAN_KMALLOC_FREE);
-
- if (!kasan_stack_collection_enabled())
- return false;
+ kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE),
+ KASAN_KMALLOC_FREE);
if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine))
return false;
- kasan_set_free_info(cache, object, tag);
+ if (kasan_stack_collection_enabled())
+ kasan_set_free_info(cache, object, tag);
return kasan_quarantine_put(cache, object);
}
@@ -368,6 +367,31 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
return ____kasan_slab_free(cache, object, ip, true);
}
+static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip)
+{
+ if (ptr != page_address(virt_to_head_page(ptr))) {
+ kasan_report_invalid_free(ptr, ip);
+ return true;
+ }
+
+ if (!kasan_byte_accessible(ptr)) {
+ kasan_report_invalid_free(ptr, ip);
+ return true;
+ }
+
+ /*
+ * The object will be poisoned by kasan_free_pages() or
+ * kasan_slab_free_mempool().
+ */
+
+ return false;
+}
+
+void __kasan_kfree_large(void *ptr, unsigned long ip)
+{
+ ____kasan_kfree_large(ptr, ip);
+}
+
void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
{
struct page *page;
@@ -381,31 +405,68 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
* KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
*/
if (unlikely(!PageSlab(page))) {
- if (ptr != page_address(page)) {
- kasan_report_invalid_free(ptr, ip);
+ if (____kasan_kfree_large(ptr, ip))
return;
- }
kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE);
} else {
____kasan_slab_free(page->slab_cache, ptr, ip, false);
}
}
-static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
+static void set_alloc_info(struct kmem_cache *cache, void *object,
+ gfp_t flags, bool is_kmalloc)
{
struct kasan_alloc_meta *alloc_meta;
+ /* Don't save alloc info for kmalloc caches in kasan_slab_alloc(). */
+ if (cache->kasan_info.is_kmalloc && !is_kmalloc)
+ return;
+
alloc_meta = kasan_get_alloc_meta(cache, object);
if (alloc_meta)
kasan_set_track(&alloc_meta->alloc_track, flags);
}
-static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object,
- size_t size, gfp_t flags, bool keep_tag)
+void * __must_check __kasan_slab_alloc(struct kmem_cache *cache,
+ void *object, gfp_t flags)
+{
+ u8 tag;
+ void *tagged_object;
+
+ if (gfpflags_allow_blocking(flags))
+ kasan_quarantine_reduce();
+
+ if (unlikely(object == NULL))
+ return NULL;
+
+ if (is_kfence_address(object))
+ return (void *)object;
+
+ /*
+ * Generate and assign random tag for tag-based modes.
+ * Tag is ignored in set_tag() for the generic mode.
+ */
+ tag = assign_tag(cache, object, false);
+ tagged_object = set_tag(object, tag);
+
+ /*
+ * Unpoison the whole object.
+ * For kmalloc() allocations, kasan_kmalloc() will do precise poisoning.
+ */
+ kasan_unpoison(tagged_object, cache->object_size);
+
+ /* Save alloc info (if possible) for non-kmalloc() allocations. */
+ if (kasan_stack_collection_enabled())
+ set_alloc_info(cache, (void *)object, flags, false);
+
+ return tagged_object;
+}
+
+static inline void *____kasan_kmalloc(struct kmem_cache *cache,
+ const void *object, size_t size, gfp_t flags)
{
unsigned long redzone_start;
unsigned long redzone_end;
- u8 tag;
if (gfpflags_allow_blocking(flags))
kasan_quarantine_reduce();
@@ -413,40 +474,51 @@ static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object,
if (unlikely(object == NULL))
return NULL;
+ if (is_kfence_address(kasan_reset_tag(object)))
+ return (void *)object;
+
+ /*
+ * The object has already been unpoisoned by kasan_slab_alloc() for
+ * kmalloc() or by kasan_krealloc() for krealloc().
+ */
+
+ /*
+ * The redzone has byte-level precision for the generic mode.
+ * Partially poison the last object granule to cover the unaligned
+ * part of the redzone.
+ */
+ if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+ kasan_poison_last_granule((void *)object, size);
+
+ /* Poison the aligned part of the redzone. */
redzone_start = round_up((unsigned long)(object + size),
KASAN_GRANULE_SIZE);
- redzone_end = round_up((unsigned long)object + cache->object_size,
+ redzone_end = round_up((unsigned long)(object + cache->object_size),
KASAN_GRANULE_SIZE);
- tag = assign_tag(cache, object, false, keep_tag);
-
- /* Tag is ignored in set_tag without CONFIG_KASAN_SW/HW_TAGS */
- kasan_unpoison(set_tag(object, tag), size);
kasan_poison((void *)redzone_start, redzone_end - redzone_start,
KASAN_KMALLOC_REDZONE);
+ /*
+ * Save alloc info (if possible) for kmalloc() allocations.
+ * This also rewrites the alloc info when called from kasan_krealloc().
+ */
if (kasan_stack_collection_enabled())
- set_alloc_info(cache, (void *)object, flags);
+ set_alloc_info(cache, (void *)object, flags, true);
- return set_tag(object, tag);
-}
-
-void * __must_check __kasan_slab_alloc(struct kmem_cache *cache,
- void *object, gfp_t flags)
-{
- return ____kasan_kmalloc(cache, object, cache->object_size, flags, false);
+ /* Keep the tag that was set by kasan_slab_alloc(). */
+ return (void *)object;
}
void * __must_check __kasan_kmalloc(struct kmem_cache *cache, const void *object,
size_t size, gfp_t flags)
{
- return ____kasan_kmalloc(cache, object, size, flags, true);
+ return ____kasan_kmalloc(cache, object, size, flags);
}
EXPORT_SYMBOL(__kasan_kmalloc);
void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
gfp_t flags)
{
- struct page *page;
unsigned long redzone_start;
unsigned long redzone_end;
@@ -456,12 +528,23 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
if (unlikely(ptr == NULL))
return NULL;
- page = virt_to_page(ptr);
+ /*
+ * The object has already been unpoisoned by kasan_alloc_pages() for
+ * alloc_pages() or by kasan_krealloc() for krealloc().
+ */
+
+ /*
+ * The redzone has byte-level precision for the generic mode.
+ * Partially poison the last object granule to cover the unaligned
+ * part of the redzone.
+ */
+ if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+ kasan_poison_last_granule(ptr, size);
+
+ /* Poison the aligned part of the redzone. */
redzone_start = round_up((unsigned long)(ptr + size),
KASAN_GRANULE_SIZE);
- redzone_end = (unsigned long)ptr + page_size(page);
-
- kasan_unpoison(ptr, size);
+ redzone_end = (unsigned long)ptr + page_size(virt_to_page(ptr));
kasan_poison((void *)redzone_start, redzone_end - redzone_start,
KASAN_PAGE_REDZONE);
@@ -475,20 +558,20 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
if (unlikely(object == ZERO_SIZE_PTR))
return (void *)object;
+ /*
+ * Unpoison the object's data.
+ * Part of it might already have been unpoisoned, but it's unknown
+ * how big that part is.
+ */
+ kasan_unpoison(object, size);
+
page = virt_to_head_page(object);
+ /* Piggy-back on kmalloc() instrumentation to poison the redzone. */
if (unlikely(!PageSlab(page)))
return __kasan_kmalloc_large(object, size, flags);
else
- return ____kasan_kmalloc(page->slab_cache, object, size,
- flags, true);
-}
-
-void __kasan_kfree_large(void *ptr, unsigned long ip)
-{
- if (ptr != page_address(virt_to_head_page(ptr)))
- kasan_report_invalid_free(ptr, ip);
- /* The object will be poisoned by kasan_free_pages(). */
+ return ____kasan_kmalloc(page->slab_cache, object, size, flags);
}
bool __kasan_check_byte(const void *address, unsigned long ip)
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 3f17a1218055..2e55e0f82f39 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -14,6 +14,7 @@
#include <linux/init.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
+#include <linux/kfence.h>
#include <linux/kmemleak.h>
#include <linux/linkage.h>
#include <linux/memblock.h>
@@ -331,7 +332,7 @@ void kasan_record_aux_stack(void *addr)
struct kasan_alloc_meta *alloc_meta;
void *object;
- if (!(page && PageSlab(page)))
+ if (is_kfence_address(addr) || !(page && PageSlab(page)))
return;
cache = page->slab_cache;
diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c
index b31aeef505dd..2aad21fda156 100644
--- a/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@ -48,7 +48,7 @@ EXPORT_SYMBOL(kasan_flag_enabled);
/* Whether to collect alloc/free stack traces. */
DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
-/* Whether panic or disable tag checking on fault. */
+/* Whether to panic or print a report and disable tag checking on fault. */
bool kasan_flag_panic __ro_after_init;
/* kasan=off/on */
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index cc14b6e6c14c..8c55634d6edd 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -3,6 +3,7 @@
#define __MM_KASAN_KASAN_H
#include <linux/kasan.h>
+#include <linux/kfence.h>
#include <linux/stackdepot.h>
#ifdef CONFIG_KASAN_HW_TAGS
@@ -329,16 +330,37 @@ static inline u8 kasan_random_tag(void) { return 0; }
#ifdef CONFIG_KASAN_HW_TAGS
-static inline void kasan_poison(const void *address, size_t size, u8 value)
+static inline void kasan_poison(const void *addr, size_t size, u8 value)
{
- hw_set_mem_tag_range(kasan_reset_tag(address),
- round_up(size, KASAN_GRANULE_SIZE), value);
+ addr = kasan_reset_tag(addr);
+
+ /* Skip KFENCE memory if called explicitly outside of sl*b. */
+ if (is_kfence_address(addr))
+ return;
+
+ if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+ return;
+ if (WARN_ON(size & KASAN_GRANULE_MASK))
+ return;
+
+ hw_set_mem_tag_range((void *)addr, size, value);
}
-static inline void kasan_unpoison(const void *address, size_t size)
+static inline void kasan_unpoison(const void *addr, size_t size)
{
- hw_set_mem_tag_range(kasan_reset_tag(address),
- round_up(size, KASAN_GRANULE_SIZE), get_tag(address));
+ u8 tag = get_tag(addr);
+
+ addr = kasan_reset_tag(addr);
+
+ /* Skip KFENCE memory if called explicitly outside of sl*b. */
+ if (is_kfence_address(addr))
+ return;
+
+ if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+ return;
+ size = round_up(size, KASAN_GRANULE_SIZE);
+
+ hw_set_mem_tag_range((void *)addr, size, tag);
}
static inline bool kasan_byte_accessible(const void *addr)
@@ -352,12 +374,51 @@ static inline bool kasan_byte_accessible(const void *addr)
#else /* CONFIG_KASAN_HW_TAGS */
-void kasan_poison(const void *address, size_t size, u8 value);
-void kasan_unpoison(const void *address, size_t size);
+/**
+ * kasan_poison - mark the memory range as unaccessible
+ * @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
+ * @size - range size, must be aligned to KASAN_GRANULE_SIZE
+ * @value - value that's written to metadata for the range
+ *
+ * The size gets aligned to KASAN_GRANULE_SIZE before marking the range.
+ */
+void kasan_poison(const void *addr, size_t size, u8 value);
+
+/**
+ * kasan_unpoison - mark the memory range as accessible
+ * @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
+ * @size - range size, can be unaligned
+ *
+ * For the tag-based modes, the @size gets aligned to KASAN_GRANULE_SIZE before
+ * marking the range.
+ * For the generic mode, the last granule of the memory range gets partially
+ * unpoisoned based on the @size.
+ */
+void kasan_unpoison(const void *addr, size_t size);
+
bool kasan_byte_accessible(const void *addr);
#endif /* CONFIG_KASAN_HW_TAGS */
+#ifdef CONFIG_KASAN_GENERIC
+
+/**
+ * kasan_poison_last_granule - mark the last granule of the memory range as
+ * unaccessible
+ * @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
+ * @size - range size
+ *
+ * This function is only available for the generic mode, as it's the only mode
+ * that has partially poisoned memory granules.
+ */
+void kasan_poison_last_granule(const void *address, size_t size);
+
+#else /* CONFIG_KASAN_GENERIC */
+
+static inline void kasan_poison_last_granule(const void *address, size_t size) { }
+
+#endif /* CONFIG_KASAN_GENERIC */
+
/*
* Exported functions for interfaces called from assembly or from generated
* code. Declarations here to avoid warning about missing declarations.
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 234f35a84f19..87b271206163 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <linux/sched/task_stack.h>
#include <linux/uaccess.h>
+#include <trace/events/error_report.h>
#include <asm/sections.h>
@@ -84,8 +85,9 @@ static void start_report(unsigned long *flags)
pr_err("==================================================================\n");
}
-static void end_report(unsigned long *flags)
+static void end_report(unsigned long *flags, unsigned long addr)
{
+ trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
pr_err("==================================================================\n");
add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
spin_unlock_irqrestore(&report_lock, *flags);
@@ -355,7 +357,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
print_address_description(object, tag);
pr_err("\n");
print_memory_metadata(object);
- end_report(&flags);
+ end_report(&flags, (unsigned long)object);
}
static void __kasan_report(unsigned long addr, size_t size, bool is_write,
@@ -401,7 +403,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
dump_stack();
}
- end_report(&flags);
+ end_report(&flags, addr);
}
bool kasan_report(unsigned long addr, size_t size, bool is_write,
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 80adc85d0393..63f43443f5d7 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -13,6 +13,7 @@
#include <linux/init.h>
#include <linux/kasan.h>
#include <linux/kernel.h>
+#include <linux/kfence.h>
#include <linux/kmemleak.h>
#include <linux/memory.h>
#include <linux/mm.h>
@@ -68,11 +69,7 @@ void *memcpy(void *dest, const void *src, size_t len)
return __memcpy(dest, src, len);
}
-/*
- * Poisons the shadow memory for 'size' bytes starting from 'addr'.
- * Memory addresses should be aligned to KASAN_GRANULE_SIZE.
- */
-void kasan_poison(const void *address, size_t size, u8 value)
+void kasan_poison(const void *addr, size_t size, u8 value)
{
void *shadow_start, *shadow_end;
@@ -81,37 +78,62 @@ void kasan_poison(const void *address, size_t size, u8 value)
* some of the callers (e.g. kasan_poison_object_data) pass tagged
* addresses to this function.
*/
- address = kasan_reset_tag(address);
- size = round_up(size, KASAN_GRANULE_SIZE);
+ addr = kasan_reset_tag(addr);
- shadow_start = kasan_mem_to_shadow(address);
- shadow_end = kasan_mem_to_shadow(address + size);
+ /* Skip KFENCE memory if called explicitly outside of sl*b. */
+ if (is_kfence_address(addr))
+ return;
+
+ if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+ return;
+ if (WARN_ON(size & KASAN_GRANULE_MASK))
+ return;
+
+ shadow_start = kasan_mem_to_shadow(addr);
+ shadow_end = kasan_mem_to_shadow(addr + size);
__memset(shadow_start, value, shadow_end - shadow_start);
}
EXPORT_SYMBOL(kasan_poison);
-void kasan_unpoison(const void *address, size_t size)
+#ifdef CONFIG_KASAN_GENERIC
+void kasan_poison_last_granule(const void *addr, size_t size)
{
- u8 tag = get_tag(address);
+ if (size & KASAN_GRANULE_MASK) {
+ u8 *shadow = (u8 *)kasan_mem_to_shadow(addr + size);
+ *shadow = size & KASAN_GRANULE_MASK;
+ }
+}
+#endif
+
+void kasan_unpoison(const void *addr, size_t size)
+{
+ u8 tag = get_tag(addr);
/*
* Perform shadow offset calculation based on untagged address, as
* some of the callers (e.g. kasan_unpoison_object_data) pass tagged
* addresses to this function.
*/
- address = kasan_reset_tag(address);
+ addr = kasan_reset_tag(addr);
+
+ /*
+ * Skip KFENCE memory if called explicitly outside of sl*b. Also note
+ * that calls to ksize(), where size is not a multiple of machine-word
+ * size, would otherwise poison the invalid portion of the word.
+ */
+ if (is_kfence_address(addr))
+ return;
- kasan_poison(address, size, tag);
+ if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+ return;
- if (size & KASAN_GRANULE_MASK) {
- u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size);
+ /* Unpoison all granules that cover the object. */
+ kasan_poison(addr, round_up(size, KASAN_GRANULE_SIZE), tag);
- if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
- *shadow = tag;
- else /* CONFIG_KASAN_GENERIC */
- *shadow = size & KASAN_GRANULE_MASK;
- }
+ /* Partially poison the last granule for the generic mode. */
+ if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+ kasan_poison_last_granule(addr, size);
}
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile
new file mode 100644
index 000000000000..6872cd5e5390
--- /dev/null
+++ b/mm/kfence/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_KFENCE) := core.o report.o
+
+CFLAGS_kfence_test.o := -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
+obj-$(CONFIG_KFENCE_KUNIT_TEST) += kfence_test.o
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
new file mode 100644
index 000000000000..3b8ec938470a
--- /dev/null
+++ b/mm/kfence/core.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KFENCE guarded object allocator and fault handling.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#define pr_fmt(fmt) "kfence: " fmt
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/kcsan-checks.h>
+#include <linux/kfence.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/memblock.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/kfence.h>
+
+#include "kfence.h"
+
+/* Disables KFENCE on the first warning assuming an irrecoverable error. */
+#define KFENCE_WARN_ON(cond) \
+ ({ \
+ const bool __cond = WARN_ON(cond); \
+ if (unlikely(__cond)) \
+ WRITE_ONCE(kfence_enabled, false); \
+ __cond; \
+ })
+
+/* === Data ================================================================= */
+
+static bool kfence_enabled __read_mostly;
+
+static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "kfence."
+
+static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
+{
+ unsigned long num;
+ int ret = kstrtoul(val, 0, &num);
+
+ if (ret < 0)
+ return ret;
+
+ if (!num) /* Using 0 to indicate KFENCE is disabled. */
+ WRITE_ONCE(kfence_enabled, false);
+ else if (!READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
+ return -EINVAL; /* Cannot (re-)enable KFENCE on-the-fly. */
+
+ *((unsigned long *)kp->arg) = num;
+ return 0;
+}
+
+static int param_get_sample_interval(char *buffer, const struct kernel_param *kp)
+{
+ if (!READ_ONCE(kfence_enabled))
+ return sprintf(buffer, "0\n");
+
+ return param_get_ulong(buffer, kp);
+}
+
+static const struct kernel_param_ops sample_interval_param_ops = {
+ .set = param_set_sample_interval,
+ .get = param_get_sample_interval,
+};
+module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
+
+/* The pool of pages used for guard pages and objects. */
+char *__kfence_pool __ro_after_init;
+EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
+
+/*
+ * Per-object metadata, with one-to-one mapping of object metadata to
+ * backing pages (in __kfence_pool).
+ */
+static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
+struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+
+/* Freelist with available objects. */
+static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
+static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
+
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+/* The static key to set up a KFENCE allocation. */
+DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
+#endif
+
+/* Gates the allocation, ensuring only one succeeds in a given period. */
+atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
+
+/* Statistics counters for debugfs. */
+enum kfence_counter_id {
+ KFENCE_COUNTER_ALLOCATED,
+ KFENCE_COUNTER_ALLOCS,
+ KFENCE_COUNTER_FREES,
+ KFENCE_COUNTER_ZOMBIES,
+ KFENCE_COUNTER_BUGS,
+ KFENCE_COUNTER_COUNT,
+};
+static atomic_long_t counters[KFENCE_COUNTER_COUNT];
+static const char *const counter_names[] = {
+ [KFENCE_COUNTER_ALLOCATED] = "currently allocated",
+ [KFENCE_COUNTER_ALLOCS] = "total allocations",
+ [KFENCE_COUNTER_FREES] = "total frees",
+ [KFENCE_COUNTER_ZOMBIES] = "zombie allocations",
+ [KFENCE_COUNTER_BUGS] = "total bugs",
+};
+static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
+
+/* === Internals ============================================================ */
+
+static bool kfence_protect(unsigned long addr)
+{
+ return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
+}
+
+static bool kfence_unprotect(unsigned long addr)
+{
+ return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
+}
+
+static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
+{
+ long index;
+
+ /* The checks do not affect performance; only called from slow-paths. */
+
+ if (!is_kfence_address((void *)addr))
+ return NULL;
+
+ /*
+ * May be an invalid index if called with an address at the edge of
+ * __kfence_pool, in which case we would report an "invalid access"
+ * error.
+ */
+ index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
+ if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS)
+ return NULL;
+
+ return &kfence_metadata[index];
+}
+
+static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
+{
+ unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
+ unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
+
+ /* The checks do not affect performance; only called from slow-paths. */
+
+ /* Only call with a pointer into kfence_metadata. */
+ if (KFENCE_WARN_ON(meta < kfence_metadata ||
+ meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS))
+ return 0;
+
+ /*
+ * This metadata object only ever maps to 1 page; verify that the stored
+ * address is in the expected range.
+ */
+ if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr))
+ return 0;
+
+ return pageaddr;
+}
+
+/*
+ * Update the object's metadata state, including updating the alloc/free stacks
+ * depending on the state transition.
+ */
+static noinline void metadata_update_state(struct kfence_metadata *meta,
+ enum kfence_object_state next)
+{
+ struct kfence_track *track =
+ next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
+
+ lockdep_assert_held(&meta->lock);
+
+ /*
+ * Skip over 1 (this) functions; noinline ensures we do not accidentally
+ * skip over the caller by never inlining.
+ */
+ track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
+ track->pid = task_pid_nr(current);
+
+ /*
+ * Pairs with READ_ONCE() in
+ * kfence_shutdown_cache(),
+ * kfence_handle_page_fault().
+ */
+ WRITE_ONCE(meta->state, next);
+}
+
+/* Write canary byte to @addr. */
+static inline bool set_canary_byte(u8 *addr)
+{
+ *addr = KFENCE_CANARY_PATTERN(addr);
+ return true;
+}
+
+/* Check canary byte at @addr. */
+static inline bool check_canary_byte(u8 *addr)
+{
+ if (likely(*addr == KFENCE_CANARY_PATTERN(addr)))
+ return true;
+
+ atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+ kfence_report_error((unsigned long)addr, false, NULL, addr_to_metadata((unsigned long)addr),
+ KFENCE_ERROR_CORRUPTION);
+ return false;
+}
+
+/* __always_inline this to ensure we won't do an indirect call to fn. */
+static __always_inline void for_each_canary(const struct kfence_metadata *meta, bool (*fn)(u8 *))
+{
+ const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
+ unsigned long addr;
+
+ lockdep_assert_held(&meta->lock);
+
+ /*
+ * We'll iterate over each canary byte per-side until fn() returns
+ * false. However, we'll still iterate over the canary bytes to the
+ * right of the object even if there was an error in the canary bytes to
+ * the left of the object. Specifically, if check_canary_byte()
+ * generates an error, showing both sides might give more clues as to
+ * what the error is about when displaying which bytes were corrupted.
+ */
+
+ /* Apply to left of object. */
+ for (addr = pageaddr; addr < meta->addr; addr++) {
+ if (!fn((u8 *)addr))
+ break;
+ }
+
+ /* Apply to right of object. */
+ for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) {
+ if (!fn((u8 *)addr))
+ break;
+ }
+}
+
+static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp)
+{
+ struct kfence_metadata *meta = NULL;
+ unsigned long flags;
+ struct page *page;
+ void *addr;
+
+ /* Try to obtain a free object. */
+ raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+ if (!list_empty(&kfence_freelist)) {
+ meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
+ list_del_init(&meta->list);
+ }
+ raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+ if (!meta)
+ return NULL;
+
+ if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
+ /*
+ * This is extremely unlikely -- we are reporting on a
+ * use-after-free, which locked meta->lock, and the reporting
+ * code via printk calls kmalloc() which ends up in
+ * kfence_alloc() and tries to grab the same object that we're
+ * reporting on. While it has never been observed, lockdep does
+ * report that there is a possibility of deadlock. Fix it by
+ * using trylock and bailing out gracefully.
+ */
+ raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+ /* Put the object back on the freelist. */
+ list_add_tail(&meta->list, &kfence_freelist);
+ raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+
+ return NULL;
+ }
+
+ meta->addr = metadata_to_pageaddr(meta);
+ /* Unprotect if we're reusing this page. */
+ if (meta->state == KFENCE_OBJECT_FREED)
+ kfence_unprotect(meta->addr);
+
+ /*
+ * Note: for allocations made before RNG initialization, will always
+ * return zero. We still benefit from enabling KFENCE as early as
+ * possible, even when the RNG is not yet available, as this will allow
+ * KFENCE to detect bugs due to earlier allocations. The only downside
+ * is that the out-of-bounds accesses detected are deterministic for
+ * such allocations.
+ */
+ if (prandom_u32_max(2)) {
+ /* Allocate on the "right" side, re-calculate address. */
+ meta->addr += PAGE_SIZE - size;
+ meta->addr = ALIGN_DOWN(meta->addr, cache->align);
+ }
+
+ addr = (void *)meta->addr;
+
+ /* Update remaining metadata. */
+ metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED);
+ /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */
+ WRITE_ONCE(meta->cache, cache);
+ meta->size = size;
+ for_each_canary(meta, set_canary_byte);
+
+ /* Set required struct page fields. */
+ page = virt_to_page(meta->addr);
+ page->slab_cache = cache;
+ if (IS_ENABLED(CONFIG_SLUB))
+ page->objects = 1;
+ if (IS_ENABLED(CONFIG_SLAB))
+ page->s_mem = addr;
+
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+ /* Memory initialization. */
+
+ /*
+ * We check slab_want_init_on_alloc() ourselves, rather than letting
+ * SL*B do the initialization, as otherwise we might overwrite KFENCE's
+ * redzone.
+ */
+ if (unlikely(slab_want_init_on_alloc(gfp, cache)))
+ memzero_explicit(addr, size);
+ if (cache->ctor)
+ cache->ctor(addr);
+
+ if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS))
+ kfence_protect(meta->addr); /* Random "faults" by protecting the object. */
+
+ atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
+ atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]);
+
+ return addr;
+}
+
+static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
+{
+ struct kcsan_scoped_access assert_page_exclusive;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&meta->lock, flags);
+
+ if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) {
+ /* Invalid or double-free, bail out. */
+ atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+ kfence_report_error((unsigned long)addr, false, NULL, meta,
+ KFENCE_ERROR_INVALID_FREE);
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+ return;
+ }
+
+ /* Detect racy use-after-free, or incorrect reallocation of this page by KFENCE. */
+ kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE,
+ KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT,
+ &assert_page_exclusive);
+
+ if (CONFIG_KFENCE_STRESS_TEST_FAULTS)
+ kfence_unprotect((unsigned long)addr); /* To check canary bytes. */
+
+ /* Restore page protection if there was an OOB access. */
+ if (meta->unprotected_page) {
+ kfence_protect(meta->unprotected_page);
+ meta->unprotected_page = 0;
+ }
+
+ /* Check canary bytes for memory corruption. */
+ for_each_canary(meta, check_canary_byte);
+
+ /*
+ * Clear memory if init-on-free is set. While we protect the page, the
+ * data is still there, and after a use-after-free is detected, we
+ * unprotect the page, so the data is still accessible.
+ */
+ if (!zombie && unlikely(slab_want_init_on_free(meta->cache)))
+ memzero_explicit(addr, meta->size);
+
+ /* Mark the object as freed. */
+ metadata_update_state(meta, KFENCE_OBJECT_FREED);
+
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+ /* Protect to detect use-after-frees. */
+ kfence_protect((unsigned long)addr);
+
+ kcsan_end_scoped_access(&assert_page_exclusive);
+ if (!zombie) {
+ /* Add it to the tail of the freelist for reuse. */
+ raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+ KFENCE_WARN_ON(!list_empty(&meta->list));
+ list_add_tail(&meta->list, &kfence_freelist);
+ raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+
+ atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
+ atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
+ } else {
+ /* See kfence_shutdown_cache(). */
+ atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]);
+ }
+}
+
+static void rcu_guarded_free(struct rcu_head *h)
+{
+ struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head);
+
+ kfence_guarded_free((void *)meta->addr, meta, false);
+}
+
+static bool __init kfence_init_pool(void)
+{
+ unsigned long addr = (unsigned long)__kfence_pool;
+ struct page *pages;
+ int i;
+
+ if (!__kfence_pool)
+ return false;
+
+ if (!arch_kfence_init_pool())
+ goto err;
+
+ pages = virt_to_page(addr);
+
+ /*
+ * Set up object pages: they must have PG_slab set, to avoid freeing
+ * these as real pages.
+ *
+ * We also want to avoid inserting kfence_free() in the kfree()
+ * fast-path in SLUB, and therefore need to ensure kfree() correctly
+ * enters __slab_free() slow-path.
+ */
+ for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
+ if (!i || (i % 2))
+ continue;
+
+ /* Verify we do not have a compound head page. */
+ if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
+ goto err;
+
+ __SetPageSlab(&pages[i]);
+ }
+
+ /*
+ * Protect the first 2 pages. The first page is mostly unnecessary, and
+ * merely serves as an extended guard page. However, adding one
+ * additional page in the beginning gives us an even number of pages,
+ * which simplifies the mapping of address to metadata index.
+ */
+ for (i = 0; i < 2; i++) {
+ if (unlikely(!kfence_protect(addr)))
+ goto err;
+
+ addr += PAGE_SIZE;
+ }
+
+ for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+ struct kfence_metadata *meta = &kfence_metadata[i];
+
+ /* Initialize metadata. */
+ INIT_LIST_HEAD(&meta->list);
+ raw_spin_lock_init(&meta->lock);
+ meta->state = KFENCE_OBJECT_UNUSED;
+ meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */
+ list_add_tail(&meta->list, &kfence_freelist);
+
+ /* Protect the right redzone. */
+ if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
+ goto err;
+
+ addr += 2 * PAGE_SIZE;
+ }
+
+ return true;
+
+err:
+ /*
+ * Only release unprotected pages, and do not try to go back and change
+ * page attributes due to risk of failing to do so as well. If changing
+ * page attributes for some pages fails, it is very likely that it also
+ * fails for the first page, and therefore expect addr==__kfence_pool in
+ * most failure cases.
+ */
+ memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
+ __kfence_pool = NULL;
+ return false;
+}
+
+/* === DebugFS Interface ==================================================== */
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+ int i;
+
+ seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled));
+ for (i = 0; i < KFENCE_COUNTER_COUNT; i++)
+ seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i]));
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+/*
+ * debugfs seq_file operations for /sys/kernel/debug/kfence/objects.
+ * start_object() and next_object() return the object index + 1, because NULL is used
+ * to stop iteration.
+ */
+static void *start_object(struct seq_file *seq, loff_t *pos)
+{
+ if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
+ return (void *)((long)*pos + 1);
+ return NULL;
+}
+
+static void stop_object(struct seq_file *seq, void *v)
+{
+}
+
+static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
+{
+ ++*pos;
+ if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
+ return (void *)((long)*pos + 1);
+ return NULL;
+}
+
+static int show_object(struct seq_file *seq, void *v)
+{
+ struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&meta->lock, flags);
+ kfence_print_object(seq, meta);
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+ seq_puts(seq, "---------------------------------\n");
+
+ return 0;
+}
+
+static const struct seq_operations object_seqops = {
+ .start = start_object,
+ .next = next_object,
+ .stop = stop_object,
+ .show = show_object,
+};
+
+static int open_objects(struct inode *inode, struct file *file)
+{
+ return seq_open(file, &object_seqops);
+}
+
+static const struct file_operations objects_fops = {
+ .open = open_objects,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+
+static int __init kfence_debugfs_init(void)
+{
+ struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL);
+
+ debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
+ debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
+ return 0;
+}
+
+late_initcall(kfence_debugfs_init);
+
+/* === Allocation Gate Timer ================================================ */
+
+/*
+ * Set up delayed work, which will enable and disable the static key. We need to
+ * use a work queue (rather than a simple timer), since enabling and disabling a
+ * static key cannot be done from an interrupt.
+ *
+ * Note: Toggling a static branch currently causes IPIs, and here we'll end up
+ * with a total of 2 IPIs to all CPUs. If this ends up a problem in future (with
+ * more aggressive sampling intervals), we could get away with a variant that
+ * avoids IPIs, at the cost of not immediately capturing allocations if the
+ * instructions remain cached.
+ */
+static struct delayed_work kfence_timer;
+static void toggle_allocation_gate(struct work_struct *work)
+{
+ if (!READ_ONCE(kfence_enabled))
+ return;
+
+ /* Enable static key, and await allocation to happen. */
+ atomic_set(&kfence_allocation_gate, 0);
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+ static_branch_enable(&kfence_allocation_key);
+ /*
+ * Await an allocation. Timeout after 1 second, in case the kernel stops
+ * doing allocations, to avoid stalling this worker task for too long.
+ */
+ {
+ unsigned long end_wait = jiffies + HZ;
+
+ do {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (atomic_read(&kfence_allocation_gate) != 0)
+ break;
+ schedule_timeout(1);
+ } while (time_before(jiffies, end_wait));
+ __set_current_state(TASK_RUNNING);
+ }
+ /* Disable static key and reset timer. */
+ static_branch_disable(&kfence_allocation_key);
+#endif
+ schedule_delayed_work(&kfence_timer, msecs_to_jiffies(kfence_sample_interval));
+}
+static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate);
+
+/* === Public interface ===================================================== */
+
+void __init kfence_alloc_pool(void)
+{
+ if (!kfence_sample_interval)
+ return;
+
+ __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+
+ if (!__kfence_pool)
+ pr_err("failed to allocate pool\n");
+}
+
+void __init kfence_init(void)
+{
+ /* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
+ if (!kfence_sample_interval)
+ return;
+
+ if (!kfence_init_pool()) {
+ pr_err("%s failed\n", __func__);
+ return;
+ }
+
+ WRITE_ONCE(kfence_enabled, true);
+ schedule_delayed_work(&kfence_timer, 0);
+ pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
+ CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
+ (void *)(__kfence_pool + KFENCE_POOL_SIZE));
+}
+
+void kfence_shutdown_cache(struct kmem_cache *s)
+{
+ unsigned long flags;
+ struct kfence_metadata *meta;
+ int i;
+
+ for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+ bool in_use;
+
+ meta = &kfence_metadata[i];
+
+ /*
+ * If we observe some inconsistent cache and state pair where we
+ * should have returned false here, cache destruction is racing
+ * with either kmem_cache_alloc() or kmem_cache_free(). Taking
+ * the lock will not help, as different critical section
+ * serialization will have the same outcome.
+ */
+ if (READ_ONCE(meta->cache) != s ||
+ READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED)
+ continue;
+
+ raw_spin_lock_irqsave(&meta->lock, flags);
+ in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED;
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+ if (in_use) {
+ /*
+ * This cache still has allocations, and we should not
+ * release them back into the freelist so they can still
+ * safely be used and retain the kernel's default
+ * behaviour of keeping the allocations alive (leak the
+ * cache); however, they effectively become "zombie
+ * allocations" as the KFENCE objects are the only ones
+ * still in use and the owning cache is being destroyed.
+ *
+ * We mark them freed, so that any subsequent use shows
+ * more useful error messages that will include stack
+ * traces of the user of the object, the original
+ * allocation, and caller to shutdown_cache().
+ */
+ kfence_guarded_free((void *)meta->addr, meta, /*zombie=*/true);
+ }
+ }
+
+ for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+ meta = &kfence_metadata[i];
+
+ /* See above. */
+ if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED)
+ continue;
+
+ raw_spin_lock_irqsave(&meta->lock, flags);
+ if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED)
+ meta->cache = NULL;
+ raw_spin_unlock_irqrestore(&meta->lock, flags);
+ }
+}
+
+void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+{
+ /*
+ * allocation_gate only needs to become non-zero, so it doesn't make
+ * sense to continue writing to it and pay the associated contention
+ * cost, in case we have a large number of concurrent allocations.
+ */
+ if (atomic_read(&kfence_allocation_gate) || atomic_inc_return(&kfence_allocation_gate) > 1)
+ return NULL;
+
+ if (!READ_ONCE(kfence_enabled))
+ return NULL;
+
+ if (size > PAGE_SIZE)
+ return NULL;
+
+ return kfence_guarded_alloc(s, size, flags);
+}
+
+size_t kfence_ksize(const void *addr)
+{
+ const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+ /*
+ * Read locklessly -- if there is a race with __kfence_alloc(), this is
+ * either a use-after-free or invalid access.
+ */
+ return meta ? meta->size : 0;
+}
+
+void *kfence_object_start(const void *addr)
+{
+ const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+ /*
+ * Read locklessly -- if there is a race with __kfence_alloc(), this is
+ * either a use-after-free or invalid access.
+ */
+ return meta ? (void *)meta->addr : NULL;
+}
+
+void __kfence_free(void *addr)
+{
+ struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+ /*
+ * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
+ * the object, as the object page may be recycled for other-typed
+ * objects once it has been freed. meta->cache may be NULL if the cache
+ * was destroyed.
+ */
+ if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU)))
+ call_rcu(&meta->rcu_head, rcu_guarded_free);
+ else
+ kfence_guarded_free(addr, meta, false);
+}
+
+bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
+{
+ const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
+ struct kfence_metadata *to_report = NULL;
+ enum kfence_error_type error_type;
+ unsigned long flags;
+
+ if (!is_kfence_address((void *)addr))
+ return false;
+
+ if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */
+ return kfence_unprotect(addr); /* ... unprotect and proceed. */
+
+ atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+
+ if (page_index % 2) {
+ /* This is a redzone, report a buffer overflow. */
+ struct kfence_metadata *meta;
+ int distance = 0;
+
+ meta = addr_to_metadata(addr - PAGE_SIZE);
+ if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
+ to_report = meta;
+ /* Data race ok; distance calculation approximate. */
+ distance = addr - data_race(meta->addr + meta->size);
+ }
+
+ meta = addr_to_metadata(addr + PAGE_SIZE);
+ if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
+ /* Data race ok; distance calculation approximate. */
+ if (!to_report || distance > data_race(meta->addr) - addr)
+ to_report = meta;
+ }
+
+ if (!to_report)
+ goto out;
+
+ raw_spin_lock_irqsave(&to_report->lock, flags);
+ to_report->unprotected_page = addr;
+ error_type = KFENCE_ERROR_OOB;
+
+ /*
+ * If the object was freed before we took the look we can still
+ * report this as an OOB -- the report will simply show the
+ * stacktrace of the free as well.
+ */
+ } else {
+ to_report = addr_to_metadata(addr);
+ if (!to_report)
+ goto out;
+
+ raw_spin_lock_irqsave(&to_report->lock, flags);
+ error_type = KFENCE_ERROR_UAF;
+ /*
+ * We may race with __kfence_alloc(), and it is possible that a
+ * freed object may be reallocated. We simply report this as a
+ * use-after-free, with the stack trace showing the place where
+ * the object was re-allocated.
+ */
+ }
+
+out:
+ if (to_report) {
+ kfence_report_error(addr, is_write, regs, to_report, error_type);
+ raw_spin_unlock_irqrestore(&to_report->lock, flags);
+ } else {
+ /* This may be a UAF or OOB access, but we can't be sure. */
+ kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID);
+ }
+
+ return kfence_unprotect(addr); /* Unprotect and let access proceed. */
+}
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
new file mode 100644
index 000000000000..24065321ff8a
--- /dev/null
+++ b/mm/kfence/kfence.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel Electric-Fence (KFENCE). For more info please see
+ * Documentation/dev-tools/kfence.rst.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef MM_KFENCE_KFENCE_H
+#define MM_KFENCE_KFENCE_H
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "../slab.h" /* for struct kmem_cache */
+
+/*
+ * Get the canary byte pattern for @addr. Use a pattern that varies based on the
+ * lower 3 bits of the address, to detect memory corruptions with higher
+ * probability, where similar constants are used.
+ */
+#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
+
+/* Maximum stack depth for reports. */
+#define KFENCE_STACK_DEPTH 64
+
+/* KFENCE object states. */
+enum kfence_object_state {
+ KFENCE_OBJECT_UNUSED, /* Object is unused. */
+ KFENCE_OBJECT_ALLOCATED, /* Object is currently allocated. */
+ KFENCE_OBJECT_FREED, /* Object was allocated, and then freed. */
+};
+
+/* Alloc/free tracking information. */
+struct kfence_track {
+ pid_t pid;
+ int num_stack_entries;
+ unsigned long stack_entries[KFENCE_STACK_DEPTH];
+};
+
+/* KFENCE metadata per guarded allocation. */
+struct kfence_metadata {
+ struct list_head list; /* Freelist node; access under kfence_freelist_lock. */
+ struct rcu_head rcu_head; /* For delayed freeing. */
+
+ /*
+ * Lock protecting below data; to ensure consistency of the below data,
+ * since the following may execute concurrently: __kfence_alloc(),
+ * __kfence_free(), kfence_handle_page_fault(). However, note that we
+ * cannot grab the same metadata off the freelist twice, and multiple
+ * __kfence_alloc() cannot run concurrently on the same metadata.
+ */
+ raw_spinlock_t lock;
+
+ /* The current state of the object; see above. */
+ enum kfence_object_state state;
+
+ /*
+ * Allocated object address; cannot be calculated from size, because of
+ * alignment requirements.
+ *
+ * Invariant: ALIGN_DOWN(addr, PAGE_SIZE) is constant.
+ */
+ unsigned long addr;
+
+ /*
+ * The size of the original allocation.
+ */
+ size_t size;
+
+ /*
+ * The kmem_cache cache of the last allocation; NULL if never allocated
+ * or the cache has already been destroyed.
+ */
+ struct kmem_cache *cache;
+
+ /*
+ * In case of an invalid access, the page that was unprotected; we
+ * optimistically only store one address.
+ */
+ unsigned long unprotected_page;
+
+ /* Allocation and free stack information. */
+ struct kfence_track alloc_track;
+ struct kfence_track free_track;
+};
+
+extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+
+/* KFENCE error types for report generation. */
+enum kfence_error_type {
+ KFENCE_ERROR_OOB, /* Detected a out-of-bounds access. */
+ KFENCE_ERROR_UAF, /* Detected a use-after-free access. */
+ KFENCE_ERROR_CORRUPTION, /* Detected a memory corruption on free. */
+ KFENCE_ERROR_INVALID, /* Invalid access of unknown type. */
+ KFENCE_ERROR_INVALID_FREE, /* Invalid free. */
+};
+
+void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
+ const struct kfence_metadata *meta, enum kfence_error_type type);
+
+void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta);
+
+#endif /* MM_KFENCE_KFENCE_H */
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
new file mode 100644
index 000000000000..4acf4251ee04
--- /dev/null
+++ b/mm/kfence/kfence_test.c
@@ -0,0 +1,858 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test cases for KFENCE memory safety error detector. Since the interface with
+ * which KFENCE's reports are obtained is via the console, this is the output we
+ * should verify. For each test case checks the presence (or absence) of
+ * generated reports. Relies on 'console' tracepoint to capture reports as they
+ * appear in the kernel log.
+ *
+ * Copyright (C) 2020, Google LLC.
+ * Author: Alexander Potapenko <glider@google.com>
+ * Marco Elver <elver@google.com>
+ */
+
+#include <kunit/test.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kfence.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/tracepoint.h>
+#include <trace/events/printk.h>
+
+#include "kfence.h"
+
+/* Report as observed from console. */
+static struct {
+ spinlock_t lock;
+ int nlines;
+ char lines[2][256];
+} observed = {
+ .lock = __SPIN_LOCK_UNLOCKED(observed.lock),
+};
+
+/* Probe for console output: obtains observed lines of interest. */
+static void probe_console(void *ignore, const char *buf, size_t len)
+{
+ unsigned long flags;
+ int nlines;
+
+ spin_lock_irqsave(&observed.lock, flags);
+ nlines = observed.nlines;
+
+ if (strnstr(buf, "BUG: KFENCE: ", len) && strnstr(buf, "test_", len)) {
+ /*
+ * KFENCE report and related to the test.
+ *
+ * The provided @buf is not NUL-terminated; copy no more than
+ * @len bytes and let strscpy() add the missing NUL-terminator.
+ */
+ strscpy(observed.lines[0], buf, min(len + 1, sizeof(observed.lines[0])));
+ nlines = 1;
+ } else if (nlines == 1 && (strnstr(buf, "at 0x", len) || strnstr(buf, "of 0x", len))) {
+ strscpy(observed.lines[nlines++], buf, min(len + 1, sizeof(observed.lines[0])));
+ }
+
+ WRITE_ONCE(observed.nlines, nlines); /* Publish new nlines. */
+ spin_unlock_irqrestore(&observed.lock, flags);
+}
+
+/* Check if a report related to the test exists. */
+static bool report_available(void)
+{
+ return READ_ONCE(observed.nlines) == ARRAY_SIZE(observed.lines);
+}
+
+/* Information we expect in a report. */
+struct expect_report {
+ enum kfence_error_type type; /* The type or error. */
+ void *fn; /* Function pointer to expected function where access occurred. */
+ char *addr; /* Address at which the bad access occurred. */
+ bool is_write; /* Is access a write. */
+};
+
+static const char *get_access_type(const struct expect_report *r)
+{
+ return r->is_write ? "write" : "read";
+}
+
+/* Check observed report matches information in @r. */
+static bool report_matches(const struct expect_report *r)
+{
+ bool ret = false;
+ unsigned long flags;
+ typeof(observed.lines) expect;
+ const char *end;
+ char *cur;
+
+ /* Doubled-checked locking. */
+ if (!report_available())
+ return false;
+
+ /* Generate expected report contents. */
+
+ /* Title */
+ cur = expect[0];
+ end = &expect[0][sizeof(expect[0]) - 1];
+ switch (r->type) {
+ case KFENCE_ERROR_OOB:
+ cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds %s",
+ get_access_type(r));
+ break;
+ case KFENCE_ERROR_UAF:
+ cur += scnprintf(cur, end - cur, "BUG: KFENCE: use-after-free %s",
+ get_access_type(r));
+ break;
+ case KFENCE_ERROR_CORRUPTION:
+ cur += scnprintf(cur, end - cur, "BUG: KFENCE: memory corruption");
+ break;
+ case KFENCE_ERROR_INVALID:
+ cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid %s",
+ get_access_type(r));
+ break;
+ case KFENCE_ERROR_INVALID_FREE:
+ cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid free");
+ break;
+ }
+
+ scnprintf(cur, end - cur, " in %pS", r->fn);
+ /* The exact offset won't match, remove it; also strip module name. */
+ cur = strchr(expect[0], '+');
+ if (cur)
+ *cur = '\0';
+
+ /* Access information */
+ cur = expect[1];
+ end = &expect[1][sizeof(expect[1]) - 1];
+
+ switch (r->type) {
+ case KFENCE_ERROR_OOB:
+ cur += scnprintf(cur, end - cur, "Out-of-bounds %s at", get_access_type(r));
+ break;
+ case KFENCE_ERROR_UAF:
+ cur += scnprintf(cur, end - cur, "Use-after-free %s at", get_access_type(r));
+ break;
+ case KFENCE_ERROR_CORRUPTION:
+ cur += scnprintf(cur, end - cur, "Corrupted memory at");
+ break;
+ case KFENCE_ERROR_INVALID:
+ cur += scnprintf(cur, end - cur, "Invalid %s at", get_access_type(r));
+ break;
+ case KFENCE_ERROR_INVALID_FREE:
+ cur += scnprintf(cur, end - cur, "Invalid free of");
+ break;
+ }
+
+ cur += scnprintf(cur, end - cur, " 0x%p", (void *)r->addr);
+
+ spin_lock_irqsave(&observed.lock, flags);
+ if (!report_available())
+ goto out; /* A new report is being captured. */
+
+ /* Finally match expected output to what we actually observed. */
+ ret = strstr(observed.lines[0], expect[0]) && strstr(observed.lines[1], expect[1]);
+out:
+ spin_unlock_irqrestore(&observed.lock, flags);
+ return ret;
+}
+
+/* ===== Test cases ===== */
+
+#define TEST_PRIV_WANT_MEMCACHE ((void *)1)
+
+/* Cache used by tests; if NULL, allocate from kmalloc instead. */
+static struct kmem_cache *test_cache;
+
+static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t flags,
+ void (*ctor)(void *))
+{
+ if (test->priv != TEST_PRIV_WANT_MEMCACHE)
+ return size;
+
+ kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor);
+
+ /*
+ * Use SLAB_NOLEAKTRACE to prevent merging with existing caches. Any
+ * other flag in SLAB_NEVER_MERGE also works. Use SLAB_ACCOUNT to
+ * allocate via memcg, if enabled.
+ */
+ flags |= SLAB_NOLEAKTRACE | SLAB_ACCOUNT;
+ test_cache = kmem_cache_create("test", size, 1, flags, ctor);
+ KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache");
+
+ return size;
+}
+
+static void test_cache_destroy(void)
+{
+ if (!test_cache)
+ return;
+
+ kmem_cache_destroy(test_cache);
+ test_cache = NULL;
+}
+
+static inline size_t kmalloc_cache_alignment(size_t size)
+{
+ return kmalloc_caches[kmalloc_type(GFP_KERNEL)][kmalloc_index(size)]->align;
+}
+
+/* Must always inline to match stack trace against caller. */
+static __always_inline void test_free(void *ptr)
+{
+ if (test_cache)
+ kmem_cache_free(test_cache, ptr);
+ else
+ kfree(ptr);
+}
+
+/*
+ * If this should be a KFENCE allocation, and on which side the allocation and
+ * the closest guard page should be.
+ */
+enum allocation_policy {
+ ALLOCATE_ANY, /* KFENCE, any side. */
+ ALLOCATE_LEFT, /* KFENCE, left side of page. */
+ ALLOCATE_RIGHT, /* KFENCE, right side of page. */
+ ALLOCATE_NONE, /* No KFENCE allocation. */
+};
+
+/*
+ * Try to get a guarded allocation from KFENCE. Uses either kmalloc() or the
+ * current test_cache if set up.
+ */
+static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocation_policy policy)
+{
+ void *alloc;
+ unsigned long timeout, resched_after;
+ const char *policy_name;
+
+ switch (policy) {
+ case ALLOCATE_ANY:
+ policy_name = "any";
+ break;
+ case ALLOCATE_LEFT:
+ policy_name = "left";
+ break;
+ case ALLOCATE_RIGHT:
+ policy_name = "right";
+ break;
+ case ALLOCATE_NONE:
+ policy_name = "none";
+ break;
+ }
+
+ kunit_info(test, "%s: size=%zu, gfp=%x, policy=%s, cache=%i\n", __func__, size, gfp,
+ policy_name, !!test_cache);
+
+ /*
+ * 100x the sample interval should be more than enough to ensure we get
+ * a KFENCE allocation eventually.
+ */
+ timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+ /*
+ * Especially for non-preemption kernels, ensure the allocation-gate
+ * timer can catch up: after @resched_after, every failed allocation
+ * attempt yields, to ensure the allocation-gate timer is scheduled.
+ */
+ resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL);
+ do {
+ if (test_cache)
+ alloc = kmem_cache_alloc(test_cache, gfp);
+ else
+ alloc = kmalloc(size, gfp);
+
+ if (is_kfence_address(alloc)) {
+ struct page *page = virt_to_head_page(alloc);
+ struct kmem_cache *s = test_cache ?: kmalloc_caches[kmalloc_type(GFP_KERNEL)][kmalloc_index(size)];
+
+ /*
+ * Verify that various helpers return the right values
+ * even for KFENCE objects; these are required so that
+ * memcg accounting works correctly.
+ */
+ KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U);
+ KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1);
+
+ if (policy == ALLOCATE_ANY)
+ return alloc;
+ if (policy == ALLOCATE_LEFT && IS_ALIGNED((unsigned long)alloc, PAGE_SIZE))
+ return alloc;
+ if (policy == ALLOCATE_RIGHT &&
+ !IS_ALIGNED((unsigned long)alloc, PAGE_SIZE))
+ return alloc;
+ } else if (policy == ALLOCATE_NONE)
+ return alloc;
+
+ test_free(alloc);
+
+ if (time_after(jiffies, resched_after))
+ cond_resched();
+ } while (time_before(jiffies, timeout));
+
+ KUNIT_ASSERT_TRUE_MSG(test, false, "failed to allocate from KFENCE");
+ return NULL; /* Unreachable. */
+}
+
+static void test_out_of_bounds_read(struct kunit *test)
+{
+ size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_OOB,
+ .fn = test_out_of_bounds_read,
+ .is_write = false,
+ };
+ char *buf;
+
+ setup_test_cache(test, size, 0, NULL);
+
+ /*
+ * If we don't have our own cache, adjust based on alignment, so that we
+ * actually access guard pages on either side.
+ */
+ if (!test_cache)
+ size = kmalloc_cache_alignment(size);
+
+ /* Test both sides. */
+
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+ expect.addr = buf - 1;
+ READ_ONCE(*expect.addr);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+ test_free(buf);
+
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+ expect.addr = buf + size;
+ READ_ONCE(*expect.addr);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+ test_free(buf);
+}
+
+static void test_out_of_bounds_write(struct kunit *test)
+{
+ size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_OOB,
+ .fn = test_out_of_bounds_write,
+ .is_write = true,
+ };
+ char *buf;
+
+ setup_test_cache(test, size, 0, NULL);
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+ expect.addr = buf - 1;
+ WRITE_ONCE(*expect.addr, 42);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+ test_free(buf);
+}
+
+static void test_use_after_free_read(struct kunit *test)
+{
+ const size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_UAF,
+ .fn = test_use_after_free_read,
+ .is_write = false,
+ };
+
+ setup_test_cache(test, size, 0, NULL);
+ expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+ test_free(expect.addr);
+ READ_ONCE(*expect.addr);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_double_free(struct kunit *test)
+{
+ const size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_INVALID_FREE,
+ .fn = test_double_free,
+ };
+
+ setup_test_cache(test, size, 0, NULL);
+ expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+ test_free(expect.addr);
+ test_free(expect.addr); /* Double-free. */
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_invalid_addr_free(struct kunit *test)
+{
+ const size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_INVALID_FREE,
+ .fn = test_invalid_addr_free,
+ };
+ char *buf;
+
+ setup_test_cache(test, size, 0, NULL);
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+ expect.addr = buf + 1; /* Free on invalid address. */
+ test_free(expect.addr); /* Invalid address free. */
+ test_free(buf); /* No error. */
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_corruption(struct kunit *test)
+{
+ size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_CORRUPTION,
+ .fn = test_corruption,
+ };
+ char *buf;
+
+ setup_test_cache(test, size, 0, NULL);
+
+ /* Test both sides. */
+
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+ expect.addr = buf + size;
+ WRITE_ONCE(*expect.addr, 42);
+ test_free(buf);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+ expect.addr = buf - 1;
+ WRITE_ONCE(*expect.addr, 42);
+ test_free(buf);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/*
+ * KFENCE is unable to detect an OOB if the allocation's alignment requirements
+ * leave a gap between the object and the guard page. Specifically, an
+ * allocation of e.g. 73 bytes is aligned on 8 and 128 bytes for SLUB or SLAB
+ * respectively. Therefore it is impossible for the allocated object to
+ * contiguously line up with the right guard page.
+ *
+ * However, we test that an access to memory beyond the gap results in KFENCE
+ * detecting an OOB access.
+ */
+static void test_kmalloc_aligned_oob_read(struct kunit *test)
+{
+ const size_t size = 73;
+ const size_t align = kmalloc_cache_alignment(size);
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_OOB,
+ .fn = test_kmalloc_aligned_oob_read,
+ .is_write = false,
+ };
+ char *buf;
+
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+
+ /*
+ * The object is offset to the right, so there won't be an OOB to the
+ * left of it.
+ */
+ READ_ONCE(*(buf - 1));
+ KUNIT_EXPECT_FALSE(test, report_available());
+
+ /*
+ * @buf must be aligned on @align, therefore buf + size belongs to the
+ * same page -> no OOB.
+ */
+ READ_ONCE(*(buf + size));
+ KUNIT_EXPECT_FALSE(test, report_available());
+
+ /* Overflowing by @align bytes will result in an OOB. */
+ expect.addr = buf + size + align;
+ READ_ONCE(*expect.addr);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+
+ test_free(buf);
+}
+
+static void test_kmalloc_aligned_oob_write(struct kunit *test)
+{
+ const size_t size = 73;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_CORRUPTION,
+ .fn = test_kmalloc_aligned_oob_write,
+ };
+ char *buf;
+
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+ /*
+ * The object is offset to the right, so we won't get a page
+ * fault immediately after it.
+ */
+ expect.addr = buf + size;
+ WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
+ KUNIT_EXPECT_FALSE(test, report_available());
+ test_free(buf);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test cache shrinking and destroying with KFENCE. */
+static void test_shrink_memcache(struct kunit *test)
+{
+ const size_t size = 32;
+ void *buf;
+
+ setup_test_cache(test, size, 0, NULL);
+ KUNIT_EXPECT_TRUE(test, test_cache);
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+ kmem_cache_shrink(test_cache);
+ test_free(buf);
+
+ KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+static void ctor_set_x(void *obj)
+{
+ /* Every object has at least 8 bytes. */
+ memset(obj, 'x', 8);
+}
+
+/* Ensure that SL*B does not modify KFENCE objects on bulk free. */
+static void test_free_bulk(struct kunit *test)
+{
+ int iter;
+
+ for (iter = 0; iter < 5; iter++) {
+ const size_t size = setup_test_cache(test, 8 + prandom_u32_max(300), 0,
+ (iter & 1) ? ctor_set_x : NULL);
+ void *objects[] = {
+ test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT),
+ test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+ test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT),
+ test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+ test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+ };
+
+ kmem_cache_free_bulk(test_cache, ARRAY_SIZE(objects), objects);
+ KUNIT_ASSERT_FALSE(test, report_available());
+ test_cache_destroy();
+ }
+}
+
+/* Test init-on-free works. */
+static void test_init_on_free(struct kunit *test)
+{
+ const size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_UAF,
+ .fn = test_init_on_free,
+ .is_write = false,
+ };
+ int i;
+
+ if (!IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON))
+ return;
+ /* Assume it hasn't been disabled on command line. */
+
+ setup_test_cache(test, size, 0, NULL);
+ expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+ for (i = 0; i < size; i++)
+ expect.addr[i] = i + 1;
+ test_free(expect.addr);
+
+ for (i = 0; i < size; i++) {
+ /*
+ * This may fail if the page was recycled by KFENCE and then
+ * written to again -- this however, is near impossible with a
+ * default config.
+ */
+ KUNIT_EXPECT_EQ(test, expect.addr[i], (char)0);
+
+ if (!i) /* Only check first access to not fail test if page is ever re-protected. */
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+ }
+}
+
+/* Ensure that constructors work properly. */
+static void test_memcache_ctor(struct kunit *test)
+{
+ const size_t size = 32;
+ char *buf;
+ int i;
+
+ setup_test_cache(test, size, 0, ctor_set_x);
+ buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+
+ for (i = 0; i < 8; i++)
+ KUNIT_EXPECT_EQ(test, buf[i], (char)'x');
+
+ test_free(buf);
+
+ KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+/* Test that memory is zeroed if requested. */
+static void test_gfpzero(struct kunit *test)
+{
+ const size_t size = PAGE_SIZE; /* PAGE_SIZE so we can use ALLOCATE_ANY. */
+ char *buf1, *buf2;
+ int i;
+
+ if (CONFIG_KFENCE_SAMPLE_INTERVAL > 100) {
+ kunit_warn(test, "skipping ... would take too long\n");
+ return;
+ }
+
+ setup_test_cache(test, size, 0, NULL);
+ buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+ for (i = 0; i < size; i++)
+ buf1[i] = i + 1;
+ test_free(buf1);
+
+ /* Try to get same address again -- this can take a while. */
+ for (i = 0;; i++) {
+ buf2 = test_alloc(test, size, GFP_KERNEL | __GFP_ZERO, ALLOCATE_ANY);
+ if (buf1 == buf2)
+ break;
+ test_free(buf2);
+
+ if (i == CONFIG_KFENCE_NUM_OBJECTS) {
+ kunit_warn(test, "giving up ... cannot get same object back\n");
+ return;
+ }
+ }
+
+ for (i = 0; i < size; i++)
+ KUNIT_EXPECT_EQ(test, buf2[i], (char)0);
+
+ test_free(buf2);
+
+ KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+static void test_invalid_access(struct kunit *test)
+{
+ const struct expect_report expect = {
+ .type = KFENCE_ERROR_INVALID,
+ .fn = test_invalid_access,
+ .addr = &__kfence_pool[10],
+ .is_write = false,
+ };
+
+ READ_ONCE(__kfence_pool[10]);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test SLAB_TYPESAFE_BY_RCU works. */
+static void test_memcache_typesafe_by_rcu(struct kunit *test)
+{
+ const size_t size = 32;
+ struct expect_report expect = {
+ .type = KFENCE_ERROR_UAF,
+ .fn = test_memcache_typesafe_by_rcu,
+ .is_write = false,
+ };
+
+ setup_test_cache(test, size, SLAB_TYPESAFE_BY_RCU, NULL);
+ KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
+
+ expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+ *expect.addr = 42;
+
+ rcu_read_lock();
+ test_free(expect.addr);
+ KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
+ /*
+ * Up to this point, memory should not have been freed yet, and
+ * therefore there should be no KFENCE report from the above access.
+ */
+ rcu_read_unlock();
+
+ /* Above access to @expect.addr should not have generated a report! */
+ KUNIT_EXPECT_FALSE(test, report_available());
+
+ /* Only after rcu_barrier() is the memory guaranteed to be freed. */
+ rcu_barrier();
+
+ /* Expect use-after-free. */
+ KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
+ KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test krealloc(). */
+static void test_krealloc(struct kunit *test)
+{
+ const size_t size = 32;
+ const struct expect_report expect = {
+ .type = KFENCE_ERROR_UAF,
+ .fn = test_krealloc,
+ .addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY),
+ .is_write = false,
+ };
+ char *buf = expect.addr;
+ int i;
+
+ KUNIT_EXPECT_FALSE(test, test_cache);
+ KUNIT_EXPECT_EQ(test, ksize(buf), size); /* Precise size match after KFENCE alloc. */
+ for (i = 0; i < size; i++)
+ buf[i] = i + 1;
+
+ /* Check that we successfully change the size. */
+ buf = krealloc(buf, size * 3, GFP_KERNEL); /* Grow. */
+ /* Note: Might no longer be a KFENCE alloc. */
+ KUNIT_EXPECT_GE(test, ksize(buf), size * 3);
+ for (i = 0; i < size; i++)
+ KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
+ for (; i < size * 3; i++) /* Fill to extra bytes. */
+ buf[i] = i + 1;
+
+ buf = krealloc(buf, size * 2, GFP_KERNEL); /* Shrink. */
+ KUNIT_EXPECT_GE(test, ksize(buf), size * 2);
+ for (i = 0; i < size * 2; i++)
+ KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
+
+ buf = krealloc(buf, 0, GFP_KERNEL); /* Free. */
+ KUNIT_EXPECT_EQ(test, (unsigned long)buf, (unsigned long)ZERO_SIZE_PTR);
+ KUNIT_ASSERT_FALSE(test, report_available()); /* No reports yet! */
+
+ READ_ONCE(*expect.addr); /* Ensure krealloc() actually freed earlier KFENCE object. */
+ KUNIT_ASSERT_TRUE(test, report_matches(&expect));
+}
+
+/* Test that some objects from a bulk allocation belong to KFENCE pool. */
+static void test_memcache_alloc_bulk(struct kunit *test)
+{
+ const size_t size = 32;
+ bool pass = false;
+ unsigned long timeout;
+
+ setup_test_cache(test, size, 0, NULL);
+ KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
+ /*
+ * 100x the sample interval should be more than enough to ensure we get
+ * a KFENCE allocation eventually.
+ */
+ timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+ do {
+ void *objects[100];
+ int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
+ objects);
+ if (!num)
+ continue;
+ for (i = 0; i < ARRAY_SIZE(objects); i++) {
+ if (is_kfence_address(objects[i])) {
+ pass = true;
+ break;
+ }
+ }
+ kmem_cache_free_bulk(test_cache, num, objects);
+ /*
+ * kmem_cache_alloc_bulk() disables interrupts, and calling it
+ * in a tight loop may not give KFENCE a chance to switch the
+ * static branch. Call cond_resched() to let KFENCE chime in.
+ */
+ cond_resched();
+ } while (!pass && time_before(jiffies, timeout));
+
+ KUNIT_EXPECT_TRUE(test, pass);
+ KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+/*
+ * KUnit does not provide a way to provide arguments to tests, and we encode
+ * additional info in the name. Set up 2 tests per test case, one using the
+ * default allocator, and another using a custom memcache (suffix '-memcache').
+ */
+#define KFENCE_KUNIT_CASE(test_name) \
+ { .run_case = test_name, .name = #test_name }, \
+ { .run_case = test_name, .name = #test_name "-memcache" }
+
+static struct kunit_case kfence_test_cases[] = {
+ KFENCE_KUNIT_CASE(test_out_of_bounds_read),
+ KFENCE_KUNIT_CASE(test_out_of_bounds_write),
+ KFENCE_KUNIT_CASE(test_use_after_free_read),
+ KFENCE_KUNIT_CASE(test_double_free),
+ KFENCE_KUNIT_CASE(test_invalid_addr_free),
+ KFENCE_KUNIT_CASE(test_corruption),
+ KFENCE_KUNIT_CASE(test_free_bulk),
+ KFENCE_KUNIT_CASE(test_init_on_free),
+ KUNIT_CASE(test_kmalloc_aligned_oob_read),
+ KUNIT_CASE(test_kmalloc_aligned_oob_write),
+ KUNIT_CASE(test_shrink_memcache),
+ KUNIT_CASE(test_memcache_ctor),
+ KUNIT_CASE(test_invalid_access),
+ KUNIT_CASE(test_gfpzero),
+ KUNIT_CASE(test_memcache_typesafe_by_rcu),
+ KUNIT_CASE(test_krealloc),
+ KUNIT_CASE(test_memcache_alloc_bulk),
+ {},
+};
+
+/* ===== End test cases ===== */
+
+static int test_init(struct kunit *test)
+{
+ unsigned long flags;
+ int i;
+
+ spin_lock_irqsave(&observed.lock, flags);
+ for (i = 0; i < ARRAY_SIZE(observed.lines); i++)
+ observed.lines[i][0] = '\0';
+ observed.nlines = 0;
+ spin_unlock_irqrestore(&observed.lock, flags);
+
+ /* Any test with 'memcache' in its name will want a memcache. */
+ if (strstr(test->name, "memcache"))
+ test->priv = TEST_PRIV_WANT_MEMCACHE;
+ else
+ test->priv = NULL;
+
+ return 0;
+}
+
+static void test_exit(struct kunit *test)
+{
+ test_cache_destroy();
+}
+
+static struct kunit_suite kfence_test_suite = {
+ .name = "kfence",
+ .test_cases = kfence_test_cases,
+ .init = test_init,
+ .exit = test_exit,
+};
+static struct kunit_suite *kfence_test_suites[] = { &kfence_test_suite, NULL };
+
+static void register_tracepoints(struct tracepoint *tp, void *ignore)
+{
+ check_trace_callback_type_console(probe_console);
+ if (!strcmp(tp->name, "console"))
+ WARN_ON(tracepoint_probe_register(tp, probe_console, NULL));
+}
+
+static void unregister_tracepoints(struct tracepoint *tp, void *ignore)
+{
+ if (!strcmp(tp->name, "console"))
+ tracepoint_probe_unregister(tp, probe_console, NULL);
+}
+
+/*
+ * We only want to do tracepoints setup and teardown once, therefore we have to
+ * customize the init and exit functions and cannot rely on kunit_test_suite().
+ */
+static int __init kfence_test_init(void)
+{
+ /*
+ * Because we want to be able to build the test as a module, we need to
+ * iterate through all known tracepoints, since the static registration
+ * won't work here.
+ */
+ for_each_kernel_tracepoint(register_tracepoints, NULL);
+ return __kunit_test_suites_init(kfence_test_suites);
+}
+
+static void kfence_test_exit(void)
+{
+ __kunit_test_suites_exit(kfence_test_suites);
+ for_each_kernel_tracepoint(unregister_tracepoints, NULL);
+ tracepoint_synchronize_unregister();
+}
+
+late_initcall(kfence_test_init);
+module_exit(kfence_test_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Alexander Potapenko <glider@google.com>, Marco Elver <elver@google.com>");
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
new file mode 100644
index 000000000000..ab83d5a59bb1
--- /dev/null
+++ b/mm/kfence/report.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KFENCE reporting.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#include <stdarg.h>
+
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/printk.h>
+#include <linux/sched/debug.h>
+#include <linux/seq_file.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+#include <trace/events/error_report.h>
+
+#include <asm/kfence.h>
+
+#include "kfence.h"
+
+extern bool no_hash_pointers;
+
+/* Helper function to either print to a seq_file or to console. */
+__printf(2, 3)
+static void seq_con_printf(struct seq_file *seq, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ if (seq)
+ seq_vprintf(seq, fmt, args);
+ else
+ vprintk(fmt, args);
+ va_end(args);
+}
+
+/*
+ * Get the number of stack entries to skip to get out of MM internals. @type is
+ * optional, and if set to NULL, assumes an allocation or free stack.
+ */
+static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries,
+ const enum kfence_error_type *type)
+{
+ char buf[64];
+ int skipnr, fallback = 0;
+
+ if (type) {
+ /* Depending on error type, find different stack entries. */
+ switch (*type) {
+ case KFENCE_ERROR_UAF:
+ case KFENCE_ERROR_OOB:
+ case KFENCE_ERROR_INVALID:
+ /*
+ * kfence_handle_page_fault() may be called with pt_regs
+ * set to NULL; in that case we'll simply show the full
+ * stack trace.
+ */
+ return 0;
+ case KFENCE_ERROR_CORRUPTION:
+ case KFENCE_ERROR_INVALID_FREE:
+ break;
+ }
+ }
+
+ for (skipnr = 0; skipnr < num_entries; skipnr++) {
+ int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
+
+ if (str_has_prefix(buf, "kfence_") || str_has_prefix(buf, "__kfence_") ||
+ !strncmp(buf, "__slab_free", len)) {
+ /*
+ * In case of tail calls from any of the below
+ * to any of the above.
+ */
+ fallback = skipnr + 1;
+ }
+
+ /* Also the *_bulk() variants by only checking prefixes. */
+ if (str_has_prefix(buf, "kfree") ||
+ str_has_prefix(buf, "kmem_cache_free") ||
+ str_has_prefix(buf, "__kmalloc") ||
+ str_has_prefix(buf, "kmem_cache_alloc"))
+ goto found;
+ }
+ if (fallback < num_entries)
+ return fallback;
+found:
+ skipnr++;
+ return skipnr < num_entries ? skipnr : 0;
+}
+
+static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadata *meta,
+ bool show_alloc)
+{
+ const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track;
+
+ if (track->num_stack_entries) {
+ /* Skip allocation/free internals stack. */
+ int i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL);
+
+ /* stack_trace_seq_print() does not exist; open code our own. */
+ for (; i < track->num_stack_entries; i++)
+ seq_con_printf(seq, " %pS\n", (void *)track->stack_entries[i]);
+ } else {
+ seq_con_printf(seq, " no %s stack\n", show_alloc ? "allocation" : "deallocation");
+ }
+}
+
+void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta)
+{
+ const int size = abs(meta->size);
+ const unsigned long start = meta->addr;
+ const struct kmem_cache *const cache = meta->cache;
+
+ lockdep_assert_held(&meta->lock);
+
+ if (meta->state == KFENCE_OBJECT_UNUSED) {
+ seq_con_printf(seq, "kfence-#%zd unused\n", meta - kfence_metadata);
+ return;
+ }
+
+ seq_con_printf(seq,
+ "kfence-#%zd [0x%p-0x%p"
+ ", size=%d, cache=%s] allocated by task %d:\n",
+ meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
+ (cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
+ kfence_print_stack(seq, meta, true);
+
+ if (meta->state == KFENCE_OBJECT_FREED) {
+ seq_con_printf(seq, "\nfreed by task %d:\n", meta->free_track.pid);
+ kfence_print_stack(seq, meta, false);
+ }
+}
+
+/*
+ * Show bytes at @addr that are different from the expected canary values, up to
+ * @max_bytes.
+ */
+static void print_diff_canary(unsigned long address, size_t bytes_to_show,
+ const struct kfence_metadata *meta)
+{
+ const unsigned long show_until_addr = address + bytes_to_show;
+ const u8 *cur, *end;
+
+ /* Do not show contents of object nor read into following guard page. */
+ end = (const u8 *)(address < meta->addr ? min(show_until_addr, meta->addr)
+ : min(show_until_addr, PAGE_ALIGN(address)));
+
+ pr_cont("[");
+ for (cur = (const u8 *)address; cur < end; cur++) {
+ if (*cur == KFENCE_CANARY_PATTERN(cur))
+ pr_cont(" .");
+ else if (no_hash_pointers)
+ pr_cont(" 0x%02x", *cur);
+ else /* Do not leak kernel memory in non-debug builds. */
+ pr_cont(" !");
+ }
+ pr_cont(" ]");
+}
+
+static const char *get_access_type(bool is_write)
+{
+ return is_write ? "write" : "read";
+}
+
+void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
+ const struct kfence_metadata *meta, enum kfence_error_type type)
+{
+ unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 };
+ const ptrdiff_t object_index = meta ? meta - kfence_metadata : -1;
+ int num_stack_entries;
+ int skipnr = 0;
+
+ if (regs) {
+ num_stack_entries = stack_trace_save_regs(regs, stack_entries, KFENCE_STACK_DEPTH, 0);
+ } else {
+ num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 1);
+ skipnr = get_stack_skipnr(stack_entries, num_stack_entries, &type);
+ }
+
+ /* Require non-NULL meta, except if KFENCE_ERROR_INVALID. */
+ if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta))
+ return;
+
+ if (meta)
+ lockdep_assert_held(&meta->lock);
+ /*
+ * Because we may generate reports in printk-unfriendly parts of the
+ * kernel, such as scheduler code, the use of printk() could deadlock.
+ * Until such time that all printing code here is safe in all parts of
+ * the kernel, accept the risk, and just get our message out (given the
+ * system might already behave unpredictably due to the memory error).
+ * As such, also disable lockdep to hide warnings, and avoid disabling
+ * lockdep for the rest of the kernel.
+ */
+ lockdep_off();
+
+ pr_err("==================================================================\n");
+ /* Print report header. */
+ switch (type) {
+ case KFENCE_ERROR_OOB: {
+ const bool left_of_object = address < meta->addr;
+
+ pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write),
+ (void *)stack_entries[skipnr]);
+ pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%zd):\n",
+ get_access_type(is_write), (void *)address,
+ left_of_object ? meta->addr - address : address - meta->addr,
+ left_of_object ? "left" : "right", object_index);
+ break;
+ }
+ case KFENCE_ERROR_UAF:
+ pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write),
+ (void *)stack_entries[skipnr]);
+ pr_err("Use-after-free %s at 0x%p (in kfence-#%zd):\n",
+ get_access_type(is_write), (void *)address, object_index);
+ break;
+ case KFENCE_ERROR_CORRUPTION:
+ pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]);
+ pr_err("Corrupted memory at 0x%p ", (void *)address);
+ print_diff_canary(address, 16, meta);
+ pr_cont(" (in kfence-#%zd):\n", object_index);
+ break;
+ case KFENCE_ERROR_INVALID:
+ pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write),
+ (void *)stack_entries[skipnr]);
+ pr_err("Invalid %s at 0x%p:\n", get_access_type(is_write),
+ (void *)address);
+ break;
+ case KFENCE_ERROR_INVALID_FREE:
+ pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]);
+ pr_err("Invalid free of 0x%p (in kfence-#%zd):\n", (void *)address,
+ object_index);
+ break;
+ }
+
+ /* Print stack trace and object info. */
+ stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 0);
+
+ if (meta) {
+ pr_err("\n");
+ kfence_print_object(NULL, meta);
+ }
+
+ /* Print report footer. */
+ pr_err("\n");
+ if (no_hash_pointers && regs)
+ show_regs(regs);
+ else
+ dump_stack_print_info(KERN_ERR);
+ trace_error_report_end(ERROR_DETECTOR_KFENCE, address);
+ pr_err("==================================================================\n");
+
+ lockdep_on();
+
+ if (panic_on_warn)
+ panic("panic_on_warn set ...\n");
+
+ /* We encountered a memory unsafety error, taint the kernel! */
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK);
+}
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 75e246f680f4..a7d6cb912b05 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -442,18 +442,28 @@ static inline int khugepaged_test_exit(struct mm_struct *mm)
static bool hugepage_vma_check(struct vm_area_struct *vma,
unsigned long vm_flags)
{
- if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
- (vm_flags & VM_NOHUGEPAGE) ||
+ /* Explicitly disabled through madvise. */
+ if ((vm_flags & VM_NOHUGEPAGE) ||
test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
return false;
- if (shmem_file(vma->vm_file) ||
- (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
- vma->vm_file &&
- (vm_flags & VM_DENYWRITE))) {
+ /* Enabled via shmem mount options or sysfs settings. */
+ if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
HPAGE_PMD_NR);
}
+
+ /* THP settings require madvise. */
+ if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
+ return false;
+
+ /* Read-only file mappings need to be aligned for THP to work. */
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
+ (vm_flags & VM_DENYWRITE)) {
+ return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+ HPAGE_PMD_NR);
+ }
+
if (!vma->anon_vma || vma->vm_ops)
return false;
if (vma_is_temporary_stack(vma))
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 55c671904aac..24210c9bd843 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1312,6 +1312,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
*/
put_page(page);
+ /* device metadata space is not recoverable */
+ if (!pgmap_pfn_valid(pgmap, pfn)) {
+ rc = -ENXIO;
+ goto out;
+ }
+
/*
* Prevent the inode from being freed while we are interrogating
* the address_space, typically this would be handled by
diff --git a/mm/memory.c b/mm/memory.c
index 784249f3307b..c8e357627318 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2902,7 +2902,6 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
}
flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
- entry = pte_sw_mkyoung(entry);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
/*
@@ -3560,7 +3559,6 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
__SetPageUptodate(page);
entry = mk_pte(page, vma->vm_page_prot);
- entry = pte_sw_mkyoung(entry);
if (vma->vm_flags & VM_WRITE)
entry = pte_mkwrite(pte_mkdirty(entry));
@@ -3745,8 +3743,6 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
if (prefault && arch_wants_old_prefaulted_pte())
entry = pte_mkold(entry);
- else
- entry = pte_sw_mkyoung(entry);
if (write)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index abe43c1ae920..5ba51a8bdaeb 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -67,17 +67,17 @@ void put_online_mems(void)
bool movable_node_enabled = false;
#ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
-int memhp_default_online_type = MMOP_OFFLINE;
+int mhp_default_online_type = MMOP_OFFLINE;
#else
-int memhp_default_online_type = MMOP_ONLINE;
+int mhp_default_online_type = MMOP_ONLINE;
#endif
static int __init setup_memhp_default_state(char *str)
{
- const int online_type = memhp_online_type_from_str(str);
+ const int online_type = mhp_online_type_from_str(str);
if (online_type >= 0)
- memhp_default_online_type = online_type;
+ mhp_default_online_type = online_type;
return 1;
}
@@ -107,6 +107,9 @@ static struct resource *register_memory_resource(u64 start, u64 size,
if (strcmp(resource_name, "System RAM"))
flags |= IORESOURCE_SYSRAM_DRIVER_MANAGED;
+ if (!mhp_range_allowed(start, size, true))
+ return ERR_PTR(-E2BIG);
+
/*
* Make sure value parsed from 'mem=' only restricts memory adding
* while booting, so that memory hotplug won't be impacted. Please
@@ -284,21 +287,53 @@ static int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
return 0;
}
-static int check_hotplug_memory_addressable(unsigned long pfn,
- unsigned long nr_pages)
+/*
+ * Return page for the valid pfn only if the page is online. All pfn
+ * walkers which rely on the fully initialized page->flags and others
+ * should use this rather than pfn_valid && pfn_to_page
+ */
+struct page *pfn_to_online_page(unsigned long pfn)
{
- const u64 max_addr = PFN_PHYS(pfn + nr_pages) - 1;
+ unsigned long nr = pfn_to_section_nr(pfn);
+ struct dev_pagemap *pgmap;
+ struct mem_section *ms;
- if (max_addr >> MAX_PHYSMEM_BITS) {
- const u64 max_allowed = (1ull << (MAX_PHYSMEM_BITS + 1)) - 1;
- WARN(1,
- "Hotplugged memory exceeds maximum addressable address, range=%#llx-%#llx, maximum=%#llx\n",
- (u64)PFN_PHYS(pfn), max_addr, max_allowed);
- return -E2BIG;
- }
+ if (nr >= NR_MEM_SECTIONS)
+ return NULL;
- return 0;
+ ms = __nr_to_section(nr);
+ if (!online_section(ms))
+ return NULL;
+
+ /*
+ * Save some code text when online_section() +
+ * pfn_section_valid() are sufficient.
+ */
+ if (IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) && !pfn_valid(pfn))
+ return NULL;
+
+ if (!pfn_section_valid(ms, pfn))
+ return NULL;
+
+ if (!online_device_section(ms))
+ return pfn_to_page(pfn);
+
+ /*
+ * Slowpath: when ZONE_DEVICE collides with
+ * ZONE_{NORMAL,MOVABLE} within the same section some pfns in
+ * the section may be 'offline' but 'valid'. Only
+ * get_dev_pagemap() can determine sub-section online status.
+ */
+ pgmap = get_dev_pagemap(pfn, NULL);
+ put_dev_pagemap(pgmap);
+
+ /* The presence of a pgmap indicates ZONE_DEVICE offline pfn */
+ if (pgmap)
+ return NULL;
+
+ return pfn_to_page(pfn);
}
+EXPORT_SYMBOL_GPL(pfn_to_online_page);
/*
* Reasonably generic function for adding memory. It is
@@ -317,9 +352,7 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
if (WARN_ON_ONCE(!params->pgprot.pgprot))
return -EINVAL;
- err = check_hotplug_memory_addressable(pfn, nr_pages);
- if (err)
- return err;
+ VM_BUG_ON(!mhp_range_allowed(PFN_PHYS(pfn), nr_pages * PAGE_SIZE, false));
if (altmap) {
/*
@@ -445,20 +478,19 @@ static void update_pgdat_span(struct pglist_data *pgdat)
for (zone = pgdat->node_zones;
zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
- unsigned long zone_end_pfn = zone->zone_start_pfn +
- zone->spanned_pages;
+ unsigned long end_pfn = zone_end_pfn(zone);
/* No need to lock the zones, they can't change. */
if (!zone->spanned_pages)
continue;
if (!node_end_pfn) {
node_start_pfn = zone->zone_start_pfn;
- node_end_pfn = zone_end_pfn;
+ node_end_pfn = end_pfn;
continue;
}
- if (zone_end_pfn > node_end_pfn)
- node_end_pfn = zone_end_pfn;
+ if (end_pfn > node_end_pfn)
+ node_end_pfn = end_pfn;
if (zone->zone_start_pfn < node_start_pfn)
node_start_pfn = zone->zone_start_pfn;
}
@@ -678,6 +710,14 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
}
+
+static void section_taint_zone_device(unsigned long pfn)
+{
+ struct mem_section *ms = __pfn_to_section(pfn);
+
+ ms->section_mem_map |= SECTION_TAINT_ZONE_DEVICE;
+}
+
/*
* Associate the pfn range with the given zone, initializing the memmaps
* and resizing the pgdat/zone data to span the added pages. After this
@@ -708,6 +748,19 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
pgdat_resize_unlock(pgdat, &flags);
/*
+ * Subsection population requires care in pfn_to_online_page().
+ * Set the taint to enable the slow path detection of
+ * ZONE_DEVICE pages in an otherwise ZONE_{NORMAL,MOVABLE}
+ * section.
+ */
+ if (zone_is_zone_device(zone)) {
+ if (!IS_ALIGNED(start_pfn, PAGES_PER_SECTION))
+ section_taint_zone_device(start_pfn);
+ if (!IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION))
+ section_taint_zone_device(start_pfn + nr_pages);
+ }
+
+ /*
* TODO now we have a visible range of pages which are not associated
* with their zone properly. Not nice but set_pfnblock_flags_mask
* expects the zone spans the pfn range. All the pages in the range
@@ -1007,7 +1060,7 @@ static int check_hotplug_memory_range(u64 start, u64 size)
static int online_memory_block(struct memory_block *mem, void *arg)
{
- mem->online_type = memhp_default_online_type;
+ mem->online_type = mhp_default_online_type;
return device_online(&mem->dev);
}
@@ -1084,11 +1137,11 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
* In case we're allowed to merge the resource, flag it and trigger
* merging now that adding succeeded.
*/
- if (mhp_flags & MEMHP_MERGE_RESOURCE)
+ if (mhp_flags & MHP_MERGE_RESOURCE)
merge_system_ram_resource(res);
/* online pages if requested */
- if (memhp_default_online_type != MMOP_OFFLINE)
+ if (mhp_default_online_type != MMOP_OFFLINE)
walk_memory_blocks(start, size, NULL, online_memory_block);
return ret;
@@ -1180,6 +1233,61 @@ out_unlock:
}
EXPORT_SYMBOL_GPL(add_memory_driver_managed);
+/*
+ * Platforms should define arch_get_mappable_range() that provides
+ * maximum possible addressable physical memory range for which the
+ * linear mapping could be created. The platform returned address
+ * range must adhere to these following semantics.
+ *
+ * - range.start <= range.end
+ * - Range includes both end points [range.start..range.end]
+ *
+ * There is also a fallback definition provided here, allowing the
+ * entire possible physical address range in case any platform does
+ * not define arch_get_mappable_range().
+ */
+struct range __weak arch_get_mappable_range(void)
+{
+ struct range mhp_range = {
+ .start = 0UL,
+ .end = -1ULL,
+ };
+ return mhp_range;
+}
+
+struct range mhp_get_pluggable_range(bool need_mapping)
+{
+ const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1;
+ struct range mhp_range;
+
+ if (need_mapping) {
+ mhp_range = arch_get_mappable_range();
+ if (mhp_range.start > max_phys) {
+ mhp_range.start = 0;
+ mhp_range.end = 0;
+ }
+ mhp_range.end = min_t(u64, mhp_range.end, max_phys);
+ } else {
+ mhp_range.start = 0;
+ mhp_range.end = max_phys;
+ }
+ return mhp_range;
+}
+EXPORT_SYMBOL_GPL(mhp_get_pluggable_range);
+
+bool mhp_range_allowed(u64 start, u64 size, bool need_mapping)
+{
+ struct range mhp_range = mhp_get_pluggable_range(need_mapping);
+ u64 end = start + size;
+
+ if (start < end && start >= mhp_range.start && (end - 1) <= mhp_range.end)
+ return true;
+
+ pr_warn("Hotplug memory [%#llx-%#llx] exceeds maximum addressable range [%#llx-%#llx]\n",
+ start, end, mhp_range.start, mhp_range.end);
+ return false;
+}
+
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
* Confirm all pages in a range [start, end) belong to the same zone (skipping
diff --git a/mm/memremap.c b/mm/memremap.c
index 16b2fb482da1..7aa7d6e80ee5 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -80,6 +80,21 @@ static unsigned long pfn_first(struct dev_pagemap *pgmap, int range_id)
return pfn + vmem_altmap_offset(pgmap_altmap(pgmap));
}
+bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
+{
+ int i;
+
+ for (i = 0; i < pgmap->nr_range; i++) {
+ struct range *range = &pgmap->ranges[i];
+
+ if (pfn >= PHYS_PFN(range->start) &&
+ pfn <= PHYS_PFN(range->end))
+ return pfn >= pfn_first(pgmap, i);
+ }
+
+ return false;
+}
+
static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id)
{
const struct range *range = &pgmap->ranges[range_id];
@@ -185,6 +200,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
int range_id, int nid)
{
+ const bool is_private = pgmap->type == MEMORY_DEVICE_PRIVATE;
struct range *range = &pgmap->ranges[range_id];
struct dev_pagemap *conflict_pgmap;
int error, is_ram;
@@ -230,6 +246,11 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
if (error)
goto err_pfn_remap;
+ if (!mhp_range_allowed(range->start, range_len(range), !is_private)) {
+ error = -EINVAL;
+ goto err_pfn_remap;
+ }
+
mem_hotplug_begin();
/*
@@ -243,7 +264,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
* the CPU, we do want the linear mapping and thus use
* arch_add_memory().
*/
- if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+ if (is_private) {
error = add_pages(nid, PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), params);
} else {
diff --git a/mm/mlock.c b/mm/mlock.c
index 73960bb3464d..f8f8cc32d03d 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -622,7 +622,7 @@ static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
vma = find_vma(mm, start);
if (vma == NULL)
- vma = mm->mmap;
+ return 0;
for (; vma ; vma = vma->vm_next) {
if (start >= vma->vm_end)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ddccc59f2f72..3e4b29ee2b1e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2168,6 +2168,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
}
adjust_managed_page_count(page, pageblock_nr_pages);
+ page_zone(page)->cma_pages += pageblock_nr_pages;
}
#endif
diff --git a/mm/page_io.c b/mm/page_io.c
index 485fa5cca4a2..c493ce9ebcf5 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -254,11 +254,6 @@ out:
return ret;
}
-static sector_t swap_page_sector(struct page *page)
-{
- return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
-}
-
static inline void count_swpout_vm_event(struct page *page)
{
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/mm/rmap.c b/mm/rmap.c
index e26ae119a131..b0fc27e77d6d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -168,7 +168,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
*
* Anon-vma allocations are very subtle, because we may have
* optimistically looked up an anon_vma in page_lock_anon_vma_read()
- * and that may actually touch the spinlock even in the newly
+ * and that may actually touch the rwsem even in the newly
* allocated vma (it depends on RCU to make sure that the
* anon_vma isn't actually destroyed).
*
@@ -359,7 +359,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
goto out_error_free_anon_vma;
/*
- * The root anon_vma's spinlock is the lock actually used when we
+ * The root anon_vma's rwsem is the lock actually used when we
* lock any of the anon_vmas in this anon_vma tree.
*/
anon_vma->root = pvma->anon_vma->root;
@@ -462,8 +462,8 @@ void __init anon_vma_init(void)
* Getting a lock on a stable anon_vma from a page off the LRU is tricky!
*
* Since there is no serialization what so ever against page_remove_rmap()
- * the best this function can do is return a locked anon_vma that might
- * have been relevant to this page.
+ * the best this function can do is return a refcount increased anon_vma
+ * that might have been relevant to this page.
*
* The page might have been remapped to a different anon_vma or the anon_vma
* returned may already be freed (and even reused).
@@ -1086,8 +1086,7 @@ static void __page_check_anon_rmap(struct page *page,
* be set up correctly at this point.
*
* We have exclusion against page_add_anon_rmap because the caller
- * always holds the page locked, except if called from page_dup_rmap,
- * in which case the page is already known to be setup.
+ * always holds the page locked.
*
* We have exclusion against page_add_new_anon_rmap because those pages
* are initially only visible via the pagetables, and the pte is locked
@@ -1737,9 +1736,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
return vma_is_temporary_stack(vma);
}
-static int page_mapcount_is_zero(struct page *page)
+static int page_not_mapped(struct page *page)
{
- return !total_mapcount(page);
+ return !page_mapped(page);
}
/**
@@ -1757,7 +1756,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
struct rmap_walk_control rwc = {
.rmap_one = try_to_unmap_one,
.arg = (void *)flags,
- .done = page_mapcount_is_zero,
+ .done = page_not_mapped,
.anon_lock = page_lock_anon_vma_read,
};
@@ -1781,11 +1780,6 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
return !page_mapcount(page) ? true : false;
}
-static int page_not_mapped(struct page *page)
-{
- return !page_mapped(page);
-};
-
/**
* try_to_munlock - try to munlock a page
* @page: the page to be munlocked
diff --git a/mm/shmem.c b/mm/shmem.c
index ff741d229701..b2db4ed0fbc7 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -842,7 +842,6 @@ unsigned long shmem_swap_usage(struct vm_area_struct *vma)
void shmem_unlock_mapping(struct address_space *mapping)
{
struct pagevec pvec;
- pgoff_t indices[PAGEVEC_SIZE];
pgoff_t index = 0;
pagevec_init(&pvec);
@@ -850,16 +849,8 @@ void shmem_unlock_mapping(struct address_space *mapping)
* Minor point, but we might as well stop if someone else SHM_LOCKs it.
*/
while (!mapping_unevictable(mapping)) {
- /*
- * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
- * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
- */
- pvec.nr = find_get_entries(mapping, index,
- PAGEVEC_SIZE, pvec.pages, indices);
- if (!pvec.nr)
+ if (!pagevec_lookup(&pvec, mapping, &index))
break;
- index = indices[pvec.nr - 1] + 1;
- pagevec_remove_exceptionals(&pvec);
check_move_unevictable_pages(&pvec);
pagevec_release(&pvec);
cond_resched();
@@ -916,18 +907,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
pagevec_init(&pvec);
index = start;
- while (index < end) {
- pvec.nr = find_get_entries(mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE),
- pvec.pages, indices);
- if (!pvec.nr)
- break;
+ while (index < end && find_lock_entries(mapping, index, end - 1,
+ &pvec, indices)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
index = indices[i];
- if (index >= end)
- break;
if (xa_is_value(page)) {
if (unfalloc)
@@ -936,18 +921,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
index, page);
continue;
}
+ index += thp_nr_pages(page) - 1;
- VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
-
- if (!trylock_page(page))
- continue;
-
- if ((!unfalloc || !PageUptodate(page)) &&
- page_mapping(page) == mapping) {
- VM_BUG_ON_PAGE(PageWriteback(page), page);
- if (shmem_punch_compound(page, start, end))
- truncate_inode_page(mapping, page);
- }
+ if (!unfalloc || !PageUptodate(page))
+ truncate_inode_page(mapping, page);
unlock_page(page);
}
pagevec_remove_exceptionals(&pvec);
@@ -988,10 +965,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
while (index < end) {
cond_resched();
- pvec.nr = find_get_entries(mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE),
- pvec.pages, indices);
- if (!pvec.nr) {
+ if (!find_get_entries(mapping, index, end - 1, &pvec,
+ indices)) {
/* If all gone or hole-punch or unfalloc, we're done */
if (index == start || end != -1)
break;
@@ -1003,9 +978,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
struct page *page = pvec.pages[i];
index = indices[i];
- if (index >= end)
- break;
-
if (xa_is_value(page)) {
if (unfalloc)
continue;
@@ -1533,6 +1505,30 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
return page;
}
+/*
+ * Make sure huge_gfp is always more limited than limit_gfp.
+ * Some of the flags set permissions, while others set limitations.
+ */
+static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
+{
+ gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
+ gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
+ gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
+ gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);
+
+ /* Allow allocations only from the originally specified zones. */
+ result |= zoneflags;
+
+ /*
+ * Minimize the result gfp by taking the union with the deny flags,
+ * and the intersection of the allow flags.
+ */
+ result |= (limit_gfp & denyflags);
+ result |= (huge_gfp & limit_gfp) & allowflags;
+
+ return result;
+}
+
static struct page *shmem_alloc_hugepage(gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
@@ -1547,8 +1543,8 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
return NULL;
shmem_pseudo_vma_init(&pvma, info, hindex);
- page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
- HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+ page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(),
+ true);
shmem_pseudo_vma_destroy(&pvma);
if (page)
prep_transhuge_page(page);
@@ -1804,6 +1800,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
struct page *page;
enum sgp_type sgp_huge = sgp;
pgoff_t hindex = index;
+ gfp_t huge_gfp;
int error;
int once = 0;
int alloced = 0;
@@ -1821,7 +1818,8 @@ repeat:
sbinfo = SHMEM_SB(inode->i_sb);
charge_mm = vma ? vma->vm_mm : current->mm;
- page = find_lock_entry(mapping, index);
+ page = pagecache_get_page(mapping, index,
+ FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
if (xa_is_value(page)) {
error = shmem_swapin_page(inode, index, &page,
sgp, gfp, vma, fault_type);
@@ -1889,7 +1887,9 @@ repeat:
}
alloc_huge:
- page = shmem_alloc_and_acct_page(gfp, inode, index, true);
+ huge_gfp = vma_thp_gfp_mask(vma);
+ huge_gfp = limit_gfp_mask(huge_gfp, gfp);
+ page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true);
if (IS_ERR(page)) {
alloc_nohuge:
page = shmem_alloc_and_acct_page(gfp, inode,
@@ -2676,86 +2676,20 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval ? retval : error;
}
-/*
- * llseek SEEK_DATA or SEEK_HOLE through the page cache.
- */
-static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
- pgoff_t index, pgoff_t end, int whence)
-{
- struct page *page;
- struct pagevec pvec;
- pgoff_t indices[PAGEVEC_SIZE];
- bool done = false;
- int i;
-
- pagevec_init(&pvec);
- pvec.nr = 1; /* start small: we may be there already */
- while (!done) {
- pvec.nr = find_get_entries(mapping, index,
- pvec.nr, pvec.pages, indices);
- if (!pvec.nr) {
- if (whence == SEEK_DATA)
- index = end;
- break;
- }
- for (i = 0; i < pvec.nr; i++, index++) {
- if (index < indices[i]) {
- if (whence == SEEK_HOLE) {
- done = true;
- break;
- }
- index = indices[i];
- }
- page = pvec.pages[i];
- if (page && !xa_is_value(page)) {
- if (!PageUptodate(page))
- page = NULL;
- }
- if (index >= end ||
- (page && whence == SEEK_DATA) ||
- (!page && whence == SEEK_HOLE)) {
- done = true;
- break;
- }
- }
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- pvec.nr = PAGEVEC_SIZE;
- cond_resched();
- }
- return index;
-}
-
static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
{
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- pgoff_t start, end;
- loff_t new_offset;
if (whence != SEEK_DATA && whence != SEEK_HOLE)
return generic_file_llseek_size(file, offset, whence,
MAX_LFS_FILESIZE, i_size_read(inode));
+ if (offset < 0)
+ return -ENXIO;
+
inode_lock(inode);
/* We're holding i_mutex so we can access i_size directly */
-
- if (offset < 0 || offset >= inode->i_size)
- offset = -ENXIO;
- else {
- start = offset >> PAGE_SHIFT;
- end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
- new_offset = shmem_seek_hole_data(mapping, start, end, whence);
- new_offset <<= PAGE_SHIFT;
- if (new_offset > offset) {
- if (new_offset < inode->i_size)
- offset = new_offset;
- else if (whence == SEEK_DATA)
- offset = -ENXIO;
- else
- offset = inode->i_size;
- }
- }
-
+ offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
if (offset >= 0)
offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
inode_unlock(inode);
diff --git a/mm/slab.c b/mm/slab.c
index 35c68d99d460..51fd424e0d6d 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -100,6 +100,7 @@
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/kallsyms.h>
+#include <linux/kfence.h>
#include <linux/cpu.h>
#include <linux/sysctl.h>
#include <linux/module.h>
@@ -3208,7 +3209,7 @@ must_grow:
}
static __always_inline void *
-slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size,
unsigned long caller)
{
unsigned long save_flags;
@@ -3221,6 +3222,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (unlikely(!cachep))
return NULL;
+ ptr = kfence_alloc(cachep, orig_size, flags);
+ if (unlikely(ptr))
+ goto out_hooks;
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
@@ -3253,6 +3258,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr)
memset(ptr, 0, cachep->object_size);
+out_hooks:
slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr);
return ptr;
}
@@ -3290,7 +3296,7 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
#endif /* CONFIG_NUMA */
static __always_inline void *
-slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
+slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned long caller)
{
unsigned long save_flags;
void *objp;
@@ -3301,6 +3307,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
if (unlikely(!cachep))
return NULL;
+ objp = kfence_alloc(cachep, orig_size, flags);
+ if (unlikely(objp))
+ goto out;
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
objp = __do_cache_alloc(cachep, flags);
@@ -3311,6 +3321,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp)
memset(objp, 0, cachep->object_size);
+out:
slab_post_alloc_hook(cachep, objcg, flags, 1, &objp);
return objp;
}
@@ -3416,6 +3427,12 @@ free_done:
static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
unsigned long caller)
{
+ if (is_kfence_address(objp)) {
+ kmemleak_free_recursive(objp, cachep->flags);
+ __kfence_free(objp);
+ return;
+ }
+
if (unlikely(slab_want_init_on_free(cachep)))
memset(objp, 0, cachep->object_size);
@@ -3482,7 +3499,7 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- void *ret = slab_alloc(cachep, flags, _RET_IP_);
+ void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_);
trace_kmem_cache_alloc(_RET_IP_, ret,
cachep->object_size, cachep->size, flags);
@@ -3515,7 +3532,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
local_irq_disable();
for (i = 0; i < size; i++) {
- void *objp = __do_cache_alloc(s, flags);
+ void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags);
if (unlikely(!objp))
goto error;
@@ -3548,7 +3565,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
{
void *ret;
- ret = slab_alloc(cachep, flags, _RET_IP_);
+ ret = slab_alloc(cachep, flags, size, _RET_IP_);
ret = kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(_RET_IP_, ret,
@@ -3574,7 +3591,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
*/
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
+ void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
cachep->object_size, cachep->size,
@@ -3592,7 +3609,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
{
void *ret;
- ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
+ ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
ret = kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc_node(_RET_IP_, ret,
@@ -3673,7 +3690,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- ret = slab_alloc(cachep, flags, caller);
+ ret = slab_alloc(cachep, flags, size, caller);
ret = kasan_kmalloc(cachep, ret, size, flags);
trace_kmalloc(caller, ret,
@@ -4172,7 +4189,10 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
BUG_ON(objnr >= cachep->num);
/* Find offset within object. */
- offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+ if (is_kfence_address(ptr))
+ offset = ptr - kfence_object_start(ptr);
+ else
+ offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
/* Allow address range falling entirely within usercopy region. */
if (offset >= cachep->useroffset &&
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 7c8298c17145..88e833986332 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -12,6 +12,7 @@
#include <linux/memory.h>
#include <linux/cache.h>
#include <linux/compiler.h>
+#include <linux/kfence.h>
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/uaccess.h>
@@ -430,6 +431,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
rcu_barrier();
list_for_each_entry_safe(s, s2, &to_destroy, list) {
+ kfence_shutdown_cache(s);
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_release(s);
#else
@@ -455,6 +457,7 @@ static int shutdown_cache(struct kmem_cache *s)
list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
schedule_work(&slab_caches_to_rcu_destroy_work);
} else {
+ kfence_shutdown_cache(s);
#ifdef SLAB_SUPPORTS_SYSFS
sysfs_slab_unlink(s);
sysfs_slab_release(s);
@@ -640,6 +643,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name,
panic("Out of memory when creating slab %s\n", name);
create_boot_cache(s, name, size, flags, useroffset, usersize);
+ kasan_cache_create_kmalloc(s);
list_add(&s->list, &slab_caches);
s->refcount = 1;
return s;
@@ -1132,16 +1136,27 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
void *ret;
size_t ks;
- ks = ksize(p);
+ /* Don't use instrumented ksize to allow precise KASAN poisoning. */
+ if (likely(!ZERO_OR_NULL_PTR(p))) {
+ if (!kasan_check_byte(p))
+ return NULL;
+ ks = kfence_ksize(p) ?: __ksize(p);
+ } else
+ ks = 0;
+ /* If the object still fits, repoison it precisely. */
if (ks >= new_size) {
p = kasan_krealloc((void *)p, new_size, flags);
return (void *)p;
}
ret = kmalloc_track_caller(new_size, flags);
- if (ret && p)
- memcpy(ret, p, ks);
+ if (ret && p) {
+ /* Disable KASAN checks as the object's redzone is accessed. */
+ kasan_disable_current();
+ memcpy(ret, kasan_reset_tag(p), ks);
+ kasan_enable_current();
+ }
return ret;
}
@@ -1235,7 +1250,7 @@ size_t ksize(const void *objp)
if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp))
return 0;
- size = __ksize(objp);
+ size = kfence_ksize(objp) ?: __ksize(objp);
/*
* We assume that ksize callers could use whole allocated area,
* so we need to unpoison this area.
diff --git a/mm/slub.c b/mm/slub.c
index b2833ce85c92..e26c274b4657 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -27,6 +27,7 @@
#include <linux/ctype.h>
#include <linux/debugobjects.h>
#include <linux/kallsyms.h>
+#include <linux/kfence.h>
#include <linux/memory.h>
#include <linux/math64.h>
#include <linux/fault-inject.h>
@@ -1570,6 +1571,11 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
void *old_tail = *tail ? *tail : *head;
int rsize;
+ if (is_kfence_address(next)) {
+ slab_free_hook(s, next);
+ return true;
+ }
+
/* Head and tail of the reconstructed freelist */
*head = NULL;
*tail = NULL;
@@ -2809,7 +2815,7 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
* Otherwise we can simply pick the next object from the lockless free list.
*/
static __always_inline void *slab_alloc_node(struct kmem_cache *s,
- gfp_t gfpflags, int node, unsigned long addr)
+ gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
{
void *object;
struct kmem_cache_cpu *c;
@@ -2820,6 +2826,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
if (!s)
return NULL;
+
+ object = kfence_alloc(s, orig_size, gfpflags);
+ if (unlikely(object))
+ goto out;
+
redo:
/*
* Must read kmem_cache cpu data via this cpu ptr. Preemption is
@@ -2892,20 +2903,21 @@ redo:
if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
memset(kasan_reset_tag(object), 0, s->object_size);
+out:
slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
return object;
}
static __always_inline void *slab_alloc(struct kmem_cache *s,
- gfp_t gfpflags, unsigned long addr)
+ gfp_t gfpflags, unsigned long addr, size_t orig_size)
{
- return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
+ return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
}
void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
{
- void *ret = slab_alloc(s, gfpflags, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
s->size, gfpflags);
@@ -2917,7 +2929,7 @@ EXPORT_SYMBOL(kmem_cache_alloc);
#ifdef CONFIG_TRACING
void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
- void *ret = slab_alloc(s, gfpflags, _RET_IP_);
+ void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
ret = kasan_kmalloc(s, ret, size, gfpflags);
return ret;
@@ -2928,7 +2940,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
{
- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
+ void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
s->object_size, s->size, gfpflags, node);
@@ -2942,7 +2954,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
gfp_t gfpflags,
int node, size_t size)
{
- void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
+ void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
trace_kmalloc_node(_RET_IP_, ret,
size, s->size, gfpflags, node);
@@ -2976,6 +2988,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
stat(s, FREE_SLOWPATH);
+ if (kfence_free(head))
+ return;
+
if (kmem_cache_debug(s) &&
!free_debug_processing(s, page, head, tail, cnt, addr))
return;
@@ -3220,6 +3235,13 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
df->s = cache_from_obj(s, object); /* Support for memcg */
}
+ if (is_kfence_address(object)) {
+ slab_free_hook(df->s, object);
+ __kfence_free(object);
+ p[size] = NULL; /* mark object processed */
+ return size;
+ }
+
/* Start new detached freelist */
df->page = page;
set_freepointer(df->s, object, NULL);
@@ -3295,8 +3317,14 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
c = this_cpu_ptr(s->cpu_slab);
for (i = 0; i < size; i++) {
- void *object = c->freelist;
+ void *object = kfence_alloc(s, s->object_size, flags);
+ if (unlikely(object)) {
+ p[i] = object;
+ continue;
+ }
+
+ object = c->freelist;
if (unlikely(!object)) {
/*
* We may have removed an object from c->freelist using
@@ -3551,8 +3579,7 @@ static void early_kmem_cache_node_alloc(int node)
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
init_tracking(kmem_cache_node, n);
#endif
- n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
- GFP_KERNEL);
+ n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL);
page->freelist = get_freepointer(kmem_cache_node, n);
page->inuse = 1;
page->frozen = 0;
@@ -4021,7 +4048,7 @@ void *__kmalloc(size_t size, gfp_t flags)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc(s, flags, _RET_IP_);
+ ret = slab_alloc(s, flags, _RET_IP_, size);
trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
@@ -4069,7 +4096,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc_node(s, flags, node, _RET_IP_);
+ ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
@@ -4095,6 +4122,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
struct kmem_cache *s;
unsigned int offset;
size_t object_size;
+ bool is_kfence = is_kfence_address(ptr);
ptr = kasan_reset_tag(ptr);
@@ -4107,10 +4135,13 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
to_user, 0, n);
/* Find offset within object. */
- offset = (ptr - page_address(page)) % s->size;
+ if (is_kfence)
+ offset = ptr - kfence_object_start(ptr);
+ else
+ offset = (ptr - page_address(page)) % s->size;
/* Adjust for redzone and reject if within the redzone. */
- if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
+ if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
if (offset < s->red_left_pad)
usercopy_abort("SLUB object in left red zone",
s->name, to_user, offset, n);
@@ -4527,7 +4558,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc(s, gfpflags, caller);
+ ret = slab_alloc(s, gfpflags, caller, size);
/* Honor the call site pointer we received. */
trace_kmalloc(caller, ret, size, s->size, gfpflags);
@@ -4558,7 +4589,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
if (unlikely(ZERO_OR_NULL_PTR(s)))
return s;
- ret = slab_alloc_node(s, gfpflags, node, caller);
+ ret = slab_alloc_node(s, gfpflags, node, caller, size);
/* Honor the call site pointer we received. */
trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
diff --git a/mm/swap.c b/mm/swap.c
index ab3258afcbeb..31b844d4ed94 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -1018,45 +1018,11 @@ void __pagevec_lru_add(struct pagevec *pvec)
}
/**
- * pagevec_lookup_entries - gang pagecache lookup
- * @pvec: Where the resulting entries are placed
- * @mapping: The address_space to search
- * @start: The starting entry index
- * @nr_entries: The maximum number of pages
- * @indices: The cache indices corresponding to the entries in @pvec
- *
- * pagevec_lookup_entries() will search for and return a group of up
- * to @nr_pages pages and shadow entries in the mapping. All
- * entries are placed in @pvec. pagevec_lookup_entries() takes a
- * reference against actual pages in @pvec.
- *
- * The search returns a group of mapping-contiguous entries with
- * ascending indexes. There may be holes in the indices due to
- * not-present entries.
- *
- * Only one subpage of a Transparent Huge Page is returned in one call:
- * allowing truncate_inode_pages_range() to evict the whole THP without
- * cycling through a pagevec of extra references.
- *
- * pagevec_lookup_entries() returns the number of entries which were
- * found.
- */
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
- struct address_space *mapping,
- pgoff_t start, unsigned nr_entries,
- pgoff_t *indices)
-{
- pvec->nr = find_get_entries(mapping, start, nr_entries,
- pvec->pages, indices);
- return pagevec_count(pvec);
-}
-
-/**
* pagevec_remove_exceptionals - pagevec exceptionals pruning
* @pvec: The pagevec to prune
*
- * pagevec_lookup_entries() fills both pages and exceptional radix
- * tree entries into the pagevec. This function prunes all
+ * find_get_entries() fills both pages and XArray value entries (aka
+ * exceptional entries) into the pagevec. This function prunes all
* exceptionals from @pvec without leaving holes, so that it can be
* passed on to page-only pagevec operations.
*/
diff --git a/mm/swap_state.c b/mm/swap_state.c
index c1a648d9092b..3cdee7b11da9 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -87,11 +87,9 @@ void *get_shadow_from_swap_cache(swp_entry_t entry)
pgoff_t idx = swp_offset(entry);
struct page *page;
- page = find_get_entry(address_space, idx);
+ page = xa_load(&address_space->i_pages, idx);
if (xa_is_value(page))
return page;
- if (page)
- put_page(page);
return NULL;
}
@@ -405,7 +403,8 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
{
swp_entry_t swp;
struct swap_info_struct *si;
- struct page *page = find_get_entry(mapping, index);
+ struct page *page = pagecache_get_page(mapping, index,
+ FGP_ENTRY | FGP_HEAD, 0);
if (!page)
return page;
diff --git a/mm/swapfile.c b/mm/swapfile.c
index f039745989d2..084a5b9a18e5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -219,6 +219,19 @@ offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset)
BUG();
}
+sector_t swap_page_sector(struct page *page)
+{
+ struct swap_info_struct *sis = page_swap_info(page);
+ struct swap_extent *se;
+ sector_t sector;
+ pgoff_t offset;
+
+ offset = __page_file_index(page);
+ se = offset_to_swap_extent(sis, offset);
+ sector = se->start_block + (offset - se->start_page);
+ return sector << (PAGE_SHIFT - 9);
+}
+
/*
* swap allocation tell device that a cluster of swap can now be discarded,
* to allow the swap device to optimize its wear-levelling.
diff --git a/mm/truncate.c b/mm/truncate.c
index 8aa4907e06e0..455944264663 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -57,11 +57,10 @@ static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
* exceptional entries similar to what pagevec_remove_exceptionals does.
*/
static void truncate_exceptional_pvec_entries(struct address_space *mapping,
- struct pagevec *pvec, pgoff_t *indices,
- pgoff_t end)
+ struct pagevec *pvec, pgoff_t *indices)
{
int i, j;
- bool dax, lock;
+ bool dax;
/* Handled by shmem itself */
if (shmem_mapping(mapping))
@@ -75,8 +74,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
return;
dax = dax_mapping(mapping);
- lock = !dax && indices[j] < end;
- if (lock)
+ if (!dax)
xa_lock_irq(&mapping->i_pages);
for (i = j; i < pagevec_count(pvec); i++) {
@@ -88,9 +86,6 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
continue;
}
- if (index >= end)
- continue;
-
if (unlikely(dax)) {
dax_delete_mapping_entry(mapping, index);
continue;
@@ -99,7 +94,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
__clear_shadow_entry(mapping, index, page);
}
- if (lock)
+ if (!dax)
xa_unlock_irq(&mapping->i_pages);
pvec->nr = j;
}
@@ -326,51 +321,19 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_init(&pvec);
index = start;
- while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE),
- indices)) {
- /*
- * Pagevec array has exceptional entries and we may also fail
- * to lock some pages. So we store pages that can be deleted
- * in a new pagevec.
- */
- struct pagevec locked_pvec;
-
- pagevec_init(&locked_pvec);
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
-
- /* We rely upon deletion not changing page->index */
- index = indices[i];
- if (index >= end)
- break;
-
- if (xa_is_value(page))
- continue;
-
- if (!trylock_page(page))
- continue;
- WARN_ON(page_to_index(page) != index);
- if (PageWriteback(page)) {
- unlock_page(page);
- continue;
- }
- if (page->mapping != mapping) {
- unlock_page(page);
- continue;
- }
- pagevec_add(&locked_pvec, page);
- }
- for (i = 0; i < pagevec_count(&locked_pvec); i++)
- truncate_cleanup_page(mapping, locked_pvec.pages[i]);
- delete_from_page_cache_batch(mapping, &locked_pvec);
- for (i = 0; i < pagevec_count(&locked_pvec); i++)
- unlock_page(locked_pvec.pages[i]);
- truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
+ while (index < end && find_lock_entries(mapping, index, end - 1,
+ &pvec, indices)) {
+ index = indices[pagevec_count(&pvec) - 1] + 1;
+ truncate_exceptional_pvec_entries(mapping, &pvec, indices);
+ for (i = 0; i < pagevec_count(&pvec); i++)
+ truncate_cleanup_page(mapping, pvec.pages[i]);
+ delete_from_page_cache_batch(mapping, &pvec);
+ for (i = 0; i < pagevec_count(&pvec); i++)
+ unlock_page(pvec.pages[i]);
pagevec_release(&pvec);
cond_resched();
- index++;
}
+
if (partial_start) {
struct page *page = find_lock_page(mapping, start - 1);
if (page) {
@@ -413,8 +376,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
index = start;
for ( ; ; ) {
cond_resched();
- if (!pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
+ if (!find_get_entries(mapping, index, end - 1, &pvec,
+ indices)) {
/* If all gone from start onwards, we're done */
if (index == start)
break;
@@ -422,23 +385,12 @@ void truncate_inode_pages_range(struct address_space *mapping,
index = start;
continue;
}
- if (index == start && indices[0] >= end) {
- /* All gone out of hole to be punched, we're done */
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- break;
- }
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
/* We rely upon deletion not changing page->index */
index = indices[i];
- if (index >= end) {
- /* Restart punch to make sure all gone */
- index = start - 1;
- break;
- }
if (xa_is_value(page))
continue;
@@ -449,7 +401,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
truncate_inode_page(mapping, page);
unlock_page(page);
}
- truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
+ truncate_exceptional_pvec_entries(mapping, &pvec, indices);
pagevec_release(&pvec);
index++;
}
@@ -539,55 +491,19 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
int i;
pagevec_init(&pvec);
- while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
- indices)) {
+ while (find_lock_entries(mapping, index, end, &pvec, indices)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
/* We rely upon deletion not changing page->index */
index = indices[i];
- if (index > end)
- break;
if (xa_is_value(page)) {
invalidate_exceptional_entry(mapping, index,
page);
continue;
}
-
- if (!trylock_page(page))
- continue;
-
- WARN_ON(page_to_index(page) != index);
-
- /* Middle of THP: skip */
- if (PageTransTail(page)) {
- unlock_page(page);
- continue;
- } else if (PageTransHuge(page)) {
- index += HPAGE_PMD_NR - 1;
- i += HPAGE_PMD_NR - 1;
- /*
- * 'end' is in the middle of THP. Don't
- * invalidate the page as the part outside of
- * 'end' could be still useful.
- */
- if (index > end) {
- unlock_page(page);
- continue;
- }
-
- /* Take a pin outside pagevec */
- get_page(page);
-
- /*
- * Drop extra pins before trying to invalidate
- * the huge page.
- */
- pagevec_remove_exceptionals(&pvec);
- pagevec_release(&pvec);
- }
+ index += thp_nr_pages(page) - 1;
ret = invalidate_inode_page(page);
unlock_page(page);
@@ -601,9 +517,6 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
if (nr_pagevec)
(*nr_pagevec)++;
}
-
- if (PageTransHuge(page))
- put_page(page);
count += ret;
}
pagevec_remove_exceptionals(&pvec);
@@ -725,16 +638,12 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
pagevec_init(&pvec);
index = start;
- while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
- indices)) {
+ while (find_get_entries(mapping, index, end, &pvec, indices)) {
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
/* We rely upon deletion not changing page->index */
index = indices[i];
- if (index > end)
- break;
if (xa_is_value(page)) {
if (!invalidate_exceptional_entry2(mapping,
diff --git a/mm/vmstat.c b/mm/vmstat.c
index a0e949542204..74b2c374b86c 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -342,6 +342,12 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
long t;
if (vmstat_item_in_bytes(item)) {
+ /*
+ * Only cgroups use subpage accounting right now; at
+ * the global level, these items still change in
+ * multiples of whole pages. Store them as pages
+ * internally to keep the per-cpu counters compact.
+ */
VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
delta >>= PAGE_SHIFT;
}
@@ -551,6 +557,12 @@ static inline void mod_node_state(struct pglist_data *pgdat,
long o, n, t, z;
if (vmstat_item_in_bytes(item)) {
+ /*
+ * Only cgroups use subpage accounting right now; at
+ * the global level, these items still change in
+ * multiples of whole pages. Store them as pages
+ * internally to keep the per-cpu counters compact.
+ */
VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
delta >>= PAGE_SHIFT;
}
@@ -1637,14 +1649,16 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n high %lu"
"\n spanned %lu"
"\n present %lu"
- "\n managed %lu",
+ "\n managed %lu"
+ "\n cma %lu",
zone_page_state(zone, NR_FREE_PAGES),
min_wmark_pages(zone),
low_wmark_pages(zone),
high_wmark_pages(zone),
zone->spanned_pages,
zone->present_pages,
- zone_managed_pages(zone));
+ zone_managed_pages(zone),
+ zone_cma_pages(zone));
seq_printf(m,
"\n protection: (%ld",
@@ -1892,16 +1906,12 @@ static void vmstat_update(struct work_struct *w)
*/
static bool need_update(int cpu)
{
+ pg_data_t *last_pgdat = NULL;
struct zone *zone;
for_each_populated_zone(zone) {
struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
-
- BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
-#ifdef CONFIG_NUMA
- BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
-#endif
-
+ struct per_cpu_nodestat *n;
/*
* The fast way of checking if there are any vmstat diffs.
*/
@@ -1913,6 +1923,13 @@ static bool need_update(int cpu)
sizeof(p->vm_numa_stat_diff[0])))
return true;
#endif
+ if (last_pgdat == zone->zone_pgdat)
+ continue;
+ last_pgdat = zone->zone_pgdat;
+ n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
+ if (memchr_inv(n->vm_node_stat_diff, 0, NR_VM_NODE_STAT_ITEMS *
+ sizeof(n->vm_node_stat_diff[0])))
+ return true;
}
return false;
}
@@ -1963,6 +1980,8 @@ static void vmstat_shepherd(struct work_struct *w)
if (!delayed_work_pending(dw) && need_update(cpu))
queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
+
+ cond_resched();
}
put_online_cpus();
diff --git a/mm/z3fold.c b/mm/z3fold.c
index c1ccf6bb0ffb..b5dafa7e44e4 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -1771,6 +1771,7 @@ static u64 z3fold_zpool_total_size(void *pool)
static struct zpool_driver z3fold_zpool_driver = {
.type = "z3fold",
+ .sleep_mapped = true,
.owner = THIS_MODULE,
.create = z3fold_zpool_create,
.destroy = z3fold_zpool_destroy,
diff --git a/mm/zbud.c b/mm/zbud.c
index c49966ece674..7ec5f27a68b0 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -203,6 +203,7 @@ static u64 zbud_zpool_total_size(void *pool)
static struct zpool_driver zbud_zpool_driver = {
.type = "zbud",
+ .sleep_mapped = true,
.owner = THIS_MODULE,
.create = zbud_zpool_create,
.destroy = zbud_zpool_destroy,
diff --git a/mm/zpool.c b/mm/zpool.c
index 3744a2d1a624..5ed71207ced7 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -23,6 +23,7 @@ struct zpool {
void *pool;
const struct zpool_ops *ops;
bool evictable;
+ bool can_sleep_mapped;
struct list_head list;
};
@@ -183,6 +184,7 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
zpool->pool = driver->create(name, gfp, ops, zpool);
zpool->ops = ops;
zpool->evictable = driver->shrink && ops && ops->evict;
+ zpool->can_sleep_mapped = driver->sleep_mapped;
if (!zpool->pool) {
pr_err("couldn't create %s pool\n", type);
@@ -393,6 +395,17 @@ bool zpool_evictable(struct zpool *zpool)
return zpool->evictable;
}
+/**
+ * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped.
+ * @zpool: The zpool to test
+ *
+ * Returns: true if zpool can sleep; false otherwise.
+ */
+bool zpool_can_sleep_mapped(struct zpool *zpool)
+{
+ return zpool->can_sleep_mapped;
+}
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
MODULE_DESCRIPTION("Common API for compressed memory storage");
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 7289f502ffac..30c358b72025 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -357,7 +357,7 @@ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
{
- return kmem_cache_alloc(pool->zspage_cachep,
+ return kmem_cache_zalloc(pool->zspage_cachep,
flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
}
@@ -816,7 +816,7 @@ static int get_pages_per_zspage(int class_size)
static struct zspage *get_zspage(struct page *page)
{
- struct zspage *zspage = (struct zspage *)page->private;
+ struct zspage *zspage = (struct zspage *)page_private(page);
BUG_ON(zspage->magic != ZSPAGE_MAGIC);
return zspage;
@@ -1064,7 +1064,6 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
if (!zspage)
return NULL;
- memset(zspage, 0, sizeof(struct zspage));
zspage->magic = ZSPAGE_MAGIC;
migrate_lock_init(zspage);
@@ -2213,11 +2212,13 @@ static unsigned long zs_can_compact(struct size_class *class)
return obj_wasted * class->pages_per_zspage;
}
-static void __zs_compact(struct zs_pool *pool, struct size_class *class)
+static unsigned long __zs_compact(struct zs_pool *pool,
+ struct size_class *class)
{
struct zs_compact_control cc;
struct zspage *src_zspage;
struct zspage *dst_zspage = NULL;
+ unsigned long pages_freed = 0;
spin_lock(&class->lock);
while ((src_zspage = isolate_zspage(class, true))) {
@@ -2247,7 +2248,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
putback_zspage(class, dst_zspage);
if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
free_zspage(pool, class, src_zspage);
- pool->stats.pages_compacted += class->pages_per_zspage;
+ pages_freed += class->pages_per_zspage;
}
spin_unlock(&class->lock);
cond_resched();
@@ -2258,12 +2259,15 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
putback_zspage(class, src_zspage);
spin_unlock(&class->lock);
+
+ return pages_freed;
}
unsigned long zs_compact(struct zs_pool *pool)
{
int i;
struct size_class *class;
+ unsigned long pages_freed = 0;
for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
class = pool->size_class[i];
@@ -2271,10 +2275,11 @@ unsigned long zs_compact(struct zs_pool *pool)
continue;
if (class->index != i)
continue;
- __zs_compact(pool, class);
+ pages_freed += __zs_compact(pool, class);
}
+ atomic_long_add(pages_freed, &pool->stats.pages_compacted);
- return pool->stats.pages_compacted;
+ return pages_freed;
}
EXPORT_SYMBOL_GPL(zs_compact);
@@ -2291,13 +2296,12 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker,
struct zs_pool *pool = container_of(shrinker, struct zs_pool,
shrinker);
- pages_freed = pool->stats.pages_compacted;
/*
* Compact classes and calculate compaction delta.
* Can run concurrently with a manually triggered
* (by user) compaction.
*/
- pages_freed = zs_compact(pool) - pages_freed;
+ pages_freed = zs_compact(pool);
return pages_freed ? pages_freed : SHRINK_STOP;
}
diff --git a/mm/zswap.c b/mm/zswap.c
index 182f6ad5aa69..578d9f256920 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -935,13 +935,19 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx;
- u8 *src;
+ u8 *src, *tmp = NULL;
unsigned int dlen;
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_NONE,
};
+ if (!zpool_can_sleep_mapped(pool)) {
+ tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
+ if (!tmp)
+ return -ENOMEM;
+ }
+
/* extract swpentry from data */
zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
swpentry = zhdr->swpentry; /* here */
@@ -955,6 +961,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
/* entry was invalidated */
spin_unlock(&tree->lock);
zpool_unmap_handle(pool, handle);
+ kfree(tmp);
return 0;
}
spin_unlock(&tree->lock);
@@ -979,6 +986,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
dlen = PAGE_SIZE;
src = (u8 *)zhdr + sizeof(struct zswap_header);
+ if (!zpool_can_sleep_mapped(pool)) {
+
+ memcpy(tmp, src, entry->length);
+ src = tmp;
+
+ zpool_unmap_handle(pool, handle);
+ }
+
mutex_lock(acomp_ctx->mutex);
sg_init_one(&input, src, entry->length);
sg_init_table(&output, 1);
@@ -1022,10 +1037,10 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
/*
* if we get here due to ZSWAP_SWAPCACHE_EXIST
- * a load may happening concurrently
- * it is safe and okay to not free the entry
+ * a load may be happening concurrently.
+ * it is safe and okay to not free the entry.
* if we free the entry in the following put
- * it it either okay to return !0
+ * it is also okay to return !0
*/
fail:
spin_lock(&tree->lock);
@@ -1033,7 +1048,11 @@ fail:
spin_unlock(&tree->lock);
end:
- zpool_unmap_handle(pool, handle);
+ if (zpool_can_sleep_mapped(pool))
+ zpool_unmap_handle(pool, handle);
+ else
+ kfree(tmp);
+
return ret;
}
@@ -1235,7 +1254,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
struct zswap_entry *entry;
struct scatterlist input, output;
struct crypto_acomp_ctx *acomp_ctx;
- u8 *src, *dst;
+ u8 *src, *dst, *tmp;
unsigned int dlen;
int ret;
@@ -1253,15 +1272,33 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
dst = kmap_atomic(page);
zswap_fill_page(dst, entry->value);
kunmap_atomic(dst);
+ ret = 0;
goto freeentry;
}
+ if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
+
+ tmp = kmalloc(entry->length, GFP_ATOMIC);
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto freeentry;
+ }
+ }
+
/* decompress */
dlen = PAGE_SIZE;
src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
if (zpool_evictable(entry->pool->zpool))
src += sizeof(struct zswap_header);
+ if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
+
+ memcpy(tmp, src, entry->length);
+ src = tmp;
+
+ zpool_unmap_handle(entry->pool->zpool, entry->handle);
+ }
+
acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
mutex_lock(acomp_ctx->mutex);
sg_init_one(&input, src, entry->length);
@@ -1271,7 +1308,11 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
mutex_unlock(acomp_ctx->mutex);
- zpool_unmap_handle(entry->pool->zpool, entry->handle);
+ if (zpool_can_sleep_mapped(entry->pool->zpool))
+ zpool_unmap_handle(entry->pool->zpool, entry->handle);
+ else
+ kfree(tmp);
+
BUG_ON(ret);
freeentry:
@@ -1279,7 +1320,7 @@ freeentry:
zswap_entry_put(tree, entry);
spin_unlock(&tree->lock);
- return 0;
+ return ret;
}
/* frees an entry in zswap */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a02ce89b56b5..1355e6c0d567 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1021,7 +1021,6 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
const struct proto_ops inet_stream_ops = {
.family = PF_INET,
- .flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE,
.release = inet_release,
.bind = inet_bind,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 1fb75f01756c..802f5111805a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -665,7 +665,6 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
const struct proto_ops inet6_stream_ops = {
.family = PF_INET6,
- .flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE,
.release = inet6_release,
.bind = inet6_bind,
diff --git a/net/socket.c b/net/socket.c
index 23c7842389de..84a8049c2b09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2413,10 +2413,6 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
unsigned int flags)
{
- /* disallow ancillary data requests from this path */
- if (msg->msg_control || msg->msg_controllen)
- return -EINVAL;
-
return ____sys_sendmsg(sock, msg, flags, NULL, 0);
}
@@ -2625,12 +2621,6 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr, unsigned int flags)
{
- if (msg->msg_control || msg->msg_controllen) {
- /* disallow ancillary data reqs unless cmsg is plain data */
- if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
- return -EINVAL;
- }
-
return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 8241f5a4a01c..09c000d490a1 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -478,6 +478,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
inode->i_fop = &simple_dir_operations;
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
+ break;
default:
break;
}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 946edf2db646..a249837d6a55 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2015-2020, Oracle and/or its affiliates.
*
- * Support for backward direction RPCs on RPC/RDMA.
+ * Support for reverse-direction RPCs on RPC/RDMA.
*/
#include <linux/sunrpc/xprt.h>
@@ -208,7 +208,7 @@ create_req:
}
/**
- * rpcrdma_bc_receive_call - Handle a backward direction call
+ * rpcrdma_bc_receive_call - Handle a reverse-direction Call
* @r_xprt: transport receiving the call
* @rep: receive buffer containing the call
*
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index baca49fe83af..766a1048a48a 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -306,20 +306,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
if (nsegs > ep->re_max_fr_depth)
nsegs = ep->re_max_fr_depth;
for (i = 0; i < nsegs;) {
- if (seg->mr_page)
- sg_set_page(&mr->mr_sg[i],
- seg->mr_page,
- seg->mr_len,
- offset_in_page(seg->mr_offset));
- else
- sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
- seg->mr_len);
+ sg_set_page(&mr->mr_sg[i], seg->mr_page,
+ seg->mr_len, seg->mr_offset);
++seg;
++i;
if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
continue;
- if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+ if ((i < nsegs && seg->mr_offset) ||
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 8f5d0cb68360..292f066d006e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -204,9 +204,7 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
return 0;
}
-/* Split @vec on page boundaries into SGEs. FMR registers pages, not
- * a byte range. Other modes coalesce these SGEs into a single MR
- * when they can.
+/* Convert @vec to a single SGL element.
*
* Returns pointer to next available SGE, and bumps the total number
* of SGEs consumed.
@@ -215,22 +213,11 @@ static struct rpcrdma_mr_seg *
rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
unsigned int *n)
{
- u32 remaining, page_offset;
- char *base;
-
- base = vec->iov_base;
- page_offset = offset_in_page(base);
- remaining = vec->iov_len;
- while (remaining) {
- seg->mr_page = NULL;
- seg->mr_offset = base;
- seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
- remaining -= seg->mr_len;
- base += seg->mr_len;
- ++seg;
- ++(*n);
- page_offset = 0;
- }
+ seg->mr_page = virt_to_page(vec->iov_base);
+ seg->mr_offset = offset_in_page(vec->iov_base);
+ seg->mr_len = vec->iov_len;
+ ++seg;
+ ++(*n);
return seg;
}
@@ -259,7 +246,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
page_base = offset_in_page(xdrbuf->page_base);
while (len) {
seg->mr_page = *ppages;
- seg->mr_offset = (char *)page_base;
+ seg->mr_offset = page_base;
seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
len -= seg->mr_len;
++ppages;
@@ -268,10 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
page_base = 0;
}
- /* When encoding a Read chunk, the tail iovec contains an
- * XDR pad and may be omitted.
- */
- if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
+ if (type == rpcrdma_readch)
goto out;
/* When encoding a Write chunk, some servers need to see an
@@ -283,7 +267,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
goto out;
if (xdrbuf->tail[0].iov_len)
- seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
+ rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
out:
if (unlikely(n > RPCRDMA_MAX_SEGS))
@@ -644,9 +628,8 @@ out_mapping_err:
return false;
}
-/* The tail iovec may include an XDR pad for the page list,
- * as well as additional content, and may not reside in the
- * same page as the head iovec.
+/* The tail iovec might not reside in the same page as the
+ * head iovec.
*/
static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
struct xdr_buf *xdr,
@@ -764,27 +747,19 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_req *req,
struct xdr_buf *xdr)
{
+ struct kvec *tail = &xdr->tail[0];
+
if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
return false;
- /* If there is a Read chunk, the page list is being handled
+ /* If there is a Read chunk, the page list is handled
* via explicit RDMA, and thus is skipped here.
*/
- /* Do not include the tail if it is only an XDR pad */
- if (xdr->tail[0].iov_len > 3) {
- unsigned int page_base, len;
-
- /* If the content in the page list is an odd length,
- * xdr_write_pages() adds a pad at the beginning of
- * the tail iovec. Force the tail's non-pad content to
- * land at the next XDR position in the Send message.
- */
- page_base = offset_in_page(xdr->tail[0].iov_base);
- len = xdr->tail[0].iov_len;
- page_base += len & 3;
- len -= len & 3;
- if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
+ if (tail->iov_len) {
+ if (!rpcrdma_prepare_tail_iov(req, xdr,
+ offset_in_page(tail->iov_base),
+ tail->iov_len))
return false;
kref_get(&req->rl_kref);
}
@@ -1164,14 +1139,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
*/
p = xdr_inline_decode(xdr, 3 * sizeof(*p));
if (unlikely(!p))
- goto out_short;
+ return true;
rpcrdma_bc_receive_call(r_xprt, rep);
return true;
-
-out_short:
- pr_warn("RPC/RDMA short backward direction call\n");
- return true;
}
#else /* CONFIG_SUNRPC_BACKCHANNEL */
{
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 63f8be974df2..4a1edbb4028e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2015-2018 Oracle. All rights reserved.
*
- * Support for backward direction RPCs on RPC/RDMA (server-side).
+ * Support for reverse-direction RPCs on RPC/RDMA (server-side).
*/
#include <linux/sunrpc/svc_rdma.h>
@@ -59,7 +59,7 @@ out_unlock:
spin_unlock(&xprt->queue_lock);
}
-/* Send a backwards direction RPC call.
+/* Send a reverse-direction RPC Call.
*
* Caller holds the connection's mutex and has already marshaled
* the RPC/RDMA request.
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 94b28657aeeb..fe3be985e239 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -98,9 +98,9 @@ struct rpcrdma_ep {
atomic_t re_completion_ids;
};
-/* Pre-allocate extra Work Requests for handling backward receives
- * and sends. This is a fixed value because the Work Queues are
- * allocated when the forward channel is set up, long before the
+/* Pre-allocate extra Work Requests for handling reverse-direction
+ * Receives and Sends. This is a fixed value because the Work Queues
+ * are allocated when the forward channel is set up, long before the
* backchannel is provisioned. This value is two times
* NFS4_DEF_CB_SLOT_TABLE_SIZE.
*/
@@ -283,10 +283,11 @@ enum {
RPCRDMA_MAX_IOV_SEGS,
};
-struct rpcrdma_mr_seg { /* chunk descriptors */
- u32 mr_len; /* length of chunk or segment */
- struct page *mr_page; /* owning page, if any */
- char *mr_offset; /* kva if no page, else offset */
+/* Arguments for DMA mapping and registration */
+struct rpcrdma_mr_seg {
+ u32 mr_len; /* length of segment */
+ struct page *mr_page; /* underlying struct page */
+ u64 mr_offset; /* IN: page offset, OUT: iova */
};
/* The Send SGE array is provisioned to send a maximum size
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c56a66cdf4ac..e35760f238a4 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -829,7 +829,7 @@ xs_stream_record_marker(struct xdr_buf *xdr)
* EAGAIN: The socket was blocked, please call again later to
* complete the request
* ENOTCONN: Caller needs to invoke connect logic then call again
- * other: Some other error occured, the request was not sent
+ * other: Some other error occurred, the request was not sent
*/
static int xs_local_send_request(struct rpc_rqst *req)
{
@@ -1665,7 +1665,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
* This ensures that we can continue to establish TCP
* connections even when all local ephemeral ports are already
* a part of some TCP connection. This makes no difference
- * for UDP sockets, but also doens't harm them.
+ * for UDP sockets, but also doesn't harm them.
*
* If we're asking for any reserved port (i.e. port == 0 &&
* transport->xprt.resvport == 1) xs_get_srcport above will
@@ -1875,6 +1875,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
xprt->stat.connect_time += (long)jiffies -
xprt->stat.connect_start;
xprt_set_connected(xprt);
+ break;
case -ENOBUFS:
break;
case -ENOENT:
@@ -2276,10 +2277,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
case -EHOSTUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
- /*
- * xs_tcp_force_close() wakes tasks with -EIO.
- * We need to wake them first to ensure the
- * correct error code.
+ /* xs_tcp_force_close() wakes tasks with a fixed error code.
+ * We need to wake them first to ensure the correct error code.
*/
xprt_wake_pending_tasks(xprt, status);
xs_tcp_force_close(xprt);
@@ -2380,7 +2379,7 @@ static void xs_error_handle(struct work_struct *work)
}
/**
- * xs_local_print_stats - display AF_LOCAL socket-specifc stats
+ * xs_local_print_stats - display AF_LOCAL socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
@@ -2409,7 +2408,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
}
/**
- * xs_udp_print_stats - display UDP socket-specifc stats
+ * xs_udp_print_stats - display UDP socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
@@ -2433,7 +2432,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
}
/**
- * xs_tcp_print_stats - display TCP socket-specifc stats
+ * xs_tcp_print_stats - display TCP socket-specific stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
*
diff --git a/samples/auxdisplay/cfag12864b-example.c b/samples/auxdisplay/cfag12864b-example.c
index bfeab44f81d0..2e3bb7375c99 100644
--- a/samples/auxdisplay/cfag12864b-example.c
+++ b/samples/auxdisplay/cfag12864b-example.c
@@ -4,7 +4,7 @@
* Version: 0.1.0
* Description: cfag12864b LCD userspace example program
*
- * Author: Copyright (C) Miguel Ojeda Sandonis
+ * Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
* Date: 2006-10-31
*/
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 0e53a93e8f15..9e2092fd5206 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -8,8 +8,6 @@ ubsan-cflags-$(CONFIG_UBSAN_LOCAL_BOUNDS) += -fsanitize=local-bounds
ubsan-cflags-$(CONFIG_UBSAN_SHIFT) += -fsanitize=shift
ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO) += -fsanitize=integer-divide-by-zero
ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE) += -fsanitize=unreachable
-ubsan-cflags-$(CONFIG_UBSAN_SIGNED_OVERFLOW) += -fsanitize=signed-integer-overflow
-ubsan-cflags-$(CONFIG_UBSAN_UNSIGNED_OVERFLOW) += -fsanitize=unsigned-integer-overflow
ubsan-cflags-$(CONFIG_UBSAN_OBJECT_SIZE) += -fsanitize=object-size
ubsan-cflags-$(CONFIG_UBSAN_BOOL) += -fsanitize=bool
ubsan-cflags-$(CONFIG_UBSAN_ENUM) += -fsanitize=enum
diff --git a/scripts/adjust_autoksyms.sh b/scripts/adjust_autoksyms.sh
index 2b366d945ccb..d8f6f9c63043 100755
--- a/scripts/adjust_autoksyms.sh
+++ b/scripts/adjust_autoksyms.sh
@@ -34,9 +34,6 @@ case "$KBUILD_VERBOSE" in
;;
esac
-# We need access to CONFIG_ symbols
-. include/config/auto.conf
-
# Generate a new symbol list file
$CONFIG_SHELL $srctree/scripts/gen_autoksyms.sh "$new_ksyms_file"
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index ca4201753d5e..df8b23dc1eb0 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -382,6 +382,7 @@ our $InitAttribute = qr{$InitAttributeData|$InitAttributeConst|$InitAttributeIni
# We need \b after 'init' otherwise 'initconst' will cause a false positive in a check
our $Attribute = qr{
const|
+ volatile|
__percpu|
__nocast|
__safe|
@@ -486,7 +487,7 @@ our $logFunctions = qr{(?x:
our $allocFunctions = qr{(?x:
(?:(?:devm_)?
- (?:kv|k|v)[czm]alloc(?:_node|_array)? |
+ (?:kv|k|v)[czm]alloc(?:_array)?(?:_node)? |
kstrdup(?:_const)? |
kmemdup(?:_nul)?) |
(?:\w+)?alloc_skb(?:_ip_align)? |
@@ -506,6 +507,30 @@ our $signature_tags = qr{(?xi:
Cc:
)};
+our $tracing_logging_tags = qr{(?xi:
+ [=-]*> |
+ <[=-]* |
+ \[ |
+ \] |
+ start |
+ called |
+ entered |
+ entry |
+ enter |
+ in |
+ inside |
+ here |
+ begin |
+ exit |
+ end |
+ done |
+ leave |
+ completed |
+ out |
+ return |
+ [\.\!:\s]*
+)};
+
sub edit_distance_min {
my (@arr) = @_;
my $len = scalar @arr;
@@ -2428,6 +2453,15 @@ sub get_raw_comment {
return $comment;
}
+sub exclude_global_initialisers {
+ my ($realfile) = @_;
+
+ # Do not check for BPF programs (tools/testing/selftests/bpf/progs/*.c, samples/bpf/*_kern.c, *.bpf.c).
+ return $realfile =~ m@^tools/testing/selftests/bpf/progs/.*\.c$@ ||
+ $realfile =~ m@^samples/bpf/.*_kern\.c$@ ||
+ $realfile =~ m@/bpf/.*\.bpf\.c$@;
+}
+
sub process {
my $filename = shift;
@@ -2973,7 +3007,7 @@ sub process {
}
if (!defined $lines[$linenr]) {
WARN("BAD_SIGN_OFF",
- "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline);
+ "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline);
} elsif ($rawlines[$linenr] !~ /^\s*signed-off-by:\s*(.*)/i) {
WARN("BAD_SIGN_OFF",
"Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline . "\n" .$rawlines[$linenr]);
@@ -2996,8 +3030,8 @@ sub process {
if (ERROR("GERRIT_CHANGE_ID",
"Remove Gerrit Change-Id's before submitting upstream\n" . $herecurr) &&
$fix) {
- fix_delete_line($fixlinenr, $rawline);
- }
+ fix_delete_line($fixlinenr, $rawline);
+ }
}
# Check if the commit log is in a possible stack dump
@@ -3239,10 +3273,10 @@ sub process {
next if ($start_char =~ /^\S$/);
next if (index(" \t.,;?!", $end_char) == -1);
- # avoid repeating hex occurrences like 'ff ff fe 09 ...'
- if ($first =~ /\b[0-9a-f]{2,}\b/i) {
- next if (!exists($allow_repeated_words{lc($first)}));
- }
+ # avoid repeating hex occurrences like 'ff ff fe 09 ...'
+ if ($first =~ /\b[0-9a-f]{2,}\b/i) {
+ next if (!exists($allow_repeated_words{lc($first)}));
+ }
if (WARN("REPEATED_WORD",
"Possible repeated word: '$first'\n" . $herecurr) &&
@@ -3574,6 +3608,13 @@ sub process {
}
}
+# check for .L prefix local symbols in .S files
+ if ($realfile =~ /\.S$/ &&
+ $line =~ /^\+\s*(?:[A-Z]+_)?SYM_[A-Z]+_(?:START|END)(?:_[A-Z_]+)?\s*\(\s*\.L/) {
+ WARN("AVOID_L_PREFIX",
+ "Avoid using '.L' prefixed local symbol names for denoting a range of code via 'SYM_*_START/END' annotations; see Documentation/asm-annotations.rst\n" . $herecurr);
+ }
+
# check we are in a valid source file C or perl if not then ignore this hunk
next if ($realfile !~ /\.(h|c|pl|dtsi|dts)$/);
@@ -3776,43 +3817,48 @@ sub process {
}
# check for missing blank lines after declarations
- if ($sline =~ /^\+\s+\S/ && #Not at char 1
- # actual declarations
- ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+# (declarations must have the same indentation and not be at the start of line)
+ if (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/) {
+ # use temporaries
+ my $sl = $sline;
+ my $pl = $prevline;
+ # remove $Attribute/$Sparse uses to simplify comparisons
+ $sl =~ s/\b(?:$Attribute|$Sparse)\b//g;
+ $pl =~ s/\b(?:$Attribute|$Sparse)\b//g;
+ if (($pl =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
# function pointer declarations
- $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
+ $pl =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
- $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ $pl =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
- $prevline =~ /^\+\s+$declaration_macros/) &&
+ $pl =~ /^\+\s+$declaration_macros/) &&
# for "else if" which can look like "$Ident $Ident"
- !($prevline =~ /^\+\s+$c90_Keywords\b/ ||
+ !($pl =~ /^\+\s+$c90_Keywords\b/ ||
# other possible extensions of declaration lines
- $prevline =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
+ $pl =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
# not starting a section or a macro "\" extended line
- $prevline =~ /(?:\{\s*|\\)$/) &&
+ $pl =~ /(?:\{\s*|\\)$/) &&
# looks like a declaration
- !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ !($sl =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
# function pointer declarations
- $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
+ $sl =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
# foo bar; where foo is some local typedef or #define
- $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ $sl =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
# known declaration macros
- $sline =~ /^\+\s+$declaration_macros/ ||
+ $sl =~ /^\+\s+$declaration_macros/ ||
# start of struct or union or enum
- $sline =~ /^\+\s+(?:static\s+)?(?:const\s+)?(?:union|struct|enum|typedef)\b/ ||
+ $sl =~ /^\+\s+(?:static\s+)?(?:const\s+)?(?:union|struct|enum|typedef)\b/ ||
# start or end of block or continuation of declaration
- $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
+ $sl =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
# bitfield continuation
- $sline =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ ||
+ $sl =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ ||
# other possible extensions of declaration lines
- $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
- # indentation of previous and current line are the same
- (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
- if (WARN("LINE_SPACING",
- "Missing a blank line after declarations\n" . $hereprev) &&
- $fix) {
- fix_insert_line($fixlinenr, "\+");
+ $sl =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/)) {
+ if (WARN("LINE_SPACING",
+ "Missing a blank line after declarations\n" . $hereprev) &&
+ $fix) {
+ fix_insert_line($fixlinenr, "\+");
+ }
}
}
@@ -4321,7 +4367,8 @@ sub process {
}
# check for global initialisers.
- if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*($zero_initializer)\s*;/) {
+ if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*($zero_initializer)\s*;/ &&
+ !exclude_global_initialisers($realfile)) {
if (ERROR("GLOBAL_INITIALISERS",
"do not initialise globals to $1\n" . $herecurr) &&
$fix) {
@@ -4417,7 +4464,7 @@ sub process {
WARN("STATIC_CONST_CHAR_ARRAY",
"char * array declaration might be better as static const\n" .
$herecurr);
- }
+ }
# check for sizeof(foo)/sizeof(foo[0]) that could be ARRAY_SIZE(foo)
if ($line =~ m@\bsizeof\s*\(\s*($Lval)\s*\)@) {
@@ -5007,7 +5054,7 @@ sub process {
# A colon needs no spaces before when it is
# terminating a case value or a label.
} elsif ($opv eq ':C' || $opv eq ':L') {
- if ($ctx =~ /Wx./) {
+ if ($ctx =~ /Wx./ and $realfile !~ m@.*\.lds\.h$@) {
if (ERROR("SPACING",
"space prohibited before that '$op' $at\n" . $hereptr)) {
$good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
@@ -5270,7 +5317,7 @@ sub process {
$lines[$linenr - 3] !~ /^[ +]\s*$Ident\s*:/) {
WARN("RETURN_VOID",
"void function return statements are not generally useful\n" . $hereprev);
- }
+ }
# if statements using unnecessary parentheses - ie: if ((foo == bar))
if ($perl_version_ok &&
@@ -5966,6 +6013,17 @@ sub process {
"Prefer using '\"%s...\", __func__' to using '$context_function', this function's name, in a string\n" . $herecurr);
}
+# check for unnecessary function tracing like uses
+# This does not use $logFunctions because there are many instances like
+# 'dprintk(FOO, "%s()\n", __func__);' which do not match $logFunctions
+ if ($rawline =~ /^\+.*\([^"]*"$tracing_logging_tags{0,3}%s(?:\s*\(\s*\)\s*)?$tracing_logging_tags{0,3}(?:\\n)?"\s*,\s*__func__\s*\)\s*;/) {
+ if (WARN("TRACING_LOGGING",
+ "Unnecessary ftrace-like logging - prefer using ftrace\n" . $herecurr) &&
+ $fix) {
+ fix_delete_line($fixlinenr, $rawline);
+ }
+ }
+
# check for spaces before a quoted newline
if ($rawline =~ /^.*\".*\s\\n/) {
if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
@@ -6477,18 +6535,18 @@ sub process {
if ($line =~ /(\(\s*$C90_int_types\s*\)\s*)($Constant)\b/) {
my $cast = $1;
my $const = $2;
+ my $suffix = "";
+ my $newconst = $const;
+ $newconst =~ s/${Int_type}$//;
+ $suffix .= 'U' if ($cast =~ /\bunsigned\b/);
+ if ($cast =~ /\blong\s+long\b/) {
+ $suffix .= 'LL';
+ } elsif ($cast =~ /\blong\b/) {
+ $suffix .= 'L';
+ }
if (WARN("TYPECAST_INT_CONSTANT",
- "Unnecessary typecast of c90 int constant\n" . $herecurr) &&
+ "Unnecessary typecast of c90 int constant - '$cast$const' could be '$const$suffix'\n" . $herecurr) &&
$fix) {
- my $suffix = "";
- my $newconst = $const;
- $newconst =~ s/${Int_type}$//;
- $suffix .= 'U' if ($cast =~ /\bunsigned\b/);
- if ($cast =~ /\blong\s+long\b/) {
- $suffix .= 'LL';
- } elsif ($cast =~ /\blong\b/) {
- $suffix .= 'L';
- }
$fixed[$fixlinenr] =~ s/\Q$cast\E$const\b/$newconst$suffix/;
}
}
@@ -7019,12 +7077,14 @@ sub process {
# use of NR_CPUS is usually wrong
# ignore definitions of NR_CPUS and usage to define arrays as likely right
+# ignore designated initializers using NR_CPUS
if ($line =~ /\bNR_CPUS\b/ &&
$line !~ /^.\s*\s*#\s*if\b.*\bNR_CPUS\b/ &&
$line !~ /^.\s*\s*#\s*define\b.*\bNR_CPUS\b/ &&
$line !~ /^.\s*$Declare\s.*\[[^\]]*NR_CPUS[^\]]*\]/ &&
$line !~ /\[[^\]]*\.\.\.[^\]]*NR_CPUS[^\]]*\]/ &&
- $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/)
+ $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/ &&
+ $line !~ /^.\s*\.\w+\s*=\s*.*\bNR_CPUS\b/)
{
WARN("NR_CPUS",
"usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc\n" . $herecurr);
diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x
deleted file mode 120000
index 49ded4cae2be..000000000000
--- a/scripts/dtc/include-prefixes/c6x
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/c6x/boot/dts \ No newline at end of file
diff --git a/scripts/gcc-plugins/latent_entropy_plugin.c b/scripts/gcc-plugins/latent_entropy_plugin.c
index 9dced66d158e..589454bce930 100644
--- a/scripts/gcc-plugins/latent_entropy_plugin.c
+++ b/scripts/gcc-plugins/latent_entropy_plugin.c
@@ -524,7 +524,7 @@ static unsigned int latent_entropy_execute(void)
while (bb != EXIT_BLOCK_PTR_FOR_FN(cfun)) {
perturb_local_entropy(bb, local_entropy);
bb = bb->next_bb;
- };
+ }
/* 4. mix local entropy into the global entropy variable */
perturb_latent_entropy(local_entropy);
diff --git a/scripts/gcc-plugins/structleak_plugin.c b/scripts/gcc-plugins/structleak_plugin.c
index 29b480c33a8d..d7190e443a14 100644
--- a/scripts/gcc-plugins/structleak_plugin.c
+++ b/scripts/gcc-plugins/structleak_plugin.c
@@ -170,7 +170,6 @@ static void initialize(tree var)
static unsigned int structleak_execute(void)
{
basic_block bb;
- unsigned int ret = 0;
tree var;
unsigned int i;
@@ -200,7 +199,7 @@ static unsigned int structleak_execute(void)
initialize(var);
}
- return ret;
+ return 0;
}
#define PASS_NAME structleak
diff --git a/scripts/gdb/linux/lists.py b/scripts/gdb/linux/lists.py
index c487ddf09d38..bae4d70b7eae 100644
--- a/scripts/gdb/linux/lists.py
+++ b/scripts/gdb/linux/lists.py
@@ -27,6 +27,11 @@ def list_for_each(head):
raise TypeError("Must be struct list_head not {}"
.format(head.type))
+ if head['next'] == 0:
+ gdb.write("list_for_each: Uninitialized list '{}' treated as empty\n"
+ .format(head.address))
+ return
+
node = head['next'].dereference()
while node.address != head.address:
yield node.address
diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh
index d54dfba15bf2..da320151e7c3 100755
--- a/scripts/gen_autoksyms.sh
+++ b/scripts/gen_autoksyms.sh
@@ -19,7 +19,26 @@ esac
# We need access to CONFIG_ symbols
. include/config/auto.conf
-ksym_wl=/dev/null
+needed_symbols=
+
+# Special case for modversions (see modpost.c)
+if [ -n "$CONFIG_MODVERSIONS" ]; then
+ needed_symbols="$needed_symbols module_layout"
+fi
+
+# With CONFIG_LTO_CLANG, LLVM bitcode has not yet been compiled into a binary
+# when the .mod files are generated, which means they don't yet contain
+# references to certain symbols that will be present in the final binaries.
+if [ -n "$CONFIG_LTO_CLANG" ]; then
+ # intrinsic functions
+ needed_symbols="$needed_symbols memcpy memmove memset"
+ # ftrace
+ needed_symbols="$needed_symbols _mcount"
+ # stack protector symbols
+ needed_symbols="$needed_symbols __stack_chk_fail __stack_chk_guard"
+fi
+
+ksym_wl=
if [ -n "$CONFIG_UNUSED_KSYMS_WHITELIST" ]; then
# Use 'eval' to expand the whitelist path and check if it is relative
eval ksym_wl="$CONFIG_UNUSED_KSYMS_WHITELIST"
@@ -40,16 +59,14 @@ cat > "$output_file" << EOT
EOT
[ -f modules.order ] && modlist=modules.order || modlist=/dev/null
-sed 's/ko$/mod/' $modlist |
-xargs -n1 sed -n -e '2{s/ /\n/g;/^$/!p;}' -- |
-cat - "$ksym_wl" |
+
+{
+ sed 's/ko$/mod/' $modlist | xargs -n1 sed -n -e '2p'
+ echo "$needed_symbols"
+ [ -n "$ksym_wl" ] && cat "$ksym_wl"
+} | sed -e 's/ /\n/g' | sed -n -e '/^$/!p' |
# Remove the dot prefix for ppc64; symbol names with a dot (.) hold entry
# point addresses.
sed -e 's/^\.//' |
sort -u |
sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$output_file"
-
-# Special case for modversions (see modpost.c)
-if [ -n "$CONFIG_MODVERSIONS" ]; then
- echo "#define __KSYM_module_layout 1" >> "$output_file"
-fi
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index e046e16e4411..8b5bc7bf4bb8 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1553,7 +1553,7 @@ sub create_parameterlist($$$$) {
} elsif ($arg =~ m/\(.+\)\s*\(/) {
# pointer-to-function
$arg =~ tr/#/,/;
- $arg =~ m/[^\(]+\(\*?\s*([\w\.]*)\s*\)/;
+ $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/;
$param = $1;
$type = $arg;
$type =~ s/([^\(]+\(\*?)\s*$param/$1/;
diff --git a/scripts/lto-used-symbollist.txt b/scripts/lto-used-symbollist.txt
deleted file mode 100644
index 38e7bb9ebaae..000000000000
--- a/scripts/lto-used-symbollist.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-memcpy
-memmove
-memset
-__stack_chk_fail
-__stack_chk_guard
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index b9c2ee7ab43f..cce12e1971d8 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -438,7 +438,7 @@ static int arm_is_fake_mcount(Elf32_Rel const *rp)
static int arm64_is_fake_mcount(Elf64_Rel const *rp)
{
- return ELF64_R_TYPE(w(rp->r_info)) != R_AARCH64_CALL26;
+ return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26;
}
/* 64-bit EM_MIPS has weird ELF64_Rela.r_info.
diff --git a/sound/hda/Kconfig b/sound/hda/Kconfig
index 9ed5cfa3c18c..57595f1552c9 100644
--- a/sound/hda/Kconfig
+++ b/sound/hda/Kconfig
@@ -44,9 +44,13 @@ config SND_INTEL_NHLT
config SND_INTEL_DSP_CONFIG
tristate
select SND_INTEL_NHLT if ACPI
+ select SND_INTEL_SOUNDWIRE_ACPI if ACPI
# this config should be selected only for Intel DSP platforms.
# A fallback is provided so that the code compiles in all cases.
+config SND_INTEL_SOUNDWIRE_ACPI
+ tristate
+
config SND_INTEL_BYT_PREFER_SOF
bool "Prefer SOF driver over SST on BY/CHT platforms"
depends on SND_SST_ATOM_HIFI2_PLATFORM_ACPI && SND_SOC_SOF_BAYTRAIL
diff --git a/sound/hda/Makefile b/sound/hda/Makefile
index 601e617918b8..78f487a635f8 100644
--- a/sound/hda/Makefile
+++ b/sound/hda/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_SND_HDA_EXT_CORE) += ext/
snd-intel-dspcfg-objs := intel-dsp-config.o
snd-intel-dspcfg-$(CONFIG_SND_INTEL_NHLT) += intel-nhlt.o
obj-$(CONFIG_SND_INTEL_DSP_CONFIG) += snd-intel-dspcfg.o
+
+snd-intel-sdw-acpi-objs := intel-sdw-acpi.o
+obj-$(CONFIG_SND_INTEL_SOUNDWIRE_ACPI) += snd-intel-sdw-acpi.o
diff --git a/sound/hda/ext/hdac_ext_controller.c b/sound/hda/ext/hdac_ext_controller.c
index a9bd39b93697..b2df7b4f9227 100644
--- a/sound/hda/ext/hdac_ext_controller.c
+++ b/sound/hda/ext/hdac_ext_controller.c
@@ -133,7 +133,7 @@ void snd_hdac_link_free_all(struct hdac_bus *bus)
EXPORT_SYMBOL_GPL(snd_hdac_link_free_all);
/**
- * snd_hdac_ext_bus_get_link_index - get link based on codec name
+ * snd_hdac_ext_bus_get_link - get link based on codec name
* @bus: the pointer to HDAC bus object
* @codec_name: codec name
*/
diff --git a/sound/hda/ext/hdac_ext_stream.c b/sound/hda/ext/hdac_ext_stream.c
index c4d54a838773..0c005d67fa89 100644
--- a/sound/hda/ext/hdac_ext_stream.c
+++ b/sound/hda/ext/hdac_ext_stream.c
@@ -133,7 +133,7 @@ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple);
/**
- * snd_hdac_ext_linkstream_start - start a stream
+ * snd_hdac_ext_link_stream_start - start a stream
* @stream: HD-audio ext core stream to start
*/
void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *stream)
diff --git a/sound/hda/hdac_regmap.c b/sound/hda/hdac_regmap.c
index d75f31eb9d78..fe3587547cfe 100644
--- a/sound/hda/hdac_regmap.c
+++ b/sound/hda/hdac_regmap.c
@@ -386,7 +386,7 @@ int snd_hdac_regmap_init(struct hdac_device *codec)
EXPORT_SYMBOL_GPL(snd_hdac_regmap_init);
/**
- * snd_hdac_regmap_init - Release the regmap from HDA codec
+ * snd_hdac_regmap_exit - Release the regmap from HDA codec
* @codec: the codec object
*/
void snd_hdac_regmap_exit(struct hdac_device *codec)
diff --git a/sound/hda/intel-dsp-config.c b/sound/hda/intel-dsp-config.c
index d1eb9d34993a..ab5ff7867eb9 100644
--- a/sound/hda/intel-dsp-config.c
+++ b/sound/hda/intel-dsp-config.c
@@ -557,4 +557,4 @@ EXPORT_SYMBOL_GPL(snd_intel_acpi_dsp_driver_probe);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Intel DSP config driver");
-MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT);
+MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI);
diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c
index 059aaf04f536..d053beccfaec 100644
--- a/sound/hda/intel-nhlt.c
+++ b/sound/hda/intel-nhlt.c
@@ -31,18 +31,44 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
struct nhlt_endpoint *epnt;
struct nhlt_dmic_array_config *cfg;
struct nhlt_vendor_dmic_array_config *cfg_vendor;
+ struct nhlt_fmt *fmt_configs;
unsigned int dmic_geo = 0;
- u8 j;
+ u16 max_ch = 0;
+ u8 i, j;
if (!nhlt)
return 0;
- epnt = (struct nhlt_endpoint *)nhlt->desc;
+ for (j = 0, epnt = nhlt->desc; j < nhlt->endpoint_count; j++,
+ epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length)) {
- for (j = 0; j < nhlt->endpoint_count; j++) {
- if (epnt->linktype == NHLT_LINK_DMIC) {
- cfg = (struct nhlt_dmic_array_config *)
- (epnt->config.caps);
+ if (epnt->linktype != NHLT_LINK_DMIC)
+ continue;
+
+ cfg = (struct nhlt_dmic_array_config *)(epnt->config.caps);
+ fmt_configs = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size);
+
+ /* find max number of channels based on format_configuration */
+ if (fmt_configs->fmt_count) {
+ dev_dbg(dev, "%s: found %d format definitions\n",
+ __func__, fmt_configs->fmt_count);
+
+ for (i = 0; i < fmt_configs->fmt_count; i++) {
+ struct wav_fmt_ext *fmt_ext;
+
+ fmt_ext = &fmt_configs->fmt_config[i].fmt_ext;
+
+ if (fmt_ext->fmt.channels > max_ch)
+ max_ch = fmt_ext->fmt.channels;
+ }
+ dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch);
+ } else {
+ dev_dbg(dev, "%s: No format information found\n", __func__);
+ }
+
+ if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) {
+ dmic_geo = max_ch;
+ } else {
switch (cfg->array_type) {
case NHLT_MIC_ARRAY_2CH_SMALL:
case NHLT_MIC_ARRAY_2CH_BIG:
@@ -59,13 +85,23 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
dmic_geo = cfg_vendor->nb_mics;
break;
default:
- dev_warn(dev, "undefined DMIC array_type 0x%0x\n",
- cfg->array_type);
+ dev_warn(dev, "%s: undefined DMIC array_type 0x%0x\n",
+ __func__, cfg->array_type);
+ }
+
+ if (dmic_geo > 0) {
+ dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo);
+ }
+ if (max_ch > dmic_geo) {
+ dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n",
+ __func__, max_ch, dmic_geo);
}
}
- epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
}
+ dev_dbg(dev, "%s: dmic number %d max_ch %d\n",
+ __func__, dmic_geo, max_ch);
+
return dmic_geo;
}
EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo);
diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c
new file mode 100644
index 000000000000..c0123bc31c0d
--- /dev/null
+++ b/sound/hda/intel-sdw-acpi.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2015-2021 Intel Corporation.
+
+/*
+ * SDW Intel ACPI scan helpers
+ */
+
+#include <linux/acpi.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fwnode.h>
+#include <linux/module.h>
+#include <linux/soundwire/sdw_intel.h>
+#include <linux/string.h>
+
+#define SDW_LINK_TYPE 4 /* from Intel ACPI documentation */
+#define SDW_MAX_LINKS 4
+
+static int ctrl_link_mask;
+module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444);
+MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
+
+static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
+{
+ struct fwnode_handle *link;
+ char name[32];
+ u32 quirk_mask = 0;
+
+ /* Find master handle */
+ snprintf(name, sizeof(name),
+ "mipi-sdw-link-%d-subproperties", i);
+
+ link = fwnode_get_named_child_node(fw_node, name);
+ if (!link)
+ return false;
+
+ fwnode_property_read_u32(link,
+ "intel-quirk-mask",
+ &quirk_mask);
+
+ if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
+ return false;
+
+ return true;
+}
+
+static int
+sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
+{
+ struct acpi_device *adev;
+ int ret, i;
+ u8 count;
+
+ if (acpi_bus_get_device(info->handle, &adev))
+ return -EINVAL;
+
+ /* Found controller, find links supported */
+ count = 0;
+ ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
+ "mipi-sdw-master-count", &count, 1);
+
+ /*
+ * In theory we could check the number of links supported in
+ * hardware, but in that step we cannot assume SoundWire IP is
+ * powered.
+ *
+ * In addition, if the BIOS doesn't even provide this
+ * 'master-count' property then all the inits based on link
+ * masks will fail as well.
+ *
+ * We will check the hardware capabilities in the startup() step
+ */
+
+ if (ret) {
+ dev_err(&adev->dev,
+ "Failed to read mipi-sdw-master-count: %d\n", ret);
+ return -EINVAL;
+ }
+
+ /* Check count is within bounds */
+ if (count > SDW_MAX_LINKS) {
+ dev_err(&adev->dev, "Link count %d exceeds max %d\n",
+ count, SDW_MAX_LINKS);
+ return -EINVAL;
+ }
+
+ if (!count) {
+ dev_warn(&adev->dev, "No SoundWire links detected\n");
+ return -EINVAL;
+ }
+ dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count);
+
+ info->count = count;
+ info->link_mask = 0;
+
+ for (i = 0; i < count; i++) {
+ if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) {
+ dev_dbg(&adev->dev,
+ "Link %d masked, will not be enabled\n", i);
+ continue;
+ }
+
+ if (!is_link_enabled(acpi_fwnode_handle(adev), i)) {
+ dev_dbg(&adev->dev,
+ "Link %d not selected in firmware\n", i);
+ continue;
+ }
+
+ info->link_mask |= BIT(i);
+ }
+
+ return 0;
+}
+
+static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
+ void *cdata, void **return_value)
+{
+ struct sdw_intel_acpi_info *info = cdata;
+ struct acpi_device *adev;
+ acpi_status status;
+ u64 adr;
+
+ status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
+ if (ACPI_FAILURE(status))
+ return AE_OK; /* keep going */
+
+ if (acpi_bus_get_device(handle, &adev)) {
+ pr_err("%s: Couldn't find ACPI handle\n", __func__);
+ return AE_NOT_FOUND;
+ }
+
+ info->handle = handle;
+
+ /*
+ * On some Intel platforms, multiple children of the HDAS
+ * device can be found, but only one of them is the SoundWire
+ * controller. The SNDW device is always exposed with
+ * Name(_ADR, 0x40000000), with bits 31..28 representing the
+ * SoundWire link so filter accordingly
+ */
+ if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
+ return AE_OK; /* keep going */
+
+ /* device found, stop namespace walk */
+ return AE_CTRL_TERMINATE;
+}
+
+/**
+ * sdw_intel_acpi_scan() - SoundWire Intel init routine
+ * @parent_handle: ACPI parent handle
+ * @info: description of what firmware/DSDT tables expose
+ *
+ * This scans the namespace and queries firmware to figure out which
+ * links to enable. A follow-up use of sdw_intel_probe() and
+ * sdw_intel_startup() is required for creation of devices and bus
+ * startup
+ */
+int sdw_intel_acpi_scan(acpi_handle *parent_handle,
+ struct sdw_intel_acpi_info *info)
+{
+ acpi_status status;
+
+ info->handle = NULL;
+ status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
+ parent_handle, 1,
+ sdw_intel_acpi_cb,
+ NULL, info, NULL);
+ if (ACPI_FAILURE(status) || info->handle == NULL)
+ return -ENODEV;
+
+ return sdw_intel_scan_controller(info);
+}
+EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SND_INTEL_SOUNDWIRE_ACPI);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Intel Soundwire ACPI helpers");
diff --git a/sound/mips/snd-n64.c b/sound/mips/snd-n64.c
index ca6b4b99da98..e35e93157755 100644
--- a/sound/mips/snd-n64.c
+++ b/sound/mips/snd-n64.c
@@ -312,14 +312,14 @@ static int __init n64audio_probe(struct platform_device *pdev)
}
priv->mi_reg_base = devm_platform_ioremap_resource(pdev, 0);
- if (!priv->mi_reg_base) {
- err = -EINVAL;
+ if (IS_ERR(priv->mi_reg_base)) {
+ err = PTR_ERR(priv->mi_reg_base);
goto fail_dma_alloc;
}
priv->ai_reg_base = devm_platform_ioremap_resource(pdev, 1);
- if (!priv->ai_reg_base) {
- err = -EINVAL;
+ if (IS_ERR(priv->ai_reg_base)) {
+ err = PTR_ERR(priv->ai_reg_base);
goto fail_dma_alloc;
}
diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c
index a855fb8c58bd..55af8ef29838 100644
--- a/sound/pci/ctxfi/cthw20k2.c
+++ b/sound/pci/ctxfi/cthw20k2.c
@@ -991,7 +991,7 @@ static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf)
if (idx < 4) {
/* S/PDIF output */
- switch ((conf & 0x7)) {
+ switch ((conf & 0xf)) {
case 1:
set_field(&ctl->txctl[idx], ATXCTL_NUC, 0);
break;
diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c
index 9b755062d841..2026f1ccaf5a 100644
--- a/sound/pci/hda/hda_codec.c
+++ b/sound/pci/hda/hda_codec.c
@@ -3483,7 +3483,7 @@ EXPORT_SYMBOL_GPL(snd_hda_check_amp_list_power);
*/
/**
- * snd_hda_input_mux_info_info - Info callback helper for the input-mux enum
+ * snd_hda_input_mux_info - Info callback helper for the input-mux enum
* @imux: imux helper object
* @uinfo: pointer to get/store the data
*/
@@ -3506,7 +3506,7 @@ int snd_hda_input_mux_info(const struct hda_input_mux *imux,
EXPORT_SYMBOL_GPL(snd_hda_input_mux_info);
/**
- * snd_hda_input_mux_info_put - Put callback helper for the input-mux enum
+ * snd_hda_input_mux_put - Put callback helper for the input-mux enum
* @codec: the HDA codec
* @imux: imux helper object
* @ucontrol: pointer to get/store the data
@@ -3941,7 +3941,7 @@ unsigned int snd_hda_correct_pin_ctl(struct hda_codec *codec,
EXPORT_SYMBOL_GPL(snd_hda_correct_pin_ctl);
/**
- * _snd_hda_pin_ctl - Helper to set pin ctl value
+ * _snd_hda_set_pin_ctl - Helper to set pin ctl value
* @codec: the HDA codec
* @pin: referred pin NID
* @val: pin control value to set
diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 5e40944e7342..8b7c5508f368 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c
@@ -3923,7 +3923,7 @@ static void vmaster_update_mute_led(void *private_data, int enabled)
}
/**
- * snd_dha_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED
+ * snd_hda_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED
* @codec: the HDA codec
* @callback: the callback for LED classdev brightness_set_blocking
*/
@@ -4074,7 +4074,7 @@ static int add_micmute_led_hook(struct hda_codec *codec)
}
/**
- * snd_dha_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED
+ * snd_hda_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED
* @codec: the HDA codec
* @callback: the callback for LED classdev brightness_set_blocking
*
diff --git a/sound/pci/hda/hda_jack.c b/sound/pci/hda/hda_jack.c
index b8b568046592..ac00866d8032 100644
--- a/sound/pci/hda/hda_jack.c
+++ b/sound/pci/hda/hda_jack.c
@@ -213,7 +213,7 @@ static void jack_detect_update(struct hda_codec *codec,
}
/**
- * snd_hda_set_dirty_all - Mark all the cached as dirty
+ * snd_hda_jack_set_dirty_all - Mark all the cached as dirty
* @codec: the HDA codec
*
* This function sets the dirty flag to all entries of jack table.
@@ -293,7 +293,7 @@ find_callback_from_list(struct hda_jack_tbl *jack,
}
/**
- * snd_hda_jack_detect_enable_mst - enable the jack-detection
+ * snd_hda_jack_detect_enable_callback_mst - enable the jack-detection
* @codec: the HDA codec
* @nid: pin NID to enable
* @func: callback function to register
diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c
index 7e62aed172a9..c966f49fa942 100644
--- a/sound/pci/hda/patch_ca0132.c
+++ b/sound/pci/hda/patch_ca0132.c
@@ -2338,7 +2338,7 @@ static int dspio_send_scp_message(struct hda_codec *codec,
}
/**
- * Prepare and send the SCP message to DSP
+ * dspio_scp - Prepare and send the SCP message to DSP
* @codec: the HDA codec
* @mod_id: ID of the DSP module to send the command
* @src_id: ID of the source
@@ -2865,7 +2865,7 @@ static int dsp_dma_stop(struct hda_codec *codec,
}
/**
- * Allocate router ports
+ * dsp_allocate_router_ports - Allocate router ports
*
* @codec: the HDA codec
* @num_chans: number of channels in the stream
@@ -3178,8 +3178,7 @@ static int dspxfr_hci_write(struct hda_codec *codec,
}
/**
- * Write a block of data into DSP code or data RAM using pre-allocated
- * DMA engine.
+ * dspxfr_one_seg - Write a block of data into DSP code or data RAM using pre-allocated DMA engine.
*
* @codec: the HDA codec
* @fls: pointer to a fast load image
@@ -3376,7 +3375,7 @@ static int dspxfr_one_seg(struct hda_codec *codec,
}
/**
- * Write the entire DSP image of a DSP code/data overlay to DSP memories
+ * dspxfr_image - Write the entire DSP image of a DSP code/data overlay to DSP memories
*
* @codec: the HDA codec
* @fls_data: pointer to a fast load image
diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index e405be7929e3..e6d0843ee9df 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c
@@ -157,6 +157,7 @@ struct hdmi_spec {
bool dyn_pin_out;
bool dyn_pcm_assign;
+ bool dyn_pcm_no_legacy;
bool intel_hsw_fixup; /* apply Intel platform-specific fixups */
/*
* Non-generic VIA/NVIDIA specific
@@ -1345,6 +1346,12 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
{
int i;
+ /* on the new machines, try to assign the pcm slot dynamically,
+ * not use the preferred fixed map (legacy way) anymore.
+ */
+ if (spec->dyn_pcm_no_legacy)
+ goto last_try;
+
/*
* generic_hdmi_build_pcms() may allocate extra PCMs on some
* platforms (with maximum of 'num_nids + dev_num - 1')
@@ -1374,6 +1381,7 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
return i;
}
+ last_try:
/* the last try; check the empty slots in pins */
for (i = 0; i < spec->num_nids; i++) {
if (!test_bit(i, &spec->pcm_bitmap))
@@ -2987,8 +2995,16 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec)
* the index indicate the port number.
*/
static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf};
+ int ret;
- return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
+ ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
+ if (!ret) {
+ struct hdmi_spec *spec = codec->spec;
+
+ spec->dyn_pcm_no_legacy = true;
+ }
+
+ return ret;
}
/* Intel Baytrail and Braswell; with eld notifier */
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index 1927605f0f7e..b47504fa8dfd 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -2532,6 +2532,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950),
SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
+ SND_PCI_QUIRK(0x1462, 0xcc34, "MSI Godlike X570", ALC1220_FIXUP_GB_DUAL_CODECS),
SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
@@ -6396,6 +6397,7 @@ enum {
ALC269_FIXUP_LEMOTE_A1802,
ALC269_FIXUP_LEMOTE_A190X,
ALC256_FIXUP_INTEL_NUC8_RUGGED,
+ ALC256_FIXUP_INTEL_NUC10,
ALC255_FIXUP_XIAOMI_HEADSET_MIC,
ALC274_FIXUP_HP_MIC,
ALC274_FIXUP_HP_HEADSET_MIC,
@@ -6406,6 +6408,7 @@ enum {
ALC236_FIXUP_DELL_AIO_HEADSET_MIC,
ALC282_FIXUP_ACER_DISABLE_LINEOUT,
ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST,
+ ALC256_FIXUP_ACER_HEADSET_MIC,
};
static const struct hda_fixup alc269_fixups[] = {
@@ -7782,6 +7785,15 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC269_FIXUP_HEADSET_MODE
},
+ [ALC256_FIXUP_INTEL_NUC10] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE
+ },
[ALC255_FIXUP_XIAOMI_HEADSET_MIC] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
@@ -7853,6 +7865,16 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
},
+ [ALC256_FIXUP_ACER_HEADSET_MIC] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x02a1113c }, /* use as headset mic, without its own jack detect */
+ { 0x1a, 0x90a1092f }, /* use as internal mic */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7879,9 +7901,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK),
SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
@@ -8128,6 +8152,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC),
SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -8222,6 +8247,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
+ SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
#if 0
/* Below is a quirk table taken from the old code.
diff --git a/sound/soc/sof/Kconfig b/sound/soc/sof/Kconfig
index 3e8b6c035ce3..8dfc165c3690 100644
--- a/sound/soc/sof/Kconfig
+++ b/sound/soc/sof/Kconfig
@@ -9,29 +9,34 @@ config SND_SOC_SOF_TOPLEVEL
if SND_SOC_SOF_TOPLEVEL
+config SND_SOC_SOF_PCI_DEV
+ tristate
+
config SND_SOC_SOF_PCI
tristate "SOF PCI enumeration support"
depends on PCI
- select SND_SOC_SOF
- select SND_SOC_ACPI if ACPI
help
This adds support for PCI enumeration. This option is
required to enable Intel Skylake+ devices.
+ For backwards-compatibility with previous configurations the selection will
+ be used as default for platform-specific drivers.
Say Y if you need this option.
If unsure select "N".
config SND_SOC_SOF_ACPI
tristate "SOF ACPI enumeration support"
depends on ACPI || COMPILE_TEST
- select SND_SOC_SOF
- select SND_SOC_ACPI if ACPI
- select IOSF_MBI if X86 && PCI
help
This adds support for ACPI enumeration. This option is required
to enable Intel Broadwell/Baytrail/Cherrytrail devices.
+ For backwards-compatibility with previous configurations the selection will
+ be used as default for platform-specific drivers.
Say Y if you need this option.
If unsure select "N".
+config SND_SOC_SOF_ACPI_DEV
+ tristate
+
config SND_SOC_SOF_OF
tristate "SOF OF enumeration support"
depends on OF || COMPILE_TEST
diff --git a/sound/soc/sof/Makefile b/sound/soc/sof/Makefile
index 05718dfe6cd2..606d8137cd98 100644
--- a/sound/soc/sof/Makefile
+++ b/sound/soc/sof/Makefile
@@ -14,9 +14,9 @@ obj-$(CONFIG_SND_SOC_SOF) += snd-sof.o
obj-$(CONFIG_SND_SOC_SOF_NOCODEC) += snd-sof-nocodec.o
-obj-$(CONFIG_SND_SOC_SOF_ACPI) += snd-sof-acpi.o
+obj-$(CONFIG_SND_SOC_SOF_ACPI_DEV) += snd-sof-acpi.o
obj-$(CONFIG_SND_SOC_SOF_OF) += snd-sof-of.o
-obj-$(CONFIG_SND_SOC_SOF_PCI) += snd-sof-pci.o
+obj-$(CONFIG_SND_SOC_SOF_PCI_DEV) += snd-sof-pci.o
obj-$(CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL) += intel/
obj-$(CONFIG_SND_SOC_SOF_IMX_TOPLEVEL) += imx/
diff --git a/sound/soc/sof/intel/Kconfig b/sound/soc/sof/intel/Kconfig
index 4797a1cf8c80..da1c396f529d 100644
--- a/sound/soc/sof/intel/Kconfig
+++ b/sound/soc/sof/intel/Kconfig
@@ -9,31 +9,6 @@ config SND_SOC_SOF_INTEL_TOPLEVEL
if SND_SOC_SOF_INTEL_TOPLEVEL
-config SND_SOC_SOF_INTEL_ACPI
- def_tristate SND_SOC_SOF_ACPI
- select SND_SOC_SOF_BAYTRAIL if SND_SOC_SOF_BAYTRAIL_SUPPORT
- select SND_SOC_SOF_BROADWELL if SND_SOC_SOF_BROADWELL_SUPPORT
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_INTEL_PCI
- def_tristate SND_SOC_SOF_PCI
- select SND_SOC_SOF_MERRIFIELD if SND_SOC_SOF_MERRIFIELD_SUPPORT
- select SND_SOC_SOF_APOLLOLAKE if SND_SOC_SOF_APOLLOLAKE_SUPPORT
- select SND_SOC_SOF_GEMINILAKE if SND_SOC_SOF_GEMINILAKE_SUPPORT
- select SND_SOC_SOF_CANNONLAKE if SND_SOC_SOF_CANNONLAKE_SUPPORT
- select SND_SOC_SOF_COFFEELAKE if SND_SOC_SOF_COFFEELAKE_SUPPORT
- select SND_SOC_SOF_ICELAKE if SND_SOC_SOF_ICELAKE_SUPPORT
- select SND_SOC_SOF_COMETLAKE if SND_SOC_SOF_COMETLAKE_SUPPORT
- select SND_SOC_SOF_TIGERLAKE if SND_SOC_SOF_TIGERLAKE_SUPPORT
- select SND_SOC_SOF_ELKHARTLAKE if SND_SOC_SOF_ELKHARTLAKE_SUPPORT
- select SND_SOC_SOF_JASPERLAKE if SND_SOC_SOF_JASPERLAKE_SUPPORT
- select SND_SOC_SOF_ALDERLAKE if SND_SOC_SOF_ALDERLAKE_SUPPORT
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
config SND_SOC_SOF_INTEL_HIFI_EP_IPC
tristate
help
@@ -50,18 +25,25 @@ config SND_SOC_SOF_INTEL_ATOM_HIFI_EP
config SND_SOC_SOF_INTEL_COMMON
tristate
+ select SND_SOC_SOF
select SND_SOC_ACPI_INTEL_MATCH
select SND_SOC_SOF_XTENSA
select SND_SOC_INTEL_MACH
select SND_SOC_ACPI if ACPI
+ select SND_INTEL_DSP_CONFIG
help
This option is not user-selectable but automagically handled by
'select' statements at a higher level.
-if SND_SOC_SOF_INTEL_ACPI
+if SND_SOC_SOF_ACPI
-config SND_SOC_SOF_BAYTRAIL_SUPPORT
- bool "SOF support for Baytrail, Braswell and Cherrytrail"
+config SND_SOC_SOF_BAYTRAIL
+ tristate "SOF support for Baytrail, Braswell and Cherrytrail"
+ default SND_SOC_SOF_ACPI
+ select SND_SOC_SOF_INTEL_COMMON
+ select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
+ select SND_SOC_SOF_ACPI_DEV
+ select IOSF_MBI if X86 && PCI
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Baytrail, Braswell or Cherrytrail processors.
@@ -75,17 +57,12 @@ config SND_SOC_SOF_BAYTRAIL_SUPPORT
Say Y if you want to enable SOF on Baytrail/Cherrytrail.
If unsure select "N".
-config SND_SOC_SOF_BAYTRAIL
- tristate
- select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
- select SND_INTEL_DSP_CONFIG
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_BROADWELL_SUPPORT
- bool "SOF support for Broadwell"
- select SND_INTEL_DSP_CONFIG
+config SND_SOC_SOF_BROADWELL
+ tristate "SOF support for Broadwell"
+ default SND_SOC_SOF_ACPI
+ select SND_SOC_SOF_INTEL_COMMON
+ select SND_SOC_SOF_INTEL_HIFI_EP_IPC
+ select SND_SOC_SOF_ACPI_DEV
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Broadwell processors.
@@ -100,197 +77,143 @@ config SND_SOC_SOF_BROADWELL_SUPPORT
Say Y if you want to enable SOF on Broadwell.
If unsure select "N".
-config SND_SOC_SOF_BROADWELL
- tristate
- select SND_SOC_SOF_INTEL_COMMON
- select SND_SOC_SOF_INTEL_HIFI_EP_IPC
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-endif ## SND_SOC_SOF_INTEL_ACPI
+endif ## SND_SOC_SOF_ACPI
-if SND_SOC_SOF_INTEL_PCI
+if SND_SOC_SOF_PCI
-config SND_SOC_SOF_MERRIFIELD_SUPPORT
- bool "SOF support for Tangier/Merrifield"
+config SND_SOC_SOF_MERRIFIELD
+ tristate "SOF support for Tangier/Merrifield"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Tangier/Merrifield processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_MERRIFIELD
+config SND_SOC_SOF_INTEL_APL
tristate
- select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
+ select SND_SOC_SOF_HDA_COMMON
-config SND_SOC_SOF_APOLLOLAKE_SUPPORT
- bool "SOF support for Apollolake"
+config SND_SOC_SOF_APOLLOLAKE
+ tristate "SOF support for Apollolake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_APL
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Apollolake processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_APOLLOLAKE
- tristate
- select SND_SOC_SOF_HDA_COMMON
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_GEMINILAKE_SUPPORT
- bool "SOF support for GeminiLake"
+config SND_SOC_SOF_GEMINILAKE
+ tristate "SOF support for GeminiLake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_APL
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Geminilake processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_GEMINILAKE
+config SND_SOC_SOF_INTEL_CNL
tristate
select SND_SOC_SOF_HDA_COMMON
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
+ select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-config SND_SOC_SOF_CANNONLAKE_SUPPORT
- bool "SOF support for Cannonlake"
+config SND_SOC_SOF_CANNONLAKE
+ tristate "SOF support for Cannonlake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_CNL
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Cannonlake processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_CANNONLAKE
- tristate
- select SND_SOC_SOF_HDA_COMMON
- select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_COFFEELAKE_SUPPORT
- bool "SOF support for CoffeeLake"
+config SND_SOC_SOF_COFFEELAKE
+ tristate "SOF support for CoffeeLake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_CNL
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Coffeelake processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_COFFEELAKE
- tristate
- select SND_SOC_SOF_HDA_COMMON
- select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_ICELAKE_SUPPORT
- bool "SOF support for Icelake"
+config SND_SOC_SOF_COMETLAKE
+ tristate "SOF support for CometLake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_CNL
help
This adds support for Sound Open Firmware for Intel(R) platforms
- using the Icelake processors.
- Say Y if you have such a device.
+ using the Cometlake processors.
If unsure select "N".
-config SND_SOC_SOF_ICELAKE
- tristate
- select SND_SOC_SOF_HDA_COMMON
- select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_COMETLAKE
+config SND_SOC_SOF_INTEL_ICL
tristate
select SND_SOC_SOF_HDA_COMMON
select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_COMETLAKE_SUPPORT
- bool
-config SND_SOC_SOF_COMETLAKE_LP_SUPPORT
- bool "SOF support for CometLake"
- select SND_SOC_SOF_COMETLAKE_SUPPORT
+config SND_SOC_SOF_ICELAKE
+ tristate "SOF support for Icelake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_ICL
help
This adds support for Sound Open Firmware for Intel(R) platforms
- using the Cometlake processors.
+ using the Icelake processors.
+ Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_TIGERLAKE_SUPPORT
- bool "SOF support for Tigerlake"
+config SND_SOC_SOF_JASPERLAKE
+ tristate "SOF support for JasperLake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_ICL
help
This adds support for Sound Open Firmware for Intel(R) platforms
- using the Tigerlake processors.
+ using the JasperLake processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_TIGERLAKE
+config SND_SOC_SOF_INTEL_TGL
tristate
select SND_SOC_SOF_HDA_COMMON
select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-config SND_SOC_SOF_ELKHARTLAKE_SUPPORT
- bool "SOF support for ElkhartLake"
+config SND_SOC_SOF_TIGERLAKE
+ tristate "SOF support for Tigerlake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_TGL
help
This adds support for Sound Open Firmware for Intel(R) platforms
- using the ElkhartLake processors.
+ using the Tigerlake processors.
Say Y if you have such a device.
If unsure select "N".
config SND_SOC_SOF_ELKHARTLAKE
- tristate
- select SND_SOC_SOF_HDA_COMMON
- select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_JASPERLAKE_SUPPORT
- bool "SOF support for JasperLake"
+ tristate "SOF support for ElkhartLake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_TGL
help
This adds support for Sound Open Firmware for Intel(R) platforms
- using the JasperLake processors.
+ using the ElkhartLake processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_JASPERLAKE
- tristate
- select SND_SOC_SOF_HDA_COMMON
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
-
-config SND_SOC_SOF_ALDERLAKE_SUPPORT
- bool "SOF support for Alderlake"
+config SND_SOC_SOF_ALDERLAKE
+ tristate "SOF support for Alderlake"
+ default SND_SOC_SOF_PCI
+ select SND_SOC_SOF_INTEL_TGL
help
This adds support for Sound Open Firmware for Intel(R) platforms
using the Alderlake processors.
Say Y if you have such a device.
If unsure select "N".
-config SND_SOC_SOF_ALDERLAKE
- tristate
- select SND_SOC_SOF_HDA_COMMON
- select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level
-
config SND_SOC_SOF_HDA_COMMON
tristate
- select SND_INTEL_DSP_CONFIG
select SND_SOC_SOF_INTEL_COMMON
+ select SND_SOC_SOF_PCI_DEV
+ select SND_INTEL_DSP_CONFIG
select SND_SOC_SOF_HDA_LINK_BASELINE
help
This option is not user-selectable but automagically handled by
@@ -353,29 +276,22 @@ config SND_SOC_SOF_HDA
This option is not user-selectable but automagically handled by
'select' statements at a higher level.
-config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK
- bool "SOF support for SoundWire"
- depends on ACPI
- help
- This adds support for SoundWire with Sound Open Firmware
- for Intel(R) platforms.
- Say Y if you want to enable SoundWire links with SOF.
- If unsure select "N".
-
config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
tristate
- select SND_SOC_SOF_INTEL_SOUNDWIRE if SND_SOC_SOF_INTEL_SOUNDWIRE_LINK
- help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
config SND_SOC_SOF_INTEL_SOUNDWIRE
- tristate
- select SOUNDWIRE
+ tristate "SOF support for SoundWire"
+ default SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
+ depends on SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
+ depends on ACPI && SOUNDWIRE
+ depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y)
select SOUNDWIRE_INTEL
+ select SND_INTEL_SOUNDWIRE_ACPI
help
- This option is not user-selectable but automagically handled by
- 'select' statements at a higher level.
+ This adds support for SoundWire with Sound Open Firmware
+ for Intel(R) platforms.
+ Say Y if you want to enable SoundWire links with SOF.
+ If unsure select "N".
endif ## SND_SOC_SOF_INTEL_PCI
diff --git a/sound/soc/sof/intel/Makefile b/sound/soc/sof/intel/Makefile
index 2589111c2fae..f3d6f7070fb3 100644
--- a/sound/soc/sof/intel/Makefile
+++ b/sound/soc/sof/intel/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
-snd-sof-intel-byt-objs := byt.o
-snd-sof-intel-bdw-objs := bdw.o
+snd-sof-acpi-intel-byt-objs := byt.o
+snd-sof-acpi-intel-bdw-objs := bdw.o
snd-sof-intel-ipc-objs := intel-ipc.o
@@ -13,8 +13,20 @@ snd-sof-intel-hda-common-$(CONFIG_SND_SOC_SOF_HDA_PROBES) += hda-compress.o
snd-sof-intel-hda-objs := hda-codec.o
-obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-intel-byt.o
-obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-intel-bdw.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-acpi-intel-byt.o
+obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-acpi-intel-bdw.o
obj-$(CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC) += snd-sof-intel-ipc.o
obj-$(CONFIG_SND_SOC_SOF_HDA_COMMON) += snd-sof-intel-hda-common.o
obj-$(CONFIG_SND_SOC_SOF_HDA) += snd-sof-intel-hda.o
+
+snd-sof-pci-intel-tng-objs := pci-tng.o
+snd-sof-pci-intel-apl-objs := pci-apl.o
+snd-sof-pci-intel-cnl-objs := pci-cnl.o
+snd-sof-pci-intel-icl-objs := pci-icl.o
+snd-sof-pci-intel-tgl-objs := pci-tgl.o
+
+obj-$(CONFIG_SND_SOC_SOF_MERRIFIELD) += snd-sof-pci-intel-tng.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_APL) += snd-sof-pci-intel-apl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_CNL) += snd-sof-pci-intel-cnl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_ICL) += snd-sof-pci-intel-icl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_TGL) += snd-sof-pci-intel-tgl.o
diff --git a/sound/soc/sof/intel/bdw.c b/sound/soc/sof/intel/bdw.c
index 50a4a73e6b9f..fd5ae628732d 100644
--- a/sound/soc/sof/intel/bdw.c
+++ b/sound/soc/sof/intel/bdw.c
@@ -15,8 +15,12 @@
#include <linux/module.h>
#include <sound/sof.h>
#include <sound/sof/xtensa.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/intel-dsp-config.h>
#include "../ops.h"
#include "shim.h"
+#include "../sof-acpi-dev.h"
#include "../sof-audio.h"
/* BARs */
@@ -590,7 +594,7 @@ static struct snd_soc_dai_driver bdw_dai[] = {
};
/* broadwell ops */
-const struct snd_sof_dsp_ops sof_bdw_ops = {
+static const struct snd_sof_dsp_ops sof_bdw_ops = {
/*Device init */
.probe = bdw_probe,
@@ -651,14 +655,69 @@ const struct snd_sof_dsp_ops sof_bdw_ops = {
.arch_ops = &sof_xtensa_arch_ops,
};
-EXPORT_SYMBOL_NS(sof_bdw_ops, SND_SOC_SOF_BROADWELL);
-const struct sof_intel_dsp_desc bdw_chip_info = {
+static const struct sof_intel_dsp_desc bdw_chip_info = {
.cores_num = 1,
.host_managed_cores_mask = 1,
};
-EXPORT_SYMBOL_NS(bdw_chip_info, SND_SOC_SOF_BROADWELL);
+
+static const struct sof_dev_desc sof_acpi_broadwell_desc = {
+ .machines = snd_soc_acpi_intel_broadwell_machines,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = 1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = 0,
+ .chip_info = &bdw_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-bdw.ri",
+ .nocodec_tplg_filename = "sof-bdw-nocodec.tplg",
+ .ops = &sof_bdw_ops,
+};
+
+static const struct acpi_device_id sof_broadwell_match[] = {
+ { "INT3438", (unsigned long)&sof_acpi_broadwell_desc },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, sof_broadwell_match);
+
+static int sof_broadwell_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ const struct acpi_device_id *id;
+ const struct sof_dev_desc *desc;
+ int ret;
+
+ id = acpi_match_device(dev->driver->acpi_match_table, dev);
+ if (!id)
+ return -ENODEV;
+
+ ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
+ if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+ dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
+ return -ENODEV;
+ }
+
+ desc = device_get_match_data(dev);
+ if (!desc)
+ return -ENODEV;
+
+ return sof_acpi_probe(pdev, device_get_match_data(dev));
+}
+
+/* acpi_driver definition */
+static struct platform_driver snd_sof_acpi_intel_bdw_driver = {
+ .probe = sof_broadwell_probe,
+ .remove = sof_acpi_remove,
+ .driver = {
+ .name = "sof-audio-acpi-intel-bdw",
+ .pm = &sof_acpi_pm,
+ .acpi_match_table = sof_broadwell_match,
+ },
+};
+module_platform_driver(snd_sof_acpi_intel_bdw_driver);
MODULE_LICENSE("Dual BSD/GPL");
MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
diff --git a/sound/soc/sof/intel/byt.c b/sound/soc/sof/intel/byt.c
index 19260dbecac5..2846fdec9d95 100644
--- a/sound/soc/sof/intel/byt.c
+++ b/sound/soc/sof/intel/byt.c
@@ -15,8 +15,12 @@
#include <linux/module.h>
#include <sound/sof.h>
#include <sound/sof/xtensa.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/intel-dsp-config.h>
#include "../ops.h"
#include "shim.h"
+#include "../sof-acpi-dev.h"
#include "../sof-audio.h"
#include "../../intel/common/soc-intel-quirks.h"
@@ -822,7 +826,7 @@ irq:
}
/* baytrail ops */
-const struct snd_sof_dsp_ops sof_byt_ops = {
+static const struct snd_sof_dsp_ops sof_byt_ops = {
/* device init */
.probe = byt_acpi_probe,
.remove = byt_remove,
@@ -892,16 +896,14 @@ const struct snd_sof_dsp_ops sof_byt_ops = {
.arch_ops = &sof_xtensa_arch_ops,
};
-EXPORT_SYMBOL_NS(sof_byt_ops, SND_SOC_SOF_BAYTRAIL);
-const struct sof_intel_dsp_desc byt_chip_info = {
+static const struct sof_intel_dsp_desc byt_chip_info = {
.cores_num = 1,
.host_managed_cores_mask = 1,
};
-EXPORT_SYMBOL_NS(byt_chip_info, SND_SOC_SOF_BAYTRAIL);
/* cherrytrail and braswell ops */
-const struct snd_sof_dsp_ops sof_cht_ops = {
+static const struct snd_sof_dsp_ops sof_cht_ops = {
/* device init */
.probe = byt_acpi_probe,
.remove = byt_remove,
@@ -972,16 +974,104 @@ const struct snd_sof_dsp_ops sof_cht_ops = {
.arch_ops = &sof_xtensa_arch_ops,
};
-EXPORT_SYMBOL_NS(sof_cht_ops, SND_SOC_SOF_BAYTRAIL);
-const struct sof_intel_dsp_desc cht_chip_info = {
+static const struct sof_intel_dsp_desc cht_chip_info = {
.cores_num = 1,
.host_managed_cores_mask = 1,
};
-EXPORT_SYMBOL_NS(cht_chip_info, SND_SOC_SOF_BAYTRAIL);
+
+/* BYTCR uses different IRQ index */
+static const struct sof_dev_desc sof_acpi_baytrailcr_desc = {
+ .machines = snd_soc_acpi_intel_baytrail_machines,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = 1,
+ .resindex_imr_base = 2,
+ .irqindex_host_ipc = 0,
+ .chip_info = &byt_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-byt.ri",
+ .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
+ .ops = &sof_byt_ops,
+};
+
+static const struct sof_dev_desc sof_acpi_baytrail_desc = {
+ .machines = snd_soc_acpi_intel_baytrail_machines,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = 1,
+ .resindex_imr_base = 2,
+ .irqindex_host_ipc = 5,
+ .chip_info = &byt_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-byt.ri",
+ .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
+ .ops = &sof_byt_ops,
+};
+
+static const struct sof_dev_desc sof_acpi_cherrytrail_desc = {
+ .machines = snd_soc_acpi_intel_cherrytrail_machines,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = 1,
+ .resindex_imr_base = 2,
+ .irqindex_host_ipc = 5,
+ .chip_info = &cht_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-cht.ri",
+ .nocodec_tplg_filename = "sof-cht-nocodec.tplg",
+ .ops = &sof_cht_ops,
+};
+
+static const struct acpi_device_id sof_baytrail_match[] = {
+ { "80860F28", (unsigned long)&sof_acpi_baytrail_desc },
+ { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc },
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, sof_baytrail_match);
+
+static int sof_baytrail_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ const struct sof_dev_desc *desc;
+ const struct acpi_device_id *id;
+ int ret;
+
+ id = acpi_match_device(dev->driver->acpi_match_table, dev);
+ if (!id)
+ return -ENODEV;
+
+ ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
+ if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+ dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
+ return -ENODEV;
+ }
+
+ desc = device_get_match_data(&pdev->dev);
+ if (!desc)
+ return -ENODEV;
+
+ if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev))
+ desc = &sof_acpi_baytrailcr_desc;
+
+ return sof_acpi_probe(pdev, desc);
+}
+
+/* acpi_driver definition */
+static struct platform_driver snd_sof_acpi_intel_byt_driver = {
+ .probe = sof_baytrail_probe,
+ .remove = sof_acpi_remove,
+ .driver = {
+ .name = "sof-audio-acpi-intel-byt",
+ .pm = &sof_acpi_pm,
+ .acpi_match_table = sof_baytrail_match,
+ },
+};
+module_platform_driver(snd_sof_acpi_intel_byt_driver);
#endif /* CONFIG_SND_SOC_SOF_BAYTRAIL */
MODULE_LICENSE("Dual BSD/GPL");
MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 0dc3a8c0f5e3..1d29b1fd6a94 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -22,10 +22,12 @@
#include <linux/module.h>
#include <linux/soundwire/sdw.h>
#include <linux/soundwire/sdw_intel.h>
+#include <sound/intel-dsp-config.h>
#include <sound/intel-nhlt.h>
#include <sound/sof.h>
#include <sound/sof/xtensa.h>
#include "../sof-audio.h"
+#include "../sof-pci-dev.h"
#include "../ops.h"
#include "hda.h"
@@ -1258,8 +1260,24 @@ void hda_machine_select(struct snd_sof_dev *sdev)
dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n");
}
+int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+ int ret;
+
+ ret = snd_intel_dsp_driver_probe(pci);
+ if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+ dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n");
+ return -ENODEV;
+ }
+
+ return sof_pci_probe(pci, pci_id);
+}
+EXPORT_SYMBOL_NS(hda_pci_intel_probe, SND_SOC_SOF_INTEL_HDA_COMMON);
+
MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC);
MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI);
MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT);
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index d1c38c37bc9d..7c7579daee7f 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -764,4 +764,7 @@ void hda_machine_select(struct snd_sof_dev *sdev);
void hda_set_mach_params(const struct snd_soc_acpi_mach *mach,
struct device *dev);
+/* PCI driver selection and probe */
+int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id);
+
#endif
diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c
new file mode 100644
index 000000000000..f89e746c2570
--- /dev/null
+++ b/sound/soc/sof/intel/pci-apl.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license. When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc bxt_desc = {
+ .machines = snd_soc_acpi_intel_bxt_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &apl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-apl.ri",
+ .nocodec_tplg_filename = "sof-apl-nocodec.tplg",
+ .ops = &sof_apl_ops,
+};
+
+static const struct sof_dev_desc glk_desc = {
+ .machines = snd_soc_acpi_intel_glk_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &apl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-glk.ri",
+ .nocodec_tplg_filename = "sof-glk-nocodec.tplg",
+ .ops = &sof_apl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+ { PCI_DEVICE(0x8086, 0x5a98), /* BXT-P (ApolloLake) */
+ .driver_data = (unsigned long)&bxt_desc},
+ { PCI_DEVICE(0x8086, 0x1a98),/* BXT-T */
+ .driver_data = (unsigned long)&bxt_desc},
+ { PCI_DEVICE(0x8086, 0x3198), /* GeminiLake */
+ .driver_data = (unsigned long)&glk_desc},
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_apl_driver = {
+ .name = "sof-audio-pci-intel-apl",
+ .id_table = sof_pci_ids,
+ .probe = hda_pci_intel_probe,
+ .remove = sof_pci_remove,
+ .shutdown = sof_pci_shutdown,
+ .driver = {
+ .pm = &sof_pci_pm,
+ },
+};
+module_pci_driver(snd_sof_pci_intel_apl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c
new file mode 100644
index 000000000000..f23257adf2ab
--- /dev/null
+++ b/sound/soc/sof/intel/pci-cnl.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license. When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc cnl_desc = {
+ .machines = snd_soc_acpi_intel_cnl_machines,
+ .alt_machines = snd_soc_acpi_intel_cnl_sdw_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &cnl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-cnl.ri",
+ .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+ .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc cfl_desc = {
+ .machines = snd_soc_acpi_intel_cfl_machines,
+ .alt_machines = snd_soc_acpi_intel_cfl_sdw_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &cnl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-cfl.ri",
+ .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+ .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc cml_desc = {
+ .machines = snd_soc_acpi_intel_cml_machines,
+ .alt_machines = snd_soc_acpi_intel_cml_sdw_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &cnl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-cml.ri",
+ .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+ .ops = &sof_cnl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+ { PCI_DEVICE(0x8086, 0x9dc8), /* CNL-LP */
+ .driver_data = (unsigned long)&cnl_desc},
+ { PCI_DEVICE(0x8086, 0xa348), /* CNL-H */
+ .driver_data = (unsigned long)&cfl_desc},
+ { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */
+ .driver_data = (unsigned long)&cml_desc},
+ { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */
+ .driver_data = (unsigned long)&cml_desc},
+ { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */
+ .driver_data = (unsigned long)&cml_desc},
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_cnl_driver = {
+ .name = "sof-audio-pci-intel-cnl",
+ .id_table = sof_pci_ids,
+ .probe = hda_pci_intel_probe,
+ .remove = sof_pci_remove,
+ .shutdown = sof_pci_shutdown,
+ .driver = {
+ .pm = &sof_pci_pm,
+ },
+};
+module_pci_driver(snd_sof_pci_intel_cnl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c
new file mode 100644
index 000000000000..2f60c28ae81f
--- /dev/null
+++ b/sound/soc/sof/intel/pci-icl.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license. When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc icl_desc = {
+ .machines = snd_soc_acpi_intel_icl_machines,
+ .alt_machines = snd_soc_acpi_intel_icl_sdw_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &icl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-icl.ri",
+ .nocodec_tplg_filename = "sof-icl-nocodec.tplg",
+ .ops = &sof_icl_ops,
+};
+
+static const struct sof_dev_desc jsl_desc = {
+ .machines = snd_soc_acpi_intel_jsl_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &jsl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-jsl.ri",
+ .nocodec_tplg_filename = "sof-jsl-nocodec.tplg",
+ .ops = &sof_cnl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+ { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */
+ .driver_data = (unsigned long)&icl_desc},
+ { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */
+ .driver_data = (unsigned long)&icl_desc},
+ { PCI_DEVICE(0x8086, 0x38c8), /* ICL-N */
+ .driver_data = (unsigned long)&jsl_desc},
+ { PCI_DEVICE(0x8086, 0x4dc8), /* JSL-N */
+ .driver_data = (unsigned long)&jsl_desc},
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_icl_driver = {
+ .name = "sof-audio-pci-intel-icl",
+ .id_table = sof_pci_ids,
+ .probe = hda_pci_intel_probe,
+ .remove = sof_pci_remove,
+ .shutdown = sof_pci_shutdown,
+ .driver = {
+ .pm = &sof_pci_pm,
+ },
+};
+module_pci_driver(snd_sof_pci_intel_icl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
new file mode 100644
index 000000000000..485607471181
--- /dev/null
+++ b/sound/soc/sof/intel/pci-tgl.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license. When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc tgl_desc = {
+ .machines = snd_soc_acpi_intel_tgl_machines,
+ .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &tgl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-tgl.ri",
+ .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
+ .ops = &sof_tgl_ops,
+};
+
+static const struct sof_dev_desc tglh_desc = {
+ .machines = snd_soc_acpi_intel_tgl_machines,
+ .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &tglh_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-tgl-h.ri",
+ .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
+ .ops = &sof_tgl_ops,
+};
+
+static const struct sof_dev_desc ehl_desc = {
+ .machines = snd_soc_acpi_intel_ehl_machines,
+ .use_acpi_target_states = true,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &ehl_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-ehl.ri",
+ .nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
+ .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc adls_desc = {
+ .machines = snd_soc_acpi_intel_adl_machines,
+ .alt_machines = snd_soc_acpi_intel_adl_sdw_machines,
+ .resindex_lpe_base = 0,
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = -1,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &adls_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-adl-s.ri",
+ .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
+ .ops = &sof_tgl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+ { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */
+ .driver_data = (unsigned long)&tgl_desc},
+ { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */
+ .driver_data = (unsigned long)&tglh_desc},
+ { PCI_DEVICE(0x8086, 0x4b55), /* EHL */
+ .driver_data = (unsigned long)&ehl_desc},
+ { PCI_DEVICE(0x8086, 0x4b58), /* EHL */
+ .driver_data = (unsigned long)&ehl_desc},
+ { PCI_DEVICE(0x8086, 0x7ad0), /* ADL-S */
+ .driver_data = (unsigned long)&adls_desc},
+ { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */
+ .driver_data = (unsigned long)&tgl_desc},
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_tgl_driver = {
+ .name = "sof-audio-pci-intel-tgl",
+ .id_table = sof_pci_ids,
+ .probe = hda_pci_intel_probe,
+ .remove = sof_pci_remove,
+ .shutdown = sof_pci_shutdown,
+ .driver = {
+ .pm = &sof_pci_pm,
+ },
+};
+module_pci_driver(snd_sof_pci_intel_tgl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
+
diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c
new file mode 100644
index 000000000000..94b9704c0117
--- /dev/null
+++ b/sound/soc/sof/intel/pci-tng.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license. When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "shim.h"
+
+static struct snd_soc_acpi_mach sof_tng_machines[] = {
+ {
+ .id = "INT343A",
+ .drv_name = "edison",
+ .sof_fw_filename = "sof-byt.ri",
+ .sof_tplg_filename = "sof-byt.tplg",
+ },
+ {}
+};
+
+static const struct sof_dev_desc tng_desc = {
+ .machines = sof_tng_machines,
+ .resindex_lpe_base = 3, /* IRAM, but subtract IRAM offset */
+ .resindex_pcicfg_base = -1,
+ .resindex_imr_base = 0,
+ .irqindex_host_ipc = -1,
+ .resindex_dma_base = -1,
+ .chip_info = &tng_chip_info,
+ .default_fw_path = "intel/sof",
+ .default_tplg_path = "intel/sof-tplg",
+ .default_fw_filename = "sof-byt.ri",
+ .nocodec_tplg_filename = "sof-byt.tplg",
+ .ops = &sof_tng_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+ { PCI_DEVICE(0x8086, 0x119a),
+ .driver_data = (unsigned long)&tng_desc},
+ { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_tng_driver = {
+ .name = "sof-audio-pci-intel-tng",
+ .id_table = sof_pci_ids,
+ .probe = sof_pci_probe,
+ .remove = sof_pci_remove,
+ .shutdown = sof_pci_shutdown,
+ .driver = {
+ .pm = &sof_pci_pm,
+ },
+};
+module_pci_driver(snd_sof_pci_intel_tng_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/shim.h b/sound/soc/sof/intel/shim.h
index 1e0afb5c8720..529f68d0ca47 100644
--- a/sound/soc/sof/intel/shim.h
+++ b/sound/soc/sof/intel/shim.h
@@ -167,13 +167,7 @@ struct sof_intel_dsp_desc {
};
extern const struct snd_sof_dsp_ops sof_tng_ops;
-extern const struct snd_sof_dsp_ops sof_byt_ops;
-extern const struct snd_sof_dsp_ops sof_cht_ops;
-extern const struct snd_sof_dsp_ops sof_bdw_ops;
-extern const struct sof_intel_dsp_desc byt_chip_info;
-extern const struct sof_intel_dsp_desc cht_chip_info;
-extern const struct sof_intel_dsp_desc bdw_chip_info;
extern const struct sof_intel_dsp_desc tng_chip_info;
struct sof_intel_stream {
diff --git a/sound/soc/sof/sof-acpi-dev.c b/sound/soc/sof/sof-acpi-dev.c
index cc2e257087e4..1fec0420f662 100644
--- a/sound/soc/sof/sof-acpi-dev.c
+++ b/sound/soc/sof/sof-acpi-dev.c
@@ -12,12 +12,12 @@
#include <linux/firmware.h>
#include <linux/module.h>
#include <linux/pm_runtime.h>
-#include <sound/intel-dsp-config.h>
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
#include <sound/sof.h>
#include "../intel/common/soc-intel-quirks.h"
#include "ops.h"
+#include "sof-acpi-dev.h"
/* platform specific devices */
#include "intel/shim.h"
@@ -36,74 +36,12 @@ MODULE_PARM_DESC(sof_acpi_debug, "SOF ACPI debug options (0x0 all off)");
#define SOF_ACPI_DISABLE_PM_RUNTIME BIT(0)
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL)
-static const struct sof_dev_desc sof_acpi_broadwell_desc = {
- .machines = snd_soc_acpi_intel_broadwell_machines,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = 1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = 0,
- .chip_info = &bdw_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-bdw.ri",
- .nocodec_tplg_filename = "sof-bdw-nocodec.tplg",
- .ops = &sof_bdw_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
-
-/* BYTCR uses different IRQ index */
-static const struct sof_dev_desc sof_acpi_baytrailcr_desc = {
- .machines = snd_soc_acpi_intel_baytrail_machines,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = 1,
- .resindex_imr_base = 2,
- .irqindex_host_ipc = 0,
- .chip_info = &byt_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-byt.ri",
- .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
- .ops = &sof_byt_ops,
-};
-
-static const struct sof_dev_desc sof_acpi_baytrail_desc = {
- .machines = snd_soc_acpi_intel_baytrail_machines,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = 1,
- .resindex_imr_base = 2,
- .irqindex_host_ipc = 5,
- .chip_info = &byt_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-byt.ri",
- .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
- .ops = &sof_byt_ops,
-};
-
-static const struct sof_dev_desc sof_acpi_cherrytrail_desc = {
- .machines = snd_soc_acpi_intel_cherrytrail_machines,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = 1,
- .resindex_imr_base = 2,
- .irqindex_host_ipc = 5,
- .chip_info = &cht_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-cht.ri",
- .nocodec_tplg_filename = "sof-cht-nocodec.tplg",
- .ops = &sof_cht_ops,
-};
-
-#endif
-
-static const struct dev_pm_ops sof_acpi_pm = {
+const struct dev_pm_ops sof_acpi_pm = {
SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume)
SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume,
snd_sof_runtime_idle)
};
+EXPORT_SYMBOL_NS(sof_acpi_pm, SND_SOC_SOF_ACPI_DEV);
static void sof_acpi_probe_complete(struct device *dev)
{
@@ -118,41 +56,19 @@ static void sof_acpi_probe_complete(struct device *dev)
pm_runtime_enable(dev);
}
-static int sof_acpi_probe(struct platform_device *pdev)
+int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc)
{
struct device *dev = &pdev->dev;
- const struct acpi_device_id *id;
- const struct sof_dev_desc *desc;
struct snd_sof_pdata *sof_pdata;
const struct snd_sof_dsp_ops *ops;
int ret;
- id = acpi_match_device(dev->driver->acpi_match_table, dev);
- if (!id)
- return -ENODEV;
-
- if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) {
- ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
- if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
- dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
- return -ENODEV;
- }
- }
dev_dbg(dev, "ACPI DSP detected");
sof_pdata = devm_kzalloc(dev, sizeof(*sof_pdata), GFP_KERNEL);
if (!sof_pdata)
return -ENOMEM;
- desc = device_get_match_data(dev);
- if (!desc)
- return -ENODEV;
-
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
- if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev))
- desc = &sof_acpi_baytrailcr_desc;
-#endif
-
/* get ops for platform */
ops = desc->ops;
if (!ops) {
@@ -194,44 +110,20 @@ static int sof_acpi_probe(struct platform_device *pdev)
return ret;
}
+EXPORT_SYMBOL_NS(sof_acpi_probe, SND_SOC_SOF_ACPI_DEV);
-static int sof_acpi_remove(struct platform_device *pdev)
+int sof_acpi_remove(struct platform_device *pdev)
{
+ struct device *dev = &pdev->dev;
+
if (!(sof_acpi_debug & SOF_ACPI_DISABLE_PM_RUNTIME))
- pm_runtime_disable(&pdev->dev);
+ pm_runtime_disable(dev);
/* call sof helper for DSP hardware remove */
- snd_sof_device_remove(&pdev->dev);
+ snd_sof_device_remove(dev);
return 0;
}
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id sof_acpi_match[] = {
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL)
- { "INT3438", (unsigned long)&sof_acpi_broadwell_desc },
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
- { "80860F28", (unsigned long)&sof_acpi_baytrail_desc },
- { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc },
-#endif
- { }
-};
-MODULE_DEVICE_TABLE(acpi, sof_acpi_match);
-#endif
-
-/* acpi_driver definition */
-static struct platform_driver snd_sof_acpi_driver = {
- .probe = sof_acpi_probe,
- .remove = sof_acpi_remove,
- .driver = {
- .name = "sof-audio-acpi",
- .pm = &sof_acpi_pm,
- .acpi_match_table = ACPI_PTR(sof_acpi_match),
- },
-};
-module_platform_driver(snd_sof_acpi_driver);
+EXPORT_SYMBOL_NS(sof_acpi_remove, SND_SOC_SOF_ACPI_DEV);
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_IMPORT_NS(SND_SOC_SOF_BAYTRAIL);
-MODULE_IMPORT_NS(SND_SOC_SOF_BROADWELL);
diff --git a/sound/soc/sof/sof-acpi-dev.h b/sound/soc/sof/sof-acpi-dev.h
new file mode 100644
index 000000000000..5c2b558d2ace
--- /dev/null
+++ b/sound/soc/sof/sof-acpi-dev.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __SOUND_SOC_SOF_ACPI_H
+#define __SOUND_SOC_SOF_ACPI_H
+
+extern const struct dev_pm_ops sof_acpi_pm;
+int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc);
+int sof_acpi_remove(struct platform_device *pdev);
+
+#endif
diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c
index fd1f0d8c2853..b842a414e1df 100644
--- a/sound/soc/sof/sof-pci-dev.c
+++ b/sound/soc/sof/sof-pci-dev.c
@@ -13,15 +13,11 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/pm_runtime.h>
-#include <sound/intel-dsp-config.h>
#include <sound/soc-acpi.h>
#include <sound/soc-acpi-intel-match.h>
#include <sound/sof.h>
#include "ops.h"
-
-/* platform specific devices */
-#include "intel/shim.h"
-#include "intel/hda.h"
+#include "sof-pci-dev.h"
static char *fw_path;
module_param(fw_path, charp, 0444);
@@ -81,243 +77,14 @@ static const struct dmi_system_id community_key_platforms[] = {
{},
};
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
-static const struct sof_dev_desc bxt_desc = {
- .machines = snd_soc_acpi_intel_bxt_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &apl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-apl.ri",
- .nocodec_tplg_filename = "sof-apl-nocodec.tplg",
- .ops = &sof_apl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE)
-static const struct sof_dev_desc glk_desc = {
- .machines = snd_soc_acpi_intel_glk_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &apl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-glk.ri",
- .nocodec_tplg_filename = "sof-glk-nocodec.tplg",
- .ops = &sof_apl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD)
-static struct snd_soc_acpi_mach sof_tng_machines[] = {
- {
- .id = "INT343A",
- .drv_name = "edison",
- .sof_fw_filename = "sof-byt.ri",
- .sof_tplg_filename = "sof-byt.tplg",
- },
- {}
-};
-
-static const struct sof_dev_desc tng_desc = {
- .machines = sof_tng_machines,
- .resindex_lpe_base = 3, /* IRAM, but subtract IRAM offset */
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = 0,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &tng_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-byt.ri",
- .nocodec_tplg_filename = "sof-byt.tplg",
- .ops = &sof_tng_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE)
-static const struct sof_dev_desc cnl_desc = {
- .machines = snd_soc_acpi_intel_cnl_machines,
- .alt_machines = snd_soc_acpi_intel_cnl_sdw_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &cnl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-cnl.ri",
- .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
- .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE)
-static const struct sof_dev_desc cfl_desc = {
- .machines = snd_soc_acpi_intel_cfl_machines,
- .alt_machines = snd_soc_acpi_intel_cfl_sdw_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &cnl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-cfl.ri",
- .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
- .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE)
-static const struct sof_dev_desc cml_desc = {
- .machines = snd_soc_acpi_intel_cml_machines,
- .alt_machines = snd_soc_acpi_intel_cml_sdw_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &cnl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-cml.ri",
- .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
- .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE)
-static const struct sof_dev_desc icl_desc = {
- .machines = snd_soc_acpi_intel_icl_machines,
- .alt_machines = snd_soc_acpi_intel_icl_sdw_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &icl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-icl.ri",
- .nocodec_tplg_filename = "sof-icl-nocodec.tplg",
- .ops = &sof_icl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) || IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-static const struct sof_dev_desc tgl_desc = {
- .machines = snd_soc_acpi_intel_tgl_machines,
- .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &tgl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-tgl.ri",
- .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
- .ops = &sof_tgl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
-static const struct sof_dev_desc tglh_desc = {
- .machines = snd_soc_acpi_intel_tgl_machines,
- .alt_machines = snd_soc_acpi_intel_tgl_sdw_machines,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &tglh_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-tgl-h.ri",
- .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
- .ops = &sof_tgl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE)
-static const struct sof_dev_desc ehl_desc = {
- .machines = snd_soc_acpi_intel_ehl_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &ehl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-ehl.ri",
- .nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
- .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
-static const struct sof_dev_desc jsl_desc = {
- .machines = snd_soc_acpi_intel_jsl_machines,
- .use_acpi_target_states = true,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &jsl_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-jsl.ri",
- .nocodec_tplg_filename = "sof-jsl-nocodec.tplg",
- .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-static const struct sof_dev_desc adls_desc = {
- .machines = snd_soc_acpi_intel_adl_machines,
- .alt_machines = snd_soc_acpi_intel_adl_sdw_machines,
- .resindex_lpe_base = 0,
- .resindex_pcicfg_base = -1,
- .resindex_imr_base = -1,
- .irqindex_host_ipc = -1,
- .resindex_dma_base = -1,
- .chip_info = &adls_chip_info,
- .default_fw_path = "intel/sof",
- .default_tplg_path = "intel/sof-tplg",
- .default_fw_filename = "sof-adl-s.ri",
- .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
- .ops = &sof_tgl_ops,
-};
-#endif
-
-static const struct dev_pm_ops sof_pci_pm = {
+const struct dev_pm_ops sof_pci_pm = {
.prepare = snd_sof_prepare,
.complete = snd_sof_complete,
SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume)
SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume,
snd_sof_runtime_idle)
};
+EXPORT_SYMBOL_NS(sof_pci_pm, SND_SOC_SOF_PCI_DEV);
static void sof_pci_probe_complete(struct device *dev)
{
@@ -343,8 +110,7 @@ static void sof_pci_probe_complete(struct device *dev)
pm_runtime_put_noidle(dev);
}
-static int sof_pci_probe(struct pci_dev *pci,
- const struct pci_device_id *pci_id)
+int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
{
struct device *dev = &pci->dev;
const struct sof_dev_desc *desc =
@@ -353,13 +119,6 @@ static int sof_pci_probe(struct pci_dev *pci,
const struct snd_sof_dsp_ops *ops;
int ret;
- if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) {
- ret = snd_intel_dsp_driver_probe(pci);
- if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
- dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n");
- return -ENODEV;
- }
- }
dev_dbg(&pci->dev, "PCI DSP detected");
/* get ops for platform */
@@ -447,8 +206,9 @@ release_regions:
return ret;
}
+EXPORT_SYMBOL_NS(sof_pci_probe, SND_SOC_SOF_PCI_DEV);
-static void sof_pci_remove(struct pci_dev *pci)
+void sof_pci_remove(struct pci_dev *pci)
{
/* call sof helper for DSP hardware remove */
snd_sof_device_remove(&pci->dev);
@@ -461,94 +221,12 @@ static void sof_pci_remove(struct pci_dev *pci)
/* release pci regions and disable device */
pci_release_regions(pci);
}
+EXPORT_SYMBOL_NS(sof_pci_remove, SND_SOC_SOF_PCI_DEV);
-static void sof_pci_shutdown(struct pci_dev *pci)
+void sof_pci_shutdown(struct pci_dev *pci)
{
snd_sof_device_shutdown(&pci->dev);
}
-
-/* PCI IDs */
-static const struct pci_device_id sof_pci_ids[] = {
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD)
- { PCI_DEVICE(0x8086, 0x119a),
- .driver_data = (unsigned long)&tng_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
- /* BXT-P & Apollolake */
- { PCI_DEVICE(0x8086, 0x5a98),
- .driver_data = (unsigned long)&bxt_desc},
- { PCI_DEVICE(0x8086, 0x1a98),
- .driver_data = (unsigned long)&bxt_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE)
- { PCI_DEVICE(0x8086, 0x3198),
- .driver_data = (unsigned long)&glk_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE)
- { PCI_DEVICE(0x8086, 0x9dc8),
- .driver_data = (unsigned long)&cnl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE)
- { PCI_DEVICE(0x8086, 0xa348),
- .driver_data = (unsigned long)&cfl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE)
- { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */
- .driver_data = (unsigned long)&icl_desc},
- { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */
- .driver_data = (unsigned long)&icl_desc},
-
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
- { PCI_DEVICE(0x8086, 0x38c8),
- .driver_data = (unsigned long)&jsl_desc},
- { PCI_DEVICE(0x8086, 0x4dc8),
- .driver_data = (unsigned long)&jsl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE)
- { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */
- .driver_data = (unsigned long)&cml_desc},
- { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */
- .driver_data = (unsigned long)&cml_desc},
- { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */
- .driver_data = (unsigned long)&cml_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
- { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */
- .driver_data = (unsigned long)&tgl_desc},
- { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */
- .driver_data = (unsigned long)&tglh_desc},
-
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE)
- { PCI_DEVICE(0x8086, 0x4b55),
- .driver_data = (unsigned long)&ehl_desc},
- { PCI_DEVICE(0x8086, 0x4b58),
- .driver_data = (unsigned long)&ehl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
- { PCI_DEVICE(0x8086, 0x7ad0),
- .driver_data = (unsigned long)&adls_desc},
- { PCI_DEVICE(0x8086, 0x51c8),
- .driver_data = (unsigned long)&tgl_desc},
-#endif
- { 0, }
-};
-MODULE_DEVICE_TABLE(pci, sof_pci_ids);
-
-/* pci_driver definition */
-static struct pci_driver snd_sof_pci_driver = {
- .name = "sof-audio-pci",
- .id_table = sof_pci_ids,
- .probe = sof_pci_probe,
- .remove = sof_pci_remove,
- .shutdown = sof_pci_shutdown,
- .driver = {
- .pm = &sof_pci_pm,
- },
-};
-module_pci_driver(snd_sof_pci_driver);
+EXPORT_SYMBOL_NS(sof_pci_shutdown, SND_SOC_SOF_PCI_DEV);
MODULE_LICENSE("Dual BSD/GPL");
-MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD);
-MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
diff --git a/sound/soc/sof/sof-pci-dev.h b/sound/soc/sof/sof-pci-dev.h
new file mode 100644
index 000000000000..81155a59e63a
--- /dev/null
+++ b/sound/soc/sof/sof-pci-dev.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __SOUND_SOC_SOF_PCI_H
+#define __SOUND_SOC_SOF_PCI_H
+
+extern const struct dev_pm_ops sof_pci_pm;
+int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id);
+void sof_pci_remove(struct pci_dev *pci);
+void sof_pci_shutdown(struct pci_dev *pci);
+
+#endif
diff --git a/sound/usb/clock.c b/sound/usb/clock.c
index 8243652d5604..a746802d0ac3 100644
--- a/sound/usb/clock.c
+++ b/sound/usb/clock.c
@@ -652,10 +652,10 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip,
cur_rate = prev_rate;
if (cur_rate != rate) {
- usb_audio_warn(chip,
- "%d:%d: freq mismatch (RO clock): req %d, clock runs @%d\n",
- fmt->iface, fmt->altsetting, rate, cur_rate);
- return -ENXIO;
+ usb_audio_dbg(chip,
+ "%d:%d: freq mismatch: req %d, clock runs @%d\n",
+ fmt->iface, fmt->altsetting, rate, cur_rate);
+ /* continue processing */
}
validation:
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index b1c78db0d470..b004b2e63a5d 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1307,6 +1307,17 @@ no_res_check:
/* totally crap, return an error */
return -EINVAL;
}
+ } else {
+ /* if the max volume is too low, it's likely a bogus range;
+ * here we use -96dB as the threshold
+ */
+ if (cval->dBmax <= -9600) {
+ usb_audio_info(cval->head.mixer->chip,
+ "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n",
+ cval->head.id, mixer_ctrl_intf(cval->head.mixer),
+ cval->dBmin, cval->dBmax);
+ cval->dBmin = cval->dBmax = 0;
+ }
}
return 0;
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index a7212f16660e..646deb6244b1 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -537,6 +537,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
.map = bose_companion5_map,
},
{
+ /* Corsair Virtuoso SE (wired mode) */
+ .id = USB_ID(0x1b1c, 0x0a3d),
+ .map = corsair_virtuoso_map,
+ },
+ {
+ /* Corsair Virtuoso SE (wireless mode) */
+ .id = USB_ID(0x1b1c, 0x0a3e),
+ .map = corsair_virtuoso_map,
+ },
+ {
/* Corsair Virtuoso (wired mode) */
.id = USB_ID(0x1b1c, 0x0a41),
.map = corsair_virtuoso_map,
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index bf5a0f3c1fad..e5311b6bb3f6 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -845,13 +845,19 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs)
list_for_each_entry(fp, &subs->fmt_list, list) {
ep = snd_usb_get_endpoint(chip, fp->endpoint);
- if (ep && ep->cur_rate)
- return ep;
+ if (ep && ep->cur_audiofmt) {
+ /* if EP is already opened solely for this substream,
+ * we still allow us to change the parameter; otherwise
+ * this substream has to follow the existing parameter
+ */
+ if (ep->cur_audiofmt != subs->cur_audiofmt || ep->opened > 1)
+ return ep;
+ }
if (!fp->implicit_fb)
continue;
/* for the implicit fb, check the sync ep as well */
ep = snd_usb_get_endpoint(chip, fp->sync_ep);
- if (ep && ep->cur_rate)
+ if (ep && ep->cur_audiofmt)
return ep;
}
return NULL;
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 9ba4682ebc48..737b2729c0d3 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1482,7 +1482,7 @@ static int pioneer_djm_set_format_quirk(struct snd_usb_substream *subs,
usb_set_interface(subs->dev, 0, 1);
// we should derive windex from fmt-sync_ep but it's not set
snd_usb_ctl_msg(subs->stream->chip->dev,
- usb_rcvctrlpipe(subs->stream->chip->dev, 0),
+ usb_sndctrlpipe(subs->stream->chip->dev, 0),
0x01, 0x22, 0x0100, windex, &sr, 0x0003);
return 0;
}
diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h
index 543dd70e12c8..ad64d673b5e6 100644
--- a/tools/arch/s390/include/uapi/asm/ptrace.h
+++ b/tools/arch/s390/include/uapi/asm/ptrace.h
@@ -179,8 +179,9 @@
#define ACR_SIZE 4
-#define PTRACE_OLDSETOPTIONS 21
-
+#define PTRACE_OLDSETOPTIONS 21
+#define PTRACE_SYSEMU 31
+#define PTRACE_SYSEMU_SINGLESTEP 32
#ifndef __ASSEMBLY__
#include <linux/stddef.h>
#include <linux/types.h>
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 84b887825f12..cc96e26d69f7 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 19 /* N 32-bit words worth of info */
+#define NCAPINTS 20 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */
/*
@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
+/* FREE! ( 3*32+17) */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -201,7 +201,7 @@
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
+/* FREE! ( 7*32+10) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
@@ -211,7 +211,7 @@
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */
+/* FREE! ( 7*32+20) */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -236,8 +236,6 @@
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -294,6 +292,7 @@
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
@@ -337,6 +336,7 @@
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
@@ -385,6 +385,13 @@
#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
+
/*
* BUG word(s)
*/
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 8e76d3701db3..5a3022c8af82 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -112,6 +112,7 @@ struct kvm_ioapic_state {
#define KVM_NR_IRQCHIPS 3
#define KVM_RUN_X86_SMM (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK (1 << 1)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index ada955c5ebb6..b8e650a985e3 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -89,6 +89,7 @@
#define EXIT_REASON_XRSTORS 64
#define EXIT_REASON_UMWAIT 67
#define EXIT_REASON_TPAUSE 68
+#define EXIT_REASON_BUS_LOCK 74
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -150,7 +151,8 @@
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
- { EXIT_REASON_TPAUSE, "TPAUSE" }
+ { EXIT_REASON_TPAUSE, "TPAUSE" }, \
+ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/tools/build/Makefile b/tools/build/Makefile
index bae48e6fa995..5ed41b96fcde 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -30,12 +30,18 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
all: $(OUTPUT)fixdep
+# Make sure there's anything to clean,
+# feature contains check for existing OUTPUT
+TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./)
+
clean:
$(call QUIET_CLEAN, fixdep)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)rm -f $(OUTPUT)fixdep
$(call QUIET_CLEAN, feature-detect)
- $(Q)$(MAKE) -C feature/ clean >/dev/null
+ifneq ($(wildcard $(TMP_O)),)
+ $(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
+endif
$(OUTPUT)fixdep-in.o: FORCE
$(Q)$(MAKE) $(build)=fixdep
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
index b0e35eec6499..4ac5c081af93 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -10,17 +10,27 @@
#define CORESIGHT_ETM_PMU_NAME "cs_etm"
#define CORESIGHT_ETM_PMU_SEED 0x10
-/* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC 12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS 28
-#define ETM_OPT_RETSTK 29
+/*
+ * Below are the definition of bit offsets for perf option, and works as
+ * arbitrary values for all ETM versions.
+ *
+ * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore,
+ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and
+ * directly use below macros as config bits.
+ */
+#define ETM_OPT_CYCACC 12
+#define ETM_OPT_CTXTID 14
+#define ETM_OPT_CTXTID2 15
+#define ETM_OPT_TS 28
+#define ETM_OPT_RETSTK 29
/* ETMv4 CONFIGR programming bits for the ETM OPTs */
#define ETM4_CFG_BIT_CYCACC 4
#define ETM4_CFG_BIT_CTXTID 6
+#define ETM4_CFG_BIT_VMID 7
#define ETM4_CFG_BIT_TS 11
#define ETM4_CFG_BIT_RETSTK 12
+#define ETM4_CFG_BIT_VMID_OPT 15
static inline int coresight_get_trace_id(int cpu)
{
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 808b48a93330..0827037c5484 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -1,11 +1,10 @@
-/**
- * \file drm.h
+/*
* Header for the Direct Rendering Manager
*
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
+ * Author: Rickard E. (Rik) Faith <faith@valinux.com>
*
- * \par Acknowledgments:
- * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
+ * Acknowledgments:
+ * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg.
*/
/*
@@ -85,7 +84,7 @@ typedef unsigned int drm_context_t;
typedef unsigned int drm_drawable_t;
typedef unsigned int drm_magic_t;
-/**
+/*
* Cliprect.
*
* \warning: If you change this structure, make sure you change
@@ -101,7 +100,7 @@ struct drm_clip_rect {
unsigned short y2;
};
-/**
+/*
* Drawable information.
*/
struct drm_drawable_info {
@@ -109,7 +108,7 @@ struct drm_drawable_info {
struct drm_clip_rect *rects;
};
-/**
+/*
* Texture region,
*/
struct drm_tex_region {
@@ -120,7 +119,7 @@ struct drm_tex_region {
unsigned int age;
};
-/**
+/*
* Hardware lock.
*
* The lock structure is a simple cache-line aligned integer. To avoid
@@ -132,7 +131,7 @@ struct drm_hw_lock {
char padding[60]; /**< Pad to cache line */
};
-/**
+/*
* DRM_IOCTL_VERSION ioctl argument type.
*
* \sa drmGetVersion().
@@ -149,7 +148,7 @@ struct drm_version {
char __user *desc; /**< User-space buffer to hold desc */
};
-/**
+/*
* DRM_IOCTL_GET_UNIQUE ioctl argument type.
*
* \sa drmGetBusid() and drmSetBusId().
@@ -168,7 +167,7 @@ struct drm_block {
int unused;
};
-/**
+/*
* DRM_IOCTL_CONTROL ioctl argument type.
*
* \sa drmCtlInstHandler() and drmCtlUninstHandler().
@@ -183,7 +182,7 @@ struct drm_control {
int irq;
};
-/**
+/*
* Type of memory to map.
*/
enum drm_map_type {
@@ -195,7 +194,7 @@ enum drm_map_type {
_DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */
};
-/**
+/*
* Memory mapping flags.
*/
enum drm_map_flags {
@@ -214,7 +213,7 @@ struct drm_ctx_priv_map {
void *handle; /**< Handle of map */
};
-/**
+/*
* DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
* argument type.
*
@@ -231,7 +230,7 @@ struct drm_map {
/* Private data */
};
-/**
+/*
* DRM_IOCTL_GET_CLIENT ioctl argument type.
*/
struct drm_client {
@@ -263,7 +262,7 @@ enum drm_stat_type {
/* Add to the *END* of the list */
};
-/**
+/*
* DRM_IOCTL_GET_STATS ioctl argument type.
*/
struct drm_stats {
@@ -274,7 +273,7 @@ struct drm_stats {
} data[15];
};
-/**
+/*
* Hardware locking flags.
*/
enum drm_lock_flags {
@@ -289,7 +288,7 @@ enum drm_lock_flags {
_DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */
};
-/**
+/*
* DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
*
* \sa drmGetLock() and drmUnlock().
@@ -299,7 +298,7 @@ struct drm_lock {
enum drm_lock_flags flags;
};
-/**
+/*
* DMA flags
*
* \warning
@@ -328,7 +327,7 @@ enum drm_dma_flags {
_DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */
};
-/**
+/*
* DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
*
* \sa drmAddBufs().
@@ -351,7 +350,7 @@ struct drm_buf_desc {
*/
};
-/**
+/*
* DRM_IOCTL_INFO_BUFS ioctl argument type.
*/
struct drm_buf_info {
@@ -359,7 +358,7 @@ struct drm_buf_info {
struct drm_buf_desc __user *list;
};
-/**
+/*
* DRM_IOCTL_FREE_BUFS ioctl argument type.
*/
struct drm_buf_free {
@@ -367,7 +366,7 @@ struct drm_buf_free {
int __user *list;
};
-/**
+/*
* Buffer information
*
* \sa drm_buf_map.
@@ -379,7 +378,7 @@ struct drm_buf_pub {
void __user *address; /**< Address of buffer */
};
-/**
+/*
* DRM_IOCTL_MAP_BUFS ioctl argument type.
*/
struct drm_buf_map {
@@ -392,7 +391,7 @@ struct drm_buf_map {
struct drm_buf_pub __user *list; /**< Buffer information */
};
-/**
+/*
* DRM_IOCTL_DMA ioctl argument type.
*
* Indices here refer to the offset into the buffer list in drm_buf_get.
@@ -417,7 +416,7 @@ enum drm_ctx_flags {
_DRM_CONTEXT_2DONLY = 0x02
};
-/**
+/*
* DRM_IOCTL_ADD_CTX ioctl argument type.
*
* \sa drmCreateContext() and drmDestroyContext().
@@ -427,7 +426,7 @@ struct drm_ctx {
enum drm_ctx_flags flags;
};
-/**
+/*
* DRM_IOCTL_RES_CTX ioctl argument type.
*/
struct drm_ctx_res {
@@ -435,14 +434,14 @@ struct drm_ctx_res {
struct drm_ctx __user *contexts;
};
-/**
+/*
* DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
*/
struct drm_draw {
drm_drawable_t handle;
};
-/**
+/*
* DRM_IOCTL_UPDATE_DRAW ioctl argument type.
*/
typedef enum {
@@ -456,14 +455,14 @@ struct drm_update_draw {
unsigned long long data;
};
-/**
+/*
* DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
*/
struct drm_auth {
drm_magic_t magic;
};
-/**
+/*
* DRM_IOCTL_IRQ_BUSID ioctl argument type.
*
* \sa drmGetInterruptFromBusID().
@@ -505,7 +504,7 @@ struct drm_wait_vblank_reply {
long tval_usec;
};
-/**
+/*
* DRM_IOCTL_WAIT_VBLANK ioctl argument type.
*
* \sa drmWaitVBlank().
@@ -518,7 +517,7 @@ union drm_wait_vblank {
#define _DRM_PRE_MODESET 1
#define _DRM_POST_MODESET 2
-/**
+/*
* DRM_IOCTL_MODESET_CTL ioctl argument type
*
* \sa drmModesetCtl().
@@ -528,7 +527,7 @@ struct drm_modeset_ctl {
__u32 cmd;
};
-/**
+/*
* DRM_IOCTL_AGP_ENABLE ioctl argument type.
*
* \sa drmAgpEnable().
@@ -537,7 +536,7 @@ struct drm_agp_mode {
unsigned long mode; /**< AGP mode */
};
-/**
+/*
* DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
*
* \sa drmAgpAlloc() and drmAgpFree().
@@ -549,7 +548,7 @@ struct drm_agp_buffer {
unsigned long physical; /**< Physical used by i810 */
};
-/**
+/*
* DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
*
* \sa drmAgpBind() and drmAgpUnbind().
@@ -559,7 +558,7 @@ struct drm_agp_binding {
unsigned long offset; /**< In bytes -- will round to page boundary */
};
-/**
+/*
* DRM_IOCTL_AGP_INFO ioctl argument type.
*
* \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
@@ -580,7 +579,7 @@ struct drm_agp_info {
unsigned short id_device;
};
-/**
+/*
* DRM_IOCTL_SG_ALLOC ioctl argument type.
*/
struct drm_scatter_gather {
@@ -588,7 +587,7 @@ struct drm_scatter_gather {
unsigned long handle; /**< Used for mapping / unmapping */
};
-/**
+/*
* DRM_IOCTL_SET_VERSION ioctl argument type.
*/
struct drm_set_version {
@@ -598,14 +597,14 @@ struct drm_set_version {
int drm_dd_minor;
};
-/** DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
struct drm_gem_close {
/** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
-/** DRM_IOCTL_GEM_FLINK ioctl argument type */
+/* DRM_IOCTL_GEM_FLINK ioctl argument type */
struct drm_gem_flink {
/** Handle for the object being named */
__u32 handle;
@@ -614,7 +613,7 @@ struct drm_gem_flink {
__u32 name;
};
-/** DRM_IOCTL_GEM_OPEN ioctl argument type */
+/* DRM_IOCTL_GEM_OPEN ioctl argument type */
struct drm_gem_open {
/** Name of object being opened */
__u32 name;
@@ -652,7 +651,7 @@ struct drm_gem_open {
#define DRM_CAP_SYNCOBJ 0x13
#define DRM_CAP_SYNCOBJ_TIMELINE 0x14
-/** DRM_IOCTL_GET_CAP ioctl argument type */
+/* DRM_IOCTL_GET_CAP ioctl argument type */
struct drm_get_cap {
__u64 capability;
__u64 value;
@@ -678,7 +677,9 @@ struct drm_get_cap {
/**
* DRM_CLIENT_CAP_ATOMIC
*
- * If set to 1, the DRM core will expose atomic properties to userspace
+ * If set to 1, the DRM core will expose atomic properties to userspace. This
+ * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and
+ * &DRM_CLIENT_CAP_ASPECT_RATIO.
*/
#define DRM_CLIENT_CAP_ATOMIC 3
@@ -698,7 +699,7 @@ struct drm_get_cap {
*/
#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5
-/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
+/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
struct drm_set_client_cap {
__u64 capability;
__u64 value;
@@ -950,7 +951,7 @@ extern "C" {
#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
-/**
+/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
* Generic IOCTLS restart at 0xA0.
@@ -961,7 +962,7 @@ extern "C" {
#define DRM_COMMAND_BASE 0x40
#define DRM_COMMAND_END 0xA0
-/**
+/*
* Header for events written back to userspace on the drm fd. The
* type defines the type of event, the length specifies the total
* length of the event (including the header), and user_data is
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index fa1f3d62f9a6..1987e2ea79a3 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
#define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2)
#define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3)
+#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4)
-#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index abb89bbe5635..8b281f722e5b 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -216,6 +216,20 @@ struct kvm_hyperv_exit {
} u;
};
+struct kvm_xen_exit {
+#define KVM_EXIT_XEN_HCALL 1
+ __u32 type;
+ union {
+ struct {
+ __u32 longmode;
+ __u32 cpl;
+ __u64 input;
+ __u64 result;
+ __u64 params[6];
+ } hcall;
+ } u;
+};
+
#define KVM_S390_GET_SKEYS_NONE 1
#define KVM_S390_SKEYS_MAX 1048576
@@ -252,6 +266,8 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_X86_WRMSR 30
#define KVM_EXIT_DIRTY_RING_FULL 31
#define KVM_EXIT_AP_RESET_HOLD 32
+#define KVM_EXIT_X86_BUS_LOCK 33
+#define KVM_EXIT_XEN 34
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -428,6 +444,8 @@ struct kvm_run {
__u32 index; /* kernel -> user */
__u64 data; /* kernel <-> user */
} msr;
+ /* KVM_EXIT_XEN */
+ struct kvm_xen_exit xen;
/* Fix the size of the union. */
char padding[256];
};
@@ -1058,6 +1076,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
#define KVM_CAP_SYS_HYPERV_CPUID 191
#define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_X86_BUS_LOCK_EXIT 193
#define KVM_CAP_PPC_DAWR1 194
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1132,6 +1151,10 @@ struct kvm_x86_mce {
#endif
#ifdef KVM_CAP_XEN_HVM
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+
struct kvm_xen_hvm_config {
__u32 flags;
__u32 msr;
@@ -1566,6 +1589,45 @@ struct kvm_pv_cmd {
/* Available with KVM_CAP_DIRTY_LOG_RING */
#define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7)
+/* Per-VM Xen attributes */
+#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
+#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr)
+
+struct kvm_xen_hvm_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u8 long_mode;
+ __u8 vector;
+ struct {
+ __u64 gfn;
+ } shared_info;
+ __u64 pad[8];
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+
+/* Per-vCPU Xen attributes */
+#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
+#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
+
+struct kvm_xen_vcpu_attr {
+ __u16 type;
+ __u16 pad[3];
+ union {
+ __u64 gpa;
+ __u64 pad[8];
+ } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
@@ -1594,6 +1656,8 @@ enum sev_cmd_id {
KVM_SEV_DBG_ENCRYPT,
/* Guest certificates commands */
KVM_SEV_CERT_EXPORT,
+ /* Attestation report */
+ KVM_SEV_GET_ATTESTATION_REPORT,
KVM_SEV_NR_MAX,
};
@@ -1646,6 +1710,12 @@ struct kvm_sev_dbg {
__u32 len;
};
+struct kvm_sev_attestation_report {
+ __u8 mnonce[16];
+ __u64 uaddr;
+ __u32 len;
+};
+
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)
#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)
#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2)
@@ -1767,4 +1837,7 @@ struct kvm_dirty_gfn {
__u64 offset;
};
+#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0)
+#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1)
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index dd8306ea336c..e6524ead2b7b 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -1,6 +1,8 @@
#ifndef _UAPI_LINUX_MOUNT_H
#define _UAPI_LINUX_MOUNT_H
+#include <linux/types.h>
+
/*
* These are the fs-independent mount-flags: up to 32 flags are supported
*
@@ -117,5 +119,19 @@ enum fsconfig_command {
#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */
#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+ __u64 attr_set;
+ __u64 attr_clr;
+ __u64 propagation;
+ __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
index 58b1eb711360..a5feb7604948 100644
--- a/tools/include/uapi/linux/openat2.h
+++ b/tools/include/uapi/linux/openat2.h
@@ -35,5 +35,9 @@ struct open_how {
#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
be scoped inside the dirfd
(similar to chroot(2)). */
+#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
+ completed through cached lookup. May
+ return -EAGAIN if that's not
+ possible. */
#endif /* _UAPI_LINUX_OPENAT2_H */
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 17465d454a0e..a0aaf385cbb5 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -26,13 +26,10 @@
void perf_evlist__init(struct perf_evlist *evlist)
{
- int i;
-
- for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
- INIT_HLIST_HEAD(&evlist->heads[i]);
INIT_LIST_HEAD(&evlist->entries);
evlist->nr_entries = 0;
fdarray__init(&evlist->pollfd, 64);
+ perf_evlist__reset_id_hash(evlist);
}
static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
@@ -237,6 +234,14 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist,
hlist_add_head(&sid->node, &evlist->heads[hash]);
}
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
+{
+ int i;
+
+ for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+ INIT_HLIST_HEAD(&evlist->heads[i]);
+}
+
void perf_evlist__id_add(struct perf_evlist *evlist,
struct perf_evsel *evsel,
int cpu, int thread, u64 id)
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 2d0fa02b036f..212c29063ad4 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -124,4 +124,6 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
struct perf_evsel *evsel,
int cpu, int thread, int fd);
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
+
#endif /* __LIBPERF_INTERNAL_EVLIST_H */
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index c0a66400a960..9af8b8dfb7b6 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -29,7 +29,7 @@ OPTIONS
Show just the sample frequency used for each event.
-v::
---verbose=::
+--verbose::
Show all fields.
-g::
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index 1e91121bac0f..6e82b7cc0bf0 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -28,8 +28,8 @@ OPTIONS
specified: function_graph or function.
-v::
---verbose=::
- Verbosity level.
+--verbose::
+ Increase the verbosity level.
-F::
--funcs::
diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt
index f3c620951f6e..c97527df8ecd 100644
--- a/tools/perf/Documentation/perf-kallsyms.txt
+++ b/tools/perf/Documentation/perf-kallsyms.txt
@@ -20,5 +20,5 @@ modules).
OPTIONS
-------
-v::
---verbose=::
+--verbose::
Increase verbosity level, showing details about symbol table loading, etc.
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index abc9b5d83312..f0da8cf63e9a 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -97,8 +97,8 @@ filter out the startup phase of the program, which is often very different.
Filter out events for these pids and for 'trace' itself (comma separated list).
-v::
---verbose=::
- Verbosity level.
+--verbose::
+ Increase the verbosity level.
--no-inherit::
Child tasks do not inherit counters.
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5345ac70cd83..f6e609673de2 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -607,7 +607,7 @@ arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
- $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
+ $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@
sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -1001,14 +1001,6 @@ $(INSTALL_DOC_TARGETS):
### Cleaning rules
-#
-# This is here, not in Makefile.config, because Makefile.config does
-# not get included for the clean target:
-#
-config-clean:
- $(call QUIET_CLEAN, config)
- $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-
python-clean:
$(python-clean)
@@ -1048,7 +1040,7 @@ endif # BUILD_BPF_SKEL
bpf-skel-clean:
$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
$(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
$(Q)$(RM) $(OUTPUT).config-detected
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index bd446aba64f7..c25c878fd06c 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -156,6 +156,10 @@ out:
return err;
}
+#define ETM_SET_OPT_CTXTID (1 << 0)
+#define ETM_SET_OPT_TS (1 << 1)
+#define ETM_SET_OPT_MASK (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS)
+
static int cs_etm_set_option(struct auxtrace_record *itr,
struct evsel *evsel, u32 option)
{
@@ -169,17 +173,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
!cpu_map__has(online_cpus, i))
continue;
- if (option & ETM_OPT_CTXTID) {
+ if (option & ETM_SET_OPT_CTXTID) {
err = cs_etm_set_context_id(itr, evsel, i);
if (err)
goto out;
}
- if (option & ETM_OPT_TS) {
+ if (option & ETM_SET_OPT_TS) {
err = cs_etm_set_timestamp(itr, evsel, i);
if (err)
goto out;
}
- if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS))
+ if (option & ~(ETM_SET_OPT_MASK))
/* Nothing else is currently supported */
goto out;
}
@@ -406,7 +410,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
evsel__set_sample_bit(cs_etm_evsel, CPU);
err = cs_etm_set_option(itr, cs_etm_evsel,
- ETM_OPT_CTXTID | ETM_OPT_TS);
+ ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS);
if (err)
goto out;
}
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index f744eb5cba88..0b2480cf3e47 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -9,9 +9,7 @@
#
0 nospu restart_syscall sys_restart_syscall
1 nospu exit sys_exit
-2 32 fork ppc_fork sys_fork
-2 64 fork sys_fork
-2 spu fork sys_ni_syscall
+2 nospu fork sys_fork
3 common read sys_read
4 common write sys_write
5 common open sys_open compat_sys_open
@@ -160,9 +158,7 @@
119 32 sigreturn sys_sigreturn compat_sys_sigreturn
119 64 sigreturn sys_ni_syscall
119 spu sigreturn sys_ni_syscall
-120 32 clone ppc_clone sys_clone
-120 64 clone sys_clone
-120 spu clone sys_ni_syscall
+120 nospu clone sys_clone
121 common setdomainname sys_setdomainname
122 common uname sys_newuname
123 common modify_ldt sys_ni_syscall
@@ -244,9 +240,7 @@
186 spu sendfile sys_sendfile64
187 common getpmsg sys_ni_syscall
188 common putpmsg sys_ni_syscall
-189 32 vfork ppc_vfork sys_vfork
-189 64 vfork sys_vfork
-189 spu vfork sys_ni_syscall
+189 nospu vfork sys_vfork
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 common readahead sys_readahead compat_sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
@@ -322,9 +316,7 @@
248 32 clock_nanosleep sys_clock_nanosleep_time32
248 64 clock_nanosleep sys_clock_nanosleep
248 spu clock_nanosleep sys_clock_nanosleep
-249 32 swapcontext ppc_swapcontext compat_sys_swapcontext
-249 64 swapcontext sys_swapcontext
-249 spu swapcontext sys_ni_syscall
+249 nospu swapcontext sys_swapcontext compat_sys_swapcontext
250 common tgkill sys_tgkill
251 32 utimes sys_utimes_time32
251 64 utimes sys_utimes
@@ -522,12 +514,11 @@
432 common fsmount sys_fsmount
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
-435 32 clone3 ppc_clone3 sys_clone3
-435 64 clone3 sys_clone3
-435 spu clone3 sys_ni_syscall
+435 nospu clone3 sys_clone3
436 common close_range sys_close_range
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index d443423495e5..3abef2144dac 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -444,3 +444,4 @@
439 common faccessat2 sys_faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr sys_mount_setattr
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 8cc6642fce7a..5a9f9a7bf07d 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -10,10 +10,11 @@ PERF_HAVE_JITDUMP := 1
# Syscall table generation
#
-out := $(OUTPUT)arch/x86/include/generated/asm
-header := $(out)/syscalls_64.c
-sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
-systbl := $(sys)/syscalltbl.sh
+generated := $(OUTPUT)arch/x86/include/generated
+out := $(generated)/asm
+header := $(out)/syscalls_64.c
+sys := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl := $(sys)/syscalltbl.sh
# Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
@@ -22,6 +23,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl)
$(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
clean::
- $(call QUIET_CLEAN, x86) $(RM) $(header)
+ $(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated)
archheaders: $(header)
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 78672124d28b..7bf01cbe582f 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -363,6 +363,7 @@
439 common faccessat2 sys_faccessat2
440 common process_madvise sys_process_madvise
441 common epoll_pwait2 sys_epoll_pwait2
+442 common mount_setattr sys_mount_setattr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 6a54b94f1c25..0e20f3dc69f3 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -10,6 +10,7 @@ int test__rdpmc(struct test *test __maybe_unused, int subtest);
int test__insn_x86(struct test *test __maybe_unused, int subtest);
int test__intel_pt_pkt_decoder(struct test *test, int subtest);
int test__bp_modify(struct test *test, int subtest);
+int test__x86_sample_parsing(struct test *test, int subtest);
#ifdef HAVE_DWARF_UNWIND_SUPPORT
struct thread;
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 36d4f248b51d..28d793390198 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-y += arch-tests.o
perf-y += rdpmc.o
+perf-y += sample-parsing.o
perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index bc25d727b4e9..71aa67367ad6 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -31,6 +31,10 @@ struct test arch_tests[] = {
},
#endif
{
+ .desc = "x86 Sample parsing",
+ .func = test__x86_sample_parsing,
+ },
+ {
.func = NULL,
},
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index f782ef8c5982..4f75ae990140 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/types.h>
-#include "../../../../arch/x86/include/asm/insn.h"
#include <string.h>
#include "debug.h"
#include "tests/tests.h"
#include "arch-tests.h"
+#include "../../../../arch/x86/include/asm/insn.h"
#include "intel-pt-decoder/intel-pt-insn-decoder.h"
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
new file mode 100644
index 000000000000..c92db87e4479
--- /dev/null
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "event.h"
+#include "evsel.h"
+#include "debug.h"
+#include "util/synthetic-events.h"
+
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define COMP(m) do { \
+ if (s1->m != s2->m) { \
+ pr_debug("Samples differ at '"#m"'\n"); \
+ return false; \
+ } \
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+ const struct perf_sample *s2,
+ u64 type)
+{
+ if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+ COMP(ins_lat);
+
+ return true;
+}
+
+static int do_test(u64 sample_type)
+{
+ struct evsel evsel = {
+ .needs_swap = false,
+ .core = {
+ . attr = {
+ .sample_type = sample_type,
+ .read_format = 0,
+ },
+ },
+ };
+ union perf_event *event;
+ struct perf_sample sample = {
+ .weight = 101,
+ .ins_lat = 102,
+ };
+ struct perf_sample sample_out;
+ size_t i, sz, bufsz;
+ int err, ret = -1;
+
+ sz = perf_event__sample_event_size(&sample, sample_type, 0);
+ bufsz = sz + 4096; /* Add a bit for overrun checking */
+ event = malloc(bufsz);
+ if (!event) {
+ pr_debug("malloc failed\n");
+ return -1;
+ }
+
+ memset(event, 0xff, bufsz);
+ event->header.type = PERF_RECORD_SAMPLE;
+ event->header.misc = 0;
+ event->header.size = sz;
+
+ err = perf_event__synthesize_sample(event, sample_type, 0, &sample);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "perf_event__synthesize_sample", sample_type, err);
+ goto out_free;
+ }
+
+ /* The data does not contain 0xff so we use that to check the size */
+ for (i = bufsz; i > 0; i--) {
+ if (*(i - 1 + (u8 *)event) != 0xff)
+ break;
+ }
+ if (i != sz) {
+ pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+ i, sz);
+ goto out_free;
+ }
+
+ evsel.sample_size = __evsel__sample_size(sample_type);
+
+ err = evsel__parse_sample(&evsel, event, &sample_out);
+ if (err) {
+ pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+ "evsel__parse_sample", sample_type, err);
+ goto out_free;
+ }
+
+ if (!samples_same(&sample, &sample_out, sample_type)) {
+ pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+ sample_type);
+ goto out_free;
+ }
+
+ ret = 0;
+out_free:
+ free(event);
+
+ return ret;
+}
+
+/**
+ * test__x86_sample_parsing - test X86 specific sample parsing
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample. If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ *
+ * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
+ * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
+ */
+int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
+}
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
index 3e6791531ca5..34d600c51044 100644
--- a/tools/perf/arch/x86/util/archinsn.c
+++ b/tools/perf/arch/x86/util/archinsn.c
@@ -1,10 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../../../arch/x86/include/asm/insn.h"
#include "archinsn.h"
#include "event.h"
#include "machine.h"
#include "thread.h"
#include "symbol.h"
+#include "../../../../arch/x86/include/asm/insn.h"
void arch_fetch_insn(struct perf_sample *sample,
struct thread *thread,
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 11726ec6285f..20b87e29c96f 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -344,18 +344,22 @@ static void mempol_restore(void)
static void bind_to_memnode(int node)
{
- unsigned long nodemask;
+ struct bitmask *node_mask;
int ret;
if (node == NUMA_NO_NODE)
return;
- BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
- nodemask = 1L << node;
+ node_mask = numa_allocate_nodemask();
+ BUG_ON(!node_mask);
- ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
- dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+ numa_bitmask_clearall(node_mask);
+ numa_bitmask_setbit(node_mask, node);
+ ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
+ dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret);
+
+ numa_bitmask_free(node_mask);
BUG_ON(ret);
}
@@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
prctl(0, bytes_worked);
}
-#define MAX_NR_NODES 64
-
/*
* Count the number of nodes a process's threads
* are spread out on.
@@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
*/
static int count_process_nodes(int process_nr)
{
- char node_present[MAX_NR_NODES] = { 0, };
+ char *node_present;
int nodes;
int n, t;
+ node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
+ BUG_ON(!node_present);
+ for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
+ node_present[nodes] = 0;
+
for (t = 0; t < g->p.nr_threads; t++) {
struct thread_data *td;
int task_nr;
@@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr)
td = g->threads + task_nr;
node = numa_node_of_cpu(td->curr_cpu);
- if (node < 0) /* curr_cpu was likely still -1 */
+ if (node < 0) /* curr_cpu was likely still -1 */ {
+ free(node_present);
return 0;
+ }
node_present[node] = 1;
}
nodes = 0;
- for (n = 0; n < MAX_NR_NODES; n++)
+ for (n = 0; n < g->p.nr_nodes; n++)
nodes += node_present[n];
+ free(node_present);
return nodes;
}
@@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
{
unsigned int loops_done_min, loops_done_max;
int process_groups;
- int nodes[MAX_NR_NODES];
+ int *nodes;
int distance;
int nr_min;
int nr_max;
@@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
if (!g->p.show_convergence && !g->p.measure_convergence)
return;
+ nodes = (int *)malloc(g->p.nr_nodes * sizeof(int));
+ BUG_ON(!nodes);
for (node = 0; node < g->p.nr_nodes; node++)
nodes[node] = 0;
@@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
BUG_ON(sum > g->p.nr_tasks);
- if (0 && (sum < g->p.nr_tasks))
+ if (0 && (sum < g->p.nr_tasks)) {
+ free(nodes);
return;
+ }
/*
* Count the number of distinct process groups present
@@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
}
tprintf("\n");
}
+
+ free(nodes);
}
static void show_summary(double runtime_ns_max, int l, double *convergence)
@@ -1413,7 +1429,7 @@ static int init(void)
g->p.nr_nodes = numa_max_node() + 1;
/* char array in count_process_nodes(): */
- BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+ BUG_ON(g->p.nr_nodes < 0);
if (g->p.show_quiet && !g->p.show_details)
g->p.show_details = -1;
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index cecce93ccc63..488f6e6ba1a5 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -309,11 +309,11 @@ int bench_sched_messaging(int argc, const char **argv)
num_groups, num_groups * 2 * num_fds,
thread_mode ? "threads" : "processes");
printf(" %14s: %lu.%03lu [sec]\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
case BENCH_FORMAT_SIMPLE:
- printf("%lu.%03lu\n", diff.tv_sec,
+ printf("%lu.%03lu\n", (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
default:
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 3c88d1f201f1..a960e7a93aec 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -156,7 +156,7 @@ int bench_sched_pipe(int argc, const char **argv)
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
printf(" %14lf usecs/op\n",
@@ -168,7 +168,7 @@ int bench_sched_pipe(int argc, const char **argv)
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / USEC_PER_MSEC));
break;
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
index 5fe621cff8e9..9b751016f4b6 100644
--- a/tools/perf/bench/syscall.c
+++ b/tools/perf/bench/syscall.c
@@ -54,7 +54,7 @@ int bench_syscall_basic(int argc, const char **argv)
result_usec += diff.tv_usec;
printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec/1000));
printf(" %14lf usecs/op\n",
@@ -66,7 +66,7 @@ int bench_syscall_basic(int argc, const char **argv)
case BENCH_FORMAT_SIMPLE:
printf("%lu.%03lu\n",
- diff.tv_sec,
+ (unsigned long) diff.tv_sec,
(unsigned long) (diff.tv_usec / 1000));
break;
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 617feaf020f6..ace8772a4f03 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -161,7 +161,7 @@ static int session_config(struct daemon *daemon, const char *var, const char *va
struct daemon_session *session;
char name[100];
- if (get_session_name(var, name, sizeof(name)))
+ if (get_session_name(var, name, sizeof(name) - 1))
return -EINVAL;
var = strchr(var, '.');
@@ -373,12 +373,12 @@ static int daemon_session__run(struct daemon_session *session,
dup2(fd, 2);
close(fd);
- if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) {
+ if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) {
perror("failed: create control fifo");
return -1;
}
- if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) {
+ if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) {
perror("failed: create ack fifo");
return -1;
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 8f6c784ce629..878e04b1fab7 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1236,7 +1236,8 @@ static int __cmd_diff(void)
out_delete:
data__for_each_file(i, d) {
- perf_session__delete(d->session);
+ if (!IS_ERR(d->session))
+ perf_session__delete(d->session);
data__free(d);
}
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 85b6a46e85b6..7ec18ff57fc4 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3964,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
evlist__config(evlist, &trace->opts, &callchain_param);
- signal(SIGCHLD, sig_handler);
- signal(SIGINT, sig_handler);
-
if (forks) {
err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
if (err < 0) {
@@ -4827,6 +4824,8 @@ int cmd_trace(int argc, const char **argv)
signal(SIGSEGV, sighandler_dump_stack);
signal(SIGFPE, sighandler_dump_stack);
+ signal(SIGCHLD, sig_handler);
+ signal(SIGINT, sig_handler);
trace.evlist = evlist__new();
trace.sctbl = syscalltbl__new();
diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh
index 0cfb3e2cefef..133f0eddbcc4 100644
--- a/tools/perf/perf-archive.sh
+++ b/tools/perf/perf-archive.sh
@@ -20,9 +20,8 @@ else
fi
BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
-NOBUILDID=0000000000000000000000000000000000000000
-perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS
if [ ! -s $BUILDIDS ] ; then
echo "perf archive: no build-ids found"
rm $BUILDIDS || true
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index ec972e0892ab..dd39ce9b0277 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -182,14 +182,20 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
struct stat st;
char path_perf[PATH_MAX];
char path_dir[PATH_MAX];
+ char *exec_path;
/* First try development tree tests. */
if (!lstat("./tests", &st))
return run_dir("./tests", "./perf");
+ exec_path = get_argv_exec_path();
+ if (exec_path == NULL)
+ return -1;
+
/* Then installed path. */
- snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path());
+ snprintf(path_dir, PATH_MAX, "%s/tests", exec_path);
snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+ free(exec_path);
if (!lstat(path_dir, &st) &&
!lstat(path_perf, &st))
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 280f0348a09c..2fdc7b2f996e 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -706,13 +706,9 @@ static int do_test_code_reading(bool try_kcore)
out_put:
thread__put(thread);
out_err:
-
- if (evlist) {
- evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
- }
+ evlist__delete(evlist);
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
machine__delete_threads(machine);
machine__delete(machine);
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 29c793ac7d10..0472b110fe65 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -106,6 +106,8 @@ static int cpu_map_print(const char *str)
return -1;
cpu_map__snprint(map, buf, sizeof(buf));
+ perf_cpu_map__put(map);
+
return !strcmp(buf, str);
}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index e6f1b2a38e03..a0438b0f0805 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -154,10 +154,9 @@ out_err:
if (evlist) {
evlist__disable(evlist);
evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
}
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 57093aeacc6f..73ae8f7aa066 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -158,8 +158,6 @@ out_init:
out_delete_evlist:
evlist__delete(evlist);
- cpus = NULL;
- threads = NULL;
out_free_cpus:
perf_cpu_map__put(cpus);
out_free_threads:
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index 7cff02664d0e..680c3cffb128 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -167,6 +167,8 @@ next_event:
out_err:
evlist__delete(evlist);
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
}
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 0dbe3aa99853..8fd8a4ef97da 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -129,9 +129,6 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_WEIGHT)
COMP(weight);
- if (type & PERF_SAMPLE_WEIGHT_STRUCT)
- COMP(ins_lat);
-
if (type & PERF_SAMPLE_DATA_SRC)
COMP(data_src);
@@ -245,7 +242,6 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.cgroup = 114,
.data_page_size = 115,
.code_page_size = 116,
- .ins_lat = 117,
.aux_sample = {
.size = sizeof(aux_data),
.data = (void *)aux_data,
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh
index e5b824dd08d9..5ad3ca8d681b 100755
--- a/tools/perf/tests/shell/daemon.sh
+++ b/tools/perf/tests/shell/daemon.sh
@@ -140,10 +140,10 @@ test_list()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -159,14 +159,14 @@ EOF
# check 1st session
# pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0
local line=`perf daemon --config ${config} -x: | head -2 | tail -1`
- check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \
+ check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \
${base}/session-size/output ${base}/session-size/control \
${base}/session-size/ack "0"
# check 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
- check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+ check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control \
${base}/session-time/ack "0"
@@ -190,10 +190,10 @@ test_reconfig()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -204,7 +204,7 @@ EOF
# check 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
- check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+ check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
@@ -215,10 +215,10 @@ EOF
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
EOF
# TEST 1 - change config
@@ -238,7 +238,7 @@ EOF
# check reconfigured 2nd session
# pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
- check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \
+ check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \
${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
# TEST 2 - empty config
@@ -309,10 +309,10 @@ test_stop()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -361,7 +361,7 @@ test_signal()
base=BASE
[session-test]
-run = -e cpu-clock --switch-output
+run = -e cpu-clock --switch-output -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -400,10 +400,10 @@ test_ping()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
[session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
@@ -439,7 +439,7 @@ test_lock()
base=BASE
[session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
EOF
sed -i -e "s|BASE|${base}|" ${config}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index a49c9e23053b..74988846be1d 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -42,8 +42,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
.disabled = 1,
.freq = 1,
};
- struct perf_cpu_map *cpus;
- struct perf_thread_map *threads;
+ struct perf_cpu_map *cpus = NULL;
+ struct perf_thread_map *threads = NULL;
struct mmap *md;
attr.sample_freq = 500;
@@ -66,14 +66,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_free_maps;
+ goto out_delete_evlist;
}
perf_evlist__set_maps(&evlist->core, cpus, threads);
- cpus = NULL;
- threads = NULL;
-
if (evlist__open(evlist)) {
const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
@@ -129,10 +126,9 @@ out_init:
err = -1;
}
-out_free_maps:
+out_delete_evlist:
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
-out_delete_evlist:
evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 15a2ab765d89..3ebaa758df77 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -574,10 +574,9 @@ out:
if (evlist) {
evlist__disable(evlist);
evlist__delete(evlist);
- } else {
- perf_cpu_map__put(cpus);
- perf_thread_map__put(threads);
}
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
return err;
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index bbf94e4aa145..4c2969db59b0 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -75,14 +75,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
if (!cpus || !threads) {
err = -ENOMEM;
pr_debug("Not enough memory to create thread/cpu maps\n");
- goto out_free_maps;
+ goto out_delete_evlist;
}
perf_evlist__set_maps(&evlist->core, cpus, threads);
- cpus = NULL;
- threads = NULL;
-
err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal);
if (err < 0) {
pr_debug("Couldn't run the workload!\n");
@@ -137,7 +134,7 @@ out_init:
if (retry_count++ > 1000) {
pr_debug("Failed after retrying 1000 times\n");
err = -1;
- goto out_free_maps;
+ goto out_delete_evlist;
}
goto retry;
@@ -148,10 +145,9 @@ out_init:
err = -1;
}
-out_free_maps:
+out_delete_evlist:
perf_cpu_map__put(cpus);
perf_thread_map__put(threads);
-out_delete_evlist:
evlist__delete(evlist);
return err;
}
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index 28f51c4bd373..d1e208b4a571 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -102,6 +102,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __
TEST_ASSERT_VAL("failed to synthesize map",
!perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
+ perf_thread_map__put(threads);
return 0;
}
@@ -109,12 +110,12 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
{
struct perf_thread_map *threads;
char *str;
- int i;
TEST_ASSERT_VAL("failed to allocate map string",
asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
threads = thread_map__new_str(str, NULL, 0, false);
+ free(str);
TEST_ASSERT_VAL("failed to allocate thread_map",
threads);
@@ -141,9 +142,6 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
TEST_ASSERT_VAL("failed to not remove thread",
thread_map__remove(threads, 0));
- for (i = 0; i < threads->nr; i++)
- zfree(&threads->map[i].comm);
-
- free(threads);
+ perf_thread_map__put(threads);
return 0;
}
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5121b4db66fe..882cd1f721d9 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1306,6 +1306,7 @@ void evlist__close(struct evlist *evlist)
perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core);
}
+ perf_evlist__reset_id_hash(&evlist->core);
}
static int evlist__create_syswide_maps(struct evlist *evlist)
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1bf76864c4f2..7ecbc8e2fbfa 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,6 +46,7 @@
#include "string2.h"
#include "memswap.h"
#include "util.h"
+#include "hashmap.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
#include <internal/xyarray.h>
@@ -1390,7 +1391,9 @@ void evsel__exit(struct evsel *evsel)
zfree(&evsel->group_name);
zfree(&evsel->name);
zfree(&evsel->pmu_name);
- zfree(&evsel->per_pkg_mask);
+ evsel__zero_per_pkg(evsel);
+ hashmap__free(evsel->per_pkg_mask);
+ evsel->per_pkg_mask = NULL;
zfree(&evsel->metric_events);
perf_evsel__object.fini(evsel);
}
@@ -2781,3 +2784,16 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
return store_evsel_ids(evsel, evlist);
}
+
+void evsel__zero_per_pkg(struct evsel *evsel)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ if (evsel->per_pkg_mask) {
+ hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
+ free((char *)cur->key);
+
+ hashmap__clear(evsel->per_pkg_mask);
+ }
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 4e8e49fb7e9d..6026487353dd 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -19,6 +19,7 @@ struct perf_stat_evsel;
union perf_event;
struct bpf_counter_ops;
struct target;
+struct hashmap;
typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
@@ -112,7 +113,7 @@ struct evsel {
bool merged_stat;
bool reset_group;
bool errored;
- unsigned long *per_pkg_mask;
+ struct hashmap *per_pkg_mask;
struct evsel *leader;
struct list_head config_terms;
int err;
@@ -433,4 +434,5 @@ struct perf_env *evsel__env(struct evsel *evsel);
int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+void evsel__zero_per_pkg(struct evsel *evsel);
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4fe9e2a54346..20effdff76ce 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1618,8 +1618,8 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp)
fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
- tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
- clockid_ns.tv_sec, clockid_ns.tv_nsec,
+ tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec,
+ (long) clockid_ns.tv_sec, clockid_ns.tv_nsec,
clockid_name(clockid));
}
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 692e56dc832e..fbc40a2c17d4 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -77,8 +77,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
if (strstarts(filename, "/system/lib/")) {
char *ndk, *app;
const char *arch;
- size_t ndk_length;
- size_t app_length;
+ int ndk_length, app_length;
ndk = getenv("NDK_ROOT");
app = getenv("APP_PLATFORM");
@@ -106,8 +105,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
if (new_length > PATH_MAX)
return false;
snprintf(newfilename, new_length,
- "%s/platforms/%s/arch-%s/usr/lib/%s",
- ndk, app, arch, libname);
+ "%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+ ndk_length, ndk, app_length, app, arch, libname);
return true;
}
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index d5b6aff82f21..d57ac86ce7ca 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -89,6 +89,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_EVENT_NAME
%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
+%type <str> event_pmu_name
%destructor { free ($$); } <str>
%type <term> event_term
%destructor { parse_events_term__delete ($$); } <term>
@@ -272,8 +273,11 @@ event_def: event_pmu |
event_legacy_raw sep_dc |
event_bpf_file
+event_pmu_name:
+PE_NAME | PE_PMU_EVENT_PRE
+
event_pmu:
-PE_NAME opt_pmu_config
+event_pmu_name opt_pmu_config
{
struct parse_events_state *parse_state = _parse_state;
struct parse_events_error *error = parse_state->error;
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 71b753523fac..845dd46e3c61 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -36,3 +36,4 @@ util/symbol_fprintf.c
util/units.c
util/affinity.c
util/rwsem.c
+util/hashmap.c
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0d5ad42812b9..552b590485bf 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -3140,7 +3140,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
- if (sort__mode != SORT_MODE__MEMORY)
+ if (sort__mode != SORT_MODE__BRANCH)
return -EINVAL;
return __sort_dimension__add_output(list, sd);
@@ -3152,7 +3152,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
- if (sort__mode != SORT_MODE__BRANCH)
+ if (sort__mode != SORT_MODE__MEMORY)
return -EINVAL;
return __sort_dimension__add_output(list, sd);
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index cce7a76d6473..7f09cdaf5b60 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -983,7 +983,7 @@ static void print_interval(struct perf_stat_config *config,
if (config->interval_clear)
puts(CONSOLE_CLEAR);
- sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+ sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) {
switch (config->aggr_mode) {
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5d8af29447f4..c400f8dde017 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -13,6 +13,7 @@
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
+#include "hashmap.h"
#include <linux/zalloc.h>
void update_stats(struct stats *stats, u64 val)
@@ -277,18 +278,29 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
}
}
-static void zero_per_pkg(struct evsel *counter)
+static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
{
- if (counter->per_pkg_mask)
- memset(counter->per_pkg_mask, 0, cpu__max_cpu());
+ uint64_t *key = (uint64_t *) __key;
+
+ return *key & 0xffffffff;
+}
+
+static bool pkg_id_equal(const void *__key1, const void *__key2,
+ void *ctx __maybe_unused)
+{
+ uint64_t *key1 = (uint64_t *) __key1;
+ uint64_t *key2 = (uint64_t *) __key2;
+
+ return *key1 == *key2;
}
static int check_per_pkg(struct evsel *counter,
struct perf_counts_values *vals, int cpu, bool *skip)
{
- unsigned long *mask = counter->per_pkg_mask;
+ struct hashmap *mask = counter->per_pkg_mask;
struct perf_cpu_map *cpus = evsel__cpus(counter);
- int s;
+ int s, d, ret = 0;
+ uint64_t *key;
*skip = false;
@@ -299,7 +311,7 @@ static int check_per_pkg(struct evsel *counter,
return 0;
if (!mask) {
- mask = zalloc(cpu__max_cpu());
+ mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
if (!mask)
return -ENOMEM;
@@ -321,8 +333,25 @@ static int check_per_pkg(struct evsel *counter,
if (s < 0)
return -1;
- *skip = test_and_set_bit(s, mask) == 1;
- return 0;
+ /*
+ * On multi-die system, die_id > 0. On no-die system, die_id = 0.
+ * We use hashmap(socket, die) to check the used socket+die pair.
+ */
+ d = cpu_map__get_die(cpus, cpu, NULL).die;
+ if (d < 0)
+ return -1;
+
+ key = malloc(sizeof(*key));
+ if (!key)
+ return -ENOMEM;
+
+ *key = (uint64_t)d << 32 | s;
+ if (hashmap__find(mask, (void *)key, NULL))
+ *skip = true;
+ else
+ ret = hashmap__add(mask, (void *)key, (void *)1);
+
+ return ret;
}
static int
@@ -422,7 +451,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
}
if (counter->per_pkg)
- zero_per_pkg(counter);
+ evsel__zero_per_pkg(counter);
ret = process_counter_maps(config, counter);
if (ret)
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index f507dff713c9..8a01af783310 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent)
pr_debug("error reading saved cmdlines\n");
goto out;
}
+ buf[ret] = '\0';
parse_saved_cmdline(pevent, buf, size);
ret = 0;
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
index 4c69408f3e84..a4969f7ee020 100644
--- a/tools/testing/selftests/gpio/.gitignore
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-gpio-mockup-chardev
+gpio-mockup-cdev
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3a84394829ea..32b87cc77c8e 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -33,6 +33,7 @@
/demand_paging_test
/dirty_log_test
/dirty_log_perf_test
+/hardware_disable_test
/kvm_create_max_vcpus
/memslot_modification_stress_test
/set_memory_region_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 8c8eda429576..a6d61f451f88 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
new file mode 100644
index 000000000000..2f2eeb8a1d86
--- /dev/null
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test is intended to reproduce a crash that happens when
+ * kvm_arch_hardware_disable is called and it attempts to unregister the user
+ * return notifiers.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <test_util.h>
+
+#include "kvm_util.h"
+
+#define VCPU_NUM 4
+#define SLEEPING_THREAD_NUM (1 << 4)
+#define FORK_NUM (1ULL << 9)
+#define DELAY_US_MAX 2000
+#define GUEST_CODE_PIO_PORT 4
+
+sem_t *sem;
+
+/* Arguments for the pthreads */
+struct payload {
+ struct kvm_vm *vm;
+ uint32_t index;
+};
+
+static void guest_code(void)
+{
+ for (;;)
+ ; /* Some busy work */
+ printf("Should not be reached.\n");
+}
+
+static void *run_vcpu(void *arg)
+{
+ struct payload *payload = (struct payload *)arg;
+ struct kvm_run *state = vcpu_state(payload->vm, payload->index);
+
+ vcpu_run(payload->vm, payload->index);
+
+ TEST_ASSERT(false, "%s: exited with reason %d: %s\n",
+ __func__, state->exit_reason,
+ exit_reason_str(state->exit_reason));
+ pthread_exit(NULL);
+}
+
+static void *sleeping_thread(void *arg)
+{
+ int fd;
+
+ while (true) {
+ fd = open("/dev/null", O_RDWR);
+ close(fd);
+ }
+ TEST_ASSERT(false, "%s: exited\n", __func__);
+ pthread_exit(NULL);
+}
+
+static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr,
+ void *(*f)(void *), void *arg)
+{
+ int r;
+
+ r = pthread_create(thread, attr, f, arg);
+ TEST_ASSERT(r == 0, "%s: failed to create thread", __func__);
+}
+
+static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set)
+{
+ int r;
+
+ r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set);
+ TEST_ASSERT(r == 0, "%s: failed set affinity", __func__);
+}
+
+static inline void check_join(pthread_t thread, void **retval)
+{
+ int r;
+
+ r = pthread_join(thread, retval);
+ TEST_ASSERT(r == 0, "%s: failed to join thread", __func__);
+}
+
+static void run_test(uint32_t run)
+{
+ struct kvm_vm *vm;
+ cpu_set_t cpu_set;
+ pthread_t threads[VCPU_NUM];
+ pthread_t throw_away;
+ struct payload payloads[VCPU_NUM];
+ void *b;
+ uint32_t i, j;
+
+ CPU_ZERO(&cpu_set);
+ for (i = 0; i < VCPU_NUM; i++)
+ CPU_SET(i, &cpu_set);
+
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ vm_create_irqchip(vm);
+
+ fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run);
+ for (i = 0; i < VCPU_NUM; ++i) {
+ vm_vcpu_add_default(vm, i, guest_code);
+ payloads[i].vm = vm;
+ payloads[i].index = i;
+
+ check_create_thread(&threads[i], NULL, run_vcpu,
+ (void *)&payloads[i]);
+ check_set_affinity(threads[i], &cpu_set);
+
+ for (j = 0; j < SLEEPING_THREAD_NUM; ++j) {
+ check_create_thread(&throw_away, NULL, sleeping_thread,
+ (void *)NULL);
+ check_set_affinity(throw_away, &cpu_set);
+ }
+ }
+ fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run);
+ sem_post(sem);
+ for (i = 0; i < VCPU_NUM; ++i)
+ check_join(threads[i], &b);
+ /* Should not be reached */
+ TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t i;
+ int s, r;
+ pid_t pid;
+
+ sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0);
+ sem_unlink("vm_sem");
+
+ for (i = 0; i < FORK_NUM; ++i) {
+ pid = fork();
+ TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__);
+ if (pid == 0)
+ run_test(i); /* This function always exits */
+
+ fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i);
+ sem_wait(sem);
+ r = (rand() % DELAY_US_MAX) + 1;
+ fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r);
+ usleep(r);
+ r = waitpid(pid, &s, WNOHANG);
+ TEST_ASSERT(r != pid,
+ "%s: [%d] child exited unexpectedly status: [%d]",
+ __func__, i, s);
+ fprintf(stderr, "%s: [%d] killing child\n", __func__, i);
+ kill(pid, SIGKILL);
+ }
+
+ sem_destroy(sem);
+ exit(0);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index d787cb802b4a..e5fbf16f725b 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -21,6 +21,8 @@
#define KVM_UTIL_PGS_PER_HUGEPG 512
#define KVM_UTIL_MIN_PFN 2
+static int vcpu_mmap_sz(void);
+
/* Aligns x up to the next multiple of size. Size must be a power of 2. */
static void *align(void *x, size_t size)
{
@@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
vcpu->dirty_gfns = NULL;
}
- ret = munmap(vcpu->state, sizeof(*vcpu->state));
+ ret = munmap(vcpu->state, vcpu_mmap_sz());
TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
"errno: %i", ret, errno);
close(vcpu->fd);
@@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
vcpu_mmap_sz(), sizeof(*vcpu->state));
- vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
"vcpu id: %u errno: %i", vcpuid, errno);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index de0c76177d02..a8906e60a108 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -720,7 +720,8 @@ struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct kvm_cpuid2 *cpuid;
- int rc, max_ent;
+ int max_ent;
+ int rc = -1;
TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 9246ea310587..804ff5ff022d 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -13,19 +13,27 @@
#include <stdint.h>
#include <time.h>
+#include <sched.h>
+#include <sys/syscall.h>
#define VCPU_ID 5
+#define SHINFO_REGION_GVA 0xc0000000ULL
#define SHINFO_REGION_GPA 0xc0000000ULL
#define SHINFO_REGION_SLOT 10
#define PAGE_SIZE 4096
#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
static struct kvm_vm *vm;
#define XEN_HYPERCALL_MSR 0x40000000
+#define MIN_STEAL_TIME 50000
+
struct pvclock_vcpu_time_info {
u32 version;
u32 pad0;
@@ -43,11 +51,67 @@ struct pvclock_wall_clock {
u32 nsec;
} __attribute__((__packed__));
+struct vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[4];
+};
+
+#define RUNSTATE_running 0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked 2
+#define RUNSTATE_offline 3
+
static void guest_code(void)
{
+ struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+
+ /* Test having the host set runstates manually */
+ GUEST_SYNC(RUNSTATE_runnable);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(RUNSTATE_blocked);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(RUNSTATE_offline);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ /* Test runstate time adjust */
+ GUEST_SYNC(4);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+ /* Test runstate time set */
+ GUEST_SYNC(5);
+ GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+ /* sched_yield() should result in some 'runnable' time */
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
GUEST_DONE();
}
+static long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+ fclose(fp);
+
+ return val[1];
+}
+
static int cmp_timespec(struct timespec *a, struct timespec *b)
{
if (a->tv_sec > b->tv_sec)
@@ -66,12 +130,14 @@ int main(int argc, char *argv[])
{
struct timespec min_ts, max_ts, vm_ts;
- if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
- KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
exit(KSFT_SKIP);
}
+ bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+
clock_gettime(CLOCK_REALTIME, &min_ts);
vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
@@ -80,6 +146,7 @@ int main(int argc, char *argv[])
/* Map a region for the shared_info page */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
struct kvm_xen_hvm_config hvmc = {
.flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
@@ -111,6 +178,17 @@ int main(int argc, char *argv[])
};
vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+ if (do_runstate_tests) {
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = RUNSTATE_ADDR,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
+ }
+
+ struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);;
+ rs->state = 0x5a;
+
for (;;) {
volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
@@ -126,8 +204,56 @@ int main(int argc, char *argv[])
case UCALL_ABORT:
TEST_FAIL("%s", (const char *)uc.args[0]);
/* NOT REACHED */
- case UCALL_SYNC:
+ case UCALL_SYNC: {
+ struct kvm_xen_vcpu_attr rst;
+ long rundelay;
+
+ /* If no runstate support, bail out early */
+ if (!do_runstate_tests)
+ goto done;
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+
+ switch (uc.args[1]) {
+ case RUNSTATE_running...RUNSTATE_offline:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+ rst.u.runstate.state = uc.args[1];
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+ case 4:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.time_blocked =
+ 0x5a - rs->time[RUNSTATE_blocked];
+ rst.u.runstate.time_offline =
+ 0x6b6b - rs->time[RUNSTATE_offline];
+ rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+ rst.u.runstate.time_offline;
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case 5:
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = RUNSTATE_running;
+ rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+ rst.u.runstate.time_blocked = 0x6b6b;
+ rst.u.runstate.time_offline = 0x5a;
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+ case 6:
+ /* Yield until scheduler delay exceeds target */
+ rundelay = get_run_delay() + MIN_STEAL_TIME;
+ do {
+ sched_yield();
+ } while (get_run_delay() < rundelay);
+ break;
+ }
break;
+ }
case UCALL_DONE:
goto done;
default:
@@ -162,6 +288,33 @@ int main(int argc, char *argv[])
TEST_ASSERT(ti2->version && !(ti2->version & 1),
"Bad time_info version %x", ti->version);
+ if (do_runstate_tests) {
+ /*
+ * Fetch runstate and check sanity. Strictly speaking in the
+ * general case we might not expect the numbers to be identical
+ * but in this case we know we aren't running the vCPU any more.
+ */
+ struct kvm_xen_vcpu_attr rst = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+ };
+ vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+ TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+ }
kvm_vm_free(vm);
return 0;
}
diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c
index 57b20802e71b..b69de9263ee6 100644
--- a/tools/tracing/latency/latency-collector.c
+++ b/tools/tracing/latency/latency-collector.c
@@ -1650,7 +1650,7 @@ static void start_printthread(void)
if (ufd < 0 ||
read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
printf(
-"Warning! Using trivial random nummer seed, since %s not available\n",
+"Warning! Using trivial random number seed, since %s not available\n",
DEV_URANDOM);
fflush(stdout);
*seed = i;
@@ -1711,8 +1711,8 @@ static void show_usage(void)
"\t\t\tbeginning, end, and backtrace.\n\n"
"-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
-"\t\t\toption causes ftrace to show the functionph of how\n"
-"\t\t\tfunctions are calling other functions.\n\n"
+"\t\t\toption causes ftrace to show the graph of how functions\n"
+"\t\t\tare calling other functions.\n\n"
"-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
"\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 001b9de4e727..383df23514b9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -486,6 +486,24 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
+ if (likely(kvm->mmu_notifier_count == 1)) {
+ kvm->mmu_notifier_range_start = range->start;
+ kvm->mmu_notifier_range_end = range->end;
+ } else {
+ /*
+ * Fully tracking multiple concurrent ranges has dimishing
+ * returns. Keep things simple and just find the minimal range
+ * which includes the current and new ranges. As there won't be
+ * enough information to subtract a range after its invalidate
+ * completes, any ranges invalidated concurrently will
+ * accumulate and persist until all outstanding invalidates
+ * complete.
+ */
+ kvm->mmu_notifier_range_start =
+ min(kvm->mmu_notifier_range_start, range->start);
+ kvm->mmu_notifier_range_end =
+ max(kvm->mmu_notifier_range_end, range->end);
+ }
need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
range->flags);
/* we've to flush the tlb before the pages can be freed */
@@ -2023,10 +2041,13 @@ exit:
kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
bool atomic, bool *async, bool write_fault,
- bool *writable)
+ bool *writable, hva_t *hva)
{
unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+ if (hva)
+ *hva = addr;
+
if (addr == KVM_HVA_ERR_RO_BAD) {
if (writable)
*writable = false;
@@ -2054,19 +2075,19 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
bool *writable)
{
return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
- write_fault, writable);
+ write_fault, writable, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
{
- return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
+ return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
}
EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);