summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap102
-rw-r--r--Documentation/ABI/testing/sysfs-bus-cxl15
-rw-r--r--Documentation/ABI/testing/sysfs-class-led-trigger-netdev20
-rw-r--r--Documentation/ABI/testing/sysfs-devices-system-cpu13
-rw-r--r--Documentation/ABI/testing/sysfs-module11
-rw-r--r--Documentation/ABI/testing/sysfs-platform-hidma2
-rw-r--r--Documentation/ABI/testing/sysfs-platform-hidma-mgmt20
-rw-r--r--Documentation/admin-guide/devices.txt2
-rw-r--r--Documentation/admin-guide/hw-vuln/gather_data_sampling.rst109
-rw-r--r--Documentation/admin-guide/hw-vuln/index.rst14
-rw-r--r--Documentation/admin-guide/hw-vuln/spectre.rst11
-rw-r--r--Documentation/admin-guide/hw-vuln/srso.rst150
-rw-r--r--Documentation/admin-guide/kdump/vmcoreinfo.rst6
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt62
-rw-r--r--Documentation/arch/arm64/silicon-errata.rst3
-rw-r--r--Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml6
-rw-r--r--Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml3
-rw-r--r--Documentation/devicetree/bindings/net/mediatek,net.yaml7
-rw-r--r--Documentation/devicetree/bindings/net/rockchip-dwmac.yaml10
-rw-r--r--Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml2
-rw-r--r--Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml4
-rw-r--r--Documentation/devicetree/bindings/serial/cavium-uart.txt19
-rw-r--r--Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt28
-rw-r--r--Documentation/devicetree/bindings/sound/audio-graph-card2.yaml20
-rw-r--r--Documentation/devicetree/bindings/sound/google,sc7180-trogdor.yaml2
-rw-r--r--Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml2
-rw-r--r--Documentation/filesystems/fscrypt.rst164
-rw-r--r--Documentation/filesystems/idmappings.rst14
-rw-r--r--Documentation/filesystems/locking.rst28
-rw-r--r--Documentation/filesystems/porting.rst25
-rw-r--r--Documentation/filesystems/tmpfs.rst85
-rw-r--r--Documentation/filesystems/vfs.rst12
-rw-r--r--Documentation/i2c/writing-clients.rst2
-rw-r--r--Documentation/networking/napi.rst13
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.rst4
-rw-r--r--Documentation/process/embargoed-hardware-issues.rst3
-rw-r--r--Documentation/process/security-bugs.rst39
-rw-r--r--MAINTAINERS148
-rw-r--r--Makefile24
-rw-r--r--arch/alpha/include/asm/processor.h13
-rw-r--r--arch/alpha/kernel/setup.c3
-rw-r--r--arch/alpha/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/arm/boot/dts/arm/integratorap.dts2
-rw-r--r--arch/arm/boot/dts/microchip/sam9x60.dtsi26
-rw-r--r--arch/arm/boot/dts/nspire/nspire.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts10
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6sll.dtsi2
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx6sx.dtsi14
-rw-r--r--arch/arm/boot/dts/nxp/imx/imx7s.dtsi6
-rw-r--r--arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi9
-rw-r--r--arch/arm/configs/axm55xx_defconfig2
-rw-r--r--arch/arm/configs/davinci_all_defconfig2
-rw-r--r--arch/arm/configs/exynos_defconfig2
-rw-r--r--arch/arm/configs/footbridge_defconfig2
-rw-r--r--arch/arm/configs/imx_v6_v7_defconfig2
-rw-r--r--arch/arm/configs/keystone_defconfig2
-rw-r--r--arch/arm/configs/lpc32xx_defconfig2
-rw-r--r--arch/arm/configs/milbeaut_m10v_defconfig2
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--arch/arm/configs/omap1_defconfig2
-rw-r--r--arch/arm/configs/omap2plus_defconfig2
-rw-r--r--arch/arm/configs/pxa_defconfig2
-rw-r--r--arch/arm/configs/rpc_defconfig2
-rw-r--r--arch/arm/configs/s5pv210_defconfig2
-rw-r--r--arch/arm/configs/socfpga_defconfig2
-rw-r--r--arch/arm/configs/spear13xx_defconfig2
-rw-r--r--arch/arm/configs/spear3xx_defconfig2
-rw-r--r--arch/arm/configs/spear6xx_defconfig2
-rw-r--r--arch/arm/include/asm/syscall.h3
-rw-r--r--arch/arm/kernel/entry-common.S1
-rw-r--r--arch/arm/kernel/ptrace.c5
-rw-r--r--arch/arm/mach-pxa/sharpsl_pm.h1
-rw-r--r--arch/arm/mach-pxa/spitz_pm.c1
-rw-r--r--arch/arm/mach-zynq/pm.c2
-rw-r--r--arch/arm/tools/syscall.tbl1
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts2
-rw-r--r--arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts2
l---------arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi1
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts4
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mm.dtsi7
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mn.dtsi6
-rw-r--r--arch/arm64/boot/dts/freescale/imx8mq.dtsi2
-rw-r--r--arch/arm64/boot/dts/freescale/imx93.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/qrb5165-rb5.dts2
-rw-r--r--arch/arm64/boot/dts/qcom/sa8775p-ride.dts4
-rw-r--r--arch/arm64/boot/dts/qcom/sc7180.dtsi4
-rw-r--r--arch/arm64/boot/dts/qcom/sc8180x.dtsi2
-rw-r--r--arch/arm64/boot/dts/qcom/sm8150.dtsi18
-rw-r--r--arch/arm64/boot/dts/qcom/sm8250.dtsi18
-rw-r--r--arch/arm64/boot/dts/qcom/sm8350.dtsi4
-rw-r--r--arch/arm64/boot/dts/renesas/r9a07g044.dtsi16
-rw-r--r--arch/arm64/boot/dts/renesas/r9a07g054.dtsi16
-rw-r--r--arch/arm64/boot/dts/rockchip/px30.dtsi16
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi6
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts2
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi4
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts3
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts1
-rw-r--r--arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts8
-rw-r--r--arch/arm64/configs/defconfig2
-rw-r--r--arch/arm64/include/asm/el2_setup.h44
-rw-r--r--arch/arm64/include/asm/fpsimd.h4
-rw-r--r--arch/arm64/include/asm/kvm_asm.h2
-rw-r--r--arch/arm64/include/asm/kvm_emulate.h21
-rw-r--r--arch/arm64/include/asm/kvm_host.h2
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h26
-rw-r--r--arch/arm64/include/asm/processor.h8
-rw-r--r--arch/arm64/include/asm/unistd.h2
-rw-r--r--arch/arm64/include/asm/unistd32.h2
-rw-r--r--arch/arm64/include/asm/virt.h1
-rw-r--r--arch/arm64/include/uapi/asm/bitsperlong.h24
-rw-r--r--arch/arm64/kernel/fpsimd.c51
-rw-r--r--arch/arm64/kernel/ptrace.c30
-rw-r--r--arch/arm64/kernel/signal.c2
-rw-r--r--arch/arm64/kernel/vdso/vgettimeofday.c4
-rw-r--r--arch/arm64/kvm/arch_timer.c6
-rw-r--r--arch/arm64/kvm/arm.c83
-rw-r--r--arch/arm64/kvm/hyp/hyp-entry.S8
-rw-r--r--arch/arm64/kvm/hyp/include/hyp/switch.h1
-rw-r--r--arch/arm64/kvm/hyp/nvhe/ffa.c15
-rw-r--r--arch/arm64/kvm/hyp/nvhe/host.S10
-rw-r--r--arch/arm64/kvm/hyp/nvhe/psci-relay.c2
-rw-r--r--arch/arm64/kvm/hyp/nvhe/switch.c2
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c47
-rw-r--r--arch/arm64/kvm/mmu.c18
-rw-r--r--arch/arm64/kvm/pkvm.c2
-rw-r--r--arch/arm64/kvm/sys_regs.c42
-rw-r--r--arch/arm64/kvm/vgic/vgic-v3.c2
-rw-r--r--arch/arm64/kvm/vgic/vgic-v4.c7
-rw-r--r--arch/arm64/mm/trans_pgd.c4
-rw-r--r--arch/arm64/net/bpf_jit_comp.c8
-rw-r--r--arch/arm64/tools/sysreg12
-rw-r--r--arch/ia64/configs/bigsur_defconfig2
-rw-r--r--arch/ia64/configs/generic_defconfig2
-rw-r--r--arch/ia64/configs/gensparse_defconfig2
-rw-r--r--arch/ia64/configs/tiger_defconfig2
-rw-r--r--arch/ia64/include/asm/processor.h3
-rw-r--r--arch/ia64/kernel/sys_ia64.c2
-rw-r--r--arch/ia64/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/loongarch/Kconfig3
-rw-r--r--arch/loongarch/Makefile6
-rw-r--r--arch/loongarch/configs/loongson3_defconfig2
-rw-r--r--arch/loongarch/include/asm/Kbuild1
-rw-r--r--arch/loongarch/include/asm/fpu.h37
-rw-r--r--arch/loongarch/include/asm/ptrace.h2
-rw-r--r--arch/loongarch/include/asm/smp.h2
-rw-r--r--arch/loongarch/kernel/fpu.S2
-rw-r--r--arch/loongarch/kernel/hw_breakpoint.c3
-rw-r--r--arch/loongarch/kernel/mcount.S2
-rw-r--r--arch/loongarch/kernel/mcount_dyn.S1
-rw-r--r--arch/loongarch/kernel/process.c7
-rw-r--r--arch/loongarch/kernel/ptrace.c4
-rw-r--r--arch/loongarch/kernel/setup.c16
-rw-r--r--arch/loongarch/kernel/smp.c2
-rw-r--r--arch/loongarch/kernel/traps.c14
-rw-r--r--arch/loongarch/lib/clear_user.S5
-rw-r--r--arch/loongarch/lib/copy_user.S5
-rw-r--r--arch/loongarch/lib/memcpy.S2
-rw-r--r--arch/loongarch/lib/memmove.S2
-rw-r--r--arch/loongarch/lib/memset.S2
-rw-r--r--arch/loongarch/lib/unaligned.S1
-rw-r--r--arch/loongarch/mm/page.S2
-rw-r--r--arch/loongarch/mm/tlbex.S1
-rw-r--r--arch/loongarch/net/bpf_jit.h2
-rw-r--r--arch/m68k/fpsp040/skeleton.S4
-rw-r--r--arch/m68k/ifpsp060/os.S4
-rw-r--r--arch/m68k/kernel/relocate_kernel.S4
-rw-r--r--arch/m68k/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/microblaze/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/mips/configs/bigsur_defconfig2
-rw-r--r--arch/mips/configs/fuloong2e_defconfig2
-rw-r--r--arch/mips/configs/ip22_defconfig2
-rw-r--r--arch/mips/configs/ip32_defconfig2
-rw-r--r--arch/mips/configs/jazz_defconfig2
-rw-r--r--arch/mips/configs/lemote2f_defconfig2
-rw-r--r--arch/mips/configs/loongson2k_defconfig2
-rw-r--r--arch/mips/configs/loongson3_defconfig2
-rw-r--r--arch/mips/configs/mtx1_defconfig2
-rw-r--r--arch/mips/configs/pic32mzda_defconfig2
-rw-r--r--arch/mips/configs/rm200_defconfig2
-rw-r--r--arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h2
-rw-r--r--arch/mips/kernel/syscalls/syscall_n32.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_n64.tbl1
-rw-r--r--arch/mips/kernel/syscalls/syscall_o32.tbl1
-rw-r--r--arch/parisc/Kconfig.debug2
-rw-r--r--arch/parisc/boot/compressed/misc.c10
-rw-r--r--arch/parisc/configs/generic-32bit_defconfig2
-rw-r--r--arch/parisc/configs/generic-64bit_defconfig2
-rw-r--r--arch/parisc/include/asm/dma.h2
-rw-r--r--arch/parisc/include/asm/ftrace.h4
-rw-r--r--arch/parisc/include/asm/spinlock.h2
-rw-r--r--arch/parisc/include/asm/spinlock_types.h6
-rw-r--r--arch/parisc/kernel/entry.S47
-rw-r--r--arch/parisc/kernel/firmware.c8
-rw-r--r--arch/parisc/kernel/ftrace.c2
-rw-r--r--arch/parisc/kernel/parisc_ksyms.c7
-rw-r--r--arch/parisc/kernel/pci-dma.c12
-rw-r--r--arch/parisc/kernel/pdt.c4
-rw-r--r--arch/parisc/kernel/perf.c2
-rw-r--r--arch/parisc/kernel/processor.c1
-rw-r--r--arch/parisc/kernel/setup.c48
-rw-r--r--arch/parisc/kernel/signal.c2
-rw-r--r--arch/parisc/kernel/syscall.S23
-rw-r--r--arch/parisc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/parisc/kernel/unaligned.c19
-rw-r--r--arch/parisc/lib/ucmpdi2.c3
-rw-r--r--arch/parisc/mm/fault.c50
-rw-r--r--arch/parisc/mm/fixmap.c3
-rw-r--r--arch/parisc/mm/init.c38
-rw-r--r--arch/parisc/mm/ioremap.c9
-rw-r--r--arch/powerpc/configs/44x/sam440ep_defconfig2
-rw-r--r--arch/powerpc/configs/85xx/stx_gp3_defconfig2
-rw-r--r--arch/powerpc/configs/cell_defconfig2
-rw-r--r--arch/powerpc/configs/ep8248e_defconfig2
-rw-r--r--arch/powerpc/configs/mgcoge_defconfig2
-rw-r--r--arch/powerpc/configs/pasemi_defconfig2
-rw-r--r--arch/powerpc/configs/pmac32_defconfig2
-rw-r--r--arch/powerpc/configs/powernv_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64_defconfig2
-rw-r--r--arch/powerpc/configs/ppc64e_defconfig2
-rw-r--r--arch/powerpc/configs/ppc6xx_defconfig2
-rw-r--r--arch/powerpc/configs/ps3_defconfig2
-rw-r--r--arch/powerpc/crypto/.gitignore3
-rw-r--r--arch/powerpc/include/asm/bug.h69
-rw-r--r--arch/powerpc/include/asm/elf.h6
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/thread_info.h6
-rw-r--r--arch/powerpc/include/asm/word-at-a-time.h2
-rw-r--r--arch/powerpc/kernel/head_64.S3
-rw-r--r--arch/powerpc/kernel/rtas_flash.c6
-rw-r--r--arch/powerpc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/powerpc/kernel/trace/ftrace_mprofile.S9
-rw-r--r--arch/powerpc/kernel/traps.c9
-rw-r--r--arch/powerpc/mm/book3s64/subpage_prot.c1
-rw-r--r--arch/powerpc/mm/init_64.c3
-rw-r--r--arch/powerpc/mm/kasan/Makefile1
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_lpbfifo.c6
-rw-r--r--arch/powerpc/platforms/85xx/smp.c2
-rw-r--r--arch/powerpc/platforms/cell/spufs/inode.c2
-rw-r--r--arch/powerpc/platforms/powermac/time.c6
-rw-r--r--arch/powerpc/platforms/pseries/vas.c9
-rw-r--r--arch/riscv/Kconfig32
-rw-r--r--arch/riscv/configs/defconfig2
-rw-r--r--arch/riscv/configs/rv32_defconfig2
-rw-r--r--arch/riscv/include/asm/acpi.h2
-rw-r--r--arch/riscv/include/asm/cacheflush.h4
-rw-r--r--arch/riscv/include/asm/insn.h15
-rw-r--r--arch/riscv/include/asm/mmio.h16
-rw-r--r--arch/riscv/include/asm/pgtable.h2
-rw-r--r--arch/riscv/include/asm/vector.h3
-rw-r--r--arch/riscv/include/asm/vmalloc.h4
-rw-r--r--arch/riscv/include/uapi/asm/bitsperlong.h14
-rw-r--r--arch/riscv/include/uapi/asm/ptrace.h1
-rw-r--r--arch/riscv/kernel/acpi.c4
-rw-r--r--arch/riscv/kernel/compat_vdso/Makefile8
-rw-r--r--arch/riscv/kernel/cpu.c5
-rw-r--r--arch/riscv/kernel/crash_core.c2
-rw-r--r--arch/riscv/kernel/elf_kexec.c3
-rw-r--r--arch/riscv/kernel/irq.c3
-rw-r--r--arch/riscv/kernel/ptrace.c69
-rw-r--r--arch/riscv/kernel/smp.c5
-rw-r--r--arch/riscv/kernel/traps.c9
-rw-r--r--arch/riscv/lib/uaccess.S11
-rw-r--r--arch/riscv/mm/init.c16
-rw-r--r--arch/riscv/mm/kasan_init.c1
-rw-r--r--arch/riscv/mm/pageattr.c1
-rw-r--r--arch/s390/configs/debug_defconfig13
-rw-r--r--arch/s390/configs/defconfig11
-rw-r--r--arch/s390/configs/zfcpdump_defconfig1
-rw-r--r--arch/s390/crypto/paes_s390.c2
-rw-r--r--arch/s390/hypfs/inode.c4
-rw-r--r--arch/s390/include/uapi/asm/ptrace.h2
-rw-r--r--arch/s390/kernel/sthyi.c6
-rw-r--r--arch/s390/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/s390/kvm/intercept.c9
-rw-r--r--arch/s390/kvm/pv.c8
-rw-r--r--arch/s390/mm/fault.c2
-rw-r--r--arch/s390/mm/gmap.c6
-rw-r--r--arch/s390/mm/vmem.c2
-rw-r--r--arch/sh/configs/espt_defconfig2
-rw-r--r--arch/sh/configs/sdk7780_defconfig2
-rw-r--r--arch/sh/configs/sdk7786_defconfig2
-rw-r--r--arch/sh/configs/sh03_defconfig2
-rw-r--r--arch/sh/configs/sh7763rdp_defconfig2
-rw-r--r--arch/sh/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/sparc/configs/sparc32_defconfig2
-rw-r--r--arch/sparc/include/asm/processor_64.h3
-rw-r--r--arch/sparc/kernel/syscalls/syscall.tbl1
-rw-r--r--arch/um/configs/i386_defconfig2
-rw-r--r--arch/um/configs/x86_64_defconfig2
-rw-r--r--arch/um/os-Linux/sigio.c7
-rw-r--r--arch/x86/Kconfig26
-rw-r--r--arch/x86/boot/compressed/idt_64.c9
-rw-r--r--arch/x86/boot/compressed/sev.c37
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/entry/entry_64.S16
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--arch/x86/entry/vdso/vma.c4
-rw-r--r--arch/x86/hyperv/hv_apic.c4
-rw-r--r--arch/x86/hyperv/hv_init.c21
-rw-r--r--arch/x86/hyperv/hv_vtl.c4
-rw-r--r--arch/x86/hyperv/ivm.c7
-rw-r--r--arch/x86/hyperv/mmu.c12
-rw-r--r--arch/x86/hyperv/nested.c11
-rw-r--r--arch/x86/include/asm/acpi.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h14
-rw-r--r--arch/x86/include/asm/entry-common.h1
-rw-r--r--arch/x86/include/asm/kvm-x86-ops.h1
-rw-r--r--arch/x86/include/asm/kvm_host.h3
-rw-r--r--arch/x86/include/asm/linkage.h2
-rw-r--r--arch/x86/include/asm/microcode.h1
-rw-r--r--arch/x86/include/asm/microcode_amd.h2
-rw-r--r--arch/x86/include/asm/mshyperv.h2
-rw-r--r--arch/x86/include/asm/msr-index.h13
-rw-r--r--arch/x86/include/asm/nospec-branch.h54
-rw-r--r--arch/x86/include/asm/processor.h12
-rw-r--r--arch/x86/include/asm/segment.h2
-rw-r--r--arch/x86/kernel/acpi/boot.c4
-rw-r--r--arch/x86/kernel/alternative.c4
-rw-r--r--arch/x86/kernel/cpu/amd.c239
-rw-r--r--arch/x86/kernel/cpu/bugs.c371
-rw-r--r--arch/x86/kernel/cpu/common.c46
-rw-r--r--arch/x86/kernel/cpu/cpu.h1
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c4
-rw-r--r--arch/x86/kernel/fpu/context.h3
-rw-r--r--arch/x86/kernel/fpu/core.c2
-rw-r--r--arch/x86/kernel/fpu/xstate.c7
-rw-r--r--arch/x86/kernel/kprobes/opt.c40
-rw-r--r--arch/x86/kernel/static_call.c13
-rw-r--r--arch/x86/kernel/traps.c18
-rw-r--r--arch/x86/kernel/vmlinux.lds.S37
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/lapic.c25
-rw-r--r--arch/x86/kvm/svm/sev.c124
-rw-r--r--arch/x86/kvm/svm/svm.c22
-rw-r--r--arch/x86/kvm/svm/svm.h26
-rw-r--r--arch/x86/kvm/svm/vmenter.S4
-rw-r--r--arch/x86/kvm/vmx/vmenter.S8
-rw-r--r--arch/x86/kvm/vmx/vmx.c62
-rw-r--r--arch/x86/kvm/vmx/vmx_ops.h12
-rw-r--r--arch/x86/kvm/x86.c55
-rw-r--r--arch/x86/lib/retpoline.S160
-rw-r--r--arch/xtensa/kernel/syscalls/syscall.tbl1
-rw-r--r--block/bdev.c69
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-core.c9
-rw-r--r--block/blk-crypto-fallback.c36
-rw-r--r--block/blk-iocost.c9
-rw-r--r--block/blk-mq.c32
-rw-r--r--block/disk-events.c23
-rw-r--r--block/elevator.c3
-rw-r--r--block/fops.c7
-rw-r--r--block/genhd.c45
-rw-r--r--block/ioctl.c9
-rw-r--r--block/partitions/core.c5
-rw-r--r--crypto/af_alg.c4
-rw-r--r--drivers/accel/habanalabs/common/habanalabs.h9
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c8
-rw-r--r--drivers/accel/qaic/qaic_control.c65
-rw-r--r--drivers/accel/qaic/qaic_data.c1
-rw-r--r--drivers/acpi/arm64/iort.c3
-rw-r--r--drivers/acpi/resource.c68
-rw-r--r--drivers/acpi/scan.c1
-rw-r--r--drivers/android/binder.c1
-rw-r--r--drivers/android/binder_alloc.c6
-rw-r--r--drivers/android/binder_alloc.h1
-rw-r--r--drivers/android/binderfs.c8
-rw-r--r--drivers/ata/libata-core.c4
-rw-r--r--drivers/ata/libata-scsi.c7
-rw-r--r--drivers/ata/pata_arasan_cf.c3
-rw-r--r--drivers/ata/pata_ns87415.c2
-rw-r--r--drivers/ata/pata_octeon_cf.c6
-rw-r--r--drivers/ata/pata_parport/aten.c2
-rw-r--r--drivers/ata/pata_parport/bpck.c2
-rw-r--r--drivers/ata/pata_parport/bpck6.c3
-rw-r--r--drivers/ata/pata_parport/comm.c2
-rw-r--r--drivers/ata/pata_parport/dstr.c2
-rw-r--r--drivers/ata/pata_parport/epat.c3
-rw-r--r--drivers/ata/pata_parport/epia.c3
-rw-r--r--drivers/ata/pata_parport/fit2.c3
-rw-r--r--drivers/ata/pata_parport/fit3.c3
-rw-r--r--drivers/ata/pata_parport/friq.c2
-rw-r--r--drivers/ata/pata_parport/frpw.c2
-rw-r--r--drivers/ata/pata_parport/kbic.c3
-rw-r--r--drivers/ata/pata_parport/ktti.c2
-rw-r--r--drivers/ata/pata_parport/on20.c2
-rw-r--r--drivers/ata/pata_parport/on26.c2
-rw-r--r--drivers/base/cpu.c85
-rw-r--r--drivers/base/power/power.h1
-rw-r--r--drivers/base/power/wakeirq.c61
-rw-r--r--drivers/base/regmap/regcache-rbtree.c4
-rw-r--r--drivers/base/regmap/regcache.c3
-rw-r--r--drivers/base/regmap/regmap-i2c.c8
-rw-r--r--drivers/base/regmap/regmap-kunit.c5
-rw-r--r--drivers/base/regmap/regmap-spi-avmm.c2
-rw-r--r--drivers/base/regmap/regmap.c6
-rw-r--r--drivers/block/amiflop.c1
-rw-r--r--drivers/block/floppy.c2
-rw-r--r--drivers/block/loop.c46
-rw-r--r--drivers/block/nbd.c8
-rw-r--r--drivers/block/rbd.c150
-rw-r--r--drivers/block/rnbd/rnbd-clt-sysfs.c2
-rw-r--r--drivers/block/ublk_drv.c11
-rw-r--r--drivers/block/zram/zram_drv.c32
-rw-r--r--drivers/bluetooth/btusb.c1
-rw-r--r--drivers/bus/ti-sysc.c2
-rw-r--r--drivers/char/tpm/st33zp24/i2c.c2
-rw-r--r--drivers/char/tpm/tpm-chip.c76
-rw-r--r--drivers/char/tpm/tpm_crb.c49
-rw-r--r--drivers/char/tpm/tpm_i2c_atmel.c2
-rw-r--r--drivers/char/tpm/tpm_i2c_infineon.c2
-rw-r--r--drivers/char/tpm/tpm_i2c_nuvoton.c2
-rw-r--r--drivers/char/tpm/tpm_tis.c43
-rw-r--r--drivers/char/tpm/tpm_tis_core.c112
-rw-r--r--drivers/char/tpm/tpm_tis_core.h4
-rw-r--r--drivers/char/tpm/tpm_tis_i2c.c61
-rw-r--r--drivers/char/tpm/tpm_tis_i2c_cr50.c2
-rw-r--r--drivers/char/tpm/tpm_tis_spi_main.c8
-rw-r--r--drivers/char/tpm/tpm_vtpm_proxy.c30
-rw-r--r--drivers/clk/Kconfig1
-rw-r--r--drivers/clk/clk-devres.c13
-rw-r--r--drivers/clk/imx/clk-imx93.c2
-rw-r--r--drivers/clk/keystone/syscon-clk.c6
-rw-r--r--drivers/clk/mediatek/clk-mt8183.c27
-rw-r--r--drivers/clk/meson/clk-pll.c4
-rw-r--r--drivers/counter/Kconfig14
-rw-r--r--drivers/cpufreq/amd-pstate.c10
-rw-r--r--drivers/cpuidle/cpuidle-psci-domain.c39
-rw-r--r--drivers/cpuidle/dt_idle_genpd.c24
-rw-r--r--drivers/cpuidle/dt_idle_genpd.h7
-rw-r--r--drivers/crypto/caam/ctrl.c4
-rw-r--r--drivers/cxl/Kconfig3
-rw-r--r--drivers/cxl/acpi.c5
-rw-r--r--drivers/cxl/core/mbox.c45
-rw-r--r--drivers/cxl/core/memdev.c19
-rw-r--r--drivers/cxl/cxlmem.h18
-rw-r--r--drivers/dma-buf/dma-resv.c13
-rw-r--r--drivers/dma-buf/sw_sync.c18
-rw-r--r--drivers/dma/Kconfig2
-rw-r--r--drivers/dma/idxd/device.c4
-rw-r--r--drivers/dma/mcf-edma.c13
-rw-r--r--drivers/dma/owl-dma.c2
-rw-r--r--drivers/dma/pl330.c18
-rw-r--r--drivers/dma/xilinx/xdma.c6
-rw-r--r--drivers/firmware/arm_scmi/mailbox.c4
-rw-r--r--drivers/firmware/arm_scmi/raw_mode.c5
-rw-r--r--drivers/firmware/arm_scmi/smc.c21
-rw-r--r--drivers/firmware/smccc/soc_id.c5
-rw-r--r--drivers/gpio/gpio-mvebu.c26
-rw-r--r--drivers/gpio/gpio-sim.c16
-rw-r--r--drivers/gpio/gpio-tps68470.c6
-rw-r--r--drivers/gpio/gpio-ws16c48.c2
-rw-r--r--drivers/gpio/gpiolib-sysfs.c7
-rw-r--r--drivers/gpio/gpiolib.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c36
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c41
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_v13_0.c16
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_crat.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c5
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.h6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c13
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c9
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c261
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h7
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c12
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c112
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h11
-rw-r--r--drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c7
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/Makefile3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c185
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h36
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c2
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c3
-rw-r--r--drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c6
-rw-r--r--drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c14
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c4
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c22
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c26
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c24
-rw-r--r--drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c14
-rw-r--r--drivers/gpu/drm/bridge/ite-it6505.c4
-rw-r--r--drivers/gpu/drm/bridge/lontium-lt9611.c4
-rw-r--r--drivers/gpu/drm/bridge/samsung-dsim.c27
-rw-r--r--drivers/gpu/drm/drm_atomic.c11
-rw-r--r--drivers/gpu/drm/drm_client_modeset.c6
-rw-r--r--drivers/gpu/drm/drm_edid.c29
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c6
-rw-r--r--drivers/gpu/drm/drm_probe_helper.c68
-rw-r--r--drivers/gpu/drm/i915/Makefile5
-rw-r--r--drivers/gpu/drm/i915/display/intel_display_device.c27
-rw-r--r--drivers/gpu/drm/i915/display/intel_dpt.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_fbdev.c5
-rw-r--r--drivers/gpu/drm/i915/display/intel_hotplug.c4
-rw-r--r--drivers/gpu/drm/i915/display/intel_sdvo.c2
-rw-r--r--drivers/gpu/drm/i915/gem/selftests/huge_pages.c6
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.c140
-rw-r--r--drivers/gpu/drm/i915/gt/gen8_engine_cs.h21
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gpu_commands.h2
-rw-r--r--drivers/gpu/drm/i915/gt/intel_gt_regs.h16
-rw-r--r--drivers/gpu/drm/i915/gt/intel_lrc.c17
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c22
-rw-r--r--drivers/gpu/drm/i915/gt/uc/intel_huc.c2
-rw-r--r--drivers/gpu/drm/i915/gvt/edid.c2
-rw-r--r--drivers/gpu/drm/i915/i915_active.c99
-rw-r--r--drivers/gpu/drm/i915/i915_driver.c33
-rw-r--r--drivers/gpu/drm/i915/i915_pci.c5
-rw-r--r--drivers/gpu/drm/i915/i915_perf.c1
-rw-r--r--drivers/gpu/drm/i915/i915_request.c11
-rw-r--r--drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/a5xx_gpu.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_device.c2
-rw-r--r--drivers/gpu/drm/msm/adreno/adreno_gpu.h12
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h13
-rw-r--r--drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c8
-rw-r--r--drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c2
-rw-r--r--drivers/gpu/drm/msm/msm_fence.c6
-rw-r--r--drivers/gpu/drm/msm/msm_gem_submit.c16
-rw-r--r--drivers/gpu/drm/msm/msm_mdss.c19
-rw-r--r--drivers/gpu/drm/nouveau/dispnv50/disp.c4
-rw-r--r--drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h4
-rw-r--r--drivers/gpu/drm/nouveau/nouveau_connector.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c48
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c27
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c4
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c10
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c1
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c13
-rw-r--r--drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c11
-rw-r--r--drivers/gpu/drm/panel/panel-jdi-lt070me05000.c36
-rw-r--r--drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c1
-rw-r--r--drivers/gpu/drm/panel/panel-simple.c24
-rw-r--r--drivers/gpu/drm/panfrost/panfrost_devfreq.c2
-rw-r--r--drivers/gpu/drm/qxl/qxl_drv.h2
-rw-r--r--drivers/gpu/drm/qxl/qxl_dumb.c5
-rw-r--r--drivers/gpu/drm/qxl/qxl_gem.c25
-rw-r--r--drivers/gpu/drm/qxl/qxl_ioctl.c6
-rw-r--r--drivers/gpu/drm/rockchip/rockchip_drm_vop.c17
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo.c3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c6
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.h8
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h12
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c35
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_kms.c6
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c3
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_shader.c3
-rw-r--r--drivers/hv/connection.c13
-rw-r--r--drivers/hv/hv_balloon.c2
-rw-r--r--drivers/hv/hv_common.c10
-rw-r--r--drivers/hwmon/aquacomputer_d5next.c39
-rw-r--r--drivers/hwmon/k10temp.c17
-rw-r--r--drivers/hwmon/nct6775-core.c28
-rw-r--r--drivers/hwmon/nct6775-platform.c2
-rw-r--r--drivers/hwmon/nct6775.h1
-rw-r--r--drivers/hwmon/nct7802.c2
-rw-r--r--drivers/hwmon/oxp-sensors.c38
-rw-r--r--drivers/hwmon/pmbus/bel-pfe.c16
-rw-r--r--drivers/hwmon/pmbus/pmbus_core.c20
-rw-r--r--drivers/i2c/busses/i2c-bcm-iproc.c11
-rw-r--r--drivers/i2c/busses/i2c-designware-master.c16
-rw-r--r--drivers/i2c/busses/i2c-hisi.c8
-rw-r--r--drivers/i2c/busses/i2c-imx-lpi2c.c3
-rw-r--r--drivers/i2c/busses/i2c-sun6i-p2wi.c3
-rw-r--r--drivers/i2c/busses/i2c-tegra.c3
-rw-r--r--drivers/idle/intel_idle.c172
-rw-r--r--drivers/iio/adc/ad7192.c16
-rw-r--r--drivers/iio/adc/ina2xx-adc.c9
-rw-r--r--drivers/iio/adc/meson_saradc.c23
-rw-r--r--drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c2
-rw-r--r--drivers/iio/frequency/admv1013.c5
-rw-r--r--drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c2
-rw-r--r--drivers/iio/industrialio-core.c5
-rw-r--r--drivers/iio/light/rohm-bu27008.c22
-rw-r--r--drivers/iio/light/rohm-bu27034.c22
-rw-r--r--drivers/infiniband/core/cma.c2
-rw-r--r--drivers/infiniband/core/umem.c3
-rw-r--r--drivers/infiniband/hw/bnxt_re/ib_verbs.c12
-rw-r--r--drivers/infiniband/hw/bnxt_re/main.c4
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.c28
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_fp.h1
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_rcfw.c9
-rw-r--r--drivers/infiniband/hw/bnxt_re/qplib_res.c1
-rw-r--r--drivers/infiniband/hw/hfi1/chip.c1
-rw-r--r--drivers/infiniband/hw/irdma/ctrl.c31
-rw-r--r--drivers/infiniband/hw/irdma/defs.h46
-rw-r--r--drivers/infiniband/hw/irdma/hw.c3
-rw-r--r--drivers/infiniband/hw/irdma/main.h2
-rw-r--r--drivers/infiniband/hw/irdma/puda.c6
-rw-r--r--drivers/infiniband/hw/irdma/type.h2
-rw-r--r--drivers/infiniband/hw/irdma/uk.c5
-rw-r--r--drivers/infiniband/hw/irdma/utils.c8
-rw-r--r--drivers/infiniband/hw/mlx4/qp.c18
-rw-r--r--drivers/infiniband/hw/mthca/mthca_qp.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_fs.c3
-rw-r--r--drivers/infiniband/sw/rxe/rxe_mw.c3
-rw-r--r--drivers/interconnect/qcom/bcm-voter.c5
-rw-r--r--drivers/interconnect/qcom/icc-rpmh.h2
-rw-r--r--drivers/interconnect/qcom/sa8775p.c1
-rw-r--r--drivers/interconnect/qcom/sm8450.c9
-rw-r--r--drivers/interconnect/qcom/sm8550.c17
-rw-r--r--drivers/iommu/iommufd/device.c12
-rw-r--r--drivers/iommu/iommufd/iommufd_private.h15
-rw-r--r--drivers/iommu/iommufd/main.c78
-rw-r--r--drivers/iommu/iommufd/pages.c2
-rw-r--r--drivers/irqchip/irq-bcm6345-l1.c14
-rw-r--r--drivers/irqchip/irq-gic-v3-its.c78
-rw-r--r--drivers/irqchip/irq-gic-v3.c62
-rw-r--r--drivers/isdn/hardware/mISDN/hfcpci.c10
-rw-r--r--drivers/isdn/mISDN/dsp.h2
-rw-r--r--drivers/isdn/mISDN/dsp_cmx.c2
-rw-r--r--drivers/isdn/mISDN/dsp_core.c2
-rw-r--r--drivers/leds/trigger/ledtrig-netdev.c8
-rw-r--r--drivers/md/dm-cache-policy-smq.c28
-rw-r--r--drivers/md/dm-integrity.c1
-rw-r--r--drivers/md/dm-raid.c20
-rw-r--r--drivers/md/md.c2
-rw-r--r--drivers/media/cec/usb/pulse8/pulse8-cec.c7
-rw-r--r--drivers/media/i2c/tc358746.c4
-rw-r--r--drivers/media/pci/cx23885/cx23885-dvb.c12
-rw-r--r--drivers/media/platform/amphion/vpu_core.c4
-rw-r--r--drivers/media/platform/amphion/vpu_mbox.c9
-rw-r--r--drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c10
-rw-r--r--drivers/media/platform/mediatek/jpeg/mtk_jpeg_dec_hw.c4
-rw-r--r--drivers/media/platform/mediatek/jpeg/mtk_jpeg_enc_hw.c4
-rw-r--r--drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c2
-rw-r--r--drivers/media/platform/mediatek/vcodec/vdec_msg_queue.c3
-rw-r--r--drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h1
-rw-r--r--drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c135
-rw-r--r--drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h5
-rw-r--r--drivers/media/platform/nxp/imx7-media-csi.c7
-rw-r--r--drivers/media/platform/qcom/venus/hfi_cmds.c4
-rw-r--r--drivers/media/platform/verisilicon/hantro.h46
-rw-r--r--drivers/media/platform/verisilicon/hantro_postproc.c12
-rw-r--r--drivers/media/usb/uvc/uvc_v4l2.c2
-rw-r--r--drivers/memory/tegra/mc.c37
-rw-r--r--drivers/memory/tegra/tegra194.c1
-rw-r--r--drivers/memory/tegra/tegra234.c27
-rw-r--r--drivers/misc/cardreader/rts5227.c2
-rw-r--r--drivers/misc/cardreader/rts5228.c18
-rw-r--r--drivers/misc/cardreader/rts5249.c3
-rw-r--r--drivers/misc/cardreader/rts5260.c18
-rw-r--r--drivers/misc/cardreader/rts5261.c18
-rw-r--r--drivers/misc/cardreader/rtsx_pcr.c5
-rw-r--r--drivers/misc/ibmasm/ibmasmfs.c2
-rw-r--r--drivers/misc/ibmvmc.c2
-rw-r--r--drivers/misc/sram.c2
-rw-r--r--drivers/misc/tps6594-esm.c19
-rw-r--r--drivers/mmc/core/block.c7
-rw-r--r--drivers/mmc/host/moxart-mmc.c8
-rw-r--r--drivers/mmc/host/sdhci_f_sdh30.c69
-rw-r--r--drivers/mmc/host/sunplus-mmc.c26
-rw-r--r--drivers/mmc/host/wbsd.c2
-rw-r--r--drivers/mtd/nand/raw/fsl_upm.c2
-rw-r--r--drivers/mtd/nand/raw/meson_nand.c3
-rw-r--r--drivers/mtd/nand/raw/omap_elm.c24
-rw-r--r--drivers/mtd/nand/raw/rockchip-nand-controller.c45
-rw-r--r--drivers/mtd/nand/spi/toshiba.c4
-rw-r--r--drivers/mtd/nand/spi/winbond.c4
-rw-r--r--drivers/mtd/spi-nor/spansion.c4
-rw-r--r--drivers/net/bonding/bond_alb.c6
-rw-r--r--drivers/net/bonding/bond_main.c9
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c10
-rw-r--r--drivers/net/can/spi/mcp251xfd/mcp251xfd.h1
-rw-r--r--drivers/net/can/usb/gs_usb.c132
-rw-r--r--drivers/net/can/vxcan.c7
-rw-r--r--drivers/net/dsa/bcm_sf2.c8
-rw-r--r--drivers/net/dsa/microchip/ksz8795.c8
-rw-r--r--drivers/net/dsa/microchip/ksz_common.c43
-rw-r--r--drivers/net/dsa/microchip/ksz_common.h7
-rw-r--r--drivers/net/dsa/mt7530.c4
-rw-r--r--drivers/net/dsa/mt7530.h2
-rw-r--r--drivers/net/dsa/mv88e6xxx/chip.c15
-rw-r--r--drivers/net/dsa/ocelot/felix.c2
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c3
-rw-r--r--drivers/net/dsa/qca/ar9331.c4
-rw-r--r--drivers/net/dsa/qca/qca8k-8xxx.c7
-rw-r--r--drivers/net/dsa/qca/qca8k-common.c19
-rw-r--r--drivers/net/ethernet/atheros/atl1c/atl1c_main.c7
-rw-r--r--drivers/net/ethernet/atheros/atl1e/atl1e_main.c7
-rw-r--r--drivers/net/ethernet/atheros/atlx/atl1.c7
-rw-r--r--drivers/net/ethernet/broadcom/b44.c8
-rw-r--r--drivers/net/ethernet/broadcom/bgmac.c2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c21
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c32
-rw-r--r--drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c17
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.c85
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt.h2
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c14
-rw-r--r--drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h2
-rw-r--r--drivers/net/ethernet/broadcom/genet/bcmmii.c2
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c5
-rw-r--r--drivers/net/ethernet/brocade/bna/bnad_debugfs.c5
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c9
-rw-r--r--drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_main.c3
-rw-r--r--drivers/net/ethernet/freescale/enetc/enetc_pf.c111
-rw-r--r--drivers/net/ethernet/freescale/fec_main.c18
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hnae3.h7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c22
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h2
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3_enet.c14
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c51
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c7
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c29
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c36
-rw-r--r--drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h5
-rw-r--r--drivers/net/ethernet/ibm/ibmveth.c2
-rw-r--r--drivers/net/ethernet/ibm/ibmvnic.c112
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c5
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c16
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf.h6
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_ethtool.c54
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_fdir.c88
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_fdir.h2
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_main.c248
-rw-r--r--drivers/net/ethernet/intel/iavf/iavf_virtchnl.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_base.c5
-rw-r--r--drivers/net/ethernet/intel/ice/ice_eswitch.c6
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool.c13
-rw-r--r--drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c26
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c27
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c33
-rw-r--r--drivers/net/ethernet/intel/ice/ice_sriov.c8
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.c34
-rw-r--r--drivers/net/ethernet/intel/ice/ice_vf_lib.h1
-rw-r--r--drivers/net/ethernet/intel/ice/ice_virtchnl.c1
-rw-r--r--drivers/net/ethernet/intel/igb/igb_ptp.c24
-rw-r--r--drivers/net/ethernet/intel/igc/igc.h4
-rw-r--r--drivers/net/ethernet/intel/igc/igc_defines.h2
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c78
-rw-r--r--drivers/net/ethernet/intel/ixgbe/ixgbe_main.c2
-rw-r--r--drivers/net/ethernet/korina.c3
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c3
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c2
-rw-r--r--drivers/net/ethernet/marvell/octeon_ep/octep_main.c9
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c43
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h8
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c137
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c5
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_pci.c3
-rw-r--r--drivers/net/ethernet/marvell/prestera/prestera_router.c14
-rw-r--r--drivers/net/ethernet/mediatek/mtk_eth_soc.c29
-rw-r--r--drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c2
-rw-r--r--drivers/net/ethernet/mediatek/mtk_wed.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c116
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c9
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c29
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c48
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_rep.c20
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_tc.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c12
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.c107
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/fs_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/sriov.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c5
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c19
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/pci.c8
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/reg.h9
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c2
-rw-r--r--drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c4
-rw-r--r--drivers/net/ethernet/microsoft/mana/mana_en.c37
-rw-r--r--drivers/net/ethernet/mscc/ocelot_fdma.c3
-rw-r--r--drivers/net/ethernet/pensando/ionic/ionic_lif.c23
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_dev_api.h16
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_fcoe.c19
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_fcoe.h17
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_hw.c26
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iscsi.c19
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_iscsi.h8
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.c19
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_l2.h24
-rw-r--r--drivers/net/ethernet/qlogic/qed/qed_main.c6
-rw-r--r--drivers/net/ethernet/qlogic/qede/qede_main.c10
-rw-r--r--drivers/net/ethernet/qualcomm/emac/emac-mac.c7
-rw-r--r--drivers/net/ethernet/realtek/r8169_main.c45
-rw-r--r--drivers/net/ethernet/sfc/ef100_nic.c2
-rw-r--r--drivers/net/ethernet/sfc/falcon/selftest.c25
-rw-r--r--drivers/net/ethernet/sfc/selftest.c25
-rw-r--r--drivers/net/ethernet/sfc/siena/selftest.c25
-rw-r--r--drivers/net/ethernet/sfc/tc.c2
-rw-r--r--drivers/net/ethernet/socionext/netsec.c11
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c3
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c4
-rw-r--r--drivers/net/ethernet/ti/cpsw_ale.c24
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_hw.c1
-rw-r--r--drivers/net/ethernet/xilinx/ll_temac_main.c12
-rw-r--r--drivers/net/ipa/ipa_table.c20
-rw-r--r--drivers/net/ipvlan/ipvlan_main.c3
-rw-r--r--drivers/net/macsec.c28
-rw-r--r--drivers/net/macvlan.c1
-rw-r--r--drivers/net/mdio/mdio-bitbang.c4
-rw-r--r--drivers/net/pcs/pcs-rzn1-miic.c10
-rw-r--r--drivers/net/phy/at803x.c47
-rw-r--r--drivers/net/phy/broadcom.c13
-rw-r--r--drivers/net/phy/marvell10g.c7
-rw-r--r--drivers/net/phy/phy.c11
-rw-r--r--drivers/net/phy/phy_device.c34
-rw-r--r--drivers/net/phy/sfp-bus.c10
-rw-r--r--drivers/net/tap.c2
-rw-r--r--drivers/net/team/team.c13
-rw-r--r--drivers/net/tun.c4
-rw-r--r--drivers/net/usb/cdc_ether.c21
-rw-r--r--drivers/net/usb/lan78xx.c7
-rw-r--r--drivers/net/usb/qmi_wwan.c1
-rw-r--r--drivers/net/usb/usbnet.c6
-rw-r--r--drivers/net/usb/zaurus.c21
-rw-r--r--drivers/net/veth.c8
-rw-r--r--drivers/net/virtio_net.c6
-rw-r--r--drivers/net/vrf.c12
-rw-r--r--drivers/net/vxlan/vxlan_core.c165
-rw-r--r--drivers/net/vxlan/vxlan_vnifilter.c11
-rw-r--r--drivers/net/wireguard/allowedips.c8
-rw-r--r--drivers/net/wireguard/selftest/allowedips.c16
-rw-r--r--drivers/net/wireless/ath/ath11k/ahb.c1
-rw-r--r--drivers/net/wireless/ath/ath11k/pcic.c1
-rw-r--r--drivers/net/wireless/ath/ath12k/wmi.c3
-rw-r--r--drivers/net/wireless/ath/ath6kl/Makefile5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c5
-rw-r--r--drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h7
-rw-r--r--drivers/net/wireless/intel/iwlwifi/Kconfig1
-rw-r--r--drivers/net/wireless/legacy/rayctl.h2
-rw-r--r--drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c6
-rw-r--r--drivers/net/wireless/realtek/rtw89/mac.c2
-rw-r--r--drivers/net/xen-netback/netback.c15
-rw-r--r--drivers/nvme/host/core.c10
-rw-r--r--drivers/nvme/host/ioctl.c2
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/nvme/host/rdma.c3
-rw-r--r--drivers/nvme/host/tcp.c3
-rw-r--r--drivers/of/Kconfig2
-rw-r--r--drivers/of/dynamic.c31
-rw-r--r--drivers/of/kexec.c3
-rw-r--r--drivers/of/platform.c6
-rw-r--r--drivers/of/unittest.c4
-rw-r--r--drivers/parisc/sba_iommu.c6
-rw-r--r--drivers/parport/parport_gsc.c28
-rw-r--r--drivers/parport/parport_gsc.h7
-rw-r--r--drivers/pci/bus.c4
-rw-r--r--drivers/pci/controller/Kconfig1
-rw-r--r--drivers/pci/controller/dwc/pcie-designware-host.c13
-rw-r--r--drivers/pci/controller/dwc/pcie-designware.c20
-rw-r--r--drivers/pci/controller/dwc/pcie-designware.h1
-rw-r--r--drivers/pci/hotplug/acpiphp_glue.c8
-rw-r--r--drivers/pci/of.c5
-rw-r--r--drivers/phy/hisilicon/phy-hisi-inno-usb2.c2
-rw-r--r--drivers/phy/mediatek/phy-mtk-dp.c2
-rw-r--r--drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c2
-rw-r--r--drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c78
-rw-r--r--drivers/pinctrl/pinctrl-amd.c30
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.c9
-rw-r--r--drivers/pinctrl/qcom/pinctrl-msm.h2
-rw-r--r--drivers/pinctrl/qcom/pinctrl-sa8775p.c1
-rw-r--r--drivers/pinctrl/renesas/pinctrl-rza2.c17
-rw-r--r--drivers/pinctrl/renesas/pinctrl-rzg2l.c15
-rw-r--r--drivers/pinctrl/renesas/pinctrl-rzv2m.c13
-rw-r--r--drivers/platform/mellanox/mlxbf-tmfifo.c1
-rw-r--r--drivers/platform/x86/amd/pmc-quirks.c8
-rw-r--r--drivers/platform/x86/amd/pmf/acpi.c23
-rw-r--r--drivers/platform/x86/amd/pmf/core.c9
-rw-r--r--drivers/platform/x86/amd/pmf/pmf.h16
-rw-r--r--drivers/platform/x86/amd/pmf/sps.c75
-rw-r--r--drivers/platform/x86/asus-wmi.c14
-rw-r--r--drivers/platform/x86/huawei-wmi.c2
-rw-r--r--drivers/platform/x86/ideapad-laptop.c5
-rw-r--r--drivers/platform/x86/intel/hid.c27
-rw-r--r--drivers/platform/x86/intel/speed_select_if/isst_if_common.c4
-rw-r--r--drivers/platform/x86/lenovo-ymc.c32
-rw-r--r--drivers/platform/x86/mlx-platform.c23
-rw-r--r--drivers/platform/x86/msi-ec.c18
-rw-r--r--drivers/platform/x86/msi-laptop.c8
-rw-r--r--drivers/platform/x86/serial-multi-instantiate.c35
-rw-r--r--drivers/platform/x86/think-lmi.c4
-rw-r--r--drivers/platform/x86/touchscreen_dmi.c7
-rw-r--r--drivers/powercap/intel_rapl_common.c14
-rw-r--r--drivers/powercap/intel_rapl_msr.c49
-rw-r--r--drivers/powercap/intel_rapl_tpmi.c17
-rw-r--r--drivers/regulator/da9063-regulator.c9
-rw-r--r--drivers/regulator/mt6358-regulator.c10
-rw-r--r--drivers/regulator/qcom-rpmh-regulator.c2
-rw-r--r--drivers/s390/block/dasd.c132
-rw-r--r--drivers/s390/block/dasd_3990_erp.c4
-rw-r--r--drivers/s390/block/dasd_ioctl.c1
-rw-r--r--drivers/s390/crypto/zcrypt_msgtype6.c35
-rw-r--r--drivers/s390/net/qeth_core.h1
-rw-r--r--drivers/s390/net/qeth_core_main.c2
-rw-r--r--drivers/s390/net/qeth_l2_main.c9
-rw-r--r--drivers/s390/net/qeth_l3_main.c8
-rw-r--r--drivers/s390/scsi/zfcp_fc.c6
-rw-r--r--drivers/scsi/53c700.c2
-rw-r--r--drivers/scsi/fnic/fnic.h2
-rw-r--r--drivers/scsi/fnic/fnic_scsi.c6
-rw-r--r--drivers/scsi/lpfc/lpfc_scsi.c20
-rw-r--r--drivers/scsi/pm8001/pm8001_init.c3
-rw-r--r--drivers/scsi/qedf/qedf_main.c18
-rw-r--r--drivers/scsi/qedi/qedi_main.c23
-rw-r--r--drivers/scsi/raid_class.c47
-rw-r--r--drivers/scsi/scsi_proc.c30
-rw-r--r--drivers/scsi/sd.c9
-rw-r--r--drivers/scsi/sg.c7
-rw-r--r--drivers/scsi/snic/snic_disc.c2
-rw-r--r--drivers/scsi/storvsc_drv.c8
-rw-r--r--drivers/soc/aspeed/aspeed-socinfo.c1
-rw-r--r--drivers/soc/aspeed/aspeed-uart-routing.c2
-rw-r--r--drivers/soc/imx/imx8mp-blk-ctrl.c2
-rw-r--r--drivers/soundwire/amd_manager.c4
-rw-r--r--drivers/soundwire/bus.c8
-rw-r--r--drivers/soundwire/qcom.c2
-rw-r--r--drivers/spi/spi-cadence.c19
-rw-r--r--drivers/spi/spi-qcom-qspi.c54
-rw-r--r--drivers/spi/spi-stm32.c6
-rw-r--r--drivers/staging/fbtft/fb_ili9341.c2
-rw-r--r--drivers/staging/ks7010/ks_wlan_net.c6
-rw-r--r--drivers/staging/media/atomisp/Kconfig1
-rw-r--r--drivers/staging/rtl8712/rtl871x_xmit.c43
-rw-r--r--drivers/staging/rtl8712/xmit_linux.c6
-rw-r--r--drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c16
-rw-r--r--drivers/thermal/thermal_core.c4
-rw-r--r--drivers/thermal/thermal_of.c27
-rw-r--r--drivers/thunderbolt/tb.c2
-rw-r--r--drivers/thunderbolt/tmu.c4
-rw-r--r--drivers/tty/Kconfig3
-rw-r--r--drivers/tty/n_gsm.c7
-rw-r--r--drivers/tty/serial/8250/8250_core.c3
-rw-r--r--drivers/tty/serial/8250/8250_dwlib.c6
-rw-r--r--drivers/tty/serial/8250/8250_port.c4
-rw-r--r--drivers/tty/serial/fsl_lpuart.c4
-rw-r--r--drivers/tty/serial/qcom_geni_serial.c7
-rw-r--r--drivers/tty/serial/serial_base.h1
-rw-r--r--drivers/tty/serial/serial_base_bus.c70
-rw-r--r--drivers/tty/serial/sh-sci.c2
-rw-r--r--drivers/tty/serial/sifive.c2
-rw-r--r--drivers/tty/serial/ucc_uart.c2
-rw-r--r--drivers/tty/tty_io.c2
-rw-r--r--drivers/ufs/core/ufs-mcq.c6
-rw-r--r--drivers/ufs/host/ufs-qcom.c2
-rw-r--r--drivers/ufs/host/ufs-renesas.c2
-rw-r--r--drivers/usb/cdns3/cdns3-gadget.c4
-rw-r--r--drivers/usb/common/usb-conn-gpio.c6
-rw-r--r--drivers/usb/core/devio.c16
-rw-r--r--drivers/usb/core/quirks.c4
-rw-r--r--drivers/usb/dwc3/core.c20
-rw-r--r--drivers/usb/dwc3/core.h3
-rw-r--r--drivers/usb/dwc3/dwc3-pci.c6
-rw-r--r--drivers/usb/dwc3/gadget.c9
-rw-r--r--drivers/usb/gadget/composite.c4
-rw-r--r--drivers/usb/gadget/function/f_fs.c3
-rw-r--r--drivers/usb/gadget/legacy/inode.c3
-rw-r--r--drivers/usb/gadget/legacy/raw_gadget.c10
-rw-r--r--drivers/usb/gadget/udc/core.c10
-rw-r--r--drivers/usb/gadget/udc/tegra-xudc.c8
-rw-r--r--drivers/usb/host/ohci-at91.c8
-rw-r--r--drivers/usb/host/xhci-mtk.c1
-rw-r--r--drivers/usb/host/xhci-pci.c4
-rw-r--r--drivers/usb/host/xhci-ring.c25
-rw-r--r--drivers/usb/host/xhci-tegra.c8
-rw-r--r--drivers/usb/misc/ehset.c8
-rw-r--r--drivers/usb/serial/option.c6
-rw-r--r--drivers/usb/serial/usb-serial-simple.c73
-rw-r--r--drivers/usb/storage/alauda.c12
-rw-r--r--drivers/usb/typec/altmodes/displayport.c18
-rw-r--r--drivers/usb/typec/class.c15
-rw-r--r--drivers/usb/typec/mux/Kconfig1
-rw-r--r--drivers/usb/typec/mux/nb7vpq904m.c25
-rw-r--r--drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c4
-rw-r--r--drivers/usb/typec/tcpm/tcpm.c7
-rw-r--r--drivers/usb/typec/ucsi/ucsi.c4
-rw-r--r--drivers/vdpa/mlx5/core/mlx5_vdpa.h2
-rw-r--r--drivers/vdpa/mlx5/core/mr.c97
-rw-r--r--drivers/vdpa/mlx5/net/mlx5_vnet.c26
-rw-r--r--drivers/vdpa/pds/Makefile3
-rw-r--r--drivers/vdpa/pds/debugfs.c15
-rw-r--r--drivers/vdpa/pds/vdpa_dev.c176
-rw-r--r--drivers/vdpa/pds/vdpa_dev.h5
-rw-r--r--drivers/vdpa/vdpa.c9
-rw-r--r--drivers/vdpa/vdpa_user/vduse_dev.c8
-rw-r--r--drivers/vhost/scsi.c187
-rw-r--r--drivers/video/console/sticon.c12
-rw-r--r--drivers/video/console/vgacon.c74
-rw-r--r--drivers/video/fbdev/amifb.c2
-rw-r--r--drivers/video/fbdev/atmel_lcdfb.c2
-rw-r--r--drivers/video/fbdev/au1200fb.c3
-rw-r--r--drivers/video/fbdev/bw2.c3
-rw-r--r--drivers/video/fbdev/cg14.c3
-rw-r--r--drivers/video/fbdev/cg3.c3
-rw-r--r--drivers/video/fbdev/cg6.c3
-rw-r--r--drivers/video/fbdev/core/fbcon.c7
-rw-r--r--drivers/video/fbdev/ep93xx-fb.c4
-rw-r--r--drivers/video/fbdev/ffb.c3
-rw-r--r--drivers/video/fbdev/goldfishfb.c4
-rw-r--r--drivers/video/fbdev/grvga.c3
-rw-r--r--drivers/video/fbdev/imxfb.c48
-rw-r--r--drivers/video/fbdev/kyro/STG4000InitDevice.c10
-rw-r--r--drivers/video/fbdev/leo.c3
-rw-r--r--drivers/video/fbdev/mb862xx/mb862xxfb_accel.c4
-rw-r--r--drivers/video/fbdev/mb862xx/mb862xxfbdrv.c6
-rw-r--r--drivers/video/fbdev/mmp/hw/mmp_ctrl.c4
-rw-r--r--drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c2
-rw-r--r--drivers/video/fbdev/p9100.c3
-rw-r--r--drivers/video/fbdev/platinumfb.c4
-rw-r--r--drivers/video/fbdev/sbuslib.c2
-rw-r--r--drivers/video/fbdev/ssd1307fb.c4
-rw-r--r--drivers/video/fbdev/sunxvr1000.c3
-rw-r--r--drivers/video/fbdev/sunxvr2500.c2
-rw-r--r--drivers/video/fbdev/sunxvr500.c2
-rw-r--r--drivers/video/fbdev/tcx.c3
-rw-r--r--drivers/video/fbdev/xilinxfb.c5
-rw-r--r--drivers/virtio/virtio_mem.c168
-rw-r--r--drivers/virtio/virtio_mmio.c5
-rw-r--r--drivers/virtio/virtio_pci_common.c2
-rw-r--r--drivers/virtio/virtio_pci_legacy.c1
-rw-r--r--drivers/virtio/virtio_vdpa.c2
-rw-r--r--drivers/xen/events/events_base.c16
-rw-r--r--drivers/xen/evtchn.c35
-rw-r--r--drivers/xen/grant-table.c40
-rw-r--r--drivers/xen/xenbus/xenbus_probe.c3
-rw-r--r--fs/9p/fid.h6
-rw-r--r--fs/9p/v9fs.c2
-rw-r--r--fs/9p/v9fs.h2
-rw-r--r--fs/9p/vfs_dir.c5
-rw-r--r--fs/9p/vfs_file.c5
-rw-r--r--fs/9p/vfs_inode.c14
-rw-r--r--fs/9p/vfs_inode_dotl.c13
-rw-r--r--fs/Kconfig16
-rw-r--r--fs/adfs/inode.c4
-rw-r--r--fs/affs/amigaffs.c6
-rw-r--r--fs/affs/file.c14
-rw-r--r--fs/affs/inode.c16
-rw-r--r--fs/affs/namei.c20
-rw-r--r--fs/afs/dynroot.c2
-rw-r--r--fs/afs/inode.c8
-rw-r--r--fs/aio.c20
-rw-r--r--fs/attr.c22
-rw-r--r--fs/autofs/Kconfig12
-rw-r--r--fs/autofs/inode.c2
-rw-r--r--fs/autofs/root.c6
-rw-r--r--fs/autofs/waitq.c5
-rw-r--r--fs/bad_inode.c6
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/dir.c16
-rw-r--r--fs/bfs/inode.c5
-rw-r--r--fs/binfmt_misc.c3
-rw-r--r--fs/btrfs/Kconfig4
-rw-r--r--fs/btrfs/accessors.h23
-rw-r--r--fs/btrfs/backref.c29
-rw-r--r--fs/btrfs/block-group.c135
-rw-r--r--fs/btrfs/block-group.h11
-rw-r--r--fs/btrfs/block-rsv.c5
-rw-r--r--fs/btrfs/btrfs_inode.h6
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/delayed-inode.c16
-rw-r--r--fs/btrfs/delayed-inode.h1
-rw-r--r--fs/btrfs/dev-replace.c6
-rw-r--r--fs/btrfs/disk-io.c195
-rw-r--r--fs/btrfs/disk-io.h1
-rw-r--r--fs/btrfs/extent-io-tree.c14
-rw-r--r--fs/btrfs/extent-io-tree.h6
-rw-r--r--fs/btrfs/extent-tree.c293
-rw-r--r--fs/btrfs/extent-tree.h16
-rw-r--r--fs/btrfs/extent_io.c718
-rw-r--r--fs/btrfs/extent_io.h35
-rw-r--r--fs/btrfs/extent_map.c6
-rw-r--r--fs/btrfs/file-item.c34
-rw-r--r--fs/btrfs/file-item.h6
-rw-r--r--fs/btrfs/file.c46
-rw-r--r--fs/btrfs/free-space-cache.c18
-rw-r--r--fs/btrfs/free-space-tree.c27
-rw-r--r--fs/btrfs/fs.h15
-rw-r--r--fs/btrfs/inode.c1068
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/messages.c16
-rw-r--r--fs/btrfs/messages.h2
-rw-r--r--fs/btrfs/ordered-data.c8
-rw-r--r--fs/btrfs/print-tree.c10
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/raid56.c40
-rw-r--r--fs/btrfs/raid56.h1
-rw-r--r--fs/btrfs/reflink.c3
-rw-r--r--fs/btrfs/relocation.c78
-rw-r--r--fs/btrfs/scrub.c243
-rw-r--r--fs/btrfs/send.c6
-rw-r--r--fs/btrfs/space-info.c85
-rw-r--r--fs/btrfs/super.c11
-rw-r--r--fs/btrfs/sysfs.c7
-rw-r--r--fs/btrfs/tests/extent-io-tests.c302
-rw-r--r--fs/btrfs/tests/extent-map-tests.c412
-rw-r--r--fs/btrfs/transaction.c52
-rw-r--r--fs/btrfs/tree-checker.c14
-rw-r--r--fs/btrfs/tree-log.c12
-rw-r--r--fs/btrfs/volumes.c118
-rw-r--r--fs/btrfs/volumes.h3
-rw-r--r--fs/btrfs/xattr.c4
-rw-r--r--fs/btrfs/zoned.c295
-rw-r--r--fs/btrfs/zoned.h28
-rw-r--r--fs/buffer.c18
-rw-r--r--fs/cachefiles/io.c16
-rw-r--r--fs/ceph/acl.c2
-rw-r--r--fs/ceph/caps.c2
-rw-r--r--fs/ceph/dir.c5
-rw-r--r--fs/ceph/inode.c18
-rw-r--r--fs/ceph/mds_client.c4
-rw-r--r--fs/ceph/mds_client.h5
-rw-r--r--fs/ceph/metric.c2
-rw-r--r--fs/ceph/snap.c2
-rw-r--r--fs/ceph/super.c10
-rw-r--r--fs/ceph/xattr.c2
-rw-r--r--fs/coda/coda_linux.c3
-rw-r--r--fs/coda/dir.c22
-rw-r--r--fs/coda/file.c2
-rw-r--r--fs/coda/inode.c5
-rw-r--r--fs/configfs/inode.c7
-rw-r--r--fs/cramfs/inode.c11
-rw-r--r--fs/dcache.c5
-rw-r--r--fs/debugfs/inode.c3
-rw-r--r--fs/devpts/inode.c16
-rw-r--r--fs/ecryptfs/crypto.c8
-rw-r--r--fs/ecryptfs/inode.c7
-rw-r--r--fs/ecryptfs/mmap.c5
-rw-r--r--fs/ecryptfs/read_write.c12
-rw-r--r--fs/efivarfs/file.c2
-rw-r--r--fs/efivarfs/inode.c2
-rw-r--r--fs/efs/inode.c4
-rw-r--r--fs/erofs/Kconfig16
-rw-r--r--fs/erofs/Makefile1
-rw-r--r--fs/erofs/compress.h2
-rw-r--r--fs/erofs/decompressor.c6
-rw-r--r--fs/erofs/decompressor_deflate.c247
-rw-r--r--fs/erofs/erofs_fs.h17
-rw-r--r--fs/erofs/inode.c14
-rw-r--r--fs/erofs/internal.h23
-rw-r--r--fs/erofs/super.c46
-rw-r--r--fs/erofs/xattr.c14
-rw-r--r--fs/erofs/zdata.c281
-rw-r--r--fs/erofs/zmap.c5
-rw-r--r--fs/eventfd.c2
-rw-r--r--fs/eventpoll.c12
-rw-r--r--fs/exfat/balloc.c6
-rw-r--r--fs/exfat/dir.c39
-rw-r--r--fs/exfat/exfat_fs.h2
-rw-r--r--fs/exfat/file.c6
-rw-r--r--fs/exfat/inode.c6
-rw-r--r--fs/exfat/namei.c26
-rw-r--r--fs/exfat/super.c42
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/acl.c2
-rw-r--r--fs/ext2/dir.c6
-rw-r--r--fs/ext2/ialloc.c2
-rw-r--r--fs/ext2/inode.c12
-rw-r--r--fs/ext2/ioctl.c4
-rw-r--r--fs/ext2/namei.c8
-rw-r--r--fs/ext2/super.c2
-rw-r--r--fs/ext2/xattr.c2
-rw-r--r--fs/ext4/acl.c2
-rw-r--r--fs/ext4/ext4.h90
-rw-r--r--fs/ext4/ext4_jbd2.c3
-rw-r--r--fs/ext4/extents.c12
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inline.c4
-rw-r--r--fs/ext4/inode-test.c6
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--fs/ext4/ioctl.c9
-rw-r--r--fs/ext4/mballoc.c172
-rw-r--r--fs/ext4/namei.c26
-rw-r--r--fs/ext4/super.c73
-rw-r--r--fs/ext4/xattr.c20
-rw-r--r--fs/f2fs/compress.c2
-rw-r--r--fs/f2fs/dir.c8
-rw-r--r--fs/f2fs/f2fs.h6
-rw-r--r--fs/f2fs/file.c22
-rw-r--r--fs/f2fs/gc.c8
-rw-r--r--fs/f2fs/inline.c2
-rw-r--r--fs/f2fs/inode.c10
-rw-r--r--fs/f2fs/namei.c12
-rw-r--r--fs/f2fs/recovery.c4
-rw-r--r--fs/f2fs/super.c9
-rw-r--r--fs/f2fs/xattr.c2
-rw-r--r--fs/fat/fat.h3
-rw-r--r--fs/fat/file.c2
-rw-r--r--fs/fat/inode.c5
-rw-r--r--fs/fat/misc.c10
-rw-r--r--fs/fcntl.c29
-rw-r--r--fs/file.c54
-rw-r--r--fs/file_table.c5
-rw-r--r--fs/freevxfs/vxfs_inode.c3
-rw-r--r--fs/fs-writeback.c4
-rw-r--r--fs/fs_context.c36
-rw-r--r--fs/fsopen.c106
-rw-r--r--fs/fuse/control.c2
-rw-r--r--fs/fuse/dir.c14
-rw-r--r--fs/fuse/inode.c24
-rw-r--r--fs/fuse/ioctl.c21
-rw-r--r--fs/gfs2/acl.c2
-rw-r--r--fs/gfs2/aops.c2
-rw-r--r--fs/gfs2/bmap.c13
-rw-r--r--fs/gfs2/dir.c15
-rw-r--r--fs/gfs2/file.c6
-rw-r--r--fs/gfs2/glops.c4
-rw-r--r--fs/gfs2/inode.c16
-rw-r--r--fs/gfs2/quota.c2
-rw-r--r--fs/gfs2/super.c16
-rw-r--r--fs/gfs2/sys.c4
-rw-r--r--fs/gfs2/trans.c14
-rw-r--r--fs/gfs2/xattr.c8
-rw-r--r--fs/hfs/catalog.c8
-rw-r--r--fs/hfs/dir.c2
-rw-r--r--fs/hfs/inode.c13
-rw-r--r--fs/hfs/sysdep.c4
-rw-r--r--fs/hfsplus/catalog.c8
-rw-r--r--fs/hfsplus/dir.c6
-rw-r--r--fs/hfsplus/inode.c18
-rw-r--r--fs/hostfs/hostfs_kern.c3
-rw-r--r--fs/hpfs/dir.c8
-rw-r--r--fs/hpfs/inode.c6
-rw-r--r--fs/hpfs/namei.c29
-rw-r--r--fs/hpfs/super.c5
-rw-r--r--fs/hugetlbfs/inode.c12
-rw-r--r--fs/inode.c236
-rw-r--r--fs/internal.h4
-rw-r--r--fs/ioctl.c18
-rw-r--r--fs/iomap/buffered-io.c469
-rw-r--r--fs/iomap/direct-io.c163
-rw-r--r--fs/isofs/inode.c9
-rw-r--r--fs/isofs/rock.c16
-rw-r--r--fs/jbd2/checkpoint.c277
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/transaction.c40
-rw-r--r--fs/jffs2/dir.c24
-rw-r--r--fs/jffs2/file.c3
-rw-r--r--fs/jffs2/fs.c10
-rw-r--r--fs/jffs2/os-linux.h2
-rw-r--r--fs/jfs/acl.c2
-rw-r--r--fs/jfs/inode.c2
-rw-r--r--fs/jfs/ioctl.c2
-rw-r--r--fs/jfs/jfs_imap.c8
-rw-r--r--fs/jfs/jfs_inode.c4
-rw-r--r--fs/jfs/namei.c27
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/jfs/xattr.c2
-rw-r--r--fs/kernel_read_file.c12
-rw-r--r--fs/kernfs/dir.c2
-rw-r--r--fs/kernfs/inode.c53
-rw-r--r--fs/libfs.c349
-rw-r--r--fs/locks.c47
-rw-r--r--fs/minix/bitmap.c2
-rw-r--r--fs/minix/dir.c6
-rw-r--r--fs/minix/inode.c12
-rw-r--r--fs/minix/itree_common.c4
-rw-r--r--fs/minix/namei.c6
-rw-r--r--fs/namei.c5
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/direct.c26
-rw-r--r--fs/nfs/fscache.h4
-rw-r--r--fs/nfs/inode.c22
-rw-r--r--fs/nfs/namespace.c3
-rw-r--r--fs/nfs/nfs42proc.c5
-rw-r--r--fs/nfs/nfs4_fs.h2
-rw-r--r--fs/nfs/nfs4file.c2
-rw-r--r--fs/nfs/nfs4proc.c18
-rw-r--r--fs/nfs/sysfs.c4
-rw-r--r--fs/nfsd/nfs4state.c4
-rw-r--r--fs/nfsd/nfsctl.c3
-rw-r--r--fs/nfsd/vfs.c11
-rw-r--r--fs/nilfs2/dir.c6
-rw-r--r--fs/nilfs2/inode.c20
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/namei.c8
-rw-r--r--fs/nilfs2/segment.c7
-rw-r--r--fs/nilfs2/super.c81
-rw-r--r--fs/nilfs2/the_nilfs.h2
-rw-r--r--fs/nls/nls_base.c4
-rw-r--r--fs/notify/dnotify/dnotify.c4
-rw-r--r--fs/nsfs.c2
-rw-r--r--fs/ntfs/dir.c3
-rw-r--r--fs/ntfs/inode.c15
-rw-r--r--fs/ntfs/mft.c3
-rw-r--r--fs/ntfs3/file.c8
-rw-r--r--fs/ntfs3/frecord.c3
-rw-r--r--fs/ntfs3/inode.c14
-rw-r--r--fs/ntfs3/namei.c11
-rw-r--r--fs/ntfs3/super.c33
-rw-r--r--fs/ntfs3/xattr.c4
-rw-r--r--fs/ocfs2/acl.c6
-rw-r--r--fs/ocfs2/alloc.c6
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/dir.c8
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/dlmglue.c7
-rw-r--r--fs/ocfs2/file.c23
-rw-r--r--fs/ocfs2/inode.c12
-rw-r--r--fs/ocfs2/journal.c6
-rw-r--r--fs/ocfs2/move_extents.c6
-rw-r--r--fs/ocfs2/namei.c21
-rw-r--r--fs/ocfs2/refcounttree.c14
-rw-r--r--fs/ocfs2/xattr.c6
-rw-r--r--fs/omfs/dir.c4
-rw-r--r--fs/omfs/inode.c9
-rw-r--r--fs/open.c56
-rw-r--r--fs/openpromfs/inode.c5
-rw-r--r--fs/orangefs/inode.c7
-rw-r--r--fs/orangefs/namei.c2
-rw-r--r--fs/orangefs/orangefs-kernel.h2
-rw-r--r--fs/orangefs/orangefs-utils.c6
-rw-r--r--fs/overlayfs/file.c17
-rw-r--r--fs/overlayfs/inode.c2
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/readdir.c3
-rw-r--r--fs/overlayfs/super.c2
-rw-r--r--fs/overlayfs/util.c2
-rw-r--r--fs/pipe.c10
-rw-r--r--fs/posix_acl.c2
-rw-r--r--fs/proc/base.c11
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/generic.c2
-rw-r--r--fs/proc/inode.c2
-rw-r--r--fs/proc/kcore.c30
-rw-r--r--fs/proc/proc_net.c3
-rw-r--r--fs/proc/proc_sysctl.c4
-rw-r--r--fs/proc/root.c3
-rw-r--r--fs/proc/self.c2
-rw-r--r--fs/proc/task_mmu.c8
-rw-r--r--fs/proc/thread_self.c2
-rw-r--r--fs/proc/vmcore.c2
-rw-r--r--fs/pstore/Kconfig100
-rw-r--r--fs/pstore/inode.c6
-rw-r--r--fs/pstore/platform.c353
-rw-r--r--fs/pstore/ram.c11
-rw-r--r--fs/pstore/ram_core.c17
-rw-r--r--fs/qnx4/inode.c3
-rw-r--r--fs/qnx6/inode.c3
-rw-r--r--fs/quota/dquot.c2
-rw-r--r--fs/ramfs/inode.c6
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/readdir.c68
-rw-r--r--fs/reiserfs/inode.c12
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/journal.c4
-rw-r--r--fs/reiserfs/namei.c18
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c2
-rw-r--r--fs/reiserfs/xattr.c5
-rw-r--r--fs/reiserfs/xattr_acl.c2
-rw-r--r--fs/romfs/super.c13
-rw-r--r--fs/smb/client/cifs_debug.c10
-rw-r--r--fs/smb/client/cifsfs.c2
-rw-r--r--fs/smb/client/cifsfs.h4
-rw-r--r--fs/smb/client/cifsglob.h1
-rw-r--r--fs/smb/client/cifssmb.c3
-rw-r--r--fs/smb/client/connect.c5
-rw-r--r--fs/smb/client/dfs.c6
-rw-r--r--fs/smb/client/file.c29
-rw-r--r--fs/smb/client/fs_context.c4
-rw-r--r--fs/smb/client/fscache.h5
-rw-r--r--fs/smb/client/inode.c16
-rw-r--r--fs/smb/client/ioctl.c22
-rw-r--r--fs/smb/client/misc.c1
-rw-r--r--fs/smb/client/sess.c4
-rw-r--r--fs/smb/client/smb2ops.c3
-rw-r--r--fs/smb/client/smb2pdu.c3
-rw-r--r--fs/smb/server/ksmbd_netlink.h3
-rw-r--r--fs/smb/server/server.c7
-rw-r--r--fs/smb/server/smb2misc.c10
-rw-r--r--fs/smb/server/smb2pdu.c86
-rw-r--r--fs/smb/server/smb_common.c19
-rw-r--r--fs/smb/server/smb_common.h2
-rw-r--r--fs/smb/server/vfs.c68
-rw-r--r--fs/smb/server/vfs.h4
-rw-r--r--fs/splice.c66
-rw-r--r--fs/squashfs/inode.c2
-rw-r--r--fs/stack.c2
-rw-r--r--fs/stat.c65
-rw-r--r--fs/super.c829
-rw-r--r--fs/sysv/dir.c6
-rw-r--r--fs/sysv/ialloc.c2
-rw-r--r--fs/sysv/inode.c5
-rw-r--r--fs/sysv/itree.c7
-rw-r--r--fs/sysv/namei.c6
-rw-r--r--fs/tracefs/inode.c2
-rw-r--r--fs/ubifs/debug.c4
-rw-r--r--fs/ubifs/dir.c41
-rw-r--r--fs/ubifs/file.c31
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c4
-rw-r--r--fs/ubifs/super.c4
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/ubifs/xattr.c6
-rw-r--r--fs/udf/ialloc.c2
-rw-r--r--fs/udf/inode.c17
-rw-r--r--fs/udf/namei.c24
-rw-r--r--fs/udf/symlink.c2
-rw-r--r--fs/ufs/dir.c6
-rw-r--r--fs/ufs/ialloc.c2
-rw-r--r--fs/ufs/inode.c23
-rw-r--r--fs/ufs/namei.c8
-rw-r--r--fs/vboxsf/dir.c3
-rw-r--r--fs/vboxsf/shfl_hostintf.h6
-rw-r--r--fs/vboxsf/utils.c6
-rw-r--r--fs/verity/fsverity_private.h12
-rw-r--r--fs/verity/hash_algs.c8
-rw-r--r--fs/verity/init.c56
-rw-r--r--fs/verity/open.c18
-rw-r--r--fs/verity/signature.c77
-rw-r--r--fs/verity/verify.c11
-rw-r--r--fs/xattr.c83
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h75
-rw-r--r--fs/xfs/libxfs/xfs_fs.h4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c5
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c6
-rw-r--r--fs/xfs/scrub/fscounters.c188
-rw-r--r--fs/xfs/scrub/scrub.c6
-rw-r--r--fs/xfs/scrub/scrub.h1
-rw-r--r--fs/xfs/scrub/trace.h26
-rw-r--r--fs/xfs/xfs_acl.c2
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_bmap_util.c6
-rw-r--r--fs/xfs/xfs_buf.c7
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_iops.c25
-rw-r--r--fs/xfs/xfs_itable.c4
-rw-r--r--fs/xfs/xfs_ondisk.h5
-rw-r--r--fs/xfs/xfs_super.c138
-rw-r--r--fs/zonefs/file.c113
-rw-r--r--fs/zonefs/super.c17
-rw-r--r--fs/zonefs/zonefs.h2
-rw-r--r--include/asm-generic/mshyperv.h2
-rw-r--r--include/asm-generic/word-at-a-time.h2
-rw-r--r--include/drm/display/drm_dp.h2
-rw-r--r--include/drm/drm_edid.h12
-rw-r--r--include/drm/drm_fb_helper.h5
-rw-r--r--include/drm/drm_probe_helper.h1
-rw-r--r--include/kvm/arm_vgic.h2
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk-mq.h2
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/blkdev.h16
-rw-r--r--include/linux/clk.h80
-rw-r--r--include/linux/completion.h1
-rw-r--r--include/linux/cpu.h4
-rw-r--r--include/linux/cpumask.h8
-rw-r--r--include/linux/dnotify.h4
-rw-r--r--include/linux/filelock.h12
-rw-r--r--include/linux/fs.h239
-rw-r--r--include/linux/fs_context.h6
-rw-r--r--include/linux/fs_stack.h2
-rw-r--r--include/linux/ftrace.h4
-rw-r--r--include/linux/huge_mm.h3
-rw-r--r--include/linux/hyperv.h3
-rw-r--r--include/linux/intel_rapl.h14
-rw-r--r--include/linux/iomap.h3
-rw-r--r--include/linux/jbd2.h7
-rw-r--r--include/linux/lsm_hook_defs.h1
-rw-r--r--include/linux/mm.h50
-rw-r--r--include/linux/mm_types.h37
-rw-r--r--include/linux/mmap_lock.h10
-rw-r--r--include/linux/nls.h2
-rw-r--r--include/linux/pagemap.h82
-rw-r--r--include/linux/pagewalk.h11
-rw-r--r--include/linux/pipe_fs_i.h4
-rw-r--r--include/linux/pm_wakeirq.h10
-rw-r--r--include/linux/prefetch.h7
-rw-r--r--include/linux/raid_class.h4
-rw-r--r--include/linux/security.h6
-rw-r--r--include/linux/serial_core.h3
-rw-r--r--include/linux/shmem_fs.h31
-rw-r--r--include/linux/skmsg.h1
-rw-r--r--include/linux/spi/corgi_lcd.h2
-rw-r--r--include/linux/spi/spi-mem.h4
-rw-r--r--include/linux/swait.h2
-rw-r--r--include/linux/syscalls.h4
-rw-r--r--include/linux/tcp.h2
-rw-r--r--include/linux/thermal.h6
-rw-r--r--include/linux/tpm.h1
-rw-r--r--include/linux/trace_events.h11
-rw-r--r--include/linux/uio.h9
-rw-r--r--include/linux/virtio_net.h4
-rw-r--r--include/linux/wait.h3
-rw-r--r--include/linux/writeback.h5
-rw-r--r--include/linux/xattr.h10
-rw-r--r--include/net/bluetooth/hci_core.h7
-rw-r--r--include/net/bonding.h13
-rw-r--r--include/net/cfg80211.h3
-rw-r--r--include/net/cfg802154.h3
-rw-r--r--include/net/codel.h4
-rw-r--r--include/net/devlink.h28
-rw-r--r--include/net/gro.h43
-rw-r--r--include/net/inet_frag.h2
-rw-r--r--include/net/inet_sock.h9
-rw-r--r--include/net/ip.h17
-rw-r--r--include/net/ipv6.h8
-rw-r--r--include/net/llc_conn.h2
-rw-r--r--include/net/llc_pdu.h6
-rw-r--r--include/net/mac80211.h1
-rw-r--r--include/net/netfilter/nf_tables.h127
-rw-r--r--include/net/nsh.h2
-rw-r--r--include/net/pie.h2
-rw-r--r--include/net/route.h4
-rw-r--r--include/net/rsi_91x.h2
-rw-r--r--include/net/rtnetlink.h4
-rw-r--r--include/net/sock.h11
-rw-r--r--include/net/tcp.h31
-rw-r--r--include/net/vxlan.h17
-rw-r--r--include/net/xfrm.h1
-rw-r--r--include/scsi/scsi_device.h1
-rw-r--r--include/soc/tegra/mc.h3
-rw-r--r--include/trace/events/btrfs.h30
-rw-r--r--include/trace/events/erofs.h16
-rw-r--r--include/trace/events/jbd2.h12
-rw-r--r--include/trace/events/tcp.h5
-rw-r--r--include/uapi/asm-generic/unistd.h5
-rw-r--r--include/uapi/linux/blkzoned.h10
-rw-r--r--include/uapi/linux/btrfs_tree.h6
-rw-r--r--include/uapi/linux/elf.h1
-rw-r--r--include/uapi/linux/fuse.h3
-rw-r--r--include/uapi/linux/if_packet.h6
-rw-r--r--include/uapi/linux/mount.h3
-rw-r--r--include/uapi/linux/pkt_cls.h4
-rw-r--r--include/uapi/linux/quota.h1
-rw-r--r--include/uapi/linux/seccomp.h4
-rw-r--r--include/uapi/xen/evtchn.h9
-rw-r--r--include/video/kyro.h12
-rw-r--r--include/xen/events.h11
-rw-r--r--init/do_mounts.c38
-rw-r--r--io_uring/io_uring.c78
-rw-r--r--io_uring/openclose.c6
-rw-r--r--io_uring/rw.c58
-rw-r--r--ipc/mqueue.c23
-rw-r--r--kernel/bpf/cpumap.c35
-rw-r--r--kernel/bpf/inode.c6
-rw-r--r--kernel/bpf/verifier.c32
-rw-r--r--kernel/irq/resend.c7
-rw-r--r--kernel/kprobes.c14
-rw-r--r--kernel/locking/rtmutex.c170
-rw-r--r--kernel/locking/rtmutex_api.c2
-rw-r--r--kernel/locking/rtmutex_common.h47
-rw-r--r--kernel/locking/ww_mutex.h12
-rw-r--r--kernel/power/hibernate.c2
-rw-r--r--kernel/sched/completion.c26
-rw-r--r--kernel/sched/core.c5
-rw-r--r--kernel/sched/fair.c4
-rw-r--r--kernel/sched/sched.h13
-rw-r--r--kernel/sched/swait.c8
-rw-r--r--kernel/sched/wait.c5
-rw-r--r--kernel/seccomp.c84
-rw-r--r--kernel/signal.c4
-rw-r--r--kernel/sys.c10
-rw-r--r--kernel/trace/bpf_trace.c17
-rw-r--r--kernel/trace/ring_buffer.c39
-rw-r--r--kernel/trace/trace.c73
-rw-r--r--kernel/trace/trace.h10
-rw-r--r--kernel/trace/trace_events.c14
-rw-r--r--kernel/trace/trace_events_hist.c3
-rw-r--r--kernel/trace/trace_events_synth.c104
-rw-r--r--kernel/trace/trace_events_trigger.c2
-rw-r--r--kernel/trace/trace_irqsoff.c3
-rw-r--r--kernel/trace/trace_probe.c8
-rw-r--r--kernel/trace/trace_sched_wakeup.c2
-rw-r--r--kernel/trace/trace_seq.c1
-rw-r--r--kernel/trace/tracing_map.h4
-rw-r--r--kernel/workqueue.c43
-rw-r--r--lib/Kconfig.debug2
-rw-r--r--lib/Makefile6
-rw-r--r--lib/clz_ctz.c32
-rw-r--r--lib/cpumask.c5
-rw-r--r--lib/genalloc.c2
-rw-r--r--lib/iov_iter.c43
-rw-r--r--lib/maple_tree.c10
-rw-r--r--lib/radix-tree.c1
-rw-r--r--lib/sbitmap.c15
-rw-r--r--lib/scatterlist.c2
-rw-r--r--lib/test_bitmap.c8
-rw-r--r--lib/test_maple_tree.c5
-rw-r--r--mm/Makefile2
-rw-r--r--mm/compaction.c8
-rw-r--r--mm/damon/core-test.h10
-rw-r--r--mm/damon/core.c1
-rw-r--r--mm/damon/vaddr.c2
-rw-r--r--mm/filemap.c65
-rw-r--r--mm/folio-compat.c2
-rw-r--r--mm/gup.c30
-rw-r--r--mm/hmm.c1
-rw-r--r--mm/huge_memory.c11
-rw-r--r--mm/hugetlb.c75
-rw-r--r--mm/internal.h17
-rw-r--r--mm/khugepaged.c13
-rw-r--r--mm/ksm.c27
-rw-r--r--mm/madvise.c9
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c43
-rw-r--r--mm/memory.c36
-rw-r--r--mm/mempolicy.c29
-rw-r--r--mm/migrate_device.c1
-rw-r--r--mm/mincore.c1
-rw-r--r--mm/mlock.c10
-rw-r--r--mm/mmap.c1
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/page-writeback.c49
-rw-r--r--mm/pagewalk.c41
-rw-r--r--mm/readahead.c13
-rw-r--r--mm/shmem.c866
-rw-r--r--mm/shmem_quota.c350
-rw-r--r--mm/swapfile.c8
-rw-r--r--mm/truncate.c4
-rw-r--r--mm/vmalloc.c4
-rw-r--r--mm/vmscan.c14
-rw-r--r--mm/zsmalloc.c14
-rw-r--r--net/9p/client.c46
-rw-r--r--net/9p/trans_virtio.c8
-rw-r--r--net/batman-adv/bat_v_elp.c3
-rw-r--r--net/batman-adv/bat_v_ogm.c7
-rw-r--r--net/batman-adv/hard-interface.c14
-rw-r--r--net/batman-adv/netlink.c3
-rw-r--r--net/batman-adv/soft-interface.c3
-rw-r--r--net/batman-adv/translation-table.c1
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bluetooth/hci_conn.c14
-rw-r--r--net/bluetooth/hci_core.c42
-rw-r--r--net/bluetooth/hci_event.c15
-rw-r--r--net/bluetooth/hci_sync.c117
-rw-r--r--net/bluetooth/iso.c53
-rw-r--r--net/bluetooth/mgmt.c28
-rw-r--r--net/bluetooth/sco.c23
-rw-r--r--net/can/bcm.c12
-rw-r--r--net/can/isotp.c22
-rw-r--r--net/can/raw.c79
-rw-r--r--net/ceph/messenger.c1
-rw-r--r--net/ceph/osd_client.c20
-rw-r--r--net/core/bpf_sk_storage.c5
-rw-r--r--net/core/filter.c6
-rw-r--r--net/core/rtnetlink.c33
-rw-r--r--net/core/skmsg.c10
-rw-r--r--net/core/sock.c73
-rw-r--r--net/core/sock_map.c12
-rw-r--r--net/dcb/dcbnl.c2
-rw-r--r--net/dccp/ipv4.c4
-rw-r--r--net/dccp/ipv6.c4
-rw-r--r--net/dccp/output.c2
-rw-r--r--net/dccp/proto.c30
-rw-r--r--net/devlink/leftover.c3
-rw-r--r--net/dsa/port.c9
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/esp4.c2
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/inet_hashtables.c17
-rw-r--r--net/ipv4/inet_timewait_sock.c8
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_output.c17
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/nexthop.c28
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv4/tcp.c57
-rw-r--r--net/ipv4/tcp_fastopen.c6
-rw-r--r--net/ipv4/tcp_ipv4.c18
-rw-r--r--net/ipv4/tcp_metrics.c70
-rw-r--r--net/ipv4/tcp_minisocks.c11
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c4
-rw-r--r--net/ipv4/udp.c8
-rw-r--r--net/ipv4/udp_offload.c23
-rw-r--r--net/ipv6/Kconfig2
-rw-r--r--net/ipv6/addrconf.c14
-rw-r--r--net/ipv6/ip6_gre.c3
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/ip6mr.c2
-rw-r--r--net/ipv6/ndisc.c3
-rw-r--r--net/ipv6/ping.c2
-rw-r--r--net/ipv6/raw.c6
-rw-r--r--net/ipv6/route.c7
-rw-r--r--net/ipv6/tcp_ipv6.c13
-rw-r--r--net/ipv6/udp.c12
-rw-r--r--net/ipv6/udp_offload.c10
-rw-r--r--net/key/af_key.c4
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/llc/llc_conn.c49
-rw-r--r--net/llc/llc_if.c2
-rw-r--r--net/llc/llc_input.c3
-rw-r--r--net/llc/llc_sap.c18
-rw-r--r--net/mac80211/rx.c12
-rw-r--r--net/mptcp/protocol.c7
-rw-r--r--net/mptcp/protocol.h1
-rw-r--r--net/mptcp/sockopt.c2
-rw-r--r--net/mptcp/subflow.c58
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c6
-rw-r--r--net/netfilter/nf_tables_api.c377
-rw-r--r--net/netfilter/nft_dynset.c3
-rw-r--r--net/netfilter/nft_immediate.c27
-rw-r--r--net/netfilter/nft_set_hash.c88
-rw-r--r--net/netfilter/nft_set_pipapo.c101
-rw-r--r--net/netfilter/nft_set_rbtree.c167
-rw-r--r--net/netfilter/nft_socket.c2
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/openvswitch/datapath.c8
-rw-r--r--net/packet/af_packet.c30
-rw-r--r--net/sched/cls_bpf.c99
-rw-r--r--net/sched/cls_flower.c104
-rw-r--r--net/sched/cls_fw.c1
-rw-r--r--net/sched/cls_matchall.c35
-rw-r--r--net/sched/cls_route.c1
-rw-r--r--net/sched/cls_u32.c105
-rw-r--r--net/sched/em_meta.c4
-rw-r--r--net/sched/sch_api.c53
-rw-r--r--net/sched/sch_mqprio.c14
-rw-r--r--net/sched/sch_taprio.c15
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/smc/af_smc.c79
-rw-r--r--net/smc/smc.h2
-rw-r--r--net/smc/smc_clc.c4
-rw-r--r--net/smc/smc_core.c25
-rw-r--r--net/smc/smc_sysctl.c10
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/svcsock.c3
-rw-r--r--net/sunrpc/xprtrdma/verbs.c9
-rw-r--r--net/tipc/crypto.c3
-rw-r--r--net/tipc/node.c2
-rw-r--r--net/tls/tls_device.c64
-rw-r--r--net/tls/tls_main.c3
-rw-r--r--net/unix/af_unix.c25
-rw-r--r--net/wireless/nl80211.c5
-rw-r--r--net/wireless/scan.c2
-rw-r--r--net/xdp/xsk.c3
-rw-r--r--net/xfrm/xfrm_compat.c2
-rw-r--r--net/xfrm/xfrm_input.c22
-rw-r--r--net/xfrm/xfrm_interface_core.c4
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state.c8
-rw-r--r--net/xfrm/xfrm_user.c15
-rw-r--r--rust/Makefile2
-rw-r--r--rust/bindings/bindings_helper.h1
-rw-r--r--rust/kernel/allocator.rs74
-rw-r--r--rust/kernel/sync/arc.rs3
-rw-r--r--rust/kernel/types.rs22
-rw-r--r--rust/macros/vtable.rs1
-rw-r--r--samples/ftrace/ftrace-direct-modify.c4
-rw-r--r--samples/ftrace/ftrace-direct-multi-modify.c4
-rw-r--r--samples/ftrace/ftrace-direct-multi.c2
-rw-r--r--samples/ftrace/ftrace-direct-too.c2
-rw-r--r--samples/ftrace/ftrace-direct.c2
-rw-r--r--scripts/Makefile.build5
-rw-r--r--scripts/Makefile.host6
-rwxr-xr-xscripts/clang-tools/gen_compile_commands.py2
-rw-r--r--scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci68
-rw-r--r--scripts/kallsyms.c1
-rw-r--r--scripts/kconfig/gconf.c11
-rw-r--r--scripts/spelling.txt1
-rw-r--r--security/apparmor/apparmorfs.c11
-rw-r--r--security/apparmor/policy_unpack.c11
-rw-r--r--security/inode.c2
-rw-r--r--security/keys/keyctl.c11
-rw-r--r--security/keys/request_key.c35
-rw-r--r--security/keys/sysctl.c2
-rw-r--r--security/keys/trusted-keys/trusted_tpm2.c2
-rw-r--r--security/security.c14
-rw-r--r--security/selinux/hooks.c22
-rw-r--r--security/selinux/selinuxfs.c2
-rw-r--r--security/selinux/ss/policydb.c2
-rw-r--r--security/smack/smack_lsm.c51
-rw-r--r--sound/core/seq/seq_ports.c1
-rw-r--r--sound/core/seq/seq_ump_client.c3
-rw-r--r--sound/drivers/pcmtest.c12
-rw-r--r--sound/pci/hda/patch_cs8409-tables.c4
-rw-r--r--sound/pci/hda/patch_realtek.c74
-rw-r--r--sound/pci/ymfpci/ymfpci.c10
-rw-r--r--sound/soc/amd/acp/amd.h7
-rw-r--r--sound/soc/amd/ps/acp63.h22
-rw-r--r--sound/soc/amd/ps/pci-ps.c4
-rw-r--r--sound/soc/amd/ps/ps-sdw-dma.c16
-rw-r--r--sound/soc/amd/yc/acp6x-mach.c9
-rw-r--r--sound/soc/atmel/atmel-i2s.c5
-rw-r--r--sound/soc/codecs/Kconfig2
-rw-r--r--sound/soc/codecs/cs35l41.c2
-rw-r--r--sound/soc/codecs/cs35l56-i2c.c9
-rw-r--r--sound/soc/codecs/cs35l56-spi.c9
-rw-r--r--sound/soc/codecs/cs35l56.c31
-rw-r--r--sound/soc/codecs/cs42l51-i2c.c6
-rw-r--r--sound/soc/codecs/cs42l51.c7
-rw-r--r--sound/soc/codecs/cs42l51.h1
-rw-r--r--sound/soc/codecs/da7219-aad.c12
-rw-r--r--sound/soc/codecs/es8316.c2
-rw-r--r--sound/soc/codecs/max98363.c9
-rw-r--r--sound/soc/codecs/nau8821.c41
-rw-r--r--sound/soc/codecs/rt1308-sdw.c13
-rw-r--r--sound/soc/codecs/rt5640.c13
-rw-r--r--sound/soc/codecs/rt5645.c6
-rw-r--r--sound/soc/codecs/rt5665.c2
-rw-r--r--sound/soc/codecs/rt5682-sdw.c9
-rw-r--r--sound/soc/codecs/rt711-sdca-sdw.c10
-rw-r--r--sound/soc/codecs/rt711-sdw.c9
-rw-r--r--sound/soc/codecs/rt712-sdca-sdw.c10
-rw-r--r--sound/soc/codecs/rt722-sdca-sdw.c10
-rw-r--r--sound/soc/codecs/tas2781-comlib.c19
-rw-r--r--sound/soc/codecs/wcd-mbhc-v2.c57
-rw-r--r--sound/soc/codecs/wcd934x.c20
-rw-r--r--sound/soc/codecs/wcd938x.c99
-rw-r--r--sound/soc/codecs/wm8904.c3
-rw-r--r--sound/soc/fsl/fsl_micfil.c4
-rw-r--r--sound/soc/fsl/fsl_micfil.h2
-rw-r--r--sound/soc/fsl/fsl_sai.c8
-rw-r--r--sound/soc/fsl/fsl_sai.h1
-rw-r--r--sound/soc/fsl/fsl_spdif.c2
-rw-r--r--sound/soc/intel/boards/sof_sdw.c2
-rw-r--r--sound/soc/intel/boards/sof_sdw_cs42l42.c6
-rw-r--r--sound/soc/meson/axg-tdm-formatter.c42
-rw-r--r--sound/soc/qcom/qdsp6/q6afe-dai.c2
-rw-r--r--sound/soc/qcom/qdsp6/q6apm-dai.c1
-rw-r--r--sound/soc/qcom/qdsp6/q6apm.c7
-rw-r--r--sound/soc/qcom/qdsp6/topology.c4
-rw-r--r--sound/soc/soc-core.c6
-rw-r--r--sound/soc/soc-pcm.c8
-rw-r--r--sound/soc/soc-topology.c10
-rw-r--r--sound/soc/sof/amd/acp.c10
-rw-r--r--sound/soc/sof/intel/hda-dai-ops.c11
-rw-r--r--sound/soc/sof/intel/hda-dai.c5
-rw-r--r--sound/soc/sof/intel/hda.h2
-rw-r--r--sound/soc/sof/ipc3-dtrace.c9
-rw-r--r--sound/soc/sof/ipc3.c2
-rw-r--r--sound/soc/sof/ipc4-pcm.c3
-rw-r--r--sound/soc/sof/ipc4-topology.c6
-rw-r--r--sound/soc/tegra/tegra210_adx.c34
-rw-r--r--sound/soc/tegra/tegra210_amx.c40
-rw-r--r--sound/usb/mixer_maps.c14
-rw-r--r--sound/usb/quirks-table.h29
-rw-r--r--sound/usb/quirks.c37
-rw-r--r--tools/arch/arm64/include/asm/cputype.h8
-rw-r--r--tools/arch/arm64/include/uapi/asm/bitsperlong.h24
-rw-r--r--tools/arch/riscv/include/uapi/asm/bitsperlong.h14
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h2
-rw-r--r--tools/arch/x86/include/asm/msr-index.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/unistd_32.h3
-rw-r--r--tools/arch/x86/include/uapi/asm/unistd_64.h3
-rw-r--r--tools/build/feature/Makefile2
-rw-r--r--tools/counter/Makefile3
-rwxr-xr-xtools/hv/vmbus_testing4
-rw-r--r--tools/include/uapi/asm-generic/unistd.h5
-rw-r--r--tools/include/uapi/drm/i915_drm.h95
-rw-r--r--tools/include/uapi/linux/fcntl.h5
-rw-r--r--tools/include/uapi/linux/kvm.h6
-rw-r--r--tools/include/uapi/linux/mman.h14
-rw-r--r--tools/include/uapi/linux/mount.h3
-rw-r--r--tools/include/uapi/linux/prctl.h11
-rw-r--r--tools/include/uapi/linux/vhost.h31
-rw-r--r--tools/include/uapi/sound/asound.h81
-rw-r--r--tools/lib/subcmd/help.c18
-rw-r--r--tools/net/ynl/lib/ynl.py16
-rw-r--r--tools/objtool/arch/x86/decode.c6
-rw-r--r--tools/objtool/check.c45
-rw-r--r--tools/objtool/include/objtool/arch.h1
-rw-r--r--tools/objtool/include/objtool/elf.h1
-rw-r--r--tools/perf/Makefile.config4
-rw-r--r--tools/perf/arch/arm64/util/pmu.c7
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl1
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/powerpc/util/skip-callchain-idx.c4
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl1
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--tools/perf/bench/Build1
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/sched-seccomp-notify.c178
-rw-r--r--tools/perf/builtin-bench.c1
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/recommended.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/recommended.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen3/recommended.json3
-rw-r--r--tools/perf/tests/parse-events.c12
-rwxr-xr-xtools/perf/tests/shell/test_uprobe_from_different_cu.sh83
-rw-r--r--tools/perf/tests/task-exit.c4
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h5
-rwxr-xr-xtools/perf/trace/beauty/move_mount_flags.sh2
-rw-r--r--tools/perf/trace/beauty/msg_flags.c8
-rw-r--r--tools/perf/util/dwarf-aux.c4
-rw-r--r--tools/perf/util/machine.c5
-rw-r--r--tools/perf/util/parse-events.c66
-rw-r--r--tools/perf/util/pmu.c11
-rw-r--r--tools/perf/util/pmu.h1
-rw-r--r--tools/perf/util/pmus.c16
-rw-r--r--tools/perf/util/stat-display.c5
-rw-r--r--tools/perf/util/thread-stack.c4
-rw-r--r--tools/testing/cxl/test/cxl.c4
-rw-r--r--tools/testing/radix-tree/maple.c6
-rw-r--r--tools/testing/radix-tree/regression1.c2
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/alsa/.gitignore1
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c4
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c74
-rw-r--r--tools/testing/selftests/bpf/progs/async_stack_depth.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c14
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c80
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c8
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile4
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan.sh99
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh158
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh118
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh16
-rw-r--r--tools/testing/selftests/fchmodat2/.gitignore2
-rw-r--r--tools/testing/selftests/fchmodat2/Makefile6
-rw-r--r--tools/testing/selftests/fchmodat2/fchmodat2_test.c142
-rw-r--r--tools/testing/selftests/filelock/Makefile5
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c132
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc2
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h6
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c68
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c70
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c4
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/charge_reserved_hugetlb.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/check_config.sh0
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c7
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/hugetlb_reparenting_test.sh0
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c1
-rw-r--r--tools/testing/selftests/mm/mkdirty.c2
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/run_vmtests.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/test_hmm.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/test_vmalloc.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/va_high_addr_switch.sh0
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/write_hugetlb_memory.sh0
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh59
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_max.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_mm.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib.sh17
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh3
-rw-r--r--tools/testing/selftests/net/forwarding/settings1
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh13
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh9
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh10
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh35
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c1
-rw-r--r--tools/testing/selftests/riscv/Makefile2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c2
-rw-r--r--tools/testing/selftests/rseq/Makefile4
-rw-r--r--tools/testing/selftests/rseq/rseq.c30
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c67
-rw-r--r--tools/testing/selftests/tc-testing/config2
-rw-r--r--tools/testing/selftests/tc-testing/settings1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json25
-rw-r--r--tools/testing/selftests/timers/raw_skew.c3
-rw-r--r--tools/testing/vsock/Makefile2
-rw-r--r--virt/kvm/kvm_main.c24
2037 files changed, 25376 insertions, 13766 deletions
diff --git a/.mailmap b/.mailmap
index 89b7f33cd330..e50662536c48 100644
--- a/.mailmap
+++ b/.mailmap
@@ -13,7 +13,9 @@
Aaron Durbin <adurbin@google.com>
Abel Vesa <abelvesa@kernel.org> <abel.vesa@nxp.com>
Abel Vesa <abelvesa@kernel.org> <abelvesa@gmail.com>
+Abhijeet Dharmapurikar <quic_adharmap@quicinc.com> <adharmap@codeaurora.org>
Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org>
+Ahmad Masri <quic_amasri@quicinc.com> <amasri@codeaurora.org>
Adam Oldham <oldhamca@gmail.com>
Adam Radford <aradford@gmail.com>
Adriana Reus <adi.reus@gmail.com> <adriana.reus@intel.com>
@@ -30,6 +32,7 @@ Alexander Mikhalitsyn <alexander@mihalicyn.com> <alexander.mikhalitsyn@virtuozzo
Alexander Mikhalitsyn <alexander@mihalicyn.com> <aleksandr.mikhalitsyn@canonical.com>
Alexandre Belloni <alexandre.belloni@bootlin.com> <alexandre.belloni@free-electrons.com>
Alexandre Ghiti <alex@ghiti.fr> <alexandre.ghiti@canonical.com>
+Alexei Avshalom Lazar <quic_ailizaro@quicinc.com> <ailizaro@codeaurora.org>
Alexei Starovoitov <ast@kernel.org> <alexei.starovoitov@gmail.com>
Alexei Starovoitov <ast@kernel.org> <ast@fb.com>
Alexei Starovoitov <ast@kernel.org> <ast@plumgrid.com>
@@ -37,8 +40,11 @@ Alex Hung <alexhung@gmail.com> <alex.hung@canonical.com>
Alex Shi <alexs@kernel.org> <alex.shi@intel.com>
Alex Shi <alexs@kernel.org> <alex.shi@linaro.org>
Alex Shi <alexs@kernel.org> <alex.shi@linux.alibaba.com>
+Aloka Dixit <quic_alokad@quicinc.com> <alokad@codeaurora.org>
Al Viro <viro@ftp.linux.org.uk>
Al Viro <viro@zenIV.linux.org.uk>
+Amit Blay <quic_ablay@quicinc.com> <ablay@codeaurora.org>
+Amit Nischal <quic_anischal@quicinc.com> <anischal@codeaurora.org>
Andi Kleen <ak@linux.intel.com> <ak@suse.de>
Andi Shyti <andi@etezian.org> <andi.shyti@samsung.com>
Andreas Herrmann <aherrman@de.ibm.com>
@@ -54,6 +60,8 @@ Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
Andrzej Hajda <andrzej.hajda@intel.com> <a.hajda@samsung.com>
André Almeida <andrealmeid@igalia.com> <andrealmeid@collabora.com>
Andy Adamson <andros@citi.umich.edu>
+Anilkumar Kolli <quic_akolli@quicinc.com> <akolli@codeaurora.org>
+Anirudh Ghayal <quic_aghayal@quicinc.com> <aghayal@codeaurora.org>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@bootlin.com>
Antoine Tenart <atenart@kernel.org> <antoine.tenart@free-electrons.com>
Antonio Ospite <ao2@ao2.it> <ao2@amarulasolutions.com>
@@ -62,9 +70,17 @@ Archit Taneja <archit@ti.com>
Ard Biesheuvel <ardb@kernel.org> <ard.biesheuvel@linaro.org>
Arnaud Patard <arnaud.patard@rtp-net.org>
Arnd Bergmann <arnd@arndb.de>
+Arun Kumar Neelakantam <quic_aneela@quicinc.com> <aneela@codeaurora.org>
+Ashok Raj Nagarajan <quic_arnagara@quicinc.com> <arnagara@codeaurora.org>
+Ashwin Chaugule <quic_ashwinc@quicinc.com> <ashwinc@codeaurora.org>
+Asutosh Das <quic_asutoshd@quicinc.com> <asutoshd@codeaurora.org>
Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
+Avaneesh Kumar Dwivedi <quic_akdwived@quicinc.com> <akdwived@codeaurora.org>
Axel Dyks <xl@xlsigned.net>
Axel Lin <axel.lin@gmail.com>
+Balakrishna Godavarthi <quic_bgodavar@quicinc.com> <bgodavar@codeaurora.org>
+Banajit Goswami <quic_bgoswami@quicinc.com> <bgoswami@codeaurora.org>
+Baochen Qiang <quic_bqiang@quicinc.com> <bqiang@codeaurora.org>
Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@linaro.org>
Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@spreadtrum.com>
Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
@@ -93,12 +109,15 @@ Brian Avery <b.avery@hp.com>
Brian King <brking@us.ibm.com>
Brian Silverman <bsilver16384@gmail.com> <brian.silverman@bluerivertech.com>
Cai Huoqing <cai.huoqing@linux.dev> <caihuoqing@baidu.com>
+Can Guo <quic_cang@quicinc.com> <cang@codeaurora.org>
+Carl Huang <quic_cjhuang@quicinc.com> <cjhuang@codeaurora.org>
Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
Chao Yu <chao@kernel.org> <yuchao0@huawei.com>
Chris Chiu <chris.chiu@canonical.com> <chiu@endlessm.com>
Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
+Chris Lew <quic_clew@quicinc.com> <clew@codeaurora.org>
Christian Borntraeger <borntraeger@linux.ibm.com> <borntraeger@de.ibm.com>
Christian Borntraeger <borntraeger@linux.ibm.com> <cborntra@de.ibm.com>
Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
@@ -119,7 +138,10 @@ Daniel Borkmann <daniel@iogearbox.net> <dborkmann@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dborkman@redhat.com>
Daniel Borkmann <daniel@iogearbox.net> <dxchgb@gmail.com>
David Brownell <david-b@pacbell.net>
+David Collins <quic_collinsd@quicinc.com> <collinsd@codeaurora.org>
David Woodhouse <dwmw2@shinybook.infradead.org>
+Dedy Lansky <quic_dlansky@quicinc.com> <dlansky@codeaurora.org>
+Deepak Kumar Singh <quic_deesin@quicinc.com> <deesin@codeaurora.org>
Dengcheng Zhu <dzhu@wavecomp.com> <dczhu@mips.com>
Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@gmail.com>
Dengcheng Zhu <dzhu@wavecomp.com> <dengcheng.zhu@imgtec.com>
@@ -136,6 +158,7 @@ Dmitry Safonov <0x7f454c46@gmail.com> <dsafonov@virtuozzo.com>
Domen Puncer <domen@coderock.org>
Douglas Gilbert <dougg@torque.net>
Ed L. Cashin <ecashin@coraid.com>
+Elliot Berman <quic_eberman@quicinc.com> <eberman@codeaurora.org>
Enric Balletbo i Serra <eballetbo@kernel.org> <enric.balletbo@collabora.com>
Enric Balletbo i Serra <eballetbo@kernel.org> <eballetbo@iseebcn.com>
Erik Kaneda <erik.kaneda@intel.com> <erik.schmauss@intel.com>
@@ -148,6 +171,7 @@ Faith Ekstrand <faith.ekstrand@collabora.com> <jason.ekstrand@collabora.com>
Felipe W Damasio <felipewd@terra.com.br>
Felix Kuhling <fxkuehl@gmx.de>
Felix Moeller <felix@derklecks.de>
+Fenglin Wu <quic_fenglinw@quicinc.com> <fenglinw@codeaurora.org>
Filipe Lautert <filipe@icewall.org>
Finn Thain <fthain@linux-m68k.org> <fthain@telegraphics.com.au>
Franck Bui-Huu <vagabon.xyz@gmail.com>
@@ -171,8 +195,11 @@ Greg Kurz <groug@kaod.org> <gkurz@linux.vnet.ibm.com>
Gregory CLEMENT <gregory.clement@bootlin.com> <gregory.clement@free-electrons.com>
Guilherme G. Piccoli <kernel@gpiccoli.net> <gpiccoli@linux.vnet.ibm.com>
Guilherme G. Piccoli <kernel@gpiccoli.net> <gpiccoli@canonical.com>
+Gokul Sriram Palanisamy <quic_gokulsri@quicinc.com> <gokulsri@codeaurora.org>
+Govindaraj Saminathan <quic_gsamin@quicinc.com> <gsamin@codeaurora.org>
Guo Ren <guoren@kernel.org> <guoren@linux.alibaba.com>
Guo Ren <guoren@kernel.org> <ren_guo@c-sky.com>
+Guru Das Srinagesh <quic_gurus@quicinc.com> <gurus@codeaurora.org>
Gustavo Padovan <gustavo@las.ic.unicamp.br>
Gustavo Padovan <padovan@profusion.mobi>
Hanjun Guo <guohanjun@huawei.com> <hanjun.guo@linaro.org>
@@ -190,6 +217,7 @@ Huacai Chen <chenhuacai@kernel.org> <chenhuacai@loongson.cn>
J. Bruce Fields <bfields@fieldses.org> <bfields@redhat.com>
J. Bruce Fields <bfields@fieldses.org> <bfields@citi.umich.edu>
Jacob Shin <Jacob.Shin@amd.com>
+Jack Pham <quic_jackp@quicinc.com> <jackp@codeaurora.org>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@google.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk.kim@samsung.com>
Jaegeuk Kim <jaegeuk@kernel.org> <jaegeuk@motorola.com>
@@ -217,10 +245,12 @@ Jayachandran C <c.jayachandran@gmail.com> <jchandra@digeo.com>
Jayachandran C <c.jayachandran@gmail.com> <jnair@caviumnetworks.com>
<jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
Jean Tourrilhes <jt@hpl.hp.com>
+Jeevan Shriram <quic_jshriram@quicinc.com> <jshriram@codeaurora.org>
Jeff Garzik <jgarzik@pretzel.yyz.us>
Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
+Jeffrey Hugo <quic_jhugo@quicinc.com> <jhugo@codeaurora.org>
Jens Axboe <axboe@kernel.dk> <axboe@suse.de>
Jens Axboe <axboe@kernel.dk> <jens.axboe@oracle.com>
Jens Axboe <axboe@kernel.dk> <axboe@fb.com>
@@ -228,6 +258,7 @@ Jens Axboe <axboe@kernel.dk> <axboe@meta.com>
Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
Jessica Zhang <quic_jesszhan@quicinc.com> <jesszhan@codeaurora.org>
+Jilai Wang <quic_jilaiw@quicinc.com> <jilaiw@codeaurora.org>
Jiri Pirko <jiri@resnulli.us> <jiri@nvidia.com>
Jiri Pirko <jiri@resnulli.us> <jiri@mellanox.com>
Jiri Pirko <jiri@resnulli.us> <jpirko@redhat.com>
@@ -238,9 +269,11 @@ Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.cz>
Jiri Slaby <jirislaby@kernel.org> <xslaby@fi.muni.cz>
Jisheng Zhang <jszhang@kernel.org> <jszhang@marvell.com>
Jisheng Zhang <jszhang@kernel.org> <Jisheng.Zhang@synaptics.com>
+Jishnu Prakash <quic_jprakash@quicinc.com> <jprakash@codeaurora.org>
Johan Hovold <johan@kernel.org> <jhovold@gmail.com>
Johan Hovold <johan@kernel.org> <johan@hovoldconsulting.com>
John Crispin <john@phrozen.org> <blogic@openwrt.org>
+John Fastabend <john.fastabend@gmail.com> <john.r.fastabend@intel.com>
John Keeping <john@keeping.me.uk> <john@metanate.com>
John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
John Stultz <johnstul@us.ibm.com>
@@ -255,6 +288,7 @@ Jordan Crouse <jordan@cosmicpenguin.net> <jcrouse@codeaurora.org>
<josh@joshtriplett.org> <josht@vnet.ibm.com>
Josh Poimboeuf <jpoimboe@kernel.org> <jpoimboe@redhat.com>
Josh Poimboeuf <jpoimboe@kernel.org> <jpoimboe@us.ibm.com>
+Jouni Malinen <quic_jouni@quicinc.com> <jouni@codeaurora.org>
Juha Yrjola <at solidboot.com>
Juha Yrjola <juha.yrjola@nokia.com>
Juha Yrjola <juha.yrjola@solidboot.com>
@@ -262,6 +296,8 @@ Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
Iskren Chernev <me@iskren.info> <iskren.chernev@gmail.com>
Kalle Valo <kvalo@kernel.org> <kvalo@codeaurora.org>
Kalyan Thota <quic_kalyant@quicinc.com> <kalyan_t@codeaurora.org>
+Karthikeyan Periyasamy <quic_periyasa@quicinc.com> <periyasa@codeaurora.org>
+Kathiravan T <quic_kathirav@quicinc.com> <kathirav@codeaurora.org>
Kay Sievers <kay.sievers@vrfy.org>
Kees Cook <keescook@chromium.org> <kees.cook@canonical.com>
Kees Cook <keescook@chromium.org> <keescook@google.com>
@@ -270,6 +306,8 @@ Kees Cook <keescook@chromium.org> <kees@ubuntu.com>
Keith Busch <kbusch@kernel.org> <keith.busch@intel.com>
Keith Busch <kbusch@kernel.org> <keith.busch@linux.intel.com>
Kenneth W Chen <kenneth.w.chen@intel.com>
+Kenneth Westfield <quic_kwestfie@quicinc.com> <kwestfie@codeaurora.org>
+Kiran Gunda <quic_kgunda@quicinc.com> <kgunda@codeaurora.org>
Kirill Tkhai <tkhai@ya.ru> <ktkhai@virtuozzo.com>
Konstantin Khlebnikov <koct9i@gmail.com> <khlebnikov@yandex-team.ru>
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
@@ -278,6 +316,7 @@ Krishna Manikandan <quic_mkrishn@quicinc.com> <mkrishn@codeaurora.org>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski.k@gmail.com>
Krzysztof Kozlowski <krzk@kernel.org> <k.kozlowski@samsung.com>
Krzysztof Kozlowski <krzk@kernel.org> <krzysztof.kozlowski@canonical.com>
+Kshitiz Godara <quic_kgodara@quicinc.com> <kgodara@codeaurora.org>
Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Kuogee Hsieh <quic_khsieh@quicinc.com> <khsieh@codeaurora.org>
Lee Jones <lee@kernel.org> <joneslee@google.com>
@@ -291,19 +330,27 @@ Leonid I Ananiev <leonid.i.ananiev@intel.com>
Leon Romanovsky <leon@kernel.org> <leon@leon.nu>
Leon Romanovsky <leon@kernel.org> <leonro@mellanox.com>
Leon Romanovsky <leon@kernel.org> <leonro@nvidia.com>
+Liam Mark <quic_lmark@quicinc.com> <lmark@codeaurora.org>
Linas Vepstas <linas@austin.ibm.com>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@ascom.ch>
Linus Lüssing <linus.luessing@c0d3.blue> <linus.luessing@web.de>
<linux-hardening@vger.kernel.org> <kernel-hardening@lists.openwall.com>
Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
+Lior David <quic_liord@quicinc.com> <liord@codeaurora.org>
Lorenzo Pieralisi <lpieralisi@kernel.org> <lorenzo.pieralisi@arm.com>
Luca Ceresoli <luca.ceresoli@bootlin.com> <luca@lucaceresoli.net>
Lukasz Luba <lukasz.luba@arm.com> <l.luba@partner.samsung.com>
+Luo Jie <quic_luoj@quicinc.com> <luoj@codeaurora.org>
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
Maciej W. Rozycki <macro@orcam.me.uk> <macro@linux-mips.org>
+Maharaja Kennadyrajan <quic_mkenna@quicinc.com> <mkenna@codeaurora.org>
+Maheshwar Ajja <quic_majja@quicinc.com> <majja@codeaurora.org>
+Malathi Gottam <quic_mgottam@quicinc.com> <mgottam@codeaurora.org>
+Manikanta Pubbisetty <quic_mpubbise@quicinc.com> <mpubbise@codeaurora.org>
Manivannan Sadhasivam <mani@kernel.org> <manivannanece23@gmail.com>
Manivannan Sadhasivam <mani@kernel.org> <manivannan.sadhasivam@linaro.org>
+Manoj Basapathi <quic_manojbm@quicinc.com> <manojbm@codeaurora.org>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
Marek Behún <kabel@kernel.org> <marek.behun@nic.cz>
@@ -333,6 +380,7 @@ Matt Ranostay <matt.ranostay@konsulko.com> <matt@ranostay.consulting>
Matt Ranostay <mranostay@gmail.com> Matthew Ranostay <mranostay@embeddedalley.com>
Matt Ranostay <mranostay@gmail.com> <matt.ranostay@intel.com>
Matt Redfearn <matt.redfearn@mips.com> <matt.redfearn@imgtec.com>
+Maulik Shah <quic_mkshah@quicinc.com> <mkshah@codeaurora.org>
Mauro Carvalho Chehab <mchehab@kernel.org> <maurochehab@gmail.com>
Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@brturbo.com.br>
Mauro Carvalho Chehab <mchehab@kernel.org> <mchehab@infradead.org>
@@ -345,7 +393,10 @@ Maxim Mikityanskiy <maxtram95@gmail.com> <maximmi@nvidia.com>
Maxime Ripard <mripard@kernel.org> <maxime@cerno.tech>
Maxime Ripard <mripard@kernel.org> <maxime.ripard@bootlin.com>
Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
+Maya Erez <quic_merez@quicinc.com> <merez@codeaurora.org>
Mayuresh Janorkar <mayur@ti.com>
+Md Sadre Alam <quic_mdalam@quicinc.com> <mdalam@codeaurora.org>
+Miaoqing Pan <quic_miaoqing@quicinc.com> <miaoqing@codeaurora.org>
Michael Buesch <m@bues.ch>
Michal Simek <michal.simek@amd.com> <michal.simek@xilinx.com>
Michel Dänzer <michel@tungstengraphics.com>
@@ -356,6 +407,7 @@ Miguel Ojeda <ojeda@kernel.org> <miguel.ojeda.sandonis@gmail.com>
Mike Rapoport <rppt@kernel.org> <mike@compulab.co.il>
Mike Rapoport <rppt@kernel.org> <mike.rapoport@gmail.com>
Mike Rapoport <rppt@kernel.org> <rppt@linux.ibm.com>
+Mike Tipton <quic_mdtipton@quicinc.com> <mdtipton@codeaurora.org>
Miodrag Dinic <miodrag.dinic@mips.com> <miodrag.dinic@imgtec.com>
Miquel Raynal <miquel.raynal@bootlin.com> <miquel.raynal@free-electrons.com>
Mitesh shah <mshah@teja.com>
@@ -364,9 +416,13 @@ Morten Welinder <terra@gnome.org>
Morten Welinder <welinder@anemone.rentec.com>
Morten Welinder <welinder@darter.rentec.com>
Morten Welinder <welinder@troll.com>
+Mukesh Ojha <quic_mojha@quicinc.com> <mojha@codeaurora.org>
+Muna Sinada <quic_msinada@quicinc.com> <msinada@codeaurora.org>
+Murali Nalajala <quic_mnalajal@quicinc.com> <mnalajal@codeaurora.org>
Mythri P K <mythripk@ti.com>
Nadia Yvette Chambers <nyc@holomorphy.com> William Lee Irwin III <wli@holomorphy.com>
Nathan Chancellor <nathan@kernel.org> <natechancellor@gmail.com>
+Neeraj Upadhyay <quic_neeraju@quicinc.com> <neeraju@codeaurora.org>
Neil Armstrong <neil.armstrong@linaro.org> <narmstrong@baylibre.com>
Nguyen Anh Quynh <aquynh@gmail.com>
Nicholas Piggin <npiggin@gmail.com> <npiggen@suse.de>
@@ -385,6 +441,7 @@ Nikolay Aleksandrov <razor@blackwall.org> <nikolay@redhat.com>
Nikolay Aleksandrov <razor@blackwall.org> <nikolay@cumulusnetworks.com>
Nikolay Aleksandrov <razor@blackwall.org> <nikolay@nvidia.com>
Nikolay Aleksandrov <razor@blackwall.org> <nikolay@isovalent.com>
+Odelu Kukatla <quic_okukatla@quicinc.com> <okukatla@codeaurora.org>
Oleksandr Natalenko <oleksandr@natalenko.name> <oleksandr@redhat.com>
Oleksij Rempel <linux@rempel-privat.de> <bug-track@fisher-privat.net>
Oleksij Rempel <linux@rempel-privat.de> <external.Oleksij.Rempel@de.bosch.com>
@@ -392,6 +449,7 @@ Oleksij Rempel <linux@rempel-privat.de> <fixed-term.Oleksij.Rempel@de.bosch.com>
Oleksij Rempel <linux@rempel-privat.de> <o.rempel@pengutronix.de>
Oleksij Rempel <linux@rempel-privat.de> <ore@pengutronix.de>
Oliver Upton <oliver.upton@linux.dev> <oupton@google.com>
+Oza Pawandeep <quic_poza@quicinc.com> <poza@codeaurora.org>
Pali Rohár <pali@kernel.org> <pali.rohar@gmail.com>
Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Patrick Mochel <mochel@digitalimplant.org>
@@ -403,11 +461,14 @@ Paul E. McKenney <paulmck@kernel.org> <paulmck@linux.vnet.ibm.com>
Paul E. McKenney <paulmck@kernel.org> <paulmck@us.ibm.com>
Paul Mackerras <paulus@ozlabs.org> <paulus@samba.org>
Paul Mackerras <paulus@ozlabs.org> <paulus@au1.ibm.com>
+Pavankumar Kondeti <quic_pkondeti@quicinc.com> <pkondeti@codeaurora.org>
Peter A Jonsson <pj@ludd.ltu.se>
Peter Oruba <peter.oruba@amd.com>
Peter Oruba <peter@oruba.de>
Pratyush Anand <pratyush.anand@gmail.com> <pratyush.anand@st.com>
Praveen BP <praveenbp@ti.com>
+Pradeep Kumar Chitrapu <quic_pradeepc@quicinc.com> <pradeepc@codeaurora.org>
+Prasad Sodagudi <quic_psodagud@quicinc.com> <psodagud@codeaurora.org>
Punit Agrawal <punitagrawal@gmail.com> <punit.agrawal@arm.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@imgtec.com>
Qais Yousef <qyousef@layalina.io> <qais.yousef@arm.com>
@@ -416,10 +477,16 @@ Quentin Perret <qperret@qperret.net> <quentin.perret@arm.com>
Rafael J. Wysocki <rjw@rjwysocki.net> <rjw@sisk.pl>
Rajeev Nandan <quic_rajeevny@quicinc.com> <rajeevny@codeaurora.org>
Rajendra Nayak <quic_rjendra@quicinc.com> <rnayak@codeaurora.org>
+Rajeshwari Ravindra Kamble <quic_rkambl@quicinc.com> <rkambl@codeaurora.org>
+Raju P.L.S.S.S.N <quic_rplsssn@quicinc.com> <rplsssn@codeaurora.org>
Rajesh Shah <rajesh.shah@intel.com>
+Rakesh Pillai <quic_pillair@quicinc.com> <pillair@codeaurora.org>
Ralf Baechle <ralf@linux-mips.org>
Ralf Wildenhues <Ralf.Wildenhues@gmx.de>
+Ram Chandra Jangir <quic_rjangir@quicinc.com> <rjangir@codeaurora.org>
Randy Dunlap <rdunlap@infradead.org> <rdunlap@xenotime.net>
+Ravi Kumar Bokka <quic_rbokka@quicinc.com> <rbokka@codeaurora.org>
+Ravi Kumar Siddojigari <quic_rsiddoji@quicinc.com> <rsiddoji@codeaurora.org>
Rémi Denis-Courmont <rdenis@simphalempin.com>
Ricardo Ribalda <ribalda@kernel.org> <ricardo@ribalda.com>
Ricardo Ribalda <ribalda@kernel.org> Ricardo Ribalda Delgado <ribalda@kernel.org>
@@ -428,6 +495,7 @@ Richard Leitner <richard.leitner@linux.dev> <dev@g0hl1n.net>
Richard Leitner <richard.leitner@linux.dev> <me@g0hl1n.net>
Richard Leitner <richard.leitner@linux.dev> <richard.leitner@skidata.com>
Robert Foss <rfoss@kernel.org> <robert.foss@linaro.org>
+Rocky Liao <quic_rjliao@quicinc.com> <rjliao@codeaurora.org>
Roman Gushchin <roman.gushchin@linux.dev> <guro@fb.com>
Roman Gushchin <roman.gushchin@linux.dev> <guroan@gmail.com>
Roman Gushchin <roman.gushchin@linux.dev> <klamm@yandex-team.ru>
@@ -445,22 +513,37 @@ Santosh Shilimkar <santosh.shilimkar@oracle.org>
Santosh Shilimkar <ssantosh@kernel.org>
Sarangdhar Joshi <spjoshi@codeaurora.org>
Sascha Hauer <s.hauer@pengutronix.de>
+Sahitya Tummala <quic_stummala@quicinc.com> <stummala@codeaurora.org>
+Sathishkumar Muruganandam <quic_murugana@quicinc.com> <murugana@codeaurora.org>
Satya Priya <quic_c_skakit@quicinc.com> <skakit@codeaurora.org>
S.Çağlar Onur <caglar@pardus.org.tr>
+Sayali Lokhande <quic_sayalil@quicinc.com> <sayalil@codeaurora.org>
Sean Christopherson <seanjc@google.com> <sean.j.christopherson@intel.com>
Sean Nyekjaer <sean@geanix.com> <sean.nyekjaer@prevas.dk>
+Sean Tranchetti <quic_stranche@quicinc.com> <stranche@codeaurora.org>
Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
Sebastian Reichel <sre@kernel.org> <sre@debian.org>
Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
+Senthilkumar N L <quic_snlakshm@quicinc.com> <snlakshm@codeaurora.org>
Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
Shannon Nelson <shannon.nelson@amd.com> <snelson@pensando.io>
+Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@intel.com>
+Shannon Nelson <shannon.nelson@amd.com> <shannon.nelson@oracle.com>
+Sharath Chandra Vurukala <quic_sharathv@quicinc.com> <sharathv@codeaurora.org>
Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
Shuah Khan <shuah@kernel.org> <shuahkh@osg.samsung.com>
Shuah Khan <shuah@kernel.org> <shuah.kh@samsung.com>
+Sibi Sankar <quic_sibis@quicinc.com> <sibis@codeaurora.org>
+Sid Manning <quic_sidneym@quicinc.com> <sidneym@codeaurora.org>
Simon Arlott <simon@octiron.net> <simon@fire.lp0.eu>
+Simon Horman <horms@kernel.org> <simon.horman@corigine.com>
+Simon Horman <horms@kernel.org> <simon.horman@netronome.com>
Simon Kelley <simon@thekelleys.org.uk>
+Sricharan Ramabadhran <quic_srichara@quicinc.com> <sricharan@codeaurora.org>
+Srinivas Ramana <quic_sramana@quicinc.com> <sramana@codeaurora.org>
+Sriram R <quic_srirrama@quicinc.com> <srirrama@codeaurora.org>
Stéphane Witzmann <stephane.witzmann@ubpmes.univ-bpclermont.fr>
Stephen Hemminger <stephen@networkplumber.org> <shemminger@linux-foundation.org>
Stephen Hemminger <stephen@networkplumber.org> <shemminger@osdl.org>
@@ -468,22 +551,30 @@ Stephen Hemminger <stephen@networkplumber.org> <sthemmin@microsoft.com>
Stephen Hemminger <stephen@networkplumber.org> <sthemmin@vyatta.com>
Steve Wise <larrystevenwise@gmail.com> <swise@chelsio.com>
Steve Wise <larrystevenwise@gmail.com> <swise@opengridcomputing.com>
-Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com> <subashab@codeaurora.org>
+Subbaraman Narayanamurthy <quic_subbaram@quicinc.com> <subbaram@codeaurora.org>
Subhash Jadavani <subhashj@codeaurora.org>
+Sudarshan Rajagopalan <quic_sudaraja@quicinc.com> <sudaraja@codeaurora.org>
Sudeep Holla <sudeep.holla@arm.com> Sudeep KarkadaNagesha <sudeep.karkadanagesha@arm.com>
Sumit Semwal <sumit.semwal@ti.com>
+Surabhi Vishnoi <quic_svishnoi@quicinc.com> <svishnoi@codeaurora.org>
Takashi YOSHII <takashi.yoshii.zj@renesas.com>
+Tamizh Chelvam Raja <quic_tamizhr@quicinc.com> <tamizhr@codeaurora.org>
+Taniya Das <quic_tdas@quicinc.com> <tdas@codeaurora.org>
Tejun Heo <htejun@gmail.com>
Thomas Graf <tgraf@suug.ch>
Thomas Körper <socketcan@esd.eu> <thomas.koerper@esd.eu>
Thomas Pedersen <twp@codeaurora.org>
Tiezhu Yang <yangtiezhu@loongson.cn> <kernelpatch@126.com>
+Tingwei Zhang <quic_tingwei@quicinc.com> <tingwei@codeaurora.org>
+Tirupathi Reddy <quic_tirupath@quicinc.com> <tirupath@codeaurora.org>
Tobias Klauser <tklauser@distanz.ch> <tobias.klauser@gmail.com>
Tobias Klauser <tklauser@distanz.ch> <klto@zhaw.ch>
Tobias Klauser <tklauser@distanz.ch> <tklauser@nuerscht.ch>
Tobias Klauser <tklauser@distanz.ch> <tklauser@xenon.tklauser.home>
Todor Tomov <todor.too@gmail.com> <todor.tomov@linaro.org>
Tony Luck <tony.luck@intel.com>
+Trilok Soni <quic_tsoni@quicinc.com> <tsoni@codeaurora.org>
TripleX Chung <xxx.phy@gmail.com> <triplex@zh-kernel.org>
TripleX Chung <xxx.phy@gmail.com> <zhongyu@18mail.cn>
Tsuneo Yoshioka <Tsuneo.Yoshioka@f-secure.com>
@@ -496,11 +587,17 @@ Uwe Kleine-König <ukleinek@strlen.de>
Uwe Kleine-König <ukl@pengutronix.de>
Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
+Vara Reddy <quic_varar@quicinc.com> <varar@codeaurora.org>
+Varadarajan Narayanan <quic_varada@quicinc.com> <varada@codeaurora.org>
+Vasanthakumar Thiagarajan <quic_vthiagar@quicinc.com> <vthiagar@codeaurora.org>
Vasily Averin <vasily.averin@linux.dev> <vvs@virtuozzo.com>
Vasily Averin <vasily.averin@linux.dev> <vvs@openvz.org>
Vasily Averin <vasily.averin@linux.dev> <vvs@parallels.com>
Vasily Averin <vasily.averin@linux.dev> <vvs@sw.ru>
Valentin Schneider <vschneid@redhat.com> <valentin.schneider@arm.com>
+Veera Sundaram Sankaran <quic_veeras@quicinc.com> <veeras@codeaurora.org>
+Veerabhadrarao Badiganti <quic_vbadigan@quicinc.com> <vbadigan@codeaurora.org>
+Venkateswara Naralasetty <quic_vnaralas@quicinc.com> <vnaralas@codeaurora.org>
Vikash Garodia <quic_vgarodia@quicinc.com> <vgarodia@codeaurora.org>
Vinod Koul <vkoul@kernel.org> <vinod.koul@intel.com>
Vinod Koul <vkoul@kernel.org> <vinod.koul@linux.intel.com>
@@ -510,11 +607,14 @@ Viresh Kumar <vireshk@kernel.org> <viresh.kumar@st.com>
Viresh Kumar <vireshk@kernel.org> <viresh.linux@gmail.com>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.org>
Viresh Kumar <viresh.kumar@linaro.org> <viresh.kumar@linaro.com>
+Vivek Aknurwar <quic_viveka@quicinc.com> <viveka@codeaurora.org>
Vivien Didelot <vivien.didelot@gmail.com> <vivien.didelot@savoirfairelinux.com>
Vlad Dogaru <ddvlad@gmail.com> <vlad.dogaru@intel.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@parallels.com>
Vladimir Davydov <vdavydov.dev@gmail.com> <vdavydov@virtuozzo.com>
WeiXiong Liao <gmpy.liaowx@gmail.com> <liaoweixiong@allwinnertech.com>
+Wen Gong <quic_wgong@quicinc.com> <wgong@codeaurora.org>
+Wesley Cheng <quic_wcheng@quicinc.com> <wcheng@codeaurora.org>
Will Deacon <will@kernel.org> <will.deacon@arm.com>
Wolfram Sang <wsa@kernel.org> <w.sang@pengutronix.de>
Wolfram Sang <wsa@kernel.org> <wsa@the-dreams.de>
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index 6350dd82b9a9..087f762ebfd5 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -82,7 +82,12 @@ Description:
whether it resides in persistent capacity, volatile capacity,
or the LSA, is made permanently unavailable by whatever means
is appropriate for the media type. This functionality requires
- the device to be not be actively decoding any HPA ranges.
+ the device to be disabled, that is, not actively decoding any
+ HPA ranges. This permits avoiding explicit global CPU cache
+ management, relying instead for it to be done when a region
+ transitions between software programmed and hardware committed
+ states. If this file is not present, then there is no hardware
+ support for the operation.
What /sys/bus/cxl/devices/memX/security/erase
@@ -92,7 +97,13 @@ Contact: linux-cxl@vger.kernel.org
Description:
(WO) Write a boolean 'true' string value to this attribute to
secure erase user data by changing the media encryption keys for
- all user data areas of the device.
+ all user data areas of the device. This functionality requires
+ the device to be disabled, that is, not actively decoding any
+ HPA ranges. This permits avoiding explicit global CPU cache
+ management, relying instead for it to be done when a region
+ transitions between software programmed and hardware committed
+ states. If this file is not present, then there is no hardware
+ support for the operation.
What: /sys/bus/cxl/devices/memX/firmware/
diff --git a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
index 78b62a23b14a..f6d9d72ce77b 100644
--- a/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
+++ b/Documentation/ABI/testing/sysfs-class-led-trigger-netdev
@@ -13,7 +13,7 @@ Description:
Specifies the duration of the LED blink in milliseconds.
Defaults to 50 ms.
- With hw_control ON, the interval value MUST be set to the
+ When offloaded is true, the interval value MUST be set to the
default value and cannot be changed.
Trying to set any value in this specific mode will return
an EINVAL error.
@@ -44,8 +44,8 @@ Description:
If set to 1, the LED will blink for the milliseconds specified
in interval to signal transmission.
- With hw_control ON, the blink interval is controlled by hardware
- and won't reflect the value set in interval.
+ When offloaded is true, the blink interval is controlled by
+ hardware and won't reflect the value set in interval.
What: /sys/class/leds/<led>/rx
Date: Dec 2017
@@ -59,21 +59,21 @@ Description:
If set to 1, the LED will blink for the milliseconds specified
in interval to signal reception.
- With hw_control ON, the blink interval is controlled by hardware
- and won't reflect the value set in interval.
+ When offloaded is true, the blink interval is controlled by
+ hardware and won't reflect the value set in interval.
-What: /sys/class/leds/<led>/hw_control
+What: /sys/class/leds/<led>/offloaded
Date: Jun 2023
KernelVersion: 6.5
Contact: linux-leds@vger.kernel.org
Description:
- Communicate whether the LED trigger modes are driven by hardware
- or software fallback is used.
+ Communicate whether the LED trigger modes are offloaded to
+ hardware or whether software fallback is used.
If 0, the LED is using software fallback to blink.
- If 1, the LED is using hardware control to blink and signal the
- requested modes.
+ If 1, the LED blinking in requested mode is offloaded to
+ hardware.
What: /sys/class/leds/<led>/link_10
Date: Jun 2023
diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index ecd585ca2d50..77942eedf4f6 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -513,17 +513,18 @@ Description: information about CPUs heterogeneity.
cpu_capacity: capacity of cpuX.
What: /sys/devices/system/cpu/vulnerabilities
+ /sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+ /sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ /sys/devices/system/cpu/vulnerabilities/l1tf
+ /sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/meltdown
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+ /sys/devices/system/cpu/vulnerabilities/retbleed
+ /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
/sys/devices/system/cpu/vulnerabilities/spectre_v1
/sys/devices/system/cpu/vulnerabilities/spectre_v2
- /sys/devices/system/cpu/vulnerabilities/spec_store_bypass
- /sys/devices/system/cpu/vulnerabilities/l1tf
- /sys/devices/system/cpu/vulnerabilities/mds
/sys/devices/system/cpu/vulnerabilities/srbds
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
- /sys/devices/system/cpu/vulnerabilities/itlb_multihit
- /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
- /sys/devices/system/cpu/vulnerabilities/retbleed
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module
index 08886367d047..62addab47d0c 100644
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -60,3 +60,14 @@ Description: Module taint flags:
C staging driver module
E unsigned module
== =====================
+
+What: /sys/module/grant_table/parameters/free_per_iteration
+Date: July 2023
+KernelVersion: 6.5 but backported to all supported stable branches
+Contact: Xen developer discussion <xen-devel@lists.xenproject.org>
+Description: Read and write number of grant entries to attempt to free per iteration.
+
+ Note: Future versions of Xen and Linux may provide a better
+ interface for controlling the rate of deferred grant reclaim
+ or may not need it at all.
+Users: Qubes OS (https://www.qubes-os.org)
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma b/Documentation/ABI/testing/sysfs-platform-hidma
index fca40a54df59..a80aeda85ef6 100644
--- a/Documentation/ABI/testing/sysfs-platform-hidma
+++ b/Documentation/ABI/testing/sysfs-platform-hidma
@@ -2,7 +2,7 @@ What: /sys/devices/platform/hidma-*/chid
/sys/devices/platform/QCOM8061:*/chid
Date: Dec 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains the ID of the channel within the HIDMA instance.
It is used to associate a given HIDMA channel with the
diff --git a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
index 3b6c5c9eabdc..0373745b4e18 100644
--- a/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
+++ b/Documentation/ABI/testing/sysfs-platform-hidma-mgmt
@@ -2,7 +2,7 @@ What: /sys/devices/platform/hidma-mgmt*/chanops/chan*/priority
/sys/devices/platform/QCOM8060:*/chanops/chan*/priority
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains either 0 or 1 and indicates if the DMA channel is a
low priority (0) or high priority (1) channel.
@@ -11,7 +11,7 @@ What: /sys/devices/platform/hidma-mgmt*/chanops/chan*/weight
/sys/devices/platform/QCOM8060:*/chanops/chan*/weight
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains 0..15 and indicates the weight of the channel among
equal priority channels during round robin scheduling.
@@ -20,7 +20,7 @@ What: /sys/devices/platform/hidma-mgmt*/chreset_timeout_cycles
/sys/devices/platform/QCOM8060:*/chreset_timeout_cycles
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains the platform specific cycle value to wait after a
reset command is issued. If the value is chosen too short,
@@ -32,7 +32,7 @@ What: /sys/devices/platform/hidma-mgmt*/dma_channels
/sys/devices/platform/QCOM8060:*/dma_channels
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains the number of dma channels supported by one instance
of HIDMA hardware. The value may change from chip to chip.
@@ -41,7 +41,7 @@ What: /sys/devices/platform/hidma-mgmt*/hw_version_major
/sys/devices/platform/QCOM8060:*/hw_version_major
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Version number major for the hardware.
@@ -49,7 +49,7 @@ What: /sys/devices/platform/hidma-mgmt*/hw_version_minor
/sys/devices/platform/QCOM8060:*/hw_version_minor
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Version number minor for the hardware.
@@ -57,7 +57,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_rd_xactions
/sys/devices/platform/QCOM8060:*/max_rd_xactions
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains a value between 0 and 31. Maximum number of
read transactions that can be issued back to back.
@@ -69,7 +69,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_read_request
/sys/devices/platform/QCOM8060:*/max_read_request
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Size of each read request. The value needs to be a power
of two and can be between 128 and 1024.
@@ -78,7 +78,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_wr_xactions
/sys/devices/platform/QCOM8060:*/max_wr_xactions
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Contains a value between 0 and 31. Maximum number of
write transactions that can be issued back to back.
@@ -91,7 +91,7 @@ What: /sys/devices/platform/hidma-mgmt*/max_write_request
/sys/devices/platform/QCOM8060:*/max_write_request
Date: Nov 2015
KernelVersion: 4.4
-Contact: "Sinan Kaya <okaya@codeaurora.org>"
+Contact: "Sinan Kaya <okaya@kernel.org>"
Description:
Size of each write request. The value needs to be a power
of two and can be between 128 and 1024.
diff --git a/Documentation/admin-guide/devices.txt b/Documentation/admin-guide/devices.txt
index 06c525e01ea5..b1b57f638b94 100644
--- a/Documentation/admin-guide/devices.txt
+++ b/Documentation/admin-guide/devices.txt
@@ -2691,7 +2691,7 @@
45 = /dev/ttyMM1 Marvell MPSC - port 1 (obsolete unused)
46 = /dev/ttyCPM0 PPC CPM (SCC or SMC) - port 0
...
- 47 = /dev/ttyCPM5 PPC CPM (SCC or SMC) - port 5
+ 49 = /dev/ttyCPM5 PPC CPM (SCC or SMC) - port 3
50 = /dev/ttyIOC0 Altix serial card
...
81 = /dev/ttyIOC31 Altix serial card
diff --git a/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
new file mode 100644
index 000000000000..264bfa937f7d
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/gather_data_sampling.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+GDS - Gather Data Sampling
+==========================
+
+Gather Data Sampling is a hardware vulnerability which allows unprivileged
+speculative access to data which was previously stored in vector registers.
+
+Problem
+-------
+When a gather instruction performs loads from memory, different data elements
+are merged into the destination vector register. However, when a gather
+instruction that is transiently executed encounters a fault, stale data from
+architectural or internal vector registers may get transiently forwarded to the
+destination vector register instead. This will allow a malicious attacker to
+infer stale data using typical side channel techniques like cache timing
+attacks. GDS is a purely sampling-based attack.
+
+The attacker uses gather instructions to infer the stale vector register data.
+The victim does not need to do anything special other than use the vector
+registers. The victim does not need to use gather instructions to be
+vulnerable.
+
+Because the buffers are shared between Hyper-Threads cross Hyper-Thread attacks
+are possible.
+
+Attack scenarios
+----------------
+Without mitigation, GDS can infer stale data across virtually all
+permission boundaries:
+
+ Non-enclaves can infer SGX enclave data
+ Userspace can infer kernel data
+ Guests can infer data from hosts
+ Guest can infer guest from other guests
+ Users can infer data from other users
+
+Because of this, it is important to ensure that the mitigation stays enabled in
+lower-privilege contexts like guests and when running outside SGX enclaves.
+
+The hardware enforces the mitigation for SGX. Likewise, VMMs should ensure
+that guests are not allowed to disable the GDS mitigation. If a host erred and
+allowed this, a guest could theoretically disable GDS mitigation, mount an
+attack, and re-enable it.
+
+Mitigation mechanism
+--------------------
+This issue is mitigated in microcode. The microcode defines the following new
+bits:
+
+ ================================ === ============================
+ IA32_ARCH_CAPABILITIES[GDS_CTRL] R/O Enumerates GDS vulnerability
+ and mitigation support.
+ IA32_ARCH_CAPABILITIES[GDS_NO] R/O Processor is not vulnerable.
+ IA32_MCU_OPT_CTRL[GDS_MITG_DIS] R/W Disables the mitigation
+ 0 by default.
+ IA32_MCU_OPT_CTRL[GDS_MITG_LOCK] R/W Locks GDS_MITG_DIS=0. Writes
+ to GDS_MITG_DIS are ignored
+ Can't be cleared once set.
+ ================================ === ============================
+
+GDS can also be mitigated on systems that don't have updated microcode by
+disabling AVX. This can be done by setting gather_data_sampling="force" or
+"clearcpuid=avx" on the kernel command-line.
+
+If used, these options will disable AVX use by turning off XSAVE YMM support.
+However, the processor will still enumerate AVX support. Userspace that
+does not follow proper AVX enumeration to check both AVX *and* XSAVE YMM
+support will break.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The mitigation can be disabled by setting "gather_data_sampling=off" or
+"mitigations=off" on the kernel command line. Not specifying either will default
+to the mitigation being enabled. Specifying "gather_data_sampling=force" will
+use the microcode mitigation when available or disable AVX on affected systems
+where the microcode hasn't been updated to include the mitigation.
+
+GDS System Information
+------------------------
+The kernel provides vulnerability status information through sysfs. For
+GDS this can be accessed by the following sysfs file:
+
+/sys/devices/system/cpu/vulnerabilities/gather_data_sampling
+
+The possible values contained in this file are:
+
+ ============================== =============================================
+ Not affected Processor not vulnerable.
+ Vulnerable Processor vulnerable and mitigation disabled.
+ Vulnerable: No microcode Processor vulnerable and microcode is missing
+ mitigation.
+ Mitigation: AVX disabled,
+ no microcode Processor is vulnerable and microcode is missing
+ mitigation. AVX disabled as mitigation.
+ Mitigation: Microcode Processor is vulnerable and mitigation is in
+ effect.
+ Mitigation: Microcode (locked) Processor is vulnerable and mitigation is in
+ effect and cannot be disabled.
+ Unknown: Dependent on
+ hypervisor status Running on a virtual guest processor that is
+ affected but with no way to know if host
+ processor is mitigated or vulnerable.
+ ============================== =============================================
+
+GDS Default mitigation
+----------------------
+The updated microcode will enable the mitigation by default. The kernel's
+default action is to leave the mitigation enabled.
diff --git a/Documentation/admin-guide/hw-vuln/index.rst b/Documentation/admin-guide/hw-vuln/index.rst
index e0614760a99e..de99caabf65a 100644
--- a/Documentation/admin-guide/hw-vuln/index.rst
+++ b/Documentation/admin-guide/hw-vuln/index.rst
@@ -13,9 +13,11 @@ are configurable at compile, boot or run time.
l1tf
mds
tsx_async_abort
- multihit.rst
- special-register-buffer-data-sampling.rst
- core-scheduling.rst
- l1d_flush.rst
- processor_mmio_stale_data.rst
- cross-thread-rsb.rst
+ multihit
+ special-register-buffer-data-sampling
+ core-scheduling
+ l1d_flush
+ processor_mmio_stale_data
+ cross-thread-rsb
+ srso
+ gather_data_sampling
diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 4d186f599d90..32a8893e5617 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -484,11 +484,14 @@ Spectre variant 2
Systems which support enhanced IBRS (eIBRS) enable IBRS protection once at
boot, by setting the IBRS bit, and they're automatically protected against
- Spectre v2 variant attacks, including cross-thread branch target injections
- on SMT systems (STIBP). In other words, eIBRS enables STIBP too.
+ Spectre v2 variant attacks.
- Legacy IBRS systems clear the IBRS bit on exit to userspace and
- therefore explicitly enable STIBP for that
+ On Intel's enhanced IBRS systems, this includes cross-thread branch target
+ injections on SMT systems (STIBP). In other words, Intel eIBRS enables
+ STIBP, too.
+
+ AMD Automatic IBRS does not protect userspace, and Legacy IBRS systems clear
+ the IBRS bit on exit to userspace, therefore both explicitly enable STIBP.
The retpoline mitigation is turned on by default on vulnerable
CPUs. It can be forced on or off by the administrator
diff --git a/Documentation/admin-guide/hw-vuln/srso.rst b/Documentation/admin-guide/hw-vuln/srso.rst
new file mode 100644
index 000000000000..b6cfb51cb0b4
--- /dev/null
+++ b/Documentation/admin-guide/hw-vuln/srso.rst
@@ -0,0 +1,150 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Speculative Return Stack Overflow (SRSO)
+========================================
+
+This is a mitigation for the speculative return stack overflow (SRSO)
+vulnerability found on AMD processors. The mechanism is by now the well
+known scenario of poisoning CPU functional units - the Branch Target
+Buffer (BTB) and Return Address Predictor (RAP) in this case - and then
+tricking the elevated privilege domain (the kernel) into leaking
+sensitive data.
+
+AMD CPUs predict RET instructions using a Return Address Predictor (aka
+Return Address Stack/Return Stack Buffer). In some cases, a non-architectural
+CALL instruction (i.e., an instruction predicted to be a CALL but is
+not actually a CALL) can create an entry in the RAP which may be used
+to predict the target of a subsequent RET instruction.
+
+The specific circumstances that lead to this varies by microarchitecture
+but the concern is that an attacker can mis-train the CPU BTB to predict
+non-architectural CALL instructions in kernel space and use this to
+control the speculative target of a subsequent kernel RET, potentially
+leading to information disclosure via a speculative side-channel.
+
+The issue is tracked under CVE-2023-20569.
+
+Affected processors
+-------------------
+
+AMD Zen, generations 1-4. That is, all families 0x17 and 0x19. Older
+processors have not been investigated.
+
+System information and options
+------------------------------
+
+First of all, it is required that the latest microcode be loaded for
+mitigations to be effective.
+
+The sysfs file showing SRSO mitigation status is:
+
+ /sys/devices/system/cpu/vulnerabilities/spec_rstack_overflow
+
+The possible values in this file are:
+
+ * 'Not affected':
+
+ The processor is not vulnerable
+
+ * 'Vulnerable: no microcode':
+
+ The processor is vulnerable, no microcode extending IBPB
+ functionality to address the vulnerability has been applied.
+
+ * 'Mitigation: microcode':
+
+ Extended IBPB functionality microcode patch has been applied. It does
+ not address User->Kernel and Guest->Host transitions protection but it
+ does address User->User and VM->VM attack vectors.
+
+ Note that User->User mitigation is controlled by how the IBPB aspect in
+ the Spectre v2 mitigation is selected:
+
+ * conditional IBPB:
+
+ where each process can select whether it needs an IBPB issued
+ around it PR_SPEC_DISABLE/_ENABLE etc, see :doc:`spectre`
+
+ * strict:
+
+ i.e., always on - by supplying spectre_v2_user=on on the kernel
+ command line
+
+ (spec_rstack_overflow=microcode)
+
+ * 'Mitigation: safe RET':
+
+ Software-only mitigation. It complements the extended IBPB microcode
+ patch functionality by addressing User->Kernel and Guest->Host
+ transitions protection.
+
+ Selected by default or by spec_rstack_overflow=safe-ret
+
+ * 'Mitigation: IBPB':
+
+ Similar protection as "safe RET" above but employs an IBPB barrier on
+ privilege domain crossings (User->Kernel, Guest->Host).
+
+ (spec_rstack_overflow=ibpb)
+
+ * 'Mitigation: IBPB on VMEXIT':
+
+ Mitigation addressing the cloud provider scenario - the Guest->Host
+ transitions only.
+
+ (spec_rstack_overflow=ibpb-vmexit)
+
+
+
+In order to exploit vulnerability, an attacker needs to:
+
+ - gain local access on the machine
+
+ - break kASLR
+
+ - find gadgets in the running kernel in order to use them in the exploit
+
+ - potentially create and pin an additional workload on the sibling
+ thread, depending on the microarchitecture (not necessary on fam 0x19)
+
+ - run the exploit
+
+Considering the performance implications of each mitigation type, the
+default one is 'Mitigation: safe RET' which should take care of most
+attack vectors, including the local User->Kernel one.
+
+As always, the user is advised to keep her/his system up-to-date by
+applying software updates regularly.
+
+The default setting will be reevaluated when needed and especially when
+new attack vectors appear.
+
+As one can surmise, 'Mitigation: safe RET' does come at the cost of some
+performance depending on the workload. If one trusts her/his userspace
+and does not want to suffer the performance impact, one can always
+disable the mitigation with spec_rstack_overflow=off.
+
+Similarly, 'Mitigation: IBPB' is another full mitigation type employing
+an indrect branch prediction barrier after having applied the required
+microcode patch for one's system. This mitigation comes also at
+a performance cost.
+
+Mitigation: safe RET
+--------------------
+
+The mitigation works by ensuring all RET instructions speculate to
+a controlled location, similar to how speculation is controlled in the
+retpoline sequence. To accomplish this, the __x86_return_thunk forces
+the CPU to mispredict every function return using a 'safe return'
+sequence.
+
+To ensure the safety of this mitigation, the kernel must ensure that the
+safe return sequence is itself free from attacker interference. In Zen3
+and Zen4, this is accomplished by creating a BTB alias between the
+untraining function srso_alias_untrain_ret() and the safe return
+function srso_alias_safe_ret() which results in evicting a potentially
+poisoned BTB entry and using that safe one for all function returns.
+
+In older Zen1 and Zen2, this is accomplished using a reinterpretation
+technique similar to Retbleed one: srso_untrain_ret() and
+srso_safe_ret().
diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index c18d94fa6470..f8ebb63b6c5d 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -624,3 +624,9 @@ Used to get the correct ranges:
* VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space.
* VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array.
* KERNEL_LINK_ADDR : start address of Kernel link and BPF
+
+va_kernel_pa_offset
+-------------------
+
+Indicates the offset between the kernel virtual and physical mappings.
+Used to translate virtual to physical addresses.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a1457995fd41..6ff63638ed82 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1623,6 +1623,26 @@
Format: off | on
default: on
+ gather_data_sampling=
+ [X86,INTEL] Control the Gather Data Sampling (GDS)
+ mitigation.
+
+ Gather Data Sampling is a hardware vulnerability which
+ allows unprivileged speculative access to data which was
+ previously stored in vector registers.
+
+ This issue is mitigated by default in updated microcode.
+ The mitigation may have a performance impact but can be
+ disabled. On systems without the microcode mitigation
+ disabling AVX serves as a mitigation.
+
+ force: Disable AVX to mitigate systems without
+ microcode mitigation. No effect if the microcode
+ mitigation is present. Known to cause crashes in
+ userspace with buggy AVX enumeration.
+
+ off: Disable GDS mitigation.
+
gcov_persist= [GCOV] When non-zero (default), profiling data for
kernel modules is saved and remains accessible via
debugfs, even when the module is unloaded/reloaded.
@@ -3273,24 +3293,25 @@
Disable all optional CPU mitigations. This
improves system performance, but it may also
expose users to several CPU vulnerabilities.
- Equivalent to: nopti [X86,PPC]
- if nokaslr then kpti=0 [ARM64]
- nospectre_v1 [X86,PPC]
- nobp=0 [S390]
- nospectre_v2 [X86,PPC,S390,ARM64]
- spectre_v2_user=off [X86]
- spec_store_bypass_disable=off [X86,PPC]
- ssbd=force-off [ARM64]
- nospectre_bhb [ARM64]
+ Equivalent to: if nokaslr then kpti=0 [ARM64]
+ gather_data_sampling=off [X86]
+ kvm.nx_huge_pages=off [X86]
l1tf=off [X86]
mds=off [X86]
- tsx_async_abort=off [X86]
- kvm.nx_huge_pages=off [X86]
- srbds=off [X86,INTEL]
+ mmio_stale_data=off [X86]
no_entry_flush [PPC]
no_uaccess_flush [PPC]
- mmio_stale_data=off [X86]
+ nobp=0 [S390]
+ nopti [X86,PPC]
+ nospectre_bhb [ARM64]
+ nospectre_v1 [X86,PPC]
+ nospectre_v2 [X86,PPC,S390,ARM64]
retbleed=off [X86]
+ spec_store_bypass_disable=off [X86,PPC]
+ spectre_v2_user=off [X86]
+ srbds=off [X86,INTEL]
+ ssbd=force-off [ARM64]
+ tsx_async_abort=off [X86]
Exceptions:
This does not have any effect on
@@ -5501,6 +5522,10 @@
Useful for devices that are detected asynchronously
(e.g. USB and MMC devices).
+ rootwait= [KNL] Maximum time (in seconds) to wait for root device
+ to show up before attempting to mount the root
+ filesystem.
+
rproc_mem=nn[KMG][@address]
[KNL,ARM,CMA] Remoteproc physical memory block.
Memory area to be used by remote processor image,
@@ -5875,6 +5900,17 @@
Not specifying this option is equivalent to
spectre_v2_user=auto.
+ spec_rstack_overflow=
+ [X86] Control RAS overflow mitigation on AMD Zen CPUs
+
+ off - Disable mitigation
+ microcode - Enable microcode mitigation only
+ safe-ret - Enable sw-only safe RET mitigation (default)
+ ibpb - Enable mitigation by issuing IBPB on
+ kernel entry
+ ibpb-vmexit - Issue IBPB only on VMEXIT
+ (cloud-specific mitigation)
+
spec_store_bypass_disable=
[HW] Control Speculative Store Bypass (SSB) Disable mitigation
(Speculative Store Bypass vulnerability)
diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst
index 496cdca5cb99..bedd3a1d7b42 100644
--- a/Documentation/arch/arm64/silicon-errata.rst
+++ b/Documentation/arch/arm64/silicon-errata.rst
@@ -148,6 +148,9 @@ stable kernels.
| ARM | MMU-700 | #2268618,2812531| N/A |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+
+| ARM | GIC-700 | #2941627 | ARM64_ERRATUM_2941627 |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_845719 |
+----------------+-----------------+-----------------+-----------------------------+
| Broadcom | Brahma-B53 | N/A | ARM64_ERRATUM_843419 |
diff --git a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
index ae4f68d4e696..bd67cfee6d19 100644
--- a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
+++ b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
@@ -105,7 +105,7 @@ properties:
G coefficient for temperature equation.
Default for series 5 = 60000
Default for series 6 = 57400
- multipleOf: 1000
+ multipleOf: 100
minimum: 1000
$ref: /schemas/types.yaml#/definitions/uint32
@@ -114,7 +114,7 @@ properties:
H coefficient for temperature equation.
Default for series 5 = 200000
Default for series 6 = 249400
- multipleOf: 1000
+ multipleOf: 100
minimum: 1000
$ref: /schemas/types.yaml#/definitions/uint32
@@ -131,7 +131,7 @@ properties:
J coefficient for temperature equation.
Default for series 5 = -100
Default for series 6 = 0
- multipleOf: 1000
+ multipleOf: 100
maximum: 0
$ref: /schemas/types.yaml#/definitions/int32
diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
index 72d2e910f206..2594fa192f93 100644
--- a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
+++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
@@ -216,7 +216,6 @@ properties:
description: Whether to enable burnout current for EXT1.
adi,ext1-burnout-current-nanoamp:
- $ref: /schemas/types.yaml#/definitions/uint32
description:
Burnout current in nanoamps to be applied to EXT1.
enum: [0, 50, 500, 1000, 10000]
@@ -233,7 +232,6 @@ properties:
description: Whether to enable burnout current for EXT2.
adi,ext2-burnout-current-nanoamp:
- $ref: /schemas/types.yaml#/definitions/uint32
description: Burnout current in nanoamps to be applied to EXT2.
enum: [0, 50, 500, 1000, 10000]
default: 0
@@ -249,7 +247,6 @@ properties:
description: Whether to enable burnout current for VIOUT.
adi,viout-burnout-current-nanoamp:
- $ref: /schemas/types.yaml#/definitions/uint32
description: Burnout current in nanoamps to be applied to VIOUT.
enum: [0, 1000, 10000]
default: 0
diff --git a/Documentation/devicetree/bindings/net/mediatek,net.yaml b/Documentation/devicetree/bindings/net/mediatek,net.yaml
index acb2b2ac4fe1..31cc0c412805 100644
--- a/Documentation/devicetree/bindings/net/mediatek,net.yaml
+++ b/Documentation/devicetree/bindings/net/mediatek,net.yaml
@@ -293,7 +293,7 @@ allOf:
patternProperties:
"^mac@[0-1]$":
type: object
- additionalProperties: false
+ unevaluatedProperties: false
allOf:
- $ref: ethernet-controller.yaml#
description:
@@ -305,14 +305,9 @@ patternProperties:
reg:
maxItems: 1
- phy-handle: true
-
- phy-mode: true
-
required:
- reg
- compatible
- - phy-handle
required:
- compatible
diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
index 176ea5f90251..7f324c6da915 100644
--- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml
@@ -91,12 +91,18 @@ properties:
$ref: /schemas/types.yaml#/definitions/phandle
tx_delay:
- description: Delay value for TXD timing. Range value is 0~0x7F, 0x30 as default.
+ description: Delay value for TXD timing.
$ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 0x7F
+ default: 0x30
rx_delay:
- description: Delay value for RXD timing. Range value is 0~0x7F, 0x10 as default.
+ description: Delay value for RXD timing.
$ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 0x7F
+ default: 0x10
phy-supply:
description: PHY regulator
diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml
index e608a4f1bcae..e119a226a4b1 100644
--- a/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/qcom,sa8775p-tlmm.yaml
@@ -87,7 +87,7 @@ $defs:
emac0_mdc, emac0_mdio, emac0_ptp_aux, emac0_ptp_pps, emac1_mcg0,
emac1_mcg1, emac1_mcg2, emac1_mcg3, emac1_mdc, emac1_mdio,
emac1_ptp_aux, emac1_ptp_pps, gcc_gp1, gcc_gp2, gcc_gp3,
- gcc_gp4, gcc_gp5, hs0_mi2s, hs1_mi2s, hs2_mi2s, ibi_i3c,
+ gcc_gp4, gcc_gp5, gpio, hs0_mi2s, hs1_mi2s, hs2_mi2s, ibi_i3c,
jitter_bist, mdp0_vsync0, mdp0_vsync1, mdp0_vsync2, mdp0_vsync3,
mdp0_vsync4, mdp0_vsync5, mdp0_vsync6, mdp0_vsync7, mdp0_vsync8,
mdp1_vsync0, mdp1_vsync1, mdp1_vsync2, mdp1_vsync3, mdp1_vsync4,
diff --git a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
index 30b2131b5860..65cb2e5c5eee 100644
--- a/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
+++ b/Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
@@ -16,7 +16,6 @@ properties:
- enum:
- atmel,at91rm9200-usart
- atmel,at91sam9260-usart
- - microchip,sam9x60-usart
- items:
- const: atmel,at91rm9200-dbgu
- const: atmel,at91rm9200-usart
@@ -24,6 +23,9 @@ properties:
- const: atmel,at91sam9260-dbgu
- const: atmel,at91sam9260-usart
- items:
+ - const: microchip,sam9x60-usart
+ - const: atmel,at91sam9260-usart
+ - items:
- const: microchip,sam9x60-dbgu
- const: microchip,sam9x60-usart
- const: atmel,at91sam9260-dbgu
diff --git a/Documentation/devicetree/bindings/serial/cavium-uart.txt b/Documentation/devicetree/bindings/serial/cavium-uart.txt
deleted file mode 100644
index 87a6c375cd44..000000000000
--- a/Documentation/devicetree/bindings/serial/cavium-uart.txt
+++ /dev/null
@@ -1,19 +0,0 @@
-* Universal Asynchronous Receiver/Transmitter (UART)
-
-- compatible: "cavium,octeon-3860-uart"
-
- Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
-
-- reg: The base address of the UART register bank.
-
-- interrupts: A single interrupt specifier.
-
-- current-speed: Optional, the current bit rate in bits per second.
-
-Example:
- uart1: serial@1180000000c00 {
- compatible = "cavium,octeon-3860-uart","ns16550";
- reg = <0x11800 0x00000c00 0x0 0x400>;
- current-speed = <115200>;
- interrupts = <0 35>;
- };
diff --git a/Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt b/Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt
deleted file mode 100644
index 04e23e63ee4f..000000000000
--- a/Documentation/devicetree/bindings/serial/nxp,lpc1850-uart.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* NXP LPC1850 UART
-
-Required properties:
-- compatible : "nxp,lpc1850-uart", "ns16550a".
-- reg : offset and length of the register set for the device.
-- interrupts : should contain uart interrupt.
-- clocks : phandle to the input clocks.
-- clock-names : required elements: "uartclk", "reg".
-
-Optional properties:
-- dmas : Two or more DMA channel specifiers following the
- convention outlined in bindings/dma/dma.txt
-- dma-names : Names for the dma channels, if present. There must
- be at least one channel named "tx" for transmit
- and named "rx" for receive.
-
-Since it's also possible to also use the of_serial.c driver all
-parameters from 8250.txt also apply but are optional.
-
-Example:
-uart0: serial@40081000 {
- compatible = "nxp,lpc1850-uart", "ns16550a";
- reg = <0x40081000 0x1000>;
- reg-shift = <2>;
- interrupts = <24>;
- clocks = <&ccu2 CLK_APB0_UART0>, <&ccu1 CLK_CPU_UART0>;
- clock-names = "uartclk", "reg";
-};
diff --git a/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml b/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml
index 3de7b36829da..d3ce4de449d5 100644
--- a/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml
+++ b/Documentation/devicetree/bindings/sound/audio-graph-card2.yaml
@@ -39,22 +39,4 @@ required:
additionalProperties: false
-examples:
- - |
- sound {
- compatible = "audio-graph-card2";
-
- links = <&cpu_port>;
- };
-
- cpu {
- compatible = "cpu-driver";
-
- cpu_port: port { cpu_ep: endpoint { remote-endpoint = <&codec_ep>; }; };
- };
-
- codec {
- compatible = "codec-driver";
-
- port { codec_ep: endpoint { remote-endpoint = <&cpu_ep>; }; };
- };
+...
diff --git a/Documentation/devicetree/bindings/sound/google,sc7180-trogdor.yaml b/Documentation/devicetree/bindings/sound/google,sc7180-trogdor.yaml
index 666a95ac22c8..ba5b7728cf33 100644
--- a/Documentation/devicetree/bindings/sound/google,sc7180-trogdor.yaml
+++ b/Documentation/devicetree/bindings/sound/google,sc7180-trogdor.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
title: Google SC7180-Trogdor ASoC sound card driver
maintainers:
- - Rohit kumar <rohitkr@codeaurora.org>
+ - Rohit kumar <quic_rohkumar@quicinc.com>
- Cheng-Yi Chiang <cychiang@chromium.org>
description:
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
index 6cc8f86c7531..3a559bd07a79 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
@@ -8,7 +8,7 @@ title: Qualcomm Technologies Inc. LPASS CPU dai driver
maintainers:
- Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
- - Rohit kumar <rohitkr@codeaurora.org>
+ - Rohit kumar <quic_rohkumar@quicinc.com>
description: |
Qualcomm Technologies Inc. SOC Low-Power Audio SubSystem (LPASS) that consist
diff --git a/Documentation/filesystems/fscrypt.rst b/Documentation/filesystems/fscrypt.rst
index eccd327e6df5..a624e92f2687 100644
--- a/Documentation/filesystems/fscrypt.rst
+++ b/Documentation/filesystems/fscrypt.rst
@@ -332,54 +332,121 @@ Encryption modes and usage
fscrypt allows one encryption mode to be specified for file contents
and one encryption mode to be specified for filenames. Different
directory trees are permitted to use different encryption modes.
+
+Supported modes
+---------------
+
Currently, the following pairs of encryption modes are supported:
- AES-256-XTS for contents and AES-256-CTS-CBC for filenames
-- AES-128-CBC for contents and AES-128-CTS-CBC for filenames
+- AES-256-XTS for contents and AES-256-HCTR2 for filenames
- Adiantum for both contents and filenames
-- AES-256-XTS for contents and AES-256-HCTR2 for filenames (v2 policies only)
-- SM4-XTS for contents and SM4-CTS-CBC for filenames (v2 policies only)
-
-If unsure, you should use the (AES-256-XTS, AES-256-CTS-CBC) pair.
-
-AES-128-CBC was added only for low-powered embedded devices with
-crypto accelerators such as CAAM or CESA that do not support XTS. To
-use AES-128-CBC, CONFIG_CRYPTO_ESSIV and CONFIG_CRYPTO_SHA256 (or
-another SHA-256 implementation) must be enabled so that ESSIV can be
-used.
-
-Adiantum is a (primarily) stream cipher-based mode that is fast even
-on CPUs without dedicated crypto instructions. It's also a true
-wide-block mode, unlike XTS. It can also eliminate the need to derive
-per-file encryption keys. However, it depends on the security of two
-primitives, XChaCha12 and AES-256, rather than just one. See the
-paper "Adiantum: length-preserving encryption for entry-level
-processors" (https://eprint.iacr.org/2018/720.pdf) for more details.
-To use Adiantum, CONFIG_CRYPTO_ADIANTUM must be enabled. Also, fast
-implementations of ChaCha and NHPoly1305 should be enabled, e.g.
-CONFIG_CRYPTO_CHACHA20_NEON and CONFIG_CRYPTO_NHPOLY1305_NEON for ARM.
-
-AES-256-HCTR2 is another true wide-block encryption mode that is intended for
-use on CPUs with dedicated crypto instructions. AES-256-HCTR2 has the property
-that a bitflip in the plaintext changes the entire ciphertext. This property
-makes it desirable for filename encryption since initialization vectors are
-reused within a directory. For more details on AES-256-HCTR2, see the paper
-"Length-preserving encryption with HCTR2"
-(https://eprint.iacr.org/2021/1441.pdf). To use AES-256-HCTR2,
-CONFIG_CRYPTO_HCTR2 must be enabled. Also, fast implementations of XCTR and
-POLYVAL should be enabled, e.g. CRYPTO_POLYVAL_ARM64_CE and
-CRYPTO_AES_ARM64_CE_BLK for ARM64.
-
-SM4 is a Chinese block cipher that is an alternative to AES. It has
-not seen as much security review as AES, and it only has a 128-bit key
-size. It may be useful in cases where its use is mandated.
-Otherwise, it should not be used. For SM4 support to be available, it
-also needs to be enabled in the kernel crypto API.
-
-New encryption modes can be added relatively easily, without changes
-to individual filesystems. However, authenticated encryption (AE)
-modes are not currently supported because of the difficulty of dealing
-with ciphertext expansion.
+- AES-128-CBC-ESSIV for contents and AES-128-CTS-CBC for filenames
+- SM4-XTS for contents and SM4-CTS-CBC for filenames
+
+Authenticated encryption modes are not currently supported because of
+the difficulty of dealing with ciphertext expansion. Therefore,
+contents encryption uses a block cipher in `XTS mode
+<https://en.wikipedia.org/wiki/Disk_encryption_theory#XTS>`_ or
+`CBC-ESSIV mode
+<https://en.wikipedia.org/wiki/Disk_encryption_theory#Encrypted_salt-sector_initialization_vector_(ESSIV)>`_,
+or a wide-block cipher. Filenames encryption uses a
+block cipher in `CTS-CBC mode
+<https://en.wikipedia.org/wiki/Ciphertext_stealing>`_ or a wide-block
+cipher.
+
+The (AES-256-XTS, AES-256-CTS-CBC) pair is the recommended default.
+It is also the only option that is *guaranteed* to always be supported
+if the kernel supports fscrypt at all; see `Kernel config options`_.
+
+The (AES-256-XTS, AES-256-HCTR2) pair is also a good choice that
+upgrades the filenames encryption to use a wide-block cipher. (A
+*wide-block cipher*, also called a tweakable super-pseudorandom
+permutation, has the property that changing one bit scrambles the
+entire result.) As described in `Filenames encryption`_, a wide-block
+cipher is the ideal mode for the problem domain, though CTS-CBC is the
+"least bad" choice among the alternatives. For more information about
+HCTR2, see `the HCTR2 paper <https://eprint.iacr.org/2021/1441.pdf>`_.
+
+Adiantum is recommended on systems where AES is too slow due to lack
+of hardware acceleration for AES. Adiantum is a wide-block cipher
+that uses XChaCha12 and AES-256 as its underlying components. Most of
+the work is done by XChaCha12, which is much faster than AES when AES
+acceleration is unavailable. For more information about Adiantum, see
+`the Adiantum paper <https://eprint.iacr.org/2018/720.pdf>`_.
+
+The (AES-128-CBC-ESSIV, AES-128-CTS-CBC) pair exists only to support
+systems whose only form of AES acceleration is an off-CPU crypto
+accelerator such as CAAM or CESA that does not support XTS.
+
+The remaining mode pairs are the "national pride ciphers":
+
+- (SM4-XTS, SM4-CTS-CBC)
+
+Generally speaking, these ciphers aren't "bad" per se, but they
+receive limited security review compared to the usual choices such as
+AES and ChaCha. They also don't bring much new to the table. It is
+suggested to only use these ciphers where their use is mandated.
+
+Kernel config options
+---------------------
+
+Enabling fscrypt support (CONFIG_FS_ENCRYPTION) automatically pulls in
+only the basic support from the crypto API needed to use AES-256-XTS
+and AES-256-CTS-CBC encryption. For optimal performance, it is
+strongly recommended to also enable any available platform-specific
+kconfig options that provide acceleration for the algorithm(s) you
+wish to use. Support for any "non-default" encryption modes typically
+requires extra kconfig options as well.
+
+Below, some relevant options are listed by encryption mode. Note,
+acceleration options not listed below may be available for your
+platform; refer to the kconfig menus. File contents encryption can
+also be configured to use inline encryption hardware instead of the
+kernel crypto API (see `Inline encryption support`_); in that case,
+the file contents mode doesn't need to supported in the kernel crypto
+API, but the filenames mode still does.
+
+- AES-256-XTS and AES-256-CTS-CBC
+ - Recommended:
+ - arm64: CONFIG_CRYPTO_AES_ARM64_CE_BLK
+ - x86: CONFIG_CRYPTO_AES_NI_INTEL
+
+- AES-256-HCTR2
+ - Mandatory:
+ - CONFIG_CRYPTO_HCTR2
+ - Recommended:
+ - arm64: CONFIG_CRYPTO_AES_ARM64_CE_BLK
+ - arm64: CONFIG_CRYPTO_POLYVAL_ARM64_CE
+ - x86: CONFIG_CRYPTO_AES_NI_INTEL
+ - x86: CONFIG_CRYPTO_POLYVAL_CLMUL_NI
+
+- Adiantum
+ - Mandatory:
+ - CONFIG_CRYPTO_ADIANTUM
+ - Recommended:
+ - arm32: CONFIG_CRYPTO_CHACHA20_NEON
+ - arm32: CONFIG_CRYPTO_NHPOLY1305_NEON
+ - arm64: CONFIG_CRYPTO_CHACHA20_NEON
+ - arm64: CONFIG_CRYPTO_NHPOLY1305_NEON
+ - x86: CONFIG_CRYPTO_CHACHA20_X86_64
+ - x86: CONFIG_CRYPTO_NHPOLY1305_SSE2
+ - x86: CONFIG_CRYPTO_NHPOLY1305_AVX2
+
+- AES-128-CBC-ESSIV and AES-128-CTS-CBC:
+ - Mandatory:
+ - CONFIG_CRYPTO_ESSIV
+ - CONFIG_CRYPTO_SHA256 or another SHA-256 implementation
+ - Recommended:
+ - AES-CBC acceleration
+
+fscrypt also uses HMAC-SHA512 for key derivation, so enabling SHA-512
+acceleration is recommended:
+
+- SHA-512
+ - Recommended:
+ - arm64: CONFIG_CRYPTO_SHA512_ARM64_CE
+ - x86: CONFIG_CRYPTO_SHA512_SSSE3
Contents encryption
-------------------
@@ -493,7 +560,14 @@ This structure must be initialized as follows:
be set to constants from ``<linux/fscrypt.h>`` which identify the
encryption modes to use. If unsure, use FSCRYPT_MODE_AES_256_XTS
(1) for ``contents_encryption_mode`` and FSCRYPT_MODE_AES_256_CTS
- (4) for ``filenames_encryption_mode``.
+ (4) for ``filenames_encryption_mode``. For details, see `Encryption
+ modes and usage`_.
+
+ v1 encryption policies only support three combinations of modes:
+ (FSCRYPT_MODE_AES_256_XTS, FSCRYPT_MODE_AES_256_CTS),
+ (FSCRYPT_MODE_AES_128_CBC, FSCRYPT_MODE_AES_128_CTS), and
+ (FSCRYPT_MODE_ADIANTUM, FSCRYPT_MODE_ADIANTUM). v2 policies support
+ all combinations documented in `Supported modes`_.
- ``flags`` contains optional flags from ``<linux/fscrypt.h>``:
diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
index ad6d21640576..d095c5838f94 100644
--- a/Documentation/filesystems/idmappings.rst
+++ b/Documentation/filesystems/idmappings.rst
@@ -146,9 +146,10 @@ For the rest of this document we will prefix all userspace ids with ``u`` and
all kernel ids with ``k``. Ranges of idmappings will be prefixed with ``r``. So
an idmapping will be written as ``u0:k10000:r10000``.
-For example, the id ``u1000`` is an id in the upper idmapset or "userspace
-idmapset" starting with ``u1000``. And it is mapped to ``k11000`` which is a
-kernel id in the lower idmapset or "kernel idmapset" starting with ``k10000``.
+For example, within this idmapping, the id ``u1000`` is an id in the upper
+idmapset or "userspace idmapset" starting with ``u0``. And it is mapped to
+``k11000`` which is a kernel id in the lower idmapset or "kernel idmapset"
+starting with ``k10000``.
A kernel id is always created by an idmapping. Such idmappings are associated
with user namespaces. Since we mainly care about how idmappings work we're not
@@ -373,6 +374,13 @@ kernel maps the caller's userspace id down into a kernel id according to the
caller's idmapping and then maps that kernel id up according to the
filesystem's idmapping.
+From the implementation point it's worth mentioning how idmappings are represented.
+All idmappings are taken from the corresponding user namespace.
+
+ - caller's idmapping (usually taken from ``current_user_ns()``)
+ - filesystem's idmapping (``sb->s_user_ns``)
+ - mount's idmapping (``mnt_idmap(vfsmnt)``)
+
Let's see some examples with caller/filesystem idmapping but without mount
idmappings. This will exhibit some problems we can hit. After that we will
revisit/reconsider these examples, this time using mount idmappings, to see how
diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index ed148919e11a..b7e5a3841aa4 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -85,13 +85,14 @@ prototypes::
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
struct posix_acl * (*get_acl)(struct mnt_idmap *, struct dentry *, int);
+ struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
locking rules:
all may block
-============== =============================================
+============== ==================================================
ops i_rwsem(inode)
-============== =============================================
+============== ==================================================
lookup: shared
create: exclusive
link: exclusive (both)
@@ -115,7 +116,8 @@ atomic_open: shared (exclusive if O_CREAT is set in open flags)
tmpfile: no
fileattr_get: no or exclusive
fileattr_set: exclusive
-============== =============================================
+get_offset_ctx no
+============== ==================================================
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_rwsem
@@ -374,10 +376,17 @@ invalidate_lock before invalidating page cache in truncate / hole punch
path (and thus calling into ->invalidate_folio) to block races between page
cache invalidation and page cache filling functions (fault, read, ...).
-->release_folio() is called when the kernel is about to try to drop the
-buffers from the folio in preparation for freeing it. It returns false to
-indicate that the buffers are (or may be) freeable. If ->release_folio is
-NULL, the kernel assumes that the fs has no private interest in the buffers.
+->release_folio() is called when the MM wants to make a change to the
+folio that would invalidate the filesystem's private data. For example,
+it may be about to be removed from the address_space or split. The folio
+is locked and not under writeback. It may be dirty. The gfp parameter
+is not usually used for allocation, but rather to indicate what the
+filesystem may do to attempt to free the private data. The filesystem may
+return false to indicate that the folio's private data cannot be freed.
+If it returns true, it should have already removed the private data from
+the folio. If a filesystem does not provide a ->release_folio method,
+the pagecache will assume that private data is buffer_heads and call
+try_to_free_buffers().
->free_folio() is called when the kernel has dropped the folio
from the page cache.
@@ -551,9 +560,8 @@ mutex or just to use i_size_read() instead.
Note: this does not protect the file->f_pos against concurrent modifications
since this is something the userspace has to take care about.
-->iterate() is called with i_rwsem exclusive.
-
-->iterate_shared() is called with i_rwsem at least shared.
+->iterate_shared() is called with i_rwsem held for reading, and with the
+file f_pos_lock held exclusively
->fasync() is responsible for maintaining the FASYNC bit in filp->f_flags.
Most instances call fasync_helper(), which does that maintenance, so it's
diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst
index d2d684ae7798..0f5da78ef4f9 100644
--- a/Documentation/filesystems/porting.rst
+++ b/Documentation/filesystems/porting.rst
@@ -537,7 +537,7 @@ vfs_readdir() is gone; switch to iterate_dir() instead
**mandatory**
-->readdir() is gone now; switch to ->iterate()
+->readdir() is gone now; switch to ->iterate_shared()
**mandatory**
@@ -693,24 +693,19 @@ parallel now.
---
-**recommended**
+**mandatory**
-->iterate_shared() is added; it's a parallel variant of ->iterate().
+->iterate_shared() is added.
Exclusion on struct file level is still provided (as well as that
between it and lseek on the same struct file), but if your directory
has been opened several times, you can get these called in parallel.
Exclusion between that method and all directory-modifying ones is
still provided, of course.
-Often enough ->iterate() can serve as ->iterate_shared() without any
-changes - it is a read-only operation, after all. If you have any
-per-inode or per-dentry in-core data structures modified by ->iterate(),
-you might need something to serialize the access to them. If you
-do dcache pre-seeding, you'll need to switch to d_alloc_parallel() for
-that; look for in-tree examples.
-
-Old method is only used if the new one is absent; eventually it will
-be removed. Switch while you still can; the old one won't stay.
+If you have any per-inode or per-dentry in-core data structures modified
+by ->iterate_shared(), you might need something to serialize the access
+to them. If you do dcache pre-seeding, you'll need to switch to
+d_alloc_parallel() for that; look for in-tree examples.
---
@@ -930,9 +925,9 @@ should be done by looking at FMODE_LSEEK in file->f_mode.
filldir_t (readdir callbacks) calling conventions have changed. Instead of
returning 0 or -E... it returns bool now. false means "no more" (as -E... used
to) and true - "keep going" (as 0 in old calling conventions). Rationale:
-callers never looked at specific -E... values anyway. ->iterate() and
-->iterate_shared() instance require no changes at all, all filldir_t ones in
-the tree converted.
+callers never looked at specific -E... values anyway. -> iterate_shared()
+instances require no changes at all, all filldir_t ones in the tree
+converted.
---
diff --git a/Documentation/filesystems/tmpfs.rst b/Documentation/filesystems/tmpfs.rst
index f18f46be5c0c..56a26c843dbe 100644
--- a/Documentation/filesystems/tmpfs.rst
+++ b/Documentation/filesystems/tmpfs.rst
@@ -21,8 +21,8 @@ explained further below, some of which can be reconfigured dynamically on the
fly using a remount ('mount -o remount ...') of the filesystem. A tmpfs
filesystem can be resized but it cannot be resized to a size below its current
usage. tmpfs also supports POSIX ACLs, and extended attributes for the
-trusted.* and security.* namespaces. ramfs does not use swap and you cannot
-modify any parameter for a ramfs filesystem. The size limit of a ramfs
+trusted.*, security.* and user.* namespaces. ramfs does not use swap and you
+cannot modify any parameter for a ramfs filesystem. The size limit of a ramfs
filesystem is how much memory you have available, and so care must be taken if
used so to not run out of memory.
@@ -84,8 +84,6 @@ nr_inodes The maximum number of inodes for this instance. The default
is half of the number of your physical RAM pages, or (on a
machine with highmem) the number of lowmem RAM pages,
whichever is the lower.
-noswap Disables swap. Remounts must respect the original settings.
- By default swap is enabled.
========= ============================================================
These parameters accept a suffix k, m or g for kilo, mega and giga and
@@ -99,36 +97,65 @@ mount with such options, since it allows any user with write access to
use up all the memory on the machine; but enhances the scalability of
that instance in a system with many CPUs making intensive use of it.
+If nr_inodes is not 0, that limited space for inodes is also used up by
+extended attributes: "df -i"'s IUsed and IUse% increase, IFree decreases.
+
+tmpfs blocks may be swapped out, when there is a shortage of memory.
+tmpfs has a mount option to disable its use of swap:
+
+====== ===========================================================
+noswap Disables swap. Remounts must respect the original settings.
+ By default swap is enabled.
+====== ===========================================================
+
tmpfs also supports Transparent Huge Pages which requires a kernel
configured with CONFIG_TRANSPARENT_HUGEPAGE and with huge supported for
your system (has_transparent_hugepage(), which is architecture specific).
The mount options for this are:
-====== ============================================================
-huge=0 never: disables huge pages for the mount
-huge=1 always: enables huge pages for the mount
-huge=2 within_size: only allocate huge pages if the page will be
- fully within i_size, also respect fadvise()/madvise() hints.
-huge=3 advise: only allocate huge pages if requested with
- fadvise()/madvise()
-====== ============================================================
-
-There is a sysfs file which you can also use to control system wide THP
-configuration for all tmpfs mounts, the file is:
-
-/sys/kernel/mm/transparent_hugepage/shmem_enabled
-
-This sysfs file is placed on top of THP sysfs directory and so is registered
-by THP code. It is however only used to control all tmpfs mounts with one
-single knob. Since it controls all tmpfs mounts it should only be used either
-for emergency or testing purposes. The values you can set for shmem_enabled are:
-
-== ============================================================
--1 deny: disables huge on shm_mnt and all mounts, for
- emergency use
--2 force: enables huge on shm_mnt and all mounts, w/o needing
- option, for testing
-== ============================================================
+================ ==============================================================
+huge=never Do not allocate huge pages. This is the default.
+huge=always Attempt to allocate huge page every time a new page is needed.
+huge=within_size Only allocate huge page if it will be fully within i_size.
+ Also respect madvise(2) hints.
+huge=advise Only allocate huge page if requested with madvise(2).
+================ ==============================================================
+
+See also Documentation/admin-guide/mm/transhuge.rst, which describes the
+sysfs file /sys/kernel/mm/transparent_hugepage/shmem_enabled: which can
+be used to deny huge pages on all tmpfs mounts in an emergency, or to
+force huge pages on all tmpfs mounts for testing.
+
+tmpfs also supports quota with the following mount options
+
+======================== =================================================
+quota User and group quota accounting and enforcement
+ is enabled on the mount. Tmpfs is using hidden
+ system quota files that are initialized on mount.
+usrquota User quota accounting and enforcement is enabled
+ on the mount.
+grpquota Group quota accounting and enforcement is enabled
+ on the mount.
+usrquota_block_hardlimit Set global user quota block hard limit.
+usrquota_inode_hardlimit Set global user quota inode hard limit.
+grpquota_block_hardlimit Set global group quota block hard limit.
+grpquota_inode_hardlimit Set global group quota inode hard limit.
+======================== =================================================
+
+None of the quota related mount options can be set or changed on remount.
+
+Quota limit parameters accept a suffix k, m or g for kilo, mega and giga
+and can't be changed on remount. Default global quota limits are taking
+effect for any and all user/group/project except root the first time the
+quota entry for user/group/project id is being accessed - typically the
+first time an inode with a particular id ownership is being created after
+the mount. In other words, instead of the limits being initialized to zero,
+they are initialized with the particular value provided with these mount
+options. The limits can be changed for any user/group id at any time as they
+normally can be.
+
+Note that tmpfs quotas do not support user namespaces so no uid/gid
+translation is done if quotas are enabled inside user namespaces.
tmpfs has a mount option to set the NUMA memory allocation policy for
all files in that instance (if CONFIG_NUMA is enabled) - which can be
diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst
index cb2a97e49872..f8fe815ab1f3 100644
--- a/Documentation/filesystems/vfs.rst
+++ b/Documentation/filesystems/vfs.rst
@@ -260,9 +260,11 @@ filesystem. The following members are defined:
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *);
+ int (*freeze_super) (struct super_block *sb,
+ enum freeze_holder who);
int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *);
+ int (*thaw_super) (struct super_block *sb,
+ enum freeze_wholder who);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
@@ -515,6 +517,7 @@ As of kernel 2.6.22, the following members are defined:
int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+ struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
};
Again, all methods are called without any locks being held, unless
@@ -675,7 +678,10 @@ otherwise noted.
called on ioctl(FS_IOC_SETFLAGS) and ioctl(FS_IOC_FSSETXATTR) to
change miscellaneous file flags and attributes. Callers hold
i_rwsem exclusive. If unset, then fall back to f_op->ioctl().
-
+``get_offset_ctx``
+ called to get the offset context for a directory inode. A
+ filesystem must define this operation to use
+ simple_offset_dir_operations.
The Address Space Object
========================
diff --git a/Documentation/i2c/writing-clients.rst b/Documentation/i2c/writing-clients.rst
index b7d3ae7458f8..41ddc10f1ac7 100644
--- a/Documentation/i2c/writing-clients.rst
+++ b/Documentation/i2c/writing-clients.rst
@@ -46,7 +46,7 @@ driver model device node, and its I2C address.
},
.id_table = foo_idtable,
- .probe_new = foo_probe,
+ .probe = foo_probe,
.remove = foo_remove,
/* if device autodetection is needed: */
.class = I2C_CLASS_SOMETHING,
diff --git a/Documentation/networking/napi.rst b/Documentation/networking/napi.rst
index a7a047742e93..7bf7b95c4f7a 100644
--- a/Documentation/networking/napi.rst
+++ b/Documentation/networking/napi.rst
@@ -65,15 +65,16 @@ argument - drivers can process completions for any number of Tx
packets but should only process up to ``budget`` number of
Rx packets. Rx processing is usually much more expensive.
-In other words, it is recommended to ignore the budget argument when
-performing TX buffer reclamation to ensure that the reclamation is not
-arbitrarily bounded; however, it is required to honor the budget argument
-for RX processing.
+In other words for Rx processing the ``budget`` argument limits how many
+packets driver can process in a single poll. Rx specific APIs like page
+pool or XDP cannot be used at all when ``budget`` is 0.
+skb Tx processing should happen regardless of the ``budget``, but if
+the argument is 0 driver cannot call any XDP (or page pool) APIs.
.. warning::
- The ``budget`` argument may be 0 if core tries to only process Tx completions
- and no Rx packets.
+ The ``budget`` argument may be 0 if core tries to only process
+ skb Tx completions and no Rx or XDP packets.
The poll method returns the amount of work done. If the driver still
has outstanding work to do (e.g. ``budget`` was exhausted)
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 8b1045c3b59e..c383a394c665 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -178,10 +178,10 @@ nf_conntrack_sctp_timeout_established - INTEGER (seconds)
Default is set to (hb_interval * path_max_retrans + rto_max)
nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
- default 0.3
+ default 3
nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
- default 0.3
+ default 3
nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
default 3
diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst
index df978127f2d7..cb686238f21d 100644
--- a/Documentation/process/embargoed-hardware-issues.rst
+++ b/Documentation/process/embargoed-hardware-issues.rst
@@ -254,7 +254,6 @@ an involved disclosed party. The current ambassadors list:
Samsung Javier González <javier.gonz@samsung.com>
Microsoft James Morris <jamorris@linux.microsoft.com>
- VMware
Xen Andrew Cooper <andrew.cooper3@citrix.com>
Canonical John Johansen <john.johansen@canonical.com>
@@ -263,10 +262,8 @@ an involved disclosed party. The current ambassadors list:
Red Hat Josh Poimboeuf <jpoimboe@redhat.com>
SUSE Jiri Kosina <jkosina@suse.cz>
- Amazon
Google Kees Cook <keescook@chromium.org>
- GCC
LLVM Nick Desaulniers <ndesaulniers@google.com>
============= ========================================================
diff --git a/Documentation/process/security-bugs.rst b/Documentation/process/security-bugs.rst
index 82e29837d589..5a6993795bd2 100644
--- a/Documentation/process/security-bugs.rst
+++ b/Documentation/process/security-bugs.rst
@@ -63,31 +63,28 @@ information submitted to the security list and any followup discussions
of the report are treated confidentially even after the embargo has been
lifted, in perpetuity.
-Coordination
-------------
-
-Fixes for sensitive bugs, such as those that might lead to privilege
-escalations, may need to be coordinated with the private
-<linux-distros@vs.openwall.org> mailing list so that distribution vendors
-are well prepared to issue a fixed kernel upon public disclosure of the
-upstream fix. Distros will need some time to test the proposed patch and
-will generally request at least a few days of embargo, and vendor update
-publication prefers to happen Tuesday through Thursday. When appropriate,
-the security team can assist with this coordination, or the reporter can
-include linux-distros from the start. In this case, remember to prefix
-the email Subject line with "[vs]" as described in the linux-distros wiki:
-<http://oss-security.openwall.org/wiki/mailing-lists/distros#how-to-use-the-lists>
+Coordination with other groups
+------------------------------
+
+The kernel security team strongly recommends that reporters of potential
+security issues NEVER contact the "linux-distros" mailing list until
+AFTER discussing it with the kernel security team. Do not Cc: both
+lists at once. You may contact the linux-distros mailing list after a
+fix has been agreed on and you fully understand the requirements that
+doing so will impose on you and the kernel community.
+
+The different lists have different goals and the linux-distros rules do
+not contribute to actually fixing any potential security problems.
CVE assignment
--------------
-The security team does not normally assign CVEs, nor do we require them
-for reports or fixes, as this can needlessly complicate the process and
-may delay the bug handling. If a reporter wishes to have a CVE identifier
-assigned ahead of public disclosure, they will need to contact the private
-linux-distros list, described above. When such a CVE identifier is known
-before a patch is provided, it is desirable to mention it in the commit
-message if the reporter agrees.
+The security team does not assign CVEs, nor do we require them for
+reports or fixes, as this can needlessly complicate the process and may
+delay the bug handling. If a reporter wishes to have a CVE identifier
+assigned, they should find one by themselves, for example by contacting
+MITRE directly. However under no circumstances will a patch inclusion
+be delayed to wait for a CVE identifier to arrive.
Non-disclosure agreements
-------------------------
diff --git a/MAINTAINERS b/MAINTAINERS
index aee340630eca..4cc6bf79fdd8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1865,9 +1865,11 @@ M: Martin Povišer <povik+lin@cutebit.org>
L: asahi@lists.linux.dev
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Maintained
+F: Documentation/devicetree/bindings/sound/adi,ssm3515.yaml
F: Documentation/devicetree/bindings/sound/apple,*
F: sound/soc/apple/*
F: sound/soc/codecs/cs42l83-i2c.c
+F: sound/soc/codecs/ssm3515.c
ARM/APPLE MACHINE SUPPORT
M: Hector Martin <marcan@marcan.st>
@@ -2337,7 +2339,7 @@ F: drivers/phy/mediatek/
ARM/MICROCHIP (ARM64) SoC support
M: Conor Dooley <conor@kernel.org>
M: Nicolas Ferre <nicolas.ferre@microchip.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
T: git https://git.kernel.org/pub/scm/linux/kernel/git/at91/linux.git
@@ -2346,7 +2348,7 @@ F: arch/arm64/boot/dts/microchip/
ARM/Microchip (AT91) SoC support
M: Nicolas Ferre <nicolas.ferre@microchip.com>
M: Alexandre Belloni <alexandre.belloni@bootlin.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
W: http://www.linux4sam.org
@@ -3248,7 +3250,7 @@ F: include/uapi/linux/atm*
ATMEL MACB ETHERNET DRIVER
M: Nicolas Ferre <nicolas.ferre@microchip.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
S: Supported
F: drivers/net/ethernet/cadence/
@@ -3260,9 +3262,8 @@ F: Documentation/devicetree/bindings/input/atmel,maxtouch.yaml
F: drivers/input/touchscreen/atmel_mxt_ts.c
ATMEL WIRELESS DRIVER
-M: Simon Kelley <simon@thekelleys.org.uk>
L: linux-wireless@vger.kernel.org
-S: Maintained
+S: Orphan
W: http://www.thekelleys.org.uk/atmel
W: http://atmelwlandriver.sourceforge.net/
F: drivers/net/wireless/atmel/atmel*
@@ -3392,7 +3393,7 @@ F: drivers/media/radio/radio-aztech*
B43 WIRELESS DRIVER
L: linux-wireless@vger.kernel.org
L: b43-dev@lists.infradead.org
-S: Odd Fixes
+S: Orphan
W: https://wireless.wiki.kernel.org/en/users/Drivers/b43
F: drivers/net/wireless/broadcom/b43/
@@ -4461,7 +4462,6 @@ CADENCE USB3 DRD IP DRIVER
M: Peter Chen <peter.chen@kernel.org>
M: Pawel Laszczak <pawell@cadence.com>
R: Roger Quadros <rogerq@kernel.org>
-R: Aswath Govindraju <a-govindraju@ti.com>
L: linux-usb@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/peter.chen/usb.git
@@ -5147,10 +5147,12 @@ S: Maintained
F: include/linux/compiler_attributes.h
COMPUTE EXPRESS LINK (CXL)
+M: Davidlohr Bueso <dave@stgolabs.net>
+M: Jonathan Cameron <jonathan.cameron@huawei.com>
+M: Dave Jiang <dave.jiang@intel.com>
M: Alison Schofield <alison.schofield@intel.com>
M: Vishal Verma <vishal.l.verma@intel.com>
M: Ira Weiny <ira.weiny@intel.com>
-M: Ben Widawsky <bwidawsk@kernel.org>
M: Dan Williams <dan.j.williams@intel.com>
L: linux-cxl@vger.kernel.org
S: Maintained
@@ -5459,8 +5461,7 @@ F: Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml
F: drivers/net/can/ctucanfd/
CW1200 WLAN driver
-M: Solomon Peachy <pizza@shaftnet.org>
-S: Maintained
+S: Orphan
F: drivers/net/wireless/st/cw1200/
CX18 VIDEO4LINUX DRIVER
@@ -8811,6 +8812,7 @@ R: Michael Walle <michael@walle.cc>
S: Maintained
F: drivers/gpio/gpio-regmap.c
F: include/linux/gpio/regmap.h
+K: (devm_)?gpio_regmap_(un)?register
GPIO SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org>
@@ -9374,7 +9376,6 @@ F: drivers/crypto/hisilicon/sgl.c
F: include/linux/hisi_acc_qm.h
HISILICON ROCE DRIVER
-M: Haoyue Xu <xuhaoyue1@hisilicon.com>
M: Junxian Huang <huangjunxian6@hisilicon.com>
L: linux-rdma@vger.kernel.org
S: Maintained
@@ -9659,6 +9660,7 @@ F: tools/hv/
HYPERBUS SUPPORT
M: Vignesh Raghavendra <vigneshr@ti.com>
+R: Tudor Ambarus <tudor.ambarus@linaro.org>
L: linux-mtd@lists.infradead.org
S: Supported
Q: http://patchwork.ozlabs.org/project/linux-mtd/list/
@@ -12478,6 +12480,7 @@ F: net/mctp/
MAPLE TREE
M: Liam R. Howlett <Liam.Howlett@oracle.com>
+L: maple-tree@lists.infradead.org
L: linux-mm@kvack.org
S: Supported
F: Documentation/core-api/maple_tree.rst
@@ -12589,18 +12592,14 @@ F: Documentation/devicetree/bindings/net/marvell,pp2.yaml
F: drivers/net/ethernet/marvell/mvpp2/
MARVELL MWIFIEX WIRELESS DRIVER
-M: Amitkumar Karwar <amitkarwar@gmail.com>
-M: Ganapathi Bhat <ganapathi017@gmail.com>
-M: Sharvari Harisangam <sharvari.harisangam@nxp.com>
-M: Xinming Hu <huxinming820@gmail.com>
+M: Brian Norris <briannorris@chromium.org>
L: linux-wireless@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: drivers/net/wireless/marvell/mwifiex/
MARVELL MWL8K WIRELESS DRIVER
-M: Lennert Buytenhek <buytenh@wantstofly.org>
L: linux-wireless@vger.kernel.org
-S: Odd Fixes
+S: Orphan
F: drivers/net/wireless/marvell/mwl8k.c
MARVELL NAND CONTROLLER DRIVER
@@ -13788,7 +13787,7 @@ F: Documentation/devicetree/bindings/serial/atmel,at91-usart.yaml
F: drivers/spi/spi-at91-usart.c
MICROCHIP AUDIO ASOC DRIVERS
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/sound/atmel*
@@ -13811,7 +13810,7 @@ S: Maintained
F: drivers/crypto/atmel-ecc.*
MICROCHIP EIC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/interrupt-controller/microchip,sama7g5-eic.yaml
@@ -13884,7 +13883,7 @@ F: drivers/video/fbdev/atmel_lcdfb.c
F: include/video/atmel_lcdc.h
MICROCHIP MCP16502 PMIC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/regulator/mcp16502-regulator.txt
@@ -13911,7 +13910,7 @@ F: Documentation/devicetree/bindings/mtd/atmel-nand.txt
F: drivers/mtd/nand/raw/atmel/*
MICROCHIP OTPC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/nvmem/microchip,sama7g5-otpc.yaml
@@ -13950,7 +13949,7 @@ F: Documentation/devicetree/bindings/fpga/microchip,mpf-spi-fpga-mgr.yaml
F: drivers/fpga/microchip-spi.c
MICROCHIP PWM DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-pwm@vger.kernel.org
S: Supported
@@ -13966,7 +13965,7 @@ F: drivers/iio/adc/at91-sama5d2_adc.c
F: include/dt-bindings/iio/adc/at91-sama5d2_adc.h
MICROCHIP SAMA5D2-COMPATIBLE SHUTDOWN CONTROLLER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
S: Supported
F: Documentation/devicetree/bindings/power/reset/atmel,sama5d2-shdwc.yaml
F: drivers/power/reset/at91-sama5d2_shdwc.c
@@ -13983,7 +13982,7 @@ S: Supported
F: drivers/spi/spi-atmel.*
MICROCHIP SSC DRIVER
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/misc/atmel-ssc.txt
@@ -14012,7 +14011,7 @@ F: drivers/usb/gadget/udc/atmel_usba_udc.*
MICROCHIP WILC1000 WIFI DRIVER
M: Ajay Singh <ajay.kathat@microchip.com>
-M: Claudiu Beznea <claudiu.beznea@microchip.com>
+M: Claudiu Beznea <claudiu.beznea@tuxon.dev>
L: linux-wireless@vger.kernel.org
S: Supported
F: drivers/net/wireless/microchip/wilc1000/
@@ -14804,6 +14803,16 @@ F: net/netfilter/xt_CONNSECMARK.c
F: net/netfilter/xt_SECMARK.c
F: net/netlabel/
+NETWORKING [MACSEC]
+M: Sabrina Dubroca <sd@queasysnail.net>
+L: netdev@vger.kernel.org
+S: Maintained
+F: drivers/net/macsec.c
+F: include/net/macsec.h
+F: include/uapi/linux/if_macsec.h
+K: macsec
+K: \bmdo_
+
NETWORKING [MPTCP]
M: Matthieu Baerts <matthieu.baerts@tessares.net>
M: Mat Martineau <martineau@kernel.org>
@@ -16295,6 +16304,7 @@ F: drivers/pci/controller/dwc/pci-exynos.c
PCI DRIVER FOR SYNOPSYS DESIGNWARE
M: Jingoo Han <jingoohan1@gmail.com>
M: Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+M: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
L: linux-pci@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -17446,6 +17456,7 @@ F: drivers/media/tuners/qt1010*
QUALCOMM ATH12K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
+M: Jeff Johnson <quic_jjohnson@quicinc.com>
L: ath12k@lists.infradead.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git
@@ -17453,6 +17464,7 @@ F: drivers/net/wireless/ath/ath12k/
QUALCOMM ATHEROS ATH10K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
+M: Jeff Johnson <quic_jjohnson@quicinc.com>
L: ath10k@lists.infradead.org
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath10k
@@ -17462,6 +17474,7 @@ F: drivers/net/wireless/ath/ath10k/
QUALCOMM ATHEROS ATH11K WIRELESS DRIVER
M: Kalle Valo <kvalo@kernel.org>
+M: Jeff Johnson <quic_jjohnson@quicinc.com>
L: ath11k@lists.infradead.org
S: Supported
W: https://wireless.wiki.kernel.org/en/users/Drivers/ath11k
@@ -17982,7 +17995,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.g
F: drivers/net/wireless/realtek/rtlwifi/
REALTEK WIRELESS DRIVER (rtw88)
-M: Yan-Hsuan Chuang <tony0620emma@gmail.com>
+M: Ping-Ke Shih <pkshih@realtek.com>
L: linux-wireless@vger.kernel.org
S: Maintained
F: drivers/net/wireless/realtek/rtw88/
@@ -18507,17 +18520,14 @@ RTL8180 WIRELESS DRIVER
L: linux-wireless@vger.kernel.org
S: Orphan
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
F: drivers/net/wireless/realtek/rtl818x/rtl8180/
RTL8187 WIRELESS DRIVER
-M: Herton Ronaldo Krzesinski <herton@canonical.com>
-M: Hin-Tak Leung <htl10@users.sourceforge.net>
+M: Hin-Tak Leung <hintak.leung@gmail.com>
M: Larry Finger <Larry.Finger@lwfinger.net>
L: linux-wireless@vger.kernel.org
S: Maintained
W: https://wireless.wiki.kernel.org/
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.git
F: drivers/net/wireless/realtek/rtl818x/rtl8187/
RTL8XXXU WIRELESS DRIVER (rtl8xxxu)
@@ -19224,13 +19234,6 @@ F: Documentation/devicetree/bindings/serial/serial.yaml
F: drivers/tty/serdev/
F: include/linux/serdev.h
-SERIAL DRIVERS
-M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L: linux-serial@vger.kernel.org
-S: Maintained
-F: Documentation/devicetree/bindings/serial/
-F: drivers/tty/serial/
-
SERIAL IR RECEIVER
M: Sean Young <sean@mess.org>
L: linux-media@vger.kernel.org
@@ -20401,7 +20404,6 @@ F: drivers/pwm/pwm-stm32*
F: include/linux/*/stm32-*tim*
STMMAC ETHERNET DRIVER
-M: Giuseppe Cavallaro <peppe.cavallaro@st.com>
M: Alexandre Torgue <alexandre.torgue@foss.st.com>
M: Jose Abreu <joabreu@synopsys.com>
L: netdev@vger.kernel.org
@@ -21060,6 +21062,39 @@ S: Maintained
F: Documentation/devicetree/bindings/sound/davinci-mcasp-audio.yaml
F: sound/soc/ti/
+TEXAS INSTRUMENTS AUDIO (ASoC/HDA) DRIVERS
+M: Shenghao Ding <shenghao-ding@ti.com>
+M: Kevin Lu <kevin-lu@ti.com>
+M: Baojun Xu <x1077012@ti.com>
+L: alsa-devel@alsa-project.org (moderated for non-subscribers)
+S: Maintained
+F: Documentation/devicetree/bindings/sound/tas2552.txt
+F: Documentation/devicetree/bindings/sound/tas2562.yaml
+F: Documentation/devicetree/bindings/sound/tas2770.yaml
+F: Documentation/devicetree/bindings/sound/tas27xx.yaml
+F: Documentation/devicetree/bindings/sound/ti,pcm1681.txt
+F: Documentation/devicetree/bindings/sound/ti,pcm3168a.yaml
+F: Documentation/devicetree/bindings/sound/ti,tlv320*.yaml
+F: Documentation/devicetree/bindings/sound/tlv320adcx140.yaml
+F: Documentation/devicetree/bindings/sound/tlv320aic31xx.txt
+F: Documentation/devicetree/bindings/sound/tpa6130a2.txt
+F: include/sound/tas2*.h
+F: include/sound/tlv320*.h
+F: include/sound/tpa6130a2-plat.h
+F: sound/pci/hda/tas2781_hda_i2c.c
+F: sound/soc/codecs/pcm1681.c
+F: sound/soc/codecs/pcm1789*.*
+F: sound/soc/codecs/pcm179x*.*
+F: sound/soc/codecs/pcm186x*.*
+F: sound/soc/codecs/pcm3008.*
+F: sound/soc/codecs/pcm3060*.*
+F: sound/soc/codecs/pcm3168a*.*
+F: sound/soc/codecs/pcm5102a.c
+F: sound/soc/codecs/pcm512x*.*
+F: sound/soc/codecs/tas2*.*
+F: sound/soc/codecs/tlv320*.*
+F: sound/soc/codecs/tpa6130a2.*
+
TEXAS INSTRUMENTS DMA DRIVERS
M: Peter Ujfalusi <peter.ujfalusi@gmail.com>
L: dmaengine@vger.kernel.org
@@ -21636,14 +21671,16 @@ W: https://github.com/srcres258/linux-doc
T: git git://github.com/srcres258/linux-doc.git doc-zh-tw
F: Documentation/translations/zh_TW/
-TTY LAYER
+TTY LAYER AND SERIAL DRIVERS
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
M: Jiri Slaby <jirislaby@kernel.org>
+L: linux-kernel@vger.kernel.org
+L: linux-serial@vger.kernel.org
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git
+F: Documentation/devicetree/bindings/serial/
F: Documentation/driver-api/serial/
F: drivers/tty/
-F: drivers/tty/serial/serial_core.c
F: include/linux/selection.h
F: include/linux/serial.h
F: include/linux/serial_core.h
@@ -21672,11 +21709,14 @@ S: Orphan
F: drivers/net/ethernet/dec/tulip/
TUN/TAP driver
-M: Maxim Krasnyansky <maxk@qti.qualcomm.com>
+M: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
+M: Jason Wang <jasowang@redhat.com>
S: Maintained
W: http://vtun.sourceforge.net/tun
F: Documentation/networking/tuntap.rst
F: arch/um/os-Linux/drivers/
+F: drivers/net/tap.c
+F: drivers/net/tun.c
TURBOCHANNEL SUBSYSTEM
M: "Maciej W. Rozycki" <macro@orcam.me.uk>
@@ -21899,9 +21939,8 @@ S: Maintained
F: drivers/usb/misc/apple-mfi-fastcharge.c
USB AR5523 WIRELESS DRIVER
-M: Pontus Fuchs <pontus.fuchs@gmail.com>
L: linux-wireless@vger.kernel.org
-S: Maintained
+S: Orphan
F: drivers/net/wireless/ath/ar5523/
USB ATTACHED SCSI
@@ -22178,9 +22217,8 @@ F: drivers/usb/gadget/legacy/webcam.c
F: include/uapi/linux/usb/g_uvc.h
USB WIRELESS RNDIS DRIVER (rndis_wlan)
-M: Jussi Kivilinna <jussi.kivilinna@iki.fi>
L: linux-wireless@vger.kernel.org
-S: Maintained
+S: Orphan
F: drivers/net/wireless/legacy/rndis_wlan.c
USB XHCI DRIVER
@@ -22469,7 +22507,6 @@ L: virtualization@lists.linux-foundation.org
S: Maintained
F: drivers/block/virtio_blk.c
F: drivers/scsi/virtio_scsi.c
-F: drivers/vhost/scsi.c
F: include/uapi/linux/virtio_blk.h
F: include/uapi/linux/virtio_scsi.h
@@ -22568,6 +22605,16 @@ F: include/linux/vhost_iotlb.h
F: include/uapi/linux/vhost.h
F: kernel/vhost_task.c
+VIRTIO HOST (VHOST-SCSI)
+M: "Michael S. Tsirkin" <mst@redhat.com>
+M: Jason Wang <jasowang@redhat.com>
+M: Mike Christie <michael.christie@oracle.com>
+R: Paolo Bonzini <pbonzini@redhat.com>
+R: Stefan Hajnoczi <stefanha@redhat.com>
+L: virtualization@lists.linux-foundation.org
+S: Maintained
+F: drivers/vhost/scsi.c
+
VIRTIO I2C DRIVER
M: Conghui Chen <conghui.chen@intel.com>
M: Viresh Kumar <viresh.kumar@linaro.org>
@@ -22955,7 +23002,7 @@ F: drivers/input/misc/wistron_btns.c
WL3501 WIRELESS PCMCIA CARD DRIVER
L: linux-wireless@vger.kernel.org
-S: Odd fixes
+S: Orphan
F: drivers/net/wireless/legacy/wl3501*
WMI BINARY MOF DRIVER
@@ -23526,11 +23573,8 @@ S: Maintained
F: mm/zbud.c
ZD1211RW WIRELESS DRIVER
-M: Ulrich Kunitz <kune@deine-taler.de>
L: linux-wireless@vger.kernel.org
-L: zd1211-devs@lists.sourceforge.net (subscribers-only)
-S: Maintained
-W: http://zd1211.ath.cx/wiki/DriverRewrite
+S: Orphan
F: drivers/net/wireless/zydas/zd1211rw/
ZD1301 MEDIA DRIVER
diff --git a/Makefile b/Makefile
index 658ec2b8aa74..2fdd8b40b7e0 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
VERSION = 6
PATCHLEVEL = 5
SUBLEVEL = 0
-EXTRAVERSION = -rc2
+EXTRAVERSION =
NAME = Hurr durr I'ma ninja sloth
# *DOCUMENTATION*
@@ -555,11 +555,23 @@ LINUXINCLUDE := \
$(USERINCLUDE)
KBUILD_AFLAGS := -D__ASSEMBLY__ -fno-PIE
-KBUILD_CFLAGS := -Wall -Wundef -Werror=strict-prototypes -Wno-trigraphs \
- -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE \
- -Werror=implicit-function-declaration -Werror=implicit-int \
- -Werror=return-type -Wno-format-security -funsigned-char \
- -std=gnu11
+
+KBUILD_CFLAGS :=
+KBUILD_CFLAGS += -std=gnu11
+KBUILD_CFLAGS += -fshort-wchar
+KBUILD_CFLAGS += -funsigned-char
+KBUILD_CFLAGS += -fno-common
+KBUILD_CFLAGS += -fno-PIE
+KBUILD_CFLAGS += -fno-strict-aliasing
+KBUILD_CFLAGS += -Wall
+KBUILD_CFLAGS += -Wundef
+KBUILD_CFLAGS += -Werror=implicit-function-declaration
+KBUILD_CFLAGS += -Werror=implicit-int
+KBUILD_CFLAGS += -Werror=return-type
+KBUILD_CFLAGS += -Werror=strict-prototypes
+KBUILD_CFLAGS += -Wno-format-security
+KBUILD_CFLAGS += -Wno-trigraphs
+
KBUILD_CPPFLAGS := -D__KERNEL__
KBUILD_RUSTFLAGS := $(rust_common_flags) \
--target=$(objtree)/scripts/target.json \
diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h
index 714abe494e5f..55bb1c09fd39 100644
--- a/arch/alpha/include/asm/processor.h
+++ b/arch/alpha/include/asm/processor.h
@@ -47,12 +47,6 @@ unsigned long __get_wchan(struct task_struct *p);
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
-
-#ifndef CONFIG_SMP
-/* Nothing to prefetch. */
-#define spin_lock_prefetch(lock) do { } while (0)
-#endif
extern inline void prefetch(const void *ptr)
{
@@ -64,11 +58,4 @@ extern inline void prefetchw(const void *ptr)
__builtin_prefetch(ptr, 1, 3);
}
-#ifdef CONFIG_SMP
-extern inline void spin_lock_prefetch(const void *ptr)
-{
- __builtin_prefetch(ptr, 1, 3);
-}
-#endif
-
#endif /* __ASM_ALPHA_PROCESSOR_H */
diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c
index b650ff1cb022..3d7473531ab1 100644
--- a/arch/alpha/kernel/setup.c
+++ b/arch/alpha/kernel/setup.c
@@ -385,8 +385,7 @@ setup_memory(void *kernel_end)
#endif /* CONFIG_BLK_DEV_INITRD */
}
-int __init
-page_is_ram(unsigned long pfn)
+int page_is_ram(unsigned long pfn)
{
struct memclust_struct * cluster;
struct memdesc_struct * memdesc;
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl
index 1f13995d00d7..ad37569d0507 100644
--- a/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@ -491,3 +491,4 @@
559 common futex_waitv sys_futex_waitv
560 common set_mempolicy_home_node sys_ni_syscall
561 common cachestat sys_cachestat
+562 common fchmodat2 sys_fchmodat2
diff --git a/arch/arm/boot/dts/arm/integratorap.dts b/arch/arm/boot/dts/arm/integratorap.dts
index 5b52d75bc6be..d9927d3181dc 100644
--- a/arch/arm/boot/dts/arm/integratorap.dts
+++ b/arch/arm/boot/dts/arm/integratorap.dts
@@ -158,7 +158,7 @@
valid-mask = <0x003fffff>;
};
- pci: pciv3@62000000 {
+ pci: pci@62000000 {
compatible = "arm,integrator-ap-pci", "v3,v360epc-pci";
device_type = "pci";
#interrupt-cells = <1>;
diff --git a/arch/arm/boot/dts/microchip/sam9x60.dtsi b/arch/arm/boot/dts/microchip/sam9x60.dtsi
index 8b53997675e7..73d570a17269 100644
--- a/arch/arm/boot/dts/microchip/sam9x60.dtsi
+++ b/arch/arm/boot/dts/microchip/sam9x60.dtsi
@@ -172,7 +172,7 @@
status = "disabled";
uart4: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <13 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -240,7 +240,7 @@
status = "disabled";
uart5: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
atmel,usart-mode = <AT91_USART_MODE_SERIAL>;
interrupts = <14 IRQ_TYPE_LEVEL_HIGH 7>;
@@ -370,7 +370,7 @@
status = "disabled";
uart11: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <32 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -419,7 +419,7 @@
status = "disabled";
uart12: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <33 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -576,7 +576,7 @@
status = "disabled";
uart6: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <9 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -625,7 +625,7 @@
status = "disabled";
uart7: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <10 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -674,7 +674,7 @@
status = "disabled";
uart8: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <11 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -723,7 +723,7 @@
status = "disabled";
uart0: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <5 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -791,7 +791,7 @@
status = "disabled";
uart1: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <6 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -859,7 +859,7 @@
status = "disabled";
uart2: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <7 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -927,7 +927,7 @@
status = "disabled";
uart3: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <8 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -1050,7 +1050,7 @@
status = "disabled";
uart9: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <15 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
@@ -1099,7 +1099,7 @@
status = "disabled";
uart10: serial@200 {
- compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
+ compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
reg = <0x200 0x200>;
interrupts = <16 IRQ_TYPE_LEVEL_HIGH 7>;
dmas = <&dma0
diff --git a/arch/arm/boot/dts/nspire/nspire.dtsi b/arch/arm/boot/dts/nspire/nspire.dtsi
index bb240e6a3a6f..088bcc38589f 100644
--- a/arch/arm/boot/dts/nspire/nspire.dtsi
+++ b/arch/arm/boot/dts/nspire/nspire.dtsi
@@ -161,7 +161,7 @@
};
watchdog: watchdog@90060000 {
- compatible = "arm,amba-primecell";
+ compatible = "arm,primecell";
reg = <0x90060000 0x1000>;
interrupts = <3>;
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts b/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts
index 103e73176e47..1a00d290092a 100644
--- a/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts
+++ b/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts
@@ -60,6 +60,16 @@
status = "okay";
};
+&cpu0 {
+ /* CPU rated to 800 MHz, not the default 1.2GHz. */
+ operating-points = <
+ /* kHz uV */
+ 166666 850000
+ 400000 900000
+ 800000 1050000
+ >;
+};
+
&ecspi1 {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_ecspi1>;
diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi
index 1a599c294ab8..1ca4d219609f 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi
@@ -182,7 +182,7 @@
pinctrl-0 = <&pinctrl_rtc_int>;
reg = <0x68>;
interrupt-parent = <&gpio7>;
- interrupts = <8 IRQ_TYPE_LEVEL_HIGH>;
+ interrupts = <8 IRQ_TYPE_LEVEL_LOW>;
status = "disabled";
};
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi b/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi
index 2873369a57c0..3659fd5ecfa6 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi
@@ -552,7 +552,7 @@
reg = <0x020ca000 0x1000>;
interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clks IMX6SLL_CLK_USBPHY2>;
- phy-reg_3p0-supply = <&reg_3p0>;
+ phy-3p0-supply = <&reg_3p0>;
fsl,anatop = <&anatop>;
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi b/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
index 3a4308666552..a05069d49cb8 100644
--- a/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi
@@ -863,7 +863,6 @@
reg = <0>;
ldb_from_lcdif1: endpoint {
- remote-endpoint = <&lcdif1_to_ldb>;
};
};
@@ -1010,6 +1009,8 @@
<&clks IMX6SX_CLK_USDHC1>;
clock-names = "ipg", "ahb", "per";
bus-width = <4>;
+ fsl,tuning-start-tap = <20>;
+ fsl,tuning-step= <2>;
status = "disabled";
};
@@ -1022,6 +1023,8 @@
<&clks IMX6SX_CLK_USDHC2>;
clock-names = "ipg", "ahb", "per";
bus-width = <4>;
+ fsl,tuning-start-tap = <20>;
+ fsl,tuning-step= <2>;
status = "disabled";
};
@@ -1034,6 +1037,8 @@
<&clks IMX6SX_CLK_USDHC3>;
clock-names = "ipg", "ahb", "per";
bus-width = <4>;
+ fsl,tuning-start-tap = <20>;
+ fsl,tuning-step= <2>;
status = "disabled";
};
@@ -1309,11 +1314,8 @@
power-domains = <&pd_disp>;
status = "disabled";
- ports {
- port {
- lcdif1_to_ldb: endpoint {
- remote-endpoint = <&ldb_from_lcdif1>;
- };
+ port {
+ lcdif1_to_ldb: endpoint {
};
};
};
diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
index 54026c2c93fa..6ffb428dc939 100644
--- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
+++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi
@@ -1184,6 +1184,8 @@
<&clks IMX7D_USDHC1_ROOT_CLK>;
clock-names = "ipg", "ahb", "per";
bus-width = <4>;
+ fsl,tuning-step = <2>;
+ fsl,tuning-start-tap = <20>;
status = "disabled";
};
@@ -1196,6 +1198,8 @@
<&clks IMX7D_USDHC2_ROOT_CLK>;
clock-names = "ipg", "ahb", "per";
bus-width = <4>;
+ fsl,tuning-step = <2>;
+ fsl,tuning-start-tap = <20>;
status = "disabled";
};
@@ -1208,6 +1212,8 @@
<&clks IMX7D_USDHC3_ROOT_CLK>;
clock-names = "ipg", "ahb", "per";
bus-width = <4>;
+ fsl,tuning-step = <2>;
+ fsl,tuning-start-tap = <20>;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
index b958607c71dc..96451c8a815c 100644
--- a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi
@@ -145,6 +145,8 @@
/* MDIO */
AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLUP | SLEWCTRL_FAST, MUX_MODE0)
AM33XX_PADCONF(AM335X_PIN_MDC, PIN_OUTPUT_PULLUP, MUX_MODE0)
+ /* Added to support GPIO controlled PHY reset */
+ AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_OUTPUT_PULLUP, MUX_MODE7)
>;
};
@@ -153,6 +155,8 @@
/* MDIO reset value */
AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLDOWN, MUX_MODE7)
AM33XX_PADCONF(AM335X_PIN_MDC, PIN_INPUT_PULLDOWN, MUX_MODE7)
+ /* Added to support GPIO controlled PHY reset */
+ AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_INPUT_PULLDOWN, MUX_MODE7)
>;
};
@@ -215,6 +219,7 @@
baseboard_eeprom: baseboard_eeprom@50 {
compatible = "atmel,24c256";
reg = <0x50>;
+ vcc-supply = <&ldo4_reg>;
#address-cells = <1>;
#size-cells = <1>;
@@ -377,6 +382,10 @@
ethphy0: ethernet-phy@0 {
reg = <0>;
+ /* Support GPIO reset on revision C3 boards */
+ reset-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+ reset-assert-us = <300>;
+ reset-deassert-us = <6500>;
};
};
diff --git a/arch/arm/configs/axm55xx_defconfig b/arch/arm/configs/axm55xx_defconfig
index bfbaa2df3be5..d1c550894a65 100644
--- a/arch/arm/configs/axm55xx_defconfig
+++ b/arch/arm/configs/axm55xx_defconfig
@@ -197,7 +197,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=y
CONFIG_CUSE=y
CONFIG_FSCACHE=y
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 821d966c95a5..05ea71778ef8 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -232,7 +232,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_XFS_FS=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index b0f0baa3a6c4..53b1d41b4a8b 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -327,7 +327,7 @@ CONFIG_PWM_SAMSUNG=y
CONFIG_PHY_EXYNOS5250_SATA=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig
index 87c489337d0e..c9f4594b7ca9 100644
--- a/arch/arm/configs/footbridge_defconfig
+++ b/arch/arm/configs/footbridge_defconfig
@@ -94,7 +94,7 @@ CONFIG_LEDS_CLASS=y
CONFIG_LEDS_TRIGGERS=y
CONFIG_LEDS_TRIGGER_TIMER=y
CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_MSDOS_FS=m
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index 05706696a5fb..0a90583f9f01 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -442,7 +442,7 @@ CONFIG_EXT3_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index d7a0bca641eb..1cb145633a91 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -207,7 +207,7 @@ CONFIG_RESET_TI_SYSCON=m
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arm/configs/lpc32xx_defconfig b/arch/arm/configs/lpc32xx_defconfig
index d7df0486850f..e2b0ff0b253f 100644
--- a/arch/arm/configs/lpc32xx_defconfig
+++ b/arch/arm/configs/lpc32xx_defconfig
@@ -162,7 +162,7 @@ CONFIG_MAX517=y
CONFIG_PWM=y
CONFIG_PWM_LPC32XX=y
CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/milbeaut_m10v_defconfig
index 385ad0f391a8..7d4284502325 100644
--- a/arch/arm/configs/milbeaut_m10v_defconfig
+++ b/arch/arm/configs/milbeaut_m10v_defconfig
@@ -81,7 +81,7 @@ CONFIG_SOC_BRCMSTB=y
CONFIG_MEMORY=y
# CONFIG_ARM_PMU is not set
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index f0800f806b5f..c7b2550d706c 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -1226,7 +1226,7 @@ CONFIG_COUNTER=m
CONFIG_STM32_TIMER_CNT=m
CONFIG_STM32_LPTIMER_CNT=m
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 53dd0717cea5..7c2cc7a89511 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -188,7 +188,7 @@ CONFIG_RTC_DRV_OMAP=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_DNOTIFY is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_MSDOS_FS=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 9bd36dd39bd0..b685018dcf54 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -678,7 +678,7 @@ CONFIG_EXT4_FS_SECURITY=y
CONFIG_FANOTIFY=y
CONFIG_QUOTA=y
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/pxa_defconfig b/arch/arm/configs/pxa_defconfig
index b46e39369dbb..b0c3355e2599 100644
--- a/arch/arm/configs/pxa_defconfig
+++ b/arch/arm/configs/pxa_defconfig
@@ -589,7 +589,7 @@ CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_XFS_FS=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
CONFIG_FSCACHE=y
diff --git a/arch/arm/configs/rpc_defconfig b/arch/arm/configs/rpc_defconfig
index 65a3fded55be..b1d12a2c2ef8 100644
--- a/arch/arm/configs/rpc_defconfig
+++ b/arch/arm/configs/rpc_defconfig
@@ -79,7 +79,7 @@ CONFIG_RTC_CLASS=y
CONFIG_RTC_DRV_PCF8583=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_MSDOS_FS=m
diff --git a/arch/arm/configs/s5pv210_defconfig b/arch/arm/configs/s5pv210_defconfig
index 4c1e480b5bbd..72df854878f8 100644
--- a/arch/arm/configs/s5pv210_defconfig
+++ b/arch/arm/configs/s5pv210_defconfig
@@ -103,7 +103,7 @@ CONFIG_PHY_SAMSUNG_USB2=m
CONFIG_PHY_S5PV210_USB2=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig
index 70739e09d0f4..d6dfae196f84 100644
--- a/arch/arm/configs/socfpga_defconfig
+++ b/arch/arm/configs/socfpga_defconfig
@@ -136,7 +136,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT3_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_VFAT_FS=y
CONFIG_NTFS_FS=y
CONFIG_NTFS_RW=y
diff --git a/arch/arm/configs/spear13xx_defconfig b/arch/arm/configs/spear13xx_defconfig
index bfde0c86cdc5..c8128a6180e7 100644
--- a/arch/arm/configs/spear13xx_defconfig
+++ b/arch/arm/configs/spear13xx_defconfig
@@ -85,7 +85,7 @@ CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=y
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
diff --git a/arch/arm/configs/spear3xx_defconfig b/arch/arm/configs/spear3xx_defconfig
index a96ed5cf778e..97ea2e9a6f07 100644
--- a/arch/arm/configs/spear3xx_defconfig
+++ b/arch/arm/configs/spear3xx_defconfig
@@ -68,7 +68,7 @@ CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
diff --git a/arch/arm/configs/spear6xx_defconfig b/arch/arm/configs/spear6xx_defconfig
index 3e2c2abae5ba..a7a3413ac968 100644
--- a/arch/arm/configs/spear6xx_defconfig
+++ b/arch/arm/configs/spear6xx_defconfig
@@ -54,7 +54,7 @@ CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_SECURITY=y
CONFIG_EXT3_FS=y
CONFIG_EXT3_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_FAT_DEFAULT_IOCHARSET="ascii"
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index dfeed440254a..fe4326d938c1 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -25,6 +25,9 @@ static inline int syscall_get_nr(struct task_struct *task,
if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
return task_thread_info(task)->abi_syscall;
+ if (task_thread_info(task)->abi_syscall == -1)
+ return -1;
+
return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
}
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index bcc4c9ec3aa4..5c31e9de7a60 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -90,6 +90,7 @@ slow_work_pending:
cmp r0, #0
beq no_work_pending
movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE)
+ str scno, [tsk, #TI_ABI_SYSCALL] @ make sure tracers see update
ldmia sp, {r0 - r6} @ have to reload r0 - r6
b local_restart @ ... and off we go
ENDPROC(ret_fast_syscall)
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 2d8e2516906b..fef32d73f912 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -783,8 +783,9 @@ long arch_ptrace(struct task_struct *child, long request,
break;
case PTRACE_SET_SYSCALL:
- task_thread_info(child)->abi_syscall = data &
- __NR_SYSCALL_MASK;
+ if (data != -1)
+ data &= __NR_SYSCALL_MASK;
+ task_thread_info(child)->abi_syscall = data;
ret = 0;
break;
diff --git a/arch/arm/mach-pxa/sharpsl_pm.h b/arch/arm/mach-pxa/sharpsl_pm.h
index 20e4cab64d85..623167f30ec2 100644
--- a/arch/arm/mach-pxa/sharpsl_pm.h
+++ b/arch/arm/mach-pxa/sharpsl_pm.h
@@ -105,5 +105,4 @@ void sharpsl_pm_led(int val);
#define MAX1111_ACIN_VOLT 6u
int sharpsl_pm_pxa_read_max1111(int channel);
-void corgi_lcd_limit_intensity(int limit);
#endif
diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c
index 1c021cef965f..8bc4ea51a0c1 100644
--- a/arch/arm/mach-pxa/spitz_pm.c
+++ b/arch/arm/mach-pxa/spitz_pm.c
@@ -15,6 +15,7 @@
#include <linux/interrupt.h>
#include <linux/platform_device.h>
#include <linux/apm-emulation.h>
+#include <linux/spi/corgi_lcd.h>
#include <asm/irq.h>
#include <asm/mach-types.h>
diff --git a/arch/arm/mach-zynq/pm.c b/arch/arm/mach-zynq/pm.c
index 8ba450ab559c..61ad965ef3ac 100644
--- a/arch/arm/mach-zynq/pm.c
+++ b/arch/arm/mach-zynq/pm.c
@@ -8,8 +8,8 @@
*/
#include <linux/io.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
#include "common.h"
/* register offsets */
diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl
index 8ebed8a13874..c572d6c3dee0 100644
--- a/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@ -465,3 +465,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
index 38ae674f2f02..3037f58057c9 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
@@ -145,7 +145,7 @@
status = "okay";
clock-frequency = <100000>;
i2c-sda-falling-time-ns = <890>; /* hcnt */
- i2c-sdl-falling-time-ns = <890>; /* lcnt */
+ i2c-scl-falling-time-ns = <890>; /* lcnt */
pinctrl-names = "default", "gpio";
pinctrl-0 = <&i2c1_pmx_func>;
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
index ede99dcc0558..f4cf30bac557 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
@@ -141,7 +141,7 @@
status = "okay";
clock-frequency = <100000>;
i2c-sda-falling-time-ns = <890>; /* hcnt */
- i2c-sdl-falling-time-ns = <890>; /* lcnt */
+ i2c-scl-falling-time-ns = <890>; /* lcnt */
adc@14 {
compatible = "lltc,ltc2497";
diff --git a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi
deleted file mode 120000
index 68fd0f8f1dee..000000000000
--- a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi
+++ /dev/null
@@ -1 +0,0 @@
-../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi \ No newline at end of file
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
index 03e7679217b2..479948f8a4b7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
@@ -141,7 +141,7 @@
};
&gpio1 {
- gpio-line-names = "nINT_ETHPHY", "LED_RED", "WDOG_INT", "X_RTC_INT",
+ gpio-line-names = "", "LED_RED", "WDOG_INT", "X_RTC_INT",
"", "", "", "RESET_ETHPHY",
"CAN_nINT", "CAN_EN", "nENABLE_FLATLINK", "",
"USB_OTG_VBUS_EN", "", "LED_GREEN", "LED_BLUE";
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
index 92616bc4f71f..847f08537b48 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
@@ -111,7 +111,7 @@
};
&gpio1 {
- gpio-line-names = "nINT_ETHPHY", "", "WDOG_INT", "X_RTC_INT",
+ gpio-line-names = "", "", "WDOG_INT", "X_RTC_INT",
"", "", "", "RESET_ETHPHY",
"", "", "nENABLE_FLATLINK";
};
@@ -210,7 +210,7 @@
};
};
- reg_vdd_gpu: buck3 {
+ reg_vdd_vpu: buck3 {
regulator-always-on;
regulator-boot-on;
regulator-max-microvolt = <1000000>;
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
index 6f26914602c8..07b07dc954fd 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
@@ -567,6 +567,10 @@
status = "okay";
};
+&disp_blk_ctrl {
+ status = "disabled";
+};
+
&pgc_mipi {
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
index 93088fa1c3b9..d5b716855812 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
+++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
@@ -628,6 +628,10 @@
status = "okay";
};
+&disp_blk_ctrl {
+ status = "disabled";
+};
+
&pgc_mipi {
status = "disabled";
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
index d6b36f04f3dc..1a647d4072ba 100644
--- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi
@@ -1221,10 +1221,9 @@
compatible = "fsl,imx8mm-mipi-csi2";
reg = <0x32e30000 0x1000>;
interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
- assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>,
- <&clk IMX8MM_CLK_CSI1_PHY_REF>;
- assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>,
- <&clk IMX8MM_SYS_PLL2_1000M>;
+ assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>;
+ assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>;
+
clock-frequency = <333000000>;
clocks = <&clk IMX8MM_CLK_DISP_APB_ROOT>,
<&clk IMX8MM_CLK_CSI1_ROOT>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
index d3a67109d55b..b8946edf317b 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
@@ -358,7 +358,7 @@
MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91
MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91
MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f
- MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19
+ MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x159
>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
index 9869fe7652fc..aa38dd6dc9ba 100644
--- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi
@@ -1175,10 +1175,8 @@
compatible = "fsl,imx8mm-mipi-csi2";
reg = <0x32e30000 0x1000>;
interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>;
- assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>,
- <&clk IMX8MN_CLK_CSI1_PHY_REF>;
- assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>,
- <&clk IMX8MN_SYS_PLL2_1000M>;
+ assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>;
+ assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>;
assigned-clock-rates = <333000000>;
clock-frequency = <333000000>;
clocks = <&clk IMX8MN_CLK_DISP_APB_ROOT>,
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 1a2d2c04db32..01eec424f7f7 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -772,7 +772,7 @@
<&clk IMX8MQ_SYS1_PLL_800M>,
<&clk IMX8MQ_VPU_PLL>;
assigned-clock-rates = <600000000>,
- <600000000>,
+ <300000000>,
<800000000>,
<0>;
};
diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi
index 8643612ace8c..1d8dd14b65cf 100644
--- a/arch/arm64/boot/dts/freescale/imx93.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx93.dtsi
@@ -340,7 +340,7 @@
anatop: anatop@44480000 {
compatible = "fsl,imx93-anatop", "syscon";
- reg = <0x44480000 0x10000>;
+ reg = <0x44480000 0x2000>;
};
adc1: adc@44530000 {
diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
index 9022ad726741..a9e7b832c18c 100644
--- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
+++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts
@@ -121,7 +121,7 @@
};
};
- pm8150l-thermal {
+ pm8150l-pcb-thermal {
polling-delay-passive = <0>;
polling-delay = <0>;
thermal-sensors = <&pm8150l_adc_tm 1>;
diff --git a/arch/arm64/boot/dts/qcom/sa8775p-ride.dts b/arch/arm64/boot/dts/qcom/sa8775p-ride.dts
index ab767cfa51ff..26f5a4e0ffed 100644
--- a/arch/arm64/boot/dts/qcom/sa8775p-ride.dts
+++ b/arch/arm64/boot/dts/qcom/sa8775p-ride.dts
@@ -153,8 +153,8 @@
vreg_l4c: ldo4 {
regulator-name = "vreg_l4c";
- regulator-min-microvolt = <1100000>;
- regulator-max-microvolt = <1300000>;
+ regulator-min-microvolt = <1200000>;
+ regulator-max-microvolt = <1200000>;
regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>;
/*
* FIXME: This should have regulator-allow-set-load but
diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi
index e25dc2bb52a7..06df931d8cad 100644
--- a/arch/arm64/boot/dts/qcom/sc7180.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi
@@ -3120,8 +3120,8 @@
reg = <0 0x0ae94400 0 0x200>,
<0 0x0ae94600 0 0x280>,
<0 0x0ae94a00 0 0x1e0>;
- reg-names = "dsi0_phy",
- "dsi0_phy_lane",
+ reg-names = "dsi_phy",
+ "dsi_phy_lane",
"dsi_pll";
#clock-cells = <1>;
diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
index d3ae18535636..be78a933d8eb 100644
--- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi
+++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi
@@ -3561,7 +3561,7 @@
};
osm_l3: interconnect@18321000 {
- compatible = "qcom,sc8180x-osm-l3";
+ compatible = "qcom,sc8180x-osm-l3", "qcom,osm-l3";
reg = <0 0x18321000 0 0x1400>;
clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi
index 18c822abdb88..b46e55bb8bde 100644
--- a/arch/arm64/boot/dts/qcom/sm8150.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi
@@ -56,7 +56,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD0>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -85,7 +85,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD1>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -109,7 +109,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD2>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -133,7 +133,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD3>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -157,7 +157,7 @@
qcom,freq-domain = <&cpufreq_hw 1>;
operating-points-v2 = <&cpu4_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD4>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -181,7 +181,7 @@
qcom,freq-domain = <&cpufreq_hw 1>;
operating-points-v2 = <&cpu4_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD5>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -205,7 +205,7 @@
qcom,freq-domain = <&cpufreq_hw 1>;
operating-points-v2 = <&cpu4_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD6>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -229,7 +229,7 @@
qcom,freq-domain = <&cpufreq_hw 2>;
operating-points-v2 = <&cpu7_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>;
+ <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>;
power-domains = <&CPU_PD7>;
power-domain-names = "psci";
#cooling-cells = <2>;
@@ -4342,7 +4342,7 @@
clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
clock-names = "xo", "alternate";
- #interconnect-cells = <2>;
+ #interconnect-cells = <1>;
};
cpufreq_hw: cpufreq@18323000 {
diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi
index 83ab6de459bc..1efa07f2caff 100644
--- a/arch/arm64/boot/dts/qcom/sm8250.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi
@@ -107,7 +107,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_0: l2-cache {
compatible = "cache";
@@ -138,7 +138,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_100: l2-cache {
compatible = "cache";
@@ -163,7 +163,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_200: l2-cache {
compatible = "cache";
@@ -188,7 +188,7 @@
qcom,freq-domain = <&cpufreq_hw 0>;
operating-points-v2 = <&cpu0_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_300: l2-cache {
compatible = "cache";
@@ -213,7 +213,7 @@
qcom,freq-domain = <&cpufreq_hw 1>;
operating-points-v2 = <&cpu4_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_400: l2-cache {
compatible = "cache";
@@ -238,7 +238,7 @@
qcom,freq-domain = <&cpufreq_hw 1>;
operating-points-v2 = <&cpu4_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_500: l2-cache {
compatible = "cache";
@@ -263,7 +263,7 @@
qcom,freq-domain = <&cpufreq_hw 1>;
operating-points-v2 = <&cpu4_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_600: l2-cache {
compatible = "cache";
@@ -288,7 +288,7 @@
qcom,freq-domain = <&cpufreq_hw 2>;
operating-points-v2 = <&cpu7_opp_table>;
interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>,
- <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>;
+ <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>;
#cooling-cells = <2>;
L2_700: l2-cache {
compatible = "cache";
@@ -5679,7 +5679,7 @@
clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
clock-names = "xo", "alternate";
- #interconnect-cells = <2>;
+ #interconnect-cells = <1>;
};
cpufreq_hw: cpufreq@18591000 {
diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi
index 88ef478cb5cc..ec451c616f3e 100644
--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi
+++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi
@@ -1744,6 +1744,8 @@
qcom,controlled-remotely;
iommus = <&apps_smmu 0x594 0x0011>,
<&apps_smmu 0x596 0x0011>;
+ /* FIXME: Probing BAM DMA causes some abort and system hang */
+ status = "fail";
};
crypto: crypto@1dfa000 {
@@ -1755,6 +1757,8 @@
<&apps_smmu 0x596 0x0011>;
interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>;
interconnect-names = "memory";
+ /* FIXME: dependency BAM DMA is disabled */
+ status = "disabled";
};
ipa: ipa@1e40000 {
diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi
index 232910e07444..66f68fc2b241 100644
--- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi
@@ -223,20 +223,20 @@
<GIC_SPI 212 IRQ_TYPE_EDGE_RISING>,
<GIC_SPI 213 IRQ_TYPE_EDGE_RISING>;
interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0",
- "tgiv0", "tgie0", "tgif0",
- "tgia1", "tgib1", "tgiv1", "tgiu1",
- "tgia2", "tgib2", "tgiv2", "tgiu2",
+ "tciv0", "tgie0", "tgif0",
+ "tgia1", "tgib1", "tciv1", "tciu1",
+ "tgia2", "tgib2", "tciv2", "tciu2",
"tgia3", "tgib3", "tgic3", "tgid3",
- "tgiv3",
+ "tciv3",
"tgia4", "tgib4", "tgic4", "tgid4",
- "tgiv4",
+ "tciv4",
"tgiu5", "tgiv5", "tgiw5",
"tgia6", "tgib6", "tgic6", "tgid6",
- "tgiv6",
+ "tciv6",
"tgia7", "tgib7", "tgic7", "tgid7",
- "tgiv7",
+ "tciv7",
"tgia8", "tgib8", "tgic8", "tgid8",
- "tgiv8", "tgiu8";
+ "tciv8", "tciu8";
clocks = <&cpg CPG_MOD R9A07G044_MTU_X_MCK_MTU3>;
power-domains = <&cpg>;
resets = <&cpg R9A07G044_MTU_X_PRESET_MTU3>;
diff --git a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi
index 2eba3a8a100d..1f1d481dc783 100644
--- a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi
+++ b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi
@@ -223,20 +223,20 @@
<GIC_SPI 212 IRQ_TYPE_EDGE_RISING>,
<GIC_SPI 213 IRQ_TYPE_EDGE_RISING>;
interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0",
- "tgiv0", "tgie0", "tgif0",
- "tgia1", "tgib1", "tgiv1", "tgiu1",
- "tgia2", "tgib2", "tgiv2", "tgiu2",
+ "tciv0", "tgie0", "tgif0",
+ "tgia1", "tgib1", "tciv1", "tciu1",
+ "tgia2", "tgib2", "tciv2", "tciu2",
"tgia3", "tgib3", "tgic3", "tgid3",
- "tgiv3",
+ "tciv3",
"tgia4", "tgib4", "tgic4", "tgid4",
- "tgiv4",
+ "tciv4",
"tgiu5", "tgiv5", "tgiw5",
"tgia6", "tgib6", "tgic6", "tgid6",
- "tgiv6",
+ "tciv6",
"tgia7", "tgib7", "tgic7", "tgid7",
- "tgiv7",
+ "tciv7",
"tgia8", "tgib8", "tgic8", "tgid8",
- "tgiv8", "tgiu8";
+ "tciv8", "tciu8";
clocks = <&cpg CPG_MOD R9A07G054_MTU_X_MCK_MTU3>;
power-domains = <&cpg>;
resets = <&cpg R9A07G054_MTU_X_PRESET_MTU3>;
diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi
index 8332c8aaf49b..42ce78beb413 100644
--- a/arch/arm64/boot/dts/rockchip/px30.dtsi
+++ b/arch/arm64/boot/dts/rockchip/px30.dtsi
@@ -291,14 +291,14 @@
};
power-domain@PX30_PD_MMC_NAND {
reg = <PX30_PD_MMC_NAND>;
- clocks = <&cru HCLK_NANDC>,
- <&cru HCLK_EMMC>,
- <&cru HCLK_SDIO>,
- <&cru HCLK_SFC>,
- <&cru SCLK_EMMC>,
- <&cru SCLK_NANDC>,
- <&cru SCLK_SDIO>,
- <&cru SCLK_SFC>;
+ clocks = <&cru HCLK_NANDC>,
+ <&cru HCLK_EMMC>,
+ <&cru HCLK_SDIO>,
+ <&cru HCLK_SFC>,
+ <&cru SCLK_EMMC>,
+ <&cru SCLK_NANDC>,
+ <&cru SCLK_SDIO>,
+ <&cru SCLK_SFC>;
pm_qos = <&qos_emmc>, <&qos_nand>,
<&qos_sdio>, <&qos_sfc>;
#power-domain-cells = <0>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
index 7ea48167747c..9232357f4fec 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts
@@ -106,7 +106,6 @@
regulator-name = "vdd_core";
regulator-min-microvolt = <827000>;
regulator-max-microvolt = <1340000>;
- regulator-init-microvolt = <1015000>;
regulator-settling-time-up-us = <250>;
regulator-always-on;
regulator-boot-on;
diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
index a71f249ed384..e9810d2f0407 100644
--- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts
@@ -105,7 +105,6 @@
regulator-name = "vdd_core";
regulator-min-microvolt = <827000>;
regulator-max-microvolt = <1340000>;
- regulator-init-microvolt = <1015000>;
regulator-settling-time-up-us = <250>;
regulator-always-on;
regulator-boot-on;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts
index d1f343345f67..6464ef4d113d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts
@@ -773,7 +773,7 @@
compatible = "brcm,bcm4329-fmac";
reg = <1>;
interrupt-parent = <&gpio0>;
- interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>;
+ interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "host-wake";
pinctrl-names = "default";
pinctrl-0 = <&wifi_host_wake_l>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
index b6e082f1f6d9..7c5f441a2219 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi
@@ -375,7 +375,6 @@
vcc_sdio: LDO_REG4 {
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <3000000>;
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <3300000>;
regulator-name = "vcc_sdio";
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts
index 028eb508ae30..8bfd5f88d1ef 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts
@@ -548,9 +548,8 @@
&sdhci {
max-frequency = <150000000>;
bus-width = <8>;
- mmc-hs400-1_8v;
+ mmc-hs200-1_8v;
non-removable;
- mmc-hs400-enhanced-strobe;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
index 907071d4fe80..980c4534313a 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi
@@ -45,7 +45,7 @@
sdio_pwrseq: sdio-pwrseq {
compatible = "mmc-pwrseq-simple";
clocks = <&rk808 1>;
- clock-names = "ext_clock";
+ clock-names = "lpo";
pinctrl-names = "default";
pinctrl-0 = <&wifi_enable_h>;
reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>;
@@ -645,9 +645,9 @@
};
&sdhci {
+ max-frequency = <150000000>;
bus-width = <8>;
- mmc-hs400-1_8v;
- mmc-hs400-enhanced-strobe;
+ mmc-hs200-1_8v;
non-removable;
status = "okay";
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts
index cec3b7b1b947..8a17c1eaae15 100644
--- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts
@@ -31,7 +31,7 @@
compatible = "brcm,bcm4329-fmac";
reg = <1>;
interrupt-parent = <&gpio0>;
- interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>;
+ interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "host-wake";
pinctrl-names = "default";
pinctrl-0 = <&wifi_host_wake_l>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
index a2c31d53b45b..8cbf3d9a4f22 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi
@@ -356,7 +356,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
regulator-name = "vdd_logic";
@@ -371,7 +370,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
regulator-name = "vdd_gpu";
@@ -533,7 +531,6 @@
regulator-boot-on;
regulator-min-microvolt = <712500>;
regulator-max-microvolt = <1390000>;
- regulator-init-microvolt = <900000>;
regulator-name = "vdd_cpu";
regulator-ramp-delay = <2300>;
vin-supply = <&vcc_sys>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
index 410cd3e5e7bc..0c18406e4c59 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts
@@ -239,7 +239,7 @@
&gmac1 {
assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>;
- assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>;
+ assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>;
phy-mode = "rgmii";
clock_in_out = "input";
pinctrl-names = "default";
@@ -416,7 +416,7 @@
compatible = "brcm,bcm4329-fmac";
reg = <1>;
interrupt-parent = <&gpio2>;
- interrupts = <RK_PB2 GPIO_ACTIVE_HIGH>;
+ interrupts = <RK_PB2 IRQ_TYPE_LEVEL_HIGH>;
interrupt-names = "host-wake";
pinctrl-names = "default";
pinctrl-0 = <&wifi_host_wake_h>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
index ff936b713579..1c6d83b47cd2 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts
@@ -218,7 +218,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
@@ -233,7 +232,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
@@ -259,7 +257,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi
index 8d61f824c12d..d899087bf0b5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi
@@ -264,7 +264,6 @@
regulator-always-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
@@ -278,7 +277,6 @@
regulator-name = "vdd_gpu_npu";
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
index 25a8c781f4e7..854d02b46e6f 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts
@@ -366,7 +366,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
regulator-name = "vdd_logic";
@@ -381,7 +380,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
regulator-name = "vdd_gpu";
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
index b276eb0810c7..2d92713be2a0 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts
@@ -277,7 +277,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-state-mem {
@@ -292,7 +291,6 @@
regulator-boot-on;
regulator-min-microvolt = <900000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-state-mem {
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts
index 5e4236af4fcb..1b1c67d5b1ef 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts
@@ -137,8 +137,8 @@
&mdio1 {
rgmii_phy1: ethernet-phy@0 {
- compatible="ethernet-phy-ieee802.3-c22";
- reg= <0x0>;
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <0x0>;
};
};
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts
index 42889c5900bd..938092fce186 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts
@@ -278,7 +278,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-state-mem {
@@ -291,7 +290,6 @@
regulator-name = "vdd_gpu";
regulator-min-microvolt = <900000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-state-mem {
diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
index 31aa2b8efe39..63bae36b8f7e 100644
--- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi
@@ -234,7 +234,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
regulator-state-mem {
@@ -249,7 +248,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
regulator-state-mem {
@@ -272,7 +270,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-name = "vdd_npu";
regulator-state-mem {
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
index ff0bf24cc1a2..f9127ddfbb7d 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts
@@ -308,7 +308,6 @@
regulator-name = "vdd_logic";
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -322,7 +321,6 @@
vdd_gpu: DCDC_REG2 {
regulator-name = "vdd_gpu";
regulator-always-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -346,7 +344,6 @@
vdd_npu: DCDC_REG4 {
regulator-name = "vdd_npu";
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
index 674792567fa6..19f8fc369b13 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts
@@ -293,7 +293,6 @@
regulator-name = "vdd_logic";
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -307,7 +306,6 @@
vdd_gpu: DCDC_REG2 {
regulator-name = "vdd_gpu";
regulator-always-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -331,7 +329,6 @@
vdd_npu: DCDC_REG4 {
regulator-name = "vdd_npu";
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi
index 25e205632a68..89e84e3a9262 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi
@@ -173,7 +173,6 @@
regulator-name = "vdd_logic";
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -187,7 +186,6 @@
vdd_gpu: DCDC_REG2 {
regulator-name = "vdd_gpu";
regulator-always-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -211,7 +209,6 @@
vdd_npu: DCDC_REG4 {
regulator-name = "vdd_npu";
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -330,7 +327,6 @@
vcca1v8_image: LDO_REG9 {
regulator-name = "vcca1v8_image";
- regulator-init-microvolt = <950000>;
regulator-min-microvolt = <950000>;
regulator-max-microvolt = <1800000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
index e653b067aa5d..a8a4cc190eb3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts
@@ -243,7 +243,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
@@ -258,7 +257,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
@@ -284,7 +282,6 @@
regulator-boot-on;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
- regulator-init-microvolt = <900000>;
regulator-ramp-delay = <6001>;
regulator-initial-mode = <0x2>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi
index 58ba328ea782..93189f830640 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi
@@ -232,7 +232,6 @@
regulator-name = "vdd_logic";
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -246,7 +245,6 @@
vdd_gpu: DCDC_REG2 {
regulator-name = "vdd_gpu";
regulator-always-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -270,7 +268,6 @@
vdd_npu: DCDC_REG4 {
regulator-name = "vdd_npu";
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts
index 59ecf868dbd0..a337f547caf5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts
@@ -291,7 +291,6 @@
regulator-name = "vdd_logic";
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -305,7 +304,6 @@
vdd_gpu: DCDC_REG2 {
regulator-name = "vdd_gpu";
regulator-always-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -329,7 +327,6 @@
vdd_npu: DCDC_REG4 {
regulator-name = "vdd_npu";
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi
index c50fbdd48680..45b03dcbbad4 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi
+++ b/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi
@@ -163,7 +163,6 @@
regulator-name = "vdd_logic";
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -177,7 +176,6 @@
vdd_gpu: DCDC_REG2 {
regulator-name = "vdd_gpu";
regulator-always-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -201,7 +199,6 @@
vdd_npu: DCDC_REG4 {
regulator-name = "vdd_npu";
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
index 917f5b2b8aab..e05ab11981f5 100644
--- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts
@@ -350,7 +350,6 @@
regulator-name = "vdd_logic";
regulator-always-on;
regulator-boot-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -364,7 +363,6 @@
vdd_gpu: DCDC_REG2 {
regulator-name = "vdd_gpu";
regulator-always-on;
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
@@ -388,7 +386,6 @@
vdd_npu: DCDC_REG4 {
regulator-name = "vdd_npu";
- regulator-init-microvolt = <900000>;
regulator-initial-mode = <0x2>;
regulator-min-microvolt = <500000>;
regulator-max-microvolt = <1350000>;
diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
index afda976680bc..51537030f8e3 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts
@@ -337,7 +337,6 @@
regulator-boot-on;
regulator-min-microvolt = <550000>;
regulator-max-microvolt = <950000>;
- regulator-init-microvolt = <750000>;
regulator-ramp-delay = <12500>;
regulator-name = "vdd_vdenc_s0";
diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
index 4d9ed2a02736..1a60a275ddf9 100644
--- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
+++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts
@@ -125,19 +125,19 @@
cpu-supply = <&vdd_cpu_lit_s0>;
};
-&cpu_b0{
+&cpu_b0 {
cpu-supply = <&vdd_cpu_big0_s0>;
};
-&cpu_b1{
+&cpu_b1 {
cpu-supply = <&vdd_cpu_big0_s0>;
};
-&cpu_b2{
+&cpu_b2 {
cpu-supply = <&vdd_cpu_big1_s0>;
};
-&cpu_b3{
+&cpu_b3 {
cpu-supply = <&vdd_cpu_big1_s0>;
};
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index 0777bcae9104..a25d783dfb95 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -1469,7 +1469,7 @@ CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_CUSE=m
CONFIG_OVERLAY_FS=m
diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h
index 8e5ffb58f83e..b7afaa026842 100644
--- a/arch/arm64/include/asm/el2_setup.h
+++ b/arch/arm64/include/asm/el2_setup.h
@@ -31,6 +31,13 @@
.Lskip_hcrx_\@:
.endm
+/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */
+.macro __check_hvhe fail, tmp
+ mrs \tmp, hcr_el2
+ and \tmp, \tmp, #HCR_E2H
+ cbz \tmp, \fail
+.endm
+
/*
* Allow Non-secure EL1 and EL0 to access physical timer and counter.
* This is not necessary for VHE, since the host kernel runs in EL2,
@@ -43,9 +50,7 @@
*/
.macro __init_el2_timers
mov x0, #3 // Enable EL1 physical timers
- mrs x1, hcr_el2
- and x1, x1, #HCR_E2H
- cbz x1, .LnVHE_\@
+ __check_hvhe .LnVHE_\@, x1
lsl x0, x0, #10
.LnVHE_\@:
msr cnthctl_el2, x0
@@ -139,15 +144,14 @@
/* Coprocessor traps */
.macro __init_el2_cptr
- mrs x1, hcr_el2
- and x1, x1, #HCR_E2H
- cbz x1, .LnVHE_\@
+ __check_hvhe .LnVHE_\@, x1
mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN)
- b .Lset_cptr_\@
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
.LnVHE_\@:
mov x0, #0x33ff
-.Lset_cptr_\@:
msr cptr_el2, x0 // Disable copro. traps to EL2
+.Lskip_set_cptr_\@:
.endm
/* Disable any fine grained traps */
@@ -268,19 +272,19 @@
check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2
.Linit_sve_\@: /* SVE register access */
- mrs x0, cptr_el2 // Disable SVE traps
- mrs x1, hcr_el2
- and x1, x1, #HCR_E2H
- cbz x1, .Lcptr_nvhe_\@
+ __check_hvhe .Lcptr_nvhe_\@, x1
- // VHE case
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SVE traps
orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN)
- b .Lset_cptr_\@
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_\@
.Lcptr_nvhe_\@: // nVHE case
+ mrs x0, cptr_el2 // Disable SVE traps
bic x0, x0, #CPTR_EL2_TZ
-.Lset_cptr_\@:
msr cptr_el2, x0
+.Lskip_set_cptr_\@:
isb
mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector
msr_s SYS_ZCR_EL2, x1 // length for EL1.
@@ -289,9 +293,19 @@
check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2
.Linit_sme_\@: /* SME register access and priority mapping */
+ __check_hvhe .Lcptr_nvhe_sme_\@, x1
+
+ // (h)VHE case
+ mrs x0, cpacr_el1 // Disable SME traps
+ orr x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN)
+ msr cpacr_el1, x0
+ b .Lskip_set_cptr_sme_\@
+
+.Lcptr_nvhe_sme_\@: // nVHE case
mrs x0, cptr_el2 // Disable SME traps
bic x0, x0, #CPTR_EL2_TSM
msr cptr_el2, x0
+.Lskip_set_cptr_sme_\@:
isb
mrs x1, sctlr_el2
diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
index 67f2fb781f59..8df46f186c64 100644
--- a/arch/arm64/include/asm/fpsimd.h
+++ b/arch/arm64/include/asm/fpsimd.h
@@ -356,7 +356,7 @@ static inline int sme_max_virtualisable_vl(void)
return vec_max_virtualisable_vl(ARM64_VEC_SME);
}
-extern void sme_alloc(struct task_struct *task);
+extern void sme_alloc(struct task_struct *task, bool flush);
extern unsigned int sme_get_vl(void);
extern int sme_set_current_vl(unsigned long arg);
extern int sme_get_current_vl(void);
@@ -388,7 +388,7 @@ static inline void sme_smstart_sm(void) { }
static inline void sme_smstop_sm(void) { }
static inline void sme_smstop(void) { }
-static inline void sme_alloc(struct task_struct *task) { }
+static inline void sme_alloc(struct task_struct *task, bool flush) { }
static inline void sme_setup(void) { }
static inline unsigned int sme_get_vl(void) { return 0; }
static inline int sme_max_vl(void) { return 0; }
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 7d170aaa2db4..24e28bb2d95b 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -278,7 +278,7 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void);
asmlinkage void kvm_unexpected_el2_exception(void);
struct kvm_cpu_context;
void handle_trap(struct kvm_cpu_context *host_ctxt);
-asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on);
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on);
void __noreturn __pkvm_init_finalise(void);
void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc);
void kvm_patch_vector_branch(struct alt_instr *alt,
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index efc0b45d79c3..3d6725ff0bf6 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -571,6 +571,14 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature)
return test_bit(feature, vcpu->arch.features);
}
+static __always_inline void kvm_write_cptr_el2(u64 val)
+{
+ if (has_vhe() || has_hvhe())
+ write_sysreg(val, cpacr_el1);
+ else
+ write_sysreg(val, cptr_el2);
+}
+
static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
{
u64 val;
@@ -578,8 +586,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu)
if (has_vhe()) {
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |
CPACR_EL1_ZEN_EL1EN);
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN_EL1EN;
} else if (has_hvhe()) {
val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN);
+
+ if (!vcpu_has_sve(vcpu) ||
+ (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED))
+ val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN;
+ if (cpus_have_final_cap(ARM64_SME))
+ val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN;
} else {
val = CPTR_NVHE_EL2_RES1;
@@ -597,9 +613,6 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
{
u64 val = kvm_get_reset_cptr_el2(vcpu);
- if (has_vhe() || has_hvhe())
- write_sysreg(val, cpacr_el1);
- else
- write_sysreg(val, cptr_el2);
+ kvm_write_cptr_el2(val);
}
#endif /* __ARM64_KVM_EMULATE_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8b6096753740..d3dd05bbfe23 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -727,6 +727,8 @@ struct kvm_vcpu_arch {
#define DBG_SS_ACTIVE_PENDING __vcpu_single_flag(sflags, BIT(5))
/* PMUSERENR for the guest EL0 is on physical CPU */
#define PMUSERENR_ON_CPU __vcpu_single_flag(sflags, BIT(6))
+/* WFI instruction trapped */
+#define IN_WFI __vcpu_single_flag(sflags, BIT(7))
/* Pointer to the vcpu's SVE FFR for sve_{save,load}_state() */
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 8294a9a7e566..929d355eae0a 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -608,22 +608,26 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
/**
- * kvm_pgtable_stage2_mkold() - Clear the access flag in a page-table entry.
+ * kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
+ * flag in a page-table entry.
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
* @addr: Intermediate physical address to identify the page-table entry.
+ * @size: Size of the address range to visit.
+ * @mkold: True if the access flag should be cleared.
*
* The offset of @addr within a page is ignored.
*
- * If there is a valid, leaf page-table entry used to translate @addr, then
- * clear the access flag in that entry.
+ * Tests and conditionally clears the access flag for every valid, leaf
+ * page-table entry used to translate the range [@addr, @addr + @size).
*
* Note that it is the caller's responsibility to invalidate the TLB after
* calling this function to ensure that the updated permissions are visible
* to the CPUs.
*
- * Return: The old page-table entry prior to clearing the flag, 0 on failure.
+ * Return: True if any of the visited PTEs had the access flag set.
*/
-kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
+bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
+ u64 size, bool mkold);
/**
* kvm_pgtable_stage2_relax_perms() - Relax the permissions enforced by a
@@ -646,18 +650,6 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
enum kvm_pgtable_prot prot);
/**
- * kvm_pgtable_stage2_is_young() - Test whether a page-table entry has the
- * access flag set.
- * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
- * @addr: Intermediate physical address to identify the page-table entry.
- *
- * The offset of @addr within a page is ignored.
- *
- * Return: True if the page-table entry has the access flag set, false otherwise.
- */
-bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
-
-/**
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point
* of Coherency for guest stage-2 address
* range.
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 3918f2a67970..e5bc54522e71 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -359,14 +359,6 @@ static inline void prefetchw(const void *ptr)
asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr));
}
-#define ARCH_HAS_SPINLOCK_PREFETCH
-static inline void spin_lock_prefetch(const void *ptr)
-{
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- "prfm pstl1strm, %a0",
- "nop") : : "p" (ptr));
-}
-
extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */
extern void __init minsigstksz_setup(void);
diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h
index 64a514f90131..bd77253b62e0 100644
--- a/arch/arm64/include/asm/unistd.h
+++ b/arch/arm64/include/asm/unistd.h
@@ -39,7 +39,7 @@
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
-#define __NR_compat_syscalls 452
+#define __NR_compat_syscalls 453
#endif
#define __ARCH_WANT_SYS_CLONE
diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h
index d952a28463e0..78b68311ec81 100644
--- a/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@ -909,6 +909,8 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
#define __NR_cachestat 451
__SYSCALL(__NR_cachestat, sys_cachestat)
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
/*
* Please add new compat syscalls above this comment and update
diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h
index 5227db7640c8..261d6e9df2e1 100644
--- a/arch/arm64/include/asm/virt.h
+++ b/arch/arm64/include/asm/virt.h
@@ -78,6 +78,7 @@ extern u32 __boot_cpu_mode[2];
void __hyp_set_vectors(phys_addr_t phys_vector_base);
void __hyp_reset_vectors(void);
+bool is_kvm_arm_initialised(void);
DECLARE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
diff --git a/arch/arm64/include/uapi/asm/bitsperlong.h b/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..485d60bee26c
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_BITSPERLONG_H */
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 7a1aeb95d7c3..087c05aa960e 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task)
unsigned int i;
__uint128_t const *p;
- if (!system_supports_sve())
+ if (!system_supports_sve() && !system_supports_sme())
return;
vl = thread_get_cur_vl(&task->thread);
@@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- if (!test_tsk_thread_flag(task, TIF_SVE))
+ if (!test_tsk_thread_flag(task, TIF_SVE) &&
+ !thread_sm_enabled(&task->thread))
return;
vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
@@ -847,6 +848,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task)
int vec_set_vector_length(struct task_struct *task, enum vec_type type,
unsigned long vl, unsigned long flags)
{
+ bool free_sme = false;
+
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
PR_SVE_SET_VL_ONEXEC))
return -EINVAL;
@@ -897,24 +900,39 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
task->thread.fp_type = FP_STATE_FPSIMD;
}
- if (system_supports_sme() && type == ARM64_VEC_SME) {
- task->thread.svcr &= ~(SVCR_SM_MASK |
- SVCR_ZA_MASK);
- clear_thread_flag(TIF_SME);
+ if (system_supports_sme()) {
+ if (type == ARM64_VEC_SME ||
+ !(task->thread.svcr & (SVCR_SM_MASK | SVCR_ZA_MASK))) {
+ /*
+ * We are changing the SME VL or weren't using
+ * SME anyway, discard the state and force a
+ * reallocation.
+ */
+ task->thread.svcr &= ~(SVCR_SM_MASK |
+ SVCR_ZA_MASK);
+ clear_tsk_thread_flag(task, TIF_SME);
+ free_sme = true;
+ }
}
if (task == current)
put_cpu_fpsimd_context();
+ task_set_vl(task, type, vl);
+
/*
- * Force reallocation of task SVE and SME state to the correct
- * size on next use:
+ * Free the changed states if they are not in use, SME will be
+ * reallocated to the correct size on next use and we just
+ * allocate SVE now in case it is needed for use in streaming
+ * mode.
*/
- sve_free(task);
- if (system_supports_sme() && type == ARM64_VEC_SME)
- sme_free(task);
+ if (system_supports_sve()) {
+ sve_free(task);
+ sve_alloc(task, true);
+ }
- task_set_vl(task, type, vl);
+ if (free_sme)
+ sme_free(task);
out:
update_tsk_thread_flag(task, vec_vl_inherit_flag(type),
@@ -1267,9 +1285,9 @@ void fpsimd_release_task(struct task_struct *dead_task)
* the interest of testability and predictability, the architecture
* guarantees that when ZA is enabled it will be zeroed.
*/
-void sme_alloc(struct task_struct *task)
+void sme_alloc(struct task_struct *task, bool flush)
{
- if (task->thread.sme_state) {
+ if (task->thread.sme_state && flush) {
memset(task->thread.sme_state, 0, sme_state_size(task));
return;
}
@@ -1497,7 +1515,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs)
}
sve_alloc(current, false);
- sme_alloc(current);
+ sme_alloc(current, true);
if (!current->thread.sve_state || !current->thread.sme_state) {
force_sig(SIGKILL);
return;
@@ -1649,7 +1667,6 @@ void fpsimd_flush_thread(void)
fpsimd_flush_thread_vl(ARM64_VEC_SME);
current->thread.svcr = 0;
- sme_smstop();
}
current->thread.fp_type = FP_STATE_FPSIMD;
diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
index d7f4f0d1ae12..187aa2b175b4 100644
--- a/arch/arm64/kernel/ptrace.c
+++ b/arch/arm64/kernel/ptrace.c
@@ -881,6 +881,13 @@ static int sve_set_common(struct task_struct *target,
break;
case ARM64_VEC_SME:
target->thread.svcr |= SVCR_SM_MASK;
+
+ /*
+ * Disable traps and ensure there is SME storage but
+ * preserve any currently set values in ZA/ZT.
+ */
+ sme_alloc(target, false);
+ set_tsk_thread_flag(target, TIF_SME);
break;
default:
WARN_ON_ONCE(1);
@@ -932,11 +939,13 @@ static int sve_set_common(struct task_struct *target,
/*
* Ensure target->thread.sve_state is up to date with target's
* FPSIMD regs, so that a short copyin leaves trailing
- * registers unmodified. Always enable SVE even if going into
- * streaming mode.
+ * registers unmodified. Only enable SVE if we are
+ * configuring normal SVE, a system with streaming SVE may not
+ * have normal SVE.
*/
fpsimd_sync_to_sve(target);
- set_tsk_thread_flag(target, TIF_SVE);
+ if (type == ARM64_VEC_SVE)
+ set_tsk_thread_flag(target, TIF_SVE);
target->thread.fp_type = FP_STATE_SVE;
BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
@@ -1098,7 +1107,7 @@ static int za_set(struct task_struct *target,
}
/* Allocate/reinit ZA storage */
- sme_alloc(target);
+ sme_alloc(target, true);
if (!target->thread.sme_state) {
ret = -ENOMEM;
goto out;
@@ -1168,8 +1177,13 @@ static int zt_set(struct task_struct *target,
if (!system_supports_sme2())
return -EINVAL;
+ /* Ensure SVE storage in case this is first use of SME */
+ sve_alloc(target, false);
+ if (!target->thread.sve_state)
+ return -ENOMEM;
+
if (!thread_za_enabled(&target->thread)) {
- sme_alloc(target);
+ sme_alloc(target, true);
if (!target->thread.sme_state)
return -ENOMEM;
}
@@ -1177,8 +1191,12 @@ static int zt_set(struct task_struct *target,
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
thread_zt_state(&target->thread),
0, ZT_SIG_REG_BYTES);
- if (ret == 0)
+ if (ret == 0) {
target->thread.svcr |= SVCR_ZA_MASK;
+ set_tsk_thread_flag(target, TIF_SME);
+ }
+
+ fpsimd_flush_task_state(target);
return ret;
}
diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
index e304f7ebec2a..c7ebe744c64e 100644
--- a/arch/arm64/kernel/signal.c
+++ b/arch/arm64/kernel/signal.c
@@ -475,7 +475,7 @@ static int restore_za_context(struct user_ctxs *user)
fpsimd_flush_task_state(current);
/* From now, fpsimd_thread_switch() won't touch thread.sve_state */
- sme_alloc(current);
+ sme_alloc(current, true);
if (!current->thread.sme_state) {
current->thread.svcr &= ~SVCR_ZA_MASK;
clear_thread_flag(TIF_SME);
diff --git a/arch/arm64/kernel/vdso/vgettimeofday.c b/arch/arm64/kernel/vdso/vgettimeofday.c
index 4236cf34d7d9..9941c5b04f15 100644
--- a/arch/arm64/kernel/vdso/vgettimeofday.c
+++ b/arch/arm64/kernel/vdso/vgettimeofday.c
@@ -6,6 +6,10 @@
*
*/
+int __kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
+int __kernel_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
+int __kernel_clock_getres(clockid_t clock_id, struct __kernel_timespec *res);
+
int __kernel_clock_gettime(clockid_t clock,
struct __kernel_timespec *ts)
{
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 0696732fa38c..6dcdae4d38cb 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -827,8 +827,8 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
- /* This only happens on VHE, so use the CNTKCTL_EL1 accessor */
- sysreg_clear_set(cntkctl_el1, clr, set);
+ /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
+ sysreg_clear_set(cnthctl_el2, clr, set);
}
void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
@@ -1563,7 +1563,7 @@ no_vgic:
void kvm_timer_init_vhe(void)
{
if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
- sysreg_clear_set(cntkctl_el1, 0, CNTHCTL_ECV);
+ sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
}
int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index c2c14059f6a8..d1cb298a58a0 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -53,11 +53,16 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt);
-static bool vgic_present;
+static bool vgic_present, kvm_arm_initialised;
-static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled);
+static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized);
DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
+bool is_kvm_arm_initialised(void)
+{
+ return kvm_arm_initialised;
+}
+
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{
return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE;
@@ -713,13 +718,15 @@ void kvm_vcpu_wfi(struct kvm_vcpu *vcpu)
*/
preempt_disable();
kvm_vgic_vmcr_sync(vcpu);
- vgic_v4_put(vcpu, true);
+ vcpu_set_flag(vcpu, IN_WFI);
+ vgic_v4_put(vcpu);
preempt_enable();
kvm_vcpu_halt(vcpu);
vcpu_clear_flag(vcpu, IN_WFIT);
preempt_disable();
+ vcpu_clear_flag(vcpu, IN_WFI);
vgic_v4_load(vcpu);
preempt_enable();
}
@@ -787,7 +794,7 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) {
/* The distributor enable bits were changed */
preempt_disable();
- vgic_v4_put(vcpu, false);
+ vgic_v4_put(vcpu);
vgic_v4_load(vcpu);
preempt_enable();
}
@@ -1857,45 +1864,49 @@ static void cpu_hyp_reinit(void)
cpu_hyp_init_features();
}
-static void _kvm_arch_hardware_enable(void *discard)
+static void cpu_hyp_init(void *discard)
{
- if (!__this_cpu_read(kvm_arm_hardware_enabled)) {
+ if (!__this_cpu_read(kvm_hyp_initialized)) {
cpu_hyp_reinit();
- __this_cpu_write(kvm_arm_hardware_enabled, 1);
+ __this_cpu_write(kvm_hyp_initialized, 1);
+ }
+}
+
+static void cpu_hyp_uninit(void *discard)
+{
+ if (__this_cpu_read(kvm_hyp_initialized)) {
+ cpu_hyp_reset();
+ __this_cpu_write(kvm_hyp_initialized, 0);
}
}
int kvm_arch_hardware_enable(void)
{
- int was_enabled = __this_cpu_read(kvm_arm_hardware_enabled);
+ /*
+ * Most calls to this function are made with migration
+ * disabled, but not with preemption disabled. The former is
+ * enough to ensure correctness, but most of the helpers
+ * expect the later and will throw a tantrum otherwise.
+ */
+ preempt_disable();
- _kvm_arch_hardware_enable(NULL);
+ cpu_hyp_init(NULL);
- if (!was_enabled) {
- kvm_vgic_cpu_up();
- kvm_timer_cpu_up();
- }
+ kvm_vgic_cpu_up();
+ kvm_timer_cpu_up();
- return 0;
-}
+ preempt_enable();
-static void _kvm_arch_hardware_disable(void *discard)
-{
- if (__this_cpu_read(kvm_arm_hardware_enabled)) {
- cpu_hyp_reset();
- __this_cpu_write(kvm_arm_hardware_enabled, 0);
- }
+ return 0;
}
void kvm_arch_hardware_disable(void)
{
- if (__this_cpu_read(kvm_arm_hardware_enabled)) {
- kvm_timer_cpu_down();
- kvm_vgic_cpu_down();
- }
+ kvm_timer_cpu_down();
+ kvm_vgic_cpu_down();
if (!is_protected_kvm_enabled())
- _kvm_arch_hardware_disable(NULL);
+ cpu_hyp_uninit(NULL);
}
#ifdef CONFIG_CPU_PM
@@ -1904,16 +1915,16 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
void *v)
{
/*
- * kvm_arm_hardware_enabled is left with its old value over
+ * kvm_hyp_initialized is left with its old value over
* PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should
* re-enable hyp.
*/
switch (cmd) {
case CPU_PM_ENTER:
- if (__this_cpu_read(kvm_arm_hardware_enabled))
+ if (__this_cpu_read(kvm_hyp_initialized))
/*
- * don't update kvm_arm_hardware_enabled here
- * so that the hardware will be re-enabled
+ * don't update kvm_hyp_initialized here
+ * so that the hyp will be re-enabled
* when we resume. See below.
*/
cpu_hyp_reset();
@@ -1921,8 +1932,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self,
return NOTIFY_OK;
case CPU_PM_ENTER_FAILED:
case CPU_PM_EXIT:
- if (__this_cpu_read(kvm_arm_hardware_enabled))
- /* The hardware was enabled before suspend. */
+ if (__this_cpu_read(kvm_hyp_initialized))
+ /* The hyp was enabled before suspend. */
cpu_hyp_reinit();
return NOTIFY_OK;
@@ -2003,7 +2014,7 @@ static int __init init_subsystems(void)
/*
* Enable hardware so that subsystem initialisation can access EL2.
*/
- on_each_cpu(_kvm_arch_hardware_enable, NULL, 1);
+ on_each_cpu(cpu_hyp_init, NULL, 1);
/*
* Register CPU lower-power notifier
@@ -2041,7 +2052,7 @@ out:
hyp_cpu_pm_exit();
if (err || !is_protected_kvm_enabled())
- on_each_cpu(_kvm_arch_hardware_disable, NULL, 1);
+ on_each_cpu(cpu_hyp_uninit, NULL, 1);
return err;
}
@@ -2079,7 +2090,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits)
* The stub hypercalls are now disabled, so set our local flag to
* prevent a later re-init attempt in kvm_arch_hardware_enable().
*/
- __this_cpu_write(kvm_arm_hardware_enabled, 1);
+ __this_cpu_write(kvm_hyp_initialized, 1);
preempt_enable();
return ret;
@@ -2482,6 +2493,8 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;
+ kvm_arm_initialised = true;
+
return 0;
out_subs:
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 8f3f93fa119e..03f97d71984c 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -154,6 +154,12 @@ SYM_CODE_END(\label)
esb
stp x0, x1, [sp, #-16]!
662:
+ /*
+ * spectre vectors __bp_harden_hyp_vecs generate br instructions at runtime
+ * that jump at offset 8 at __kvm_hyp_vector.
+ * As hyp .text is guarded section, it needs bti j.
+ */
+ bti j
b \target
check_preamble_length 661b, 662b
@@ -165,6 +171,8 @@ check_preamble_length 661b, 662b
nop
stp x0, x1, [sp, #-16]!
662:
+ /* Check valid_vect */
+ bti j
b \target
check_preamble_length 661b, 662b
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 4bddb8541bec..34f222af6165 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -457,6 +457,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu)
*/
val &= ~(TCR_HD | TCR_HA);
write_sysreg_el1(val, SYS_TCR);
+ __kvm_skip_instr(vcpu);
return true;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c
index 58dcd92bf346..ab4f5d160c58 100644
--- a/arch/arm64/kvm/hyp/nvhe/ffa.c
+++ b/arch/arm64/kvm/hyp/nvhe/ffa.c
@@ -705,7 +705,20 @@ int hyp_ffa_init(void *pages)
if (res.a0 == FFA_RET_NOT_SUPPORTED)
return 0;
- if (res.a0 != FFA_VERSION_1_0)
+ /*
+ * Firmware returns the maximum supported version of the FF-A
+ * implementation. Check that the returned version is
+ * backwards-compatible with the hyp according to the rules in DEN0077A
+ * v1.1 REL0 13.2.1.
+ *
+ * Of course, things are never simple when dealing with firmware. v1.1
+ * broke ABI with v1.0 on several structures, which is itself
+ * incompatible with the aforementioned versioning scheme. The
+ * expectation is that v1.x implementations that do not support the v1.0
+ * ABI return NOT_SUPPORTED rather than a version number, according to
+ * DEN0077A v1.1 REL0 18.6.4.
+ */
+ if (FFA_MAJOR_VERSION(res.a0) != 1)
return -EOPNOTSUPP;
arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res);
diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S
index c87c63133e10..7693a6757cd7 100644
--- a/arch/arm64/kvm/hyp/nvhe/host.S
+++ b/arch/arm64/kvm/hyp/nvhe/host.S
@@ -297,3 +297,13 @@ SYM_CODE_START(__kvm_hyp_host_forward_smc)
ret
SYM_CODE_END(__kvm_hyp_host_forward_smc)
+
+/*
+ * kvm_host_psci_cpu_entry is called through br instruction, which requires
+ * bti j instruction as compilers (gcc and llvm) doesn't insert bti j for external
+ * functions, but bti c instead.
+ */
+SYM_CODE_START(kvm_host_psci_cpu_entry)
+ bti j
+ b __kvm_host_psci_cpu_entry
+SYM_CODE_END(kvm_host_psci_cpu_entry)
diff --git a/arch/arm64/kvm/hyp/nvhe/psci-relay.c b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
index 08508783ec3d..24543d2a3490 100644
--- a/arch/arm64/kvm/hyp/nvhe/psci-relay.c
+++ b/arch/arm64/kvm/hyp/nvhe/psci-relay.c
@@ -200,7 +200,7 @@ static int psci_system_suspend(u64 func_id, struct kvm_cpu_context *host_ctxt)
__hyp_pa(init_params), 0);
}
-asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on)
+asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on)
{
struct psci_boot_args *boot_args;
struct kvm_cpu_context *host_ctxt;
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 0a6271052def..e89a23153e85 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -63,7 +63,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu)
__activate_traps_fpsimd32(vcpu);
}
- write_sysreg(val, cptr_el2);
+ kvm_write_cptr_el2(val);
write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2);
if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) {
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index aa740a974e02..f7a93ef29250 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1195,25 +1195,54 @@ kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
return pte;
}
-kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
+struct stage2_age_data {
+ bool mkold;
+ bool young;
+};
+
+static int stage2_age_walker(const struct kvm_pgtable_visit_ctx *ctx,
+ enum kvm_pgtable_walk_flags visit)
{
- kvm_pte_t pte = 0;
- stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
- &pte, NULL, 0);
+ kvm_pte_t new = ctx->old & ~KVM_PTE_LEAF_ATTR_LO_S2_AF;
+ struct stage2_age_data *data = ctx->arg;
+
+ if (!kvm_pte_valid(ctx->old) || new == ctx->old)
+ return 0;
+
+ data->young = true;
+
+ /*
+ * stage2_age_walker() is always called while holding the MMU lock for
+ * write, so this will always succeed. Nonetheless, this deliberately
+ * follows the race detection pattern of the other stage-2 walkers in
+ * case the locking mechanics of the MMU notifiers is ever changed.
+ */
+ if (data->mkold && !stage2_try_set_pte(ctx, new))
+ return -EAGAIN;
+
/*
* "But where's the TLBI?!", you scream.
* "Over in the core code", I sigh.
*
* See the '->clear_flush_young()' callback on the KVM mmu notifier.
*/
- return pte;
+ return 0;
}
-bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
+bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
+ u64 size, bool mkold)
{
- kvm_pte_t pte = 0;
- stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0);
- return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
+ struct stage2_age_data data = {
+ .mkold = mkold,
+ };
+ struct kvm_pgtable_walker walker = {
+ .cb = stage2_age_walker,
+ .arg = &data,
+ .flags = KVM_PGTABLE_WALK_LEAF,
+ };
+
+ WARN_ON(kvm_pgtable_walk(pgt, addr, size, &walker));
+ return data.young;
}
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 6db9ef288ec3..d3b4feed460c 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1756,27 +1756,25 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
u64 size = (range->end - range->start) << PAGE_SHIFT;
- kvm_pte_t kpte;
- pte_t pte;
if (!kvm->arch.mmu.pgt)
return false;
- WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE);
-
- kpte = kvm_pgtable_stage2_mkold(kvm->arch.mmu.pgt,
- range->start << PAGE_SHIFT);
- pte = __pte(kpte);
- return pte_valid(pte) && pte_young(pte);
+ return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+ range->start << PAGE_SHIFT,
+ size, true);
}
bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
{
+ u64 size = (range->end - range->start) << PAGE_SHIFT;
+
if (!kvm->arch.mmu.pgt)
return false;
- return kvm_pgtable_stage2_is_young(kvm->arch.mmu.pgt,
- range->start << PAGE_SHIFT);
+ return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
+ range->start << PAGE_SHIFT,
+ size, false);
}
phys_addr_t kvm_mmu_get_httbr(void)
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 994a494703c3..6ff3ec18c925 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -244,7 +244,7 @@ static int __init finalize_pkvm(void)
{
int ret;
- if (!is_protected_kvm_enabled())
+ if (!is_protected_kvm_enabled() || !is_kvm_arm_initialised())
return 0;
/*
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index bd3431823ec5..2ca2973abe66 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -986,7 +986,6 @@ static bool access_pmu_evtyper(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
kvm_pmu_set_counter_event_type(vcpu, p->regval, idx);
- __vcpu_sys_reg(vcpu, reg) = p->regval & ARMV8_PMU_EVTYPE_MASK;
kvm_vcpu_pmu_restore_guest(vcpu);
} else {
p->regval = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_MASK;
@@ -1115,18 +1114,19 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
{ SYS_DESC(SYS_DBGWCRn_EL1(n)), \
trap_wcr, reset_wcr, 0, 0, get_wcr, set_wcr }
-#define PMU_SYS_REG(r) \
- SYS_DESC(r), .reset = reset_pmu_reg, .visibility = pmu_visibility
+#define PMU_SYS_REG(name) \
+ SYS_DESC(SYS_##name), .reset = reset_pmu_reg, \
+ .visibility = pmu_visibility
/* Macro to expand the PMEVCNTRn_EL0 register */
#define PMU_PMEVCNTR_EL0(n) \
- { PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
+ { PMU_SYS_REG(PMEVCNTRn_EL0(n)), \
.reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
#define PMU_PMEVTYPER_EL0(n) \
- { PMU_SYS_REG(SYS_PMEVTYPERn_EL0(n)), \
+ { PMU_SYS_REG(PMEVTYPERn_EL0(n)), \
.reset = reset_pmevtyper, \
.access = access_pmu_evtyper, .reg = (PMEVTYPER0_EL0 + n), }
@@ -2115,9 +2115,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_PMBSR_EL1), undef_access },
/* PMBIDR_EL1 is not trapped */
- { PMU_SYS_REG(SYS_PMINTENSET_EL1),
+ { PMU_SYS_REG(PMINTENSET_EL1),
.access = access_pminten, .reg = PMINTENSET_EL1 },
- { PMU_SYS_REG(SYS_PMINTENCLR_EL1),
+ { PMU_SYS_REG(PMINTENCLR_EL1),
.access = access_pminten, .reg = PMINTENSET_EL1 },
{ SYS_DESC(SYS_PMMIR_EL1), trap_raz_wi },
@@ -2164,41 +2164,41 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ SYS_DESC(SYS_CTR_EL0), access_ctr },
{ SYS_DESC(SYS_SVCR), undef_access },
- { PMU_SYS_REG(SYS_PMCR_EL0), .access = access_pmcr,
+ { PMU_SYS_REG(PMCR_EL0), .access = access_pmcr,
.reset = reset_pmcr, .reg = PMCR_EL0 },
- { PMU_SYS_REG(SYS_PMCNTENSET_EL0),
+ { PMU_SYS_REG(PMCNTENSET_EL0),
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
- { PMU_SYS_REG(SYS_PMCNTENCLR_EL0),
+ { PMU_SYS_REG(PMCNTENCLR_EL0),
.access = access_pmcnten, .reg = PMCNTENSET_EL0 },
- { PMU_SYS_REG(SYS_PMOVSCLR_EL0),
+ { PMU_SYS_REG(PMOVSCLR_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
/*
* PM_SWINC_EL0 is exposed to userspace as RAZ/WI, as it was
* previously (and pointlessly) advertised in the past...
*/
- { PMU_SYS_REG(SYS_PMSWINC_EL0),
+ { PMU_SYS_REG(PMSWINC_EL0),
.get_user = get_raz_reg, .set_user = set_wi_reg,
.access = access_pmswinc, .reset = NULL },
- { PMU_SYS_REG(SYS_PMSELR_EL0),
+ { PMU_SYS_REG(PMSELR_EL0),
.access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 },
- { PMU_SYS_REG(SYS_PMCEID0_EL0),
+ { PMU_SYS_REG(PMCEID0_EL0),
.access = access_pmceid, .reset = NULL },
- { PMU_SYS_REG(SYS_PMCEID1_EL0),
+ { PMU_SYS_REG(PMCEID1_EL0),
.access = access_pmceid, .reset = NULL },
- { PMU_SYS_REG(SYS_PMCCNTR_EL0),
+ { PMU_SYS_REG(PMCCNTR_EL0),
.access = access_pmu_evcntr, .reset = reset_unknown,
.reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
- { PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
+ { PMU_SYS_REG(PMXEVTYPER_EL0),
.access = access_pmu_evtyper, .reset = NULL },
- { PMU_SYS_REG(SYS_PMXEVCNTR_EL0),
+ { PMU_SYS_REG(PMXEVCNTR_EL0),
.access = access_pmu_evcntr, .reset = NULL },
/*
* PMUSERENR_EL0 resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
- { PMU_SYS_REG(SYS_PMUSERENR_EL0), .access = access_pmuserenr,
+ { PMU_SYS_REG(PMUSERENR_EL0), .access = access_pmuserenr,
.reset = reset_val, .reg = PMUSERENR_EL0, .val = 0 },
- { PMU_SYS_REG(SYS_PMOVSSET_EL0),
+ { PMU_SYS_REG(PMOVSSET_EL0),
.access = access_pmovs, .reg = PMOVSSET_EL0 },
{ SYS_DESC(SYS_TPIDR_EL0), NULL, reset_unknown, TPIDR_EL0 },
@@ -2354,7 +2354,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
* PMCCFILTR_EL0 resets as unknown in 64bit mode while it resets as zero
* in 32bit mode. Here we choose to reset it as zero for consistency.
*/
- { PMU_SYS_REG(SYS_PMCCFILTR_EL0), .access = access_pmu_evtyper,
+ { PMU_SYS_REG(PMCCFILTR_EL0), .access = access_pmu_evtyper,
.reset = reset_val, .reg = PMCCFILTR_EL0, .val = 0 },
EL2_REG(VPIDR_EL2, access_rw, reset_unknown, 0),
diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c
index c3b8e132d599..3dfc8b84e03e 100644
--- a/arch/arm64/kvm/vgic/vgic-v3.c
+++ b/arch/arm64/kvm/vgic/vgic-v3.c
@@ -749,7 +749,7 @@ void vgic_v3_put(struct kvm_vcpu *vcpu)
{
struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3;
- WARN_ON(vgic_v4_put(vcpu, false));
+ WARN_ON(vgic_v4_put(vcpu));
vgic_v3_vmcr_sync(vcpu);
diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c
index c1c28fe680ba..339a55194b2c 100644
--- a/arch/arm64/kvm/vgic/vgic-v4.c
+++ b/arch/arm64/kvm/vgic/vgic-v4.c
@@ -336,14 +336,14 @@ void vgic_v4_teardown(struct kvm *kvm)
its_vm->vpes = NULL;
}
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db)
+int vgic_v4_put(struct kvm_vcpu *vcpu)
{
struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident)
return 0;
- return its_make_vpe_non_resident(vpe, need_db);
+ return its_make_vpe_non_resident(vpe, !!vcpu_get_flag(vcpu, IN_WFI));
}
int vgic_v4_load(struct kvm_vcpu *vcpu)
@@ -354,6 +354,9 @@ int vgic_v4_load(struct kvm_vcpu *vcpu)
if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident)
return 0;
+ if (vcpu_get_flag(vcpu, IN_WFI))
+ return 0;
+
/*
* Before making the VPE resident, make sure the redistributor
* corresponding to our current CPU expects us here. See the
diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c
index 4ea2eefbc053..e9ad391fc8ea 100644
--- a/arch/arm64/mm/trans_pgd.c
+++ b/arch/arm64/mm/trans_pgd.c
@@ -24,6 +24,7 @@
#include <linux/bug.h>
#include <linux/mm.h>
#include <linux/mmzone.h>
+#include <linux/kfence.h>
static void *trans_alloc(struct trans_pgd_info *info)
{
@@ -41,7 +42,8 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr)
* the temporary mappings we use during restore.
*/
set_pte(dst_ptep, pte_mkwrite(pte));
- } else if (debug_pagealloc_enabled() && !pte_none(pte)) {
+ } else if ((debug_pagealloc_enabled() ||
+ is_kfence_address((void *)addr)) && !pte_none(pte)) {
/*
* debug_pagealloc will removed the PTE_VALID bit if
* the page isn't in use by the resume kernel. It may have
diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
index 145b540ec34f..ec2174838f2a 100644
--- a/arch/arm64/net/bpf_jit_comp.c
+++ b/arch/arm64/net/bpf_jit_comp.c
@@ -322,7 +322,13 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
*
*/
- emit_bti(A64_BTI_C, ctx);
+ /* bpf function may be invoked by 3 instruction types:
+ * 1. bl, attached via freplace to bpf prog via short jump
+ * 2. br, attached via freplace to bpf prog via long jump
+ * 3. blr, working as a function pointer, used by emit_call.
+ * So BTI_JC should used here to support both br and blr.
+ */
+ emit_bti(A64_BTI_JC, ctx);
emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
emit(A64_NOP, ctx);
diff --git a/arch/arm64/tools/sysreg b/arch/arm64/tools/sysreg
index 1ea4a3dc68f8..65866bf819c3 100644
--- a/arch/arm64/tools/sysreg
+++ b/arch/arm64/tools/sysreg
@@ -2017,7 +2017,7 @@ Field 0 SM
EndSysreg
SysregFields HFGxTR_EL2
-Field 63 nAMIAIR2_EL1
+Field 63 nAMAIR2_EL1
Field 62 nMAIR2_EL1
Field 61 nS2POR_EL1
Field 60 nPOR_EL1
@@ -2032,9 +2032,9 @@ Field 52 nGCS_EL0
Res0 51
Field 50 nACCDATA_EL1
Field 49 ERXADDR_EL1
-Field 48 EXRPFGCDN_EL1
-Field 47 EXPFGCTL_EL1
-Field 46 EXPFGF_EL1
+Field 48 ERXPFGCDN_EL1
+Field 47 ERXPFGCTL_EL1
+Field 46 ERXPFGF_EL1
Field 45 ERXMISCn_EL1
Field 44 ERXSTATUS_EL1
Field 43 ERXCTLR_EL1
@@ -2049,8 +2049,8 @@ Field 35 TPIDR_EL0
Field 34 TPIDRRO_EL0
Field 33 TPIDR_EL1
Field 32 TCR_EL1
-Field 31 SCTXNUM_EL0
-Field 30 SCTXNUM_EL1
+Field 31 SCXTNUM_EL0
+Field 30 SCXTNUM_EL1
Field 29 SCTLR_EL1
Field 28 REVIDR_EL1
Field 27 PAR_EL1
diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig
index 3e1337aceb37..7cb96db9a25d 100644
--- a/arch/ia64/configs/bigsur_defconfig
+++ b/arch/ia64/configs/bigsur_defconfig
@@ -77,7 +77,7 @@ CONFIG_EXT3_FS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig
index f8033bacea89..4581240013dd 100644
--- a/arch/ia64/configs/generic_defconfig
+++ b/arch/ia64/configs/generic_defconfig
@@ -146,7 +146,7 @@ CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_XFS_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig
index ffebe6c503f5..c9e806616544 100644
--- a/arch/ia64/configs/gensparse_defconfig
+++ b/arch/ia64/configs/gensparse_defconfig
@@ -127,7 +127,7 @@ CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_XFS_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
index 45f5d6e2da0a..d7d8fb5c7b71 100644
--- a/arch/ia64/configs/tiger_defconfig
+++ b/arch/ia64/configs/tiger_defconfig
@@ -110,7 +110,7 @@ CONFIG_REISERFS_FS_XATTR=y
CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_XFS_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
index d1978e004054..47e3801b526a 100644
--- a/arch/ia64/include/asm/processor.h
+++ b/arch/ia64/include/asm/processor.h
@@ -634,7 +634,6 @@ ia64_imva (void *addr)
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
#define PREFETCH_STRIDE L1_CACHE_BYTES
static inline void
@@ -649,8 +648,6 @@ prefetchw (const void *x)
ia64_lfetch_excl(ia64_lfhint_none, x);
}
-#define spin_lock_prefetch(x) prefetchw(x)
-
extern unsigned long boot_option_idle_override;
enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT,
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
index 6e948d015332..eb561cc93632 100644
--- a/arch/ia64/kernel/sys_ia64.c
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -63,7 +63,7 @@ arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len
info.low_limit = addr;
info.high_limit = TASK_SIZE;
info.align_mask = align_mask;
- info.align_offset = 0;
+ info.align_offset = pgoff << PAGE_SHIFT;
return vm_unmapped_area(&info);
}
diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl
index f8c74ffeeefb..83d8609aec03 100644
--- a/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@ -372,3 +372,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e55511af4c77..465759f6b0ed 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -14,6 +14,7 @@ config LOONGARCH
select ARCH_HAS_CPU_FINALIZE_INIT
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_NMI_SAFE_THIS_CPU_OPS
+ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_INLINE_READ_LOCK if !PREEMPTION
@@ -661,5 +662,3 @@ source "kernel/power/Kconfig"
source "drivers/acpi/Kconfig"
endmenu
-
-source "drivers/firmware/Kconfig"
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 09ba338a64de..ef87bab46754 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -68,6 +68,8 @@ LDFLAGS_vmlinux += -static -n -nostdlib
ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS
cflags-y += $(call cc-option,-mexplicit-relocs)
KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access)
+KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
+KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax)
else
cflags-y += $(call cc-option,-mno-explicit-relocs)
KBUILD_AFLAGS_KERNEL += -Wa,-mla-global-with-pcrel
@@ -81,8 +83,8 @@ KBUILD_CFLAGS_KERNEL += -fPIE
LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext
endif
-cflags-y += -ffreestanding
cflags-y += $(call cc-option, -mno-check-zero-division)
+cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset
load-y = 0x9000000000200000
bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y)
@@ -111,7 +113,7 @@ KBUILD_CFLAGS += -isystem $(shell $(CC) -print-file-name=include)
KBUILD_LDFLAGS += -m $(ld-emul)
-ifdef CONFIG_LOONGARCH
+ifdef need-compiler
CHECKFLAGS += $(shell $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
grep -E -vw '__GNUC_(MINOR_|PATCHLEVEL_)?_' | \
sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/" -e 's/\$$/&&/g')
diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 6cd26dd3c134..d64849b4cba1 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -769,7 +769,7 @@ CONFIG_QUOTA=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=y
CONFIG_OVERLAY_FS_INDEX=y
diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild
index 6b222f227342..93783fa24f6e 100644
--- a/arch/loongarch/include/asm/Kbuild
+++ b/arch/loongarch/include/asm/Kbuild
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
generic-y += dma-contiguous.h
-generic-y += export.h
generic-y += mcs_spinlock.h
generic-y += parport.h
generic-y += early_ioremap.h
diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h
index e4193d637f66..c2d8962fda00 100644
--- a/arch/loongarch/include/asm/fpu.h
+++ b/arch/loongarch/include/asm/fpu.h
@@ -173,16 +173,30 @@ static inline void restore_fp(struct task_struct *tsk)
_restore_fp(&tsk->thread.fpu);
}
-static inline union fpureg *get_fpu_regs(struct task_struct *tsk)
+static inline void save_fpu_regs(struct task_struct *tsk)
{
+ unsigned int euen;
+
if (tsk == current) {
preempt_disable();
- if (is_fpu_owner())
+
+ euen = csr_read32(LOONGARCH_CSR_EUEN);
+
+#ifdef CONFIG_CPU_HAS_LASX
+ if (euen & CSR_EUEN_LASXEN)
+ _save_lasx(&current->thread.fpu);
+ else
+#endif
+#ifdef CONFIG_CPU_HAS_LSX
+ if (euen & CSR_EUEN_LSXEN)
+ _save_lsx(&current->thread.fpu);
+ else
+#endif
+ if (euen & CSR_EUEN_FPEN)
_save_fp(&current->thread.fpu);
+
preempt_enable();
}
-
- return tsk->thread.fpu.fpr;
}
static inline int is_simd_owner(void)
@@ -218,15 +232,8 @@ static inline void restore_lsx(struct task_struct *t)
static inline void init_lsx_upper(void)
{
- /*
- * Check cpu_has_lsx only if it's a constant. This will allow the
- * compiler to optimise out code for CPUs without LSX without adding
- * an extra redundant check for CPUs with LSX.
- */
- if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
- return;
-
- _init_lsx_upper();
+ if (cpu_has_lsx)
+ _init_lsx_upper();
}
static inline void restore_lsx_upper(struct task_struct *t)
@@ -294,7 +301,7 @@ static inline void restore_lasx_upper(struct task_struct *t) {}
static inline int thread_lsx_context_live(void)
{
- if (__builtin_constant_p(cpu_has_lsx) && !cpu_has_lsx)
+ if (!cpu_has_lsx)
return 0;
return test_thread_flag(TIF_LSX_CTX_LIVE);
@@ -302,7 +309,7 @@ static inline int thread_lsx_context_live(void)
static inline int thread_lasx_context_live(void)
{
- if (__builtin_constant_p(cpu_has_lasx) && !cpu_has_lasx)
+ if (!cpu_has_lasx)
return 0;
return test_thread_flag(TIF_LASX_CTX_LIVE);
diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h
index 35f0958163ac..f3ddaed9ef7f 100644
--- a/arch/loongarch/include/asm/ptrace.h
+++ b/arch/loongarch/include/asm/ptrace.h
@@ -162,7 +162,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val
#define instruction_pointer(regs) ((regs)->csr_era)
#define profile_pc(regs) instruction_pointer(regs)
-extern void die(const char *, struct pt_regs *) __noreturn;
+extern void die(const char *str, struct pt_regs *regs);
static inline void die_if_kernel(const char *str, struct pt_regs *regs)
{
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index 416b653bccb4..66ecb480c894 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -98,8 +98,6 @@ static inline void __cpu_die(unsigned int cpu)
{
loongson_cpu_die(cpu);
}
-
-extern void __noreturn play_dead(void);
#endif
#endif /* __ASM_SMP_H */
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index f3df5f0a4509..501094a09f5d 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -6,12 +6,12 @@
*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/asm-extable.h>
#include <asm/asm-offsets.h>
#include <asm/errno.h>
-#include <asm/export.h>
#include <asm/fpregdef.h>
#include <asm/loongarch.h>
#include <asm/regdef.h>
diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c
index 021b59c248fa..fc55c4de2a11 100644
--- a/arch/loongarch/kernel/hw_breakpoint.c
+++ b/arch/loongarch/kernel/hw_breakpoint.c
@@ -207,8 +207,7 @@ static int hw_breakpoint_control(struct perf_event *bp,
write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE);
} else {
ctrl = encode_ctrl_reg(info->ctrl);
- write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE |
- 1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn);
+ write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE);
}
enable = csr_read64(LOONGARCH_CSR_CRMD);
csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD);
diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S
index cb8e5803de4b..3015896016a0 100644
--- a/arch/loongarch/kernel/mcount.S
+++ b/arch/loongarch/kernel/mcount.S
@@ -5,7 +5,7 @@
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
-#include <asm/export.h>
+#include <linux/export.h>
#include <asm/ftrace.h>
#include <asm/regdef.h>
#include <asm/stackframe.h>
diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
index e16ab0b98e5a..482aa553aa2d 100644
--- a/arch/loongarch/kernel/mcount_dyn.S
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -3,7 +3,6 @@
* Copyright (C) 2022 Loongson Technology Corporation Limited
*/
-#include <asm/export.h>
#include <asm/ftrace.h>
#include <asm/regdef.h>
#include <asm/stackframe.h>
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index 2e04eb07abb6..4ee1e9d6a65f 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -61,13 +61,6 @@ EXPORT_SYMBOL(__stack_chk_guard);
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
-#ifdef CONFIG_HOTPLUG_CPU
-void __noreturn arch_cpu_idle_dead(void)
-{
- play_dead();
-}
-#endif
-
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c
index a0767c3a0f0a..f72adbf530c6 100644
--- a/arch/loongarch/kernel/ptrace.c
+++ b/arch/loongarch/kernel/ptrace.c
@@ -147,6 +147,8 @@ static int fpr_get(struct task_struct *target,
{
int r;
+ save_fpu_regs(target);
+
if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
r = gfpr_get(target, &to);
else
@@ -278,6 +280,8 @@ static int simd_get(struct task_struct *target,
{
const unsigned int wr_size = NUM_FPU_REGS * regset->size;
+ save_fpu_regs(target);
+
if (!tsk_used_math(target)) {
/* The task hasn't used FP or LSX, fill with 0xff */
copy_pad_fprs(target, regset, &to, 0);
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 78a00359bde3..9d830ab4e302 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -332,9 +332,25 @@ static void __init bootcmdline_init(char **cmdline_p)
strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE);
+ goto out;
}
#endif
+ /*
+ * Append built-in command line to the bootloader command line if
+ * CONFIG_CMDLINE_EXTEND is enabled.
+ */
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) && CONFIG_CMDLINE[0]) {
+ strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+ strlcat(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+ }
+
+ /*
+ * Use built-in command line if the bootloader command line is empty.
+ */
+ if (IS_ENABLED(CONFIG_CMDLINE_BOOTLOADER) && !boot_command_line[0])
+ strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+
out:
*cmdline_p = boot_command_line;
}
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 8ea1bbcf13a7..6667b0a90f81 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -317,7 +317,7 @@ void loongson_cpu_die(unsigned int cpu)
mb();
}
-void play_dead(void)
+void __noreturn arch_cpu_idle_dead(void)
{
register uint64_t addr;
register void (*init_fn)(void);
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 8fb5e7a77145..89699db45cec 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -383,16 +383,15 @@ void show_registers(struct pt_regs *regs)
static DEFINE_RAW_SPINLOCK(die_lock);
-void __noreturn die(const char *str, struct pt_regs *regs)
+void die(const char *str, struct pt_regs *regs)
{
+ int ret;
static int die_counter;
- int sig = SIGSEGV;
oops_enter();
- if (notify_die(DIE_OOPS, str, regs, 0, current->thread.trap_nr,
- SIGSEGV) == NOTIFY_STOP)
- sig = 0;
+ ret = notify_die(DIE_OOPS, str, regs, 0,
+ current->thread.trap_nr, SIGSEGV);
console_verbose();
raw_spin_lock_irq(&die_lock);
@@ -405,6 +404,9 @@ void __noreturn die(const char *str, struct pt_regs *regs)
oops_exit();
+ if (ret == NOTIFY_STOP)
+ return;
+
if (regs && kexec_should_crash(current))
crash_kexec(regs);
@@ -414,7 +416,7 @@ void __noreturn die(const char *str, struct pt_regs *regs)
if (panic_on_oops)
panic("Fatal exception");
- make_task_dead(sig);
+ make_task_dead(SIGSEGV);
}
static inline void setup_vint_size(unsigned int size)
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index fd1d62b244f2..0790eadce166 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -3,12 +3,12 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/export.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/asm-extable.h>
#include <asm/cpu.h>
-#include <asm/export.h>
#include <asm/regdef.h>
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
@@ -108,6 +108,7 @@ SYM_FUNC_START(__clear_user_fast)
addi.d a3, a2, -8
bgeu a0, a3, .Llt8
15: st.d zero, a0, 0
+ addi.d a0, a0, 8
.Llt8:
16: st.d zero, a2, -8
@@ -188,7 +189,7 @@ SYM_FUNC_START(__clear_user_fast)
_asm_extable 13b, .L_fixup_handle_0
_asm_extable 14b, .L_fixup_handle_1
_asm_extable 15b, .L_fixup_handle_0
- _asm_extable 16b, .L_fixup_handle_1
+ _asm_extable 16b, .L_fixup_handle_0
_asm_extable 17b, .L_fixup_handle_s0
_asm_extable 18b, .L_fixup_handle_s0
_asm_extable 19b, .L_fixup_handle_s0
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index b21f6d5d38f5..bfe3d2793d00 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -3,12 +3,12 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/export.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/asm-extable.h>
#include <asm/cpu.h>
-#include <asm/export.h>
#include <asm/regdef.h>
.irp to, 0, 1, 2, 3, 4, 5, 6, 7
@@ -136,6 +136,7 @@ SYM_FUNC_START(__copy_user_fast)
bgeu a1, a4, .Llt8
30: ld.d t0, a1, 0
31: st.d t0, a0, 0
+ addi.d a0, a0, 8
.Llt8:
32: ld.d t0, a3, -8
@@ -246,7 +247,7 @@ SYM_FUNC_START(__copy_user_fast)
_asm_extable 30b, .L_fixup_handle_0
_asm_extable 31b, .L_fixup_handle_0
_asm_extable 32b, .L_fixup_handle_0
- _asm_extable 33b, .L_fixup_handle_1
+ _asm_extable 33b, .L_fixup_handle_0
_asm_extable 34b, .L_fixup_handle_s0
_asm_extable 35b, .L_fixup_handle_s0
_asm_extable 36b, .L_fixup_handle_s0
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
index 39ce6621c704..cc30b3b6252f 100644
--- a/arch/loongarch/lib/memcpy.S
+++ b/arch/loongarch/lib/memcpy.S
@@ -3,11 +3,11 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/export.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/cpu.h>
-#include <asm/export.h>
#include <asm/regdef.h>
SYM_FUNC_START(memcpy)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
index 45b725ba7867..7dc76d1484b6 100644
--- a/arch/loongarch/lib/memmove.S
+++ b/arch/loongarch/lib/memmove.S
@@ -3,11 +3,11 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/export.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/cpu.h>
-#include <asm/export.h>
#include <asm/regdef.h>
SYM_FUNC_START(memmove)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
index b39c6194e3ae..3f20f7996e8e 100644
--- a/arch/loongarch/lib/memset.S
+++ b/arch/loongarch/lib/memset.S
@@ -3,11 +3,11 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/export.h>
#include <asm/alternative-asm.h>
#include <asm/asm.h>
#include <asm/asmmacro.h>
#include <asm/cpu.h>
-#include <asm/export.h>
#include <asm/regdef.h>
.macro fill_to_64 r0
diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
index 9177fd638f07..185f82d85810 100644
--- a/arch/loongarch/lib/unaligned.S
+++ b/arch/loongarch/lib/unaligned.S
@@ -9,7 +9,6 @@
#include <asm/asmmacro.h>
#include <asm/asm-extable.h>
#include <asm/errno.h>
-#include <asm/export.h>
#include <asm/regdef.h>
.L_fixup_handle_unaligned:
diff --git a/arch/loongarch/mm/page.S b/arch/loongarch/mm/page.S
index 4c874a7af0ad..7ad76551d313 100644
--- a/arch/loongarch/mm/page.S
+++ b/arch/loongarch/mm/page.S
@@ -2,9 +2,9 @@
/*
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
+#include <linux/export.h>
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm/export.h>
#include <asm/page.h>
#include <asm/regdef.h>
diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S
index 4ad78703de6f..ca17dd3a1915 100644
--- a/arch/loongarch/mm/tlbex.S
+++ b/arch/loongarch/mm/tlbex.S
@@ -3,7 +3,6 @@
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <asm/asm.h>
-#include <asm/export.h>
#include <asm/loongarch.h>
#include <asm/page.h>
#include <asm/pgtable.h>
diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
index c335dc4eed37..68586338ecf8 100644
--- a/arch/loongarch/net/bpf_jit.h
+++ b/arch/loongarch/net/bpf_jit.h
@@ -150,7 +150,7 @@ static inline void move_imm(struct jit_ctx *ctx, enum loongarch_gpr rd, long imm
* no need to call lu32id to do a new filled operation.
*/
imm_51_31 = (imm >> 31) & 0x1fffff;
- if (imm_51_31 != 0 || imm_51_31 != 0x1fffff) {
+ if (imm_51_31 != 0 && imm_51_31 != 0x1fffff) {
/* lu32id rd, imm_51_32 */
imm_51_32 = (imm >> 32) & 0xfffff;
emit_insn(ctx, lu32id, rd, imm_51_32);
diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S
index 439395aa6fb4..081922c72daa 100644
--- a/arch/m68k/fpsp040/skeleton.S
+++ b/arch/m68k/fpsp040/skeleton.S
@@ -499,13 +499,13 @@ in_ea:
dbf %d0,morein
rts
- .section .fixup,#alloc,#execinstr
+ .section .fixup,"ax"
.even
1:
jbsr fpsp040_die
jbra .Lnotkern
- .section __ex_table,#alloc
+ .section __ex_table,"a"
.align 4
.long in_ea,1b
diff --git a/arch/m68k/ifpsp060/os.S b/arch/m68k/ifpsp060/os.S
index 7a0d6e428066..89e2ec224ab6 100644
--- a/arch/m68k/ifpsp060/os.S
+++ b/arch/m68k/ifpsp060/os.S
@@ -379,11 +379,11 @@ _060_real_access:
| Execption handling for movs access to illegal memory
- .section .fixup,#alloc,#execinstr
+ .section .fixup,"ax"
.even
1: moveq #-1,%d1
rts
-.section __ex_table,#alloc
+.section __ex_table,"a"
.align 4
.long dmrbuae,1b
.long dmrwuae,1b
diff --git a/arch/m68k/kernel/relocate_kernel.S b/arch/m68k/kernel/relocate_kernel.S
index ab0f1e7d4653..f7667079e08e 100644
--- a/arch/m68k/kernel/relocate_kernel.S
+++ b/arch/m68k/kernel/relocate_kernel.S
@@ -26,7 +26,7 @@ ENTRY(relocate_new_kernel)
lea %pc@(.Lcopy),%a4
2: addl #0x00000000,%a4 /* virt_to_phys() */
- .section ".m68k_fixup","aw"
+ .section .m68k_fixup,"aw"
.long M68K_FIXUP_MEMOFFSET, 2b+2
.previous
@@ -49,7 +49,7 @@ ENTRY(relocate_new_kernel)
lea %pc@(.Lcont040),%a4
5: addl #0x00000000,%a4 /* virt_to_phys() */
- .section ".m68k_fixup","aw"
+ .section .m68k_fixup,"aw"
.long M68K_FIXUP_MEMOFFSET, 5b+2
.previous
diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl
index 4f504783371f..259ceb125367 100644
--- a/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@ -451,3 +451,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl
index 858d22bf275c..a3798c2637fd 100644
--- a/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@ -457,3 +457,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/mips/configs/bigsur_defconfig b/arch/mips/configs/bigsur_defconfig
index b0cbfa781ffc..e463a9acae03 100644
--- a/arch/mips/configs/bigsur_defconfig
+++ b/arch/mips/configs/bigsur_defconfig
@@ -153,7 +153,7 @@ CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/fuloong2e_defconfig b/arch/mips/configs/fuloong2e_defconfig
index 5458573286a2..1843468f84a3 100644
--- a/arch/mips/configs/fuloong2e_defconfig
+++ b/arch/mips/configs/fuloong2e_defconfig
@@ -178,7 +178,7 @@ CONFIG_EXT3_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_REISERFS_FS=m
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig
index 897e55579af0..44821f497261 100644
--- a/arch/mips/configs/ip22_defconfig
+++ b/arch/mips/configs/ip22_defconfig
@@ -245,7 +245,7 @@ CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig
index 418a4181e2fe..930c5f6ed182 100644
--- a/arch/mips/configs/ip32_defconfig
+++ b/arch/mips/configs/ip32_defconfig
@@ -95,7 +95,7 @@ CONFIG_EXT3_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig
index 6b5d7e963ed9..fdf374574105 100644
--- a/arch/mips/configs/jazz_defconfig
+++ b/arch/mips/configs/jazz_defconfig
@@ -76,7 +76,7 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 7e598d338979..83d9a8ff4270 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -240,7 +240,7 @@ CONFIG_XFS_POSIX_ACL=y
CONFIG_BTRFS_FS=m
CONFIG_QUOTA=y
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FSCACHE=m
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=m
diff --git a/arch/mips/configs/loongson2k_defconfig b/arch/mips/configs/loongson2k_defconfig
index 0ab029ecad21..ec3ee8df737d 100644
--- a/arch/mips/configs/loongson2k_defconfig
+++ b/arch/mips/configs/loongson2k_defconfig
@@ -296,7 +296,7 @@ CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_QUOTA=y
# CONFIG_PRINT_QUOTA_WARNING is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 3087e64e6ebe..129426351237 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -352,7 +352,7 @@ CONFIG_QUOTA=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_VIRTIO_FS=m
CONFIG_FSCACHE=m
diff --git a/arch/mips/configs/mtx1_defconfig b/arch/mips/configs/mtx1_defconfig
index b64172179160..935585d8bb26 100644
--- a/arch/mips/configs/mtx1_defconfig
+++ b/arch/mips/configs/mtx1_defconfig
@@ -601,7 +601,7 @@ CONFIG_EXT3_FS=m
CONFIG_EXT3_FS_POSIX_ACL=y
CONFIG_EXT3_FS_SECURITY=y
CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/configs/pic32mzda_defconfig b/arch/mips/configs/pic32mzda_defconfig
index 8397f28355d5..0e494c24246f 100644
--- a/arch/mips/configs/pic32mzda_defconfig
+++ b/arch/mips/configs/pic32mzda_defconfig
@@ -66,7 +66,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_FSCACHE=m
CONFIG_ISO9660_FS=m
diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig
index 7475c2cbea89..e0e312dd968a 100644
--- a/arch/mips/configs/rm200_defconfig
+++ b/arch/mips/configs/rm200_defconfig
@@ -317,7 +317,7 @@ CONFIG_REISERFS_FS_POSIX_ACL=y
CONFIG_REISERFS_FS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
index 9151dcd9d0d5..af9cea21c853 100644
--- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
+++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h
@@ -58,8 +58,6 @@
#define cpu_has_rixi (cpu_data[0].cputype != CPU_CAVIUM_OCTEON)
-#define ARCH_HAS_SPINLOCK_PREFETCH 1
-#define spin_lock_prefetch(x) prefetch(x)
#define PREFETCH_STRIDE 128
#ifdef __OCTEON__
diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl
index 1976317d4e8b..152034b8e0a0 100644
--- a/arch/mips/kernel/syscalls/syscall_n32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n32.tbl
@@ -390,3 +390,4 @@
449 n32 futex_waitv sys_futex_waitv
450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node
451 n32 cachestat sys_cachestat
+452 n32 fchmodat2 sys_fchmodat2
diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl
index cfda2511badf..cb5e757f6621 100644
--- a/arch/mips/kernel/syscalls/syscall_n64.tbl
+++ b/arch/mips/kernel/syscalls/syscall_n64.tbl
@@ -366,3 +366,4 @@
449 n64 futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 n64 cachestat sys_cachestat
+452 n64 fchmodat2 sys_fchmodat2
diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl
index 7692234c3768..1a646813afdc 100644
--- a/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@ -439,3 +439,4 @@
449 o32 futex_waitv sys_futex_waitv
450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node
451 o32 cachestat sys_cachestat
+452 o32 fchmodat2 sys_fchmodat2
diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug
index 1401e4c5fe5f..bf2b21b96f0b 100644
--- a/arch/parisc/Kconfig.debug
+++ b/arch/parisc/Kconfig.debug
@@ -2,7 +2,7 @@
#
config LIGHTWEIGHT_SPINLOCK_CHECK
bool "Enable lightweight spinlock checks"
- depends on SMP && !DEBUG_SPINLOCK
+ depends on DEBUG_KERNEL && SMP && !DEBUG_SPINLOCK
default y
help
Add checks with low performance impact to the spinlock functions
diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c
index 7ee49f5881d1..d389359e22ac 100644
--- a/arch/parisc/boot/compressed/misc.c
+++ b/arch/parisc/boot/compressed/misc.c
@@ -117,7 +117,7 @@ char *strchr(const char *s, int c)
return NULL;
}
-int puts(const char *s)
+static int puts(const char *s)
{
const char *nuline = s;
@@ -172,7 +172,7 @@ static int print_num(unsigned long num, int base)
return 0;
}
-int printf(const char *fmt, ...)
+static int printf(const char *fmt, ...)
{
va_list args;
int i = 0;
@@ -204,13 +204,13 @@ void abort(void)
}
#undef malloc
-void *malloc(size_t size)
+static void *malloc(size_t size)
{
return malloc_gzip(size);
}
#undef free
-void free(void *ptr)
+static void free(void *ptr)
{
return free_gzip(ptr);
}
@@ -278,7 +278,7 @@ static void parse_elf(void *output)
free(phdrs);
}
-unsigned long decompress_kernel(unsigned int started_wide,
+asmlinkage unsigned long __visible decompress_kernel(unsigned int started_wide,
unsigned int command_line,
const unsigned int rd_start,
const unsigned int rd_end)
diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig
index 9651f4390029..ee4febb30386 100644
--- a/arch/parisc/configs/generic-32bit_defconfig
+++ b/arch/parisc/configs/generic-32bit_defconfig
@@ -237,7 +237,7 @@ CONFIG_EXT3_FS_SECURITY=y
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_VFAT_FS=y
diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig
index 6758c030b09d..f6ded7147b4d 100644
--- a/arch/parisc/configs/generic-64bit_defconfig
+++ b/arch/parisc/configs/generic-64bit_defconfig
@@ -259,7 +259,7 @@ CONFIG_BTRFS_FS=m
CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_FUSE_FS=y
CONFIG_CUSE=y
CONFIG_ISO9660_FS=y
diff --git a/arch/parisc/include/asm/dma.h b/arch/parisc/include/asm/dma.h
index 9e8c101de902..582fb5d1a5d5 100644
--- a/arch/parisc/include/asm/dma.h
+++ b/arch/parisc/include/asm/dma.h
@@ -14,6 +14,8 @@
#define dma_outb outb
#define dma_inb inb
+extern unsigned long pcxl_dma_start;
+
/*
** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
** (or rather not merge) DMAs into manageable chunks.
diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h
index a7cf0d05ccf4..f1cc1ee3a647 100644
--- a/arch/parisc/include/asm/ftrace.h
+++ b/arch/parisc/include/asm/ftrace.h
@@ -12,6 +12,10 @@ extern void mcount(void);
extern unsigned long sys_call_table[];
extern unsigned long return_address(unsigned int);
+struct ftrace_regs;
+extern void ftrace_function_trampoline(unsigned long parent,
+ unsigned long self_addr, unsigned long org_sp_gr3,
+ struct ftrace_regs *fregs);
#ifdef CONFIG_DYNAMIC_FTRACE
extern void ftrace_caller(void);
diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h
index edfcb9858bcb..0b326e52255e 100644
--- a/arch/parisc/include/asm/spinlock.h
+++ b/arch/parisc/include/asm/spinlock.h
@@ -7,8 +7,6 @@
#include <asm/processor.h>
#include <asm/spinlock_types.h>
-#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */
-
static inline void arch_spin_val_check(int lock_val)
{
if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK))
diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h
index d65934079ebd..efd06a897c6a 100644
--- a/arch/parisc/include/asm/spinlock_types.h
+++ b/arch/parisc/include/asm/spinlock_types.h
@@ -4,6 +4,10 @@
#define __ARCH_SPIN_LOCK_UNLOCKED_VAL 0x1a46
+#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */
+
+#ifndef __ASSEMBLY__
+
typedef struct {
#ifdef CONFIG_PA20
volatile unsigned int slock;
@@ -27,6 +31,8 @@ typedef struct {
volatile unsigned int counter;
} arch_rwlock_t;
+#endif /* __ASSEMBLY__ */
+
#define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000
#define __ARCH_RW_LOCK_UNLOCKED { .lock_mutex = __ARCH_SPIN_LOCK_UNLOCKED, \
.counter = __ARCH_RW_LOCK_UNLOCKED__ }
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 0e5ebfe8d9d2..ae03b8679696 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -25,6 +25,7 @@
#include <asm/traps.h>
#include <asm/thread_info.h>
#include <asm/alternative.h>
+#include <asm/spinlock_types.h>
#include <linux/linkage.h>
#include <linux/pgtable.h>
@@ -406,7 +407,7 @@
LDREG 0(\ptp),\pte
bb,<,n \pte,_PAGE_PRESENT_BIT,3f
b \fault
- stw \spc,0(\tmp)
+ stw \tmp1,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
2: LDREG 0(\ptp),\pte
@@ -415,24 +416,22 @@
.endm
/* Release page_table_lock without reloading lock address.
- Note that the values in the register spc are limited to
- NR_SPACE_IDS (262144). Thus, the stw instruction always
- stores a nonzero value even when register spc is 64 bits.
We use an ordered store to ensure all prior accesses are
performed prior to releasing the lock. */
- .macro ptl_unlock0 spc,tmp
+ .macro ptl_unlock0 spc,tmp,tmp2
#ifdef CONFIG_TLB_PTLOCK
-98: or,COND(=) %r0,\spc,%r0
- stw,ma \spc,0(\tmp)
+98: ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmp2
+ or,COND(=) %r0,\spc,%r0
+ stw,ma \tmp2,0(\tmp)
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
.endm
/* Release page_table_lock. */
- .macro ptl_unlock1 spc,tmp
+ .macro ptl_unlock1 spc,tmp,tmp2
#ifdef CONFIG_TLB_PTLOCK
98: get_ptl \tmp
- ptl_unlock0 \spc,\tmp
+ ptl_unlock0 \spc,\tmp,\tmp2
99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP)
#endif
.endm
@@ -1125,7 +1124,7 @@ dtlb_miss_20w:
idtlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1151,7 +1150,7 @@ nadtlb_miss_20w:
idtlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1185,7 +1184,7 @@ dtlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1218,7 +1217,7 @@ nadtlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1247,7 +1246,7 @@ dtlb_miss_20:
idtlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1275,7 +1274,7 @@ nadtlb_miss_20:
idtlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1320,7 +1319,7 @@ itlb_miss_20w:
iitlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1344,7 +1343,7 @@ naitlb_miss_20w:
iitlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1378,7 +1377,7 @@ itlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1402,7 +1401,7 @@ naitlb_miss_11:
mtsp t1, %sr1 /* Restore sr1 */
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1432,7 +1431,7 @@ itlb_miss_20:
iitlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1452,7 +1451,7 @@ naitlb_miss_20:
iitlbt pte,prot
- ptl_unlock1 spc,t0
+ ptl_unlock1 spc,t0,t1
rfir
nop
@@ -1482,7 +1481,7 @@ dbit_trap_20w:
idtlbt pte,prot
- ptl_unlock0 spc,t0
+ ptl_unlock0 spc,t0,t1
rfir
nop
#else
@@ -1508,7 +1507,7 @@ dbit_trap_11:
mtsp t1, %sr1 /* Restore sr1 */
- ptl_unlock0 spc,t0
+ ptl_unlock0 spc,t0,t1
rfir
nop
@@ -1528,7 +1527,7 @@ dbit_trap_20:
idtlbt pte,prot
- ptl_unlock0 spc,t0
+ ptl_unlock0 spc,t0,t1
rfir
nop
#endif
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 6d1c781eb1db..8f37e75f2fb9 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -74,8 +74,8 @@
static DEFINE_SPINLOCK(pdc_lock);
#endif
-unsigned long pdc_result[NUM_PDC_RESULT] __aligned(8);
-unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
+static unsigned long pdc_result[NUM_PDC_RESULT] __aligned(8);
+static unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
#ifdef CONFIG_64BIT
#define WIDE_FIRMWARE 0x1
@@ -334,7 +334,7 @@ int __pdc_cpu_rendezvous(void)
/**
* pdc_cpu_rendezvous_lock - Lock PDC while transitioning to rendezvous state
*/
-void pdc_cpu_rendezvous_lock(void)
+void pdc_cpu_rendezvous_lock(void) __acquires(&pdc_lock)
{
spin_lock(&pdc_lock);
}
@@ -342,7 +342,7 @@ void pdc_cpu_rendezvous_lock(void)
/**
* pdc_cpu_rendezvous_unlock - Unlock PDC after reaching rendezvous state
*/
-void pdc_cpu_rendezvous_unlock(void)
+void pdc_cpu_rendezvous_unlock(void) __releases(&pdc_lock)
{
spin_unlock(&pdc_lock);
}
diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c
index 4d392e4ed358..d1defb9ede70 100644
--- a/arch/parisc/kernel/ftrace.c
+++ b/arch/parisc/kernel/ftrace.c
@@ -53,7 +53,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent,
static ftrace_func_t ftrace_func;
-void notrace __hot ftrace_function_trampoline(unsigned long parent,
+asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent,
unsigned long self_addr,
unsigned long org_sp_gr3,
struct ftrace_regs *fregs)
diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c
index 00297e8e1c88..6f0c92e8149d 100644
--- a/arch/parisc/kernel/parisc_ksyms.c
+++ b/arch/parisc/kernel/parisc_ksyms.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/syscalls.h>
+#include <linux/libgcc.h>
#include <linux/string.h>
EXPORT_SYMBOL(memset);
@@ -92,12 +93,6 @@ EXPORT_SYMBOL($$divI_12);
EXPORT_SYMBOL($$divI_14);
EXPORT_SYMBOL($$divI_15);
-extern void __ashrdi3(void);
-extern void __ashldi3(void);
-extern void __lshrdi3(void);
-extern void __muldi3(void);
-extern void __ucmpdi2(void);
-
EXPORT_SYMBOL(__ashrdi3);
EXPORT_SYMBOL(__ashldi3);
EXPORT_SYMBOL(__lshrdi3);
diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c
index d818ece23b4a..bf9f192c826e 100644
--- a/arch/parisc/kernel/pci-dma.c
+++ b/arch/parisc/kernel/pci-dma.c
@@ -39,7 +39,7 @@ static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
static unsigned long pcxl_used_bytes __read_mostly;
static unsigned long pcxl_used_pages __read_mostly;
-extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */
+unsigned long pcxl_dma_start __ro_after_init; /* pcxl dma mapping area start */
static DEFINE_SPINLOCK(pcxl_res_lock);
static char *pcxl_res_map;
static int pcxl_res_hint;
@@ -381,7 +381,7 @@ pcxl_dma_init(void)
pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL,
get_order(pcxl_res_size));
memset(pcxl_res_map, 0, pcxl_res_size);
- proc_gsc_root = proc_mkdir("gsc", NULL);
+ proc_gsc_root = proc_mkdir("bus/gsc", NULL);
if (!proc_gsc_root)
printk(KERN_WARNING
"pcxl_dma_init: Unable to create gsc /proc dir entry\n");
@@ -417,14 +417,6 @@ void *arch_dma_alloc(struct device *dev, size_t size,
map_uncached_pages(vaddr, size, paddr);
*dma_handle = (dma_addr_t) paddr;
-#if 0
-/* This probably isn't needed to support EISA cards.
-** ISA cards will certainly only support 24-bit DMA addressing.
-** Not clear if we can, want, or need to support ISA.
-*/
- if (!dev || *dev->coherent_dma_mask < 0xffffffff)
- gfp |= GFP_DMA;
-#endif
return (void *)vaddr;
}
diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index 0d24735bd918..0f9b3b5914cf 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -354,10 +354,8 @@ static int __init pdt_initcall(void)
return -ENODEV;
kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd");
- if (IS_ERR(kpdtd_task))
- return PTR_ERR(kpdtd_task);
- return 0;
+ return PTR_ERR_OR_ZERO(kpdtd_task);
}
late_initcall(pdt_initcall);
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index 90b04d8af212..b0f0816879df 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -57,7 +57,7 @@ struct rdr_tbl_ent {
static int perf_processor_interface __read_mostly = UNKNOWN_INTF;
static int perf_enabled __read_mostly;
static DEFINE_SPINLOCK(perf_lock);
-struct parisc_device *cpu_device __read_mostly;
+static struct parisc_device *cpu_device __read_mostly;
/* RDRs to write for PCX-W */
static const int perf_rdrs_W[] =
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index 00b0df97afb1..762289b9984e 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -26,6 +26,7 @@
#include <asm/processor.h>
#include <asm/page.h>
#include <asm/pdc.h>
+#include <asm/smp.h>
#include <asm/pdcpat.h>
#include <asm/irq.h> /* for struct irq_region */
#include <asm/parisc-device.h>
diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 573f8303e2b0..211a4afdd282 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -40,11 +40,6 @@
static char __initdata command_line[COMMAND_LINE_SIZE];
-/* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */
-struct proc_dir_entry * proc_runway_root __read_mostly = NULL;
-struct proc_dir_entry * proc_gsc_root __read_mostly = NULL;
-struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL;
-
static void __init setup_cmdline(char **cmdline_p)
{
extern unsigned int boot_args[];
@@ -196,48 +191,6 @@ const struct seq_operations cpuinfo_op = {
.show = show_cpuinfo
};
-static void __init parisc_proc_mkdir(void)
-{
- /*
- ** Can't call proc_mkdir() until after proc_root_init() has been
- ** called by start_kernel(). In other words, this code can't
- ** live in arch/.../setup.c because start_parisc() calls
- ** start_kernel().
- */
- switch (boot_cpu_data.cpu_type) {
- case pcxl:
- case pcxl2:
- if (NULL == proc_gsc_root)
- {
- proc_gsc_root = proc_mkdir("bus/gsc", NULL);
- }
- break;
- case pcxt_:
- case pcxu:
- case pcxu_:
- case pcxw:
- case pcxw_:
- case pcxw2:
- if (NULL == proc_runway_root)
- {
- proc_runway_root = proc_mkdir("bus/runway", NULL);
- }
- break;
- case mako:
- case mako2:
- if (NULL == proc_mckinley_root)
- {
- proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
- }
- break;
- default:
- /* FIXME: this was added to prevent the compiler
- * complaining about missing pcx, pcxs and pcxt
- * I'm assuming they have neither gsc nor runway */
- break;
- }
-}
-
static struct resource central_bus = {
.name = "Central Bus",
.start = F_EXTEND(0xfff80000),
@@ -294,7 +247,6 @@ static int __init parisc_init(void)
{
u32 osid = (OS_ID_LINUX << 16);
- parisc_proc_mkdir();
parisc_init_resources();
do_device_inventory(); /* probe for hardware */
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index f886ff0c75df..e8d27def6c52 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -423,7 +423,7 @@ static void check_syscallno_in_delay_branch(struct pt_regs *regs)
regs->gr[31] -= 8; /* delayed branching */
/* Get assembler opcode of code in delay branch */
- uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4);
+ uaddr = (u32 __user *) ((regs->gr[31] & ~3) + 4);
err = get_user(opcode, uaddr);
if (err)
return;
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 1373e5129868..1f51aa9c8230 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -39,6 +39,7 @@ registers).
#include <asm/assembly.h>
#include <asm/processor.h>
#include <asm/cache.h>
+#include <asm/spinlock_types.h>
#include <linux/linkage.h>
@@ -66,6 +67,16 @@ registers).
stw \reg1, 0(%sr2,\reg2)
.endm
+ /* raise exception if spinlock content is not zero or
+ * __ARCH_SPIN_LOCK_UNLOCKED_VAL */
+ .macro spinlock_check spin_val,tmpreg
+#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK
+ ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmpreg
+ andcm,= \spin_val, \tmpreg, %r0
+ .word SPINLOCK_BREAK_INSN
+#endif
+ .endm
+
.text
.import syscall_exit,code
@@ -508,7 +519,8 @@ lws_start:
lws_exit_noerror:
lws_pagefault_enable %r1,%r21
- stw,ma %r20, 0(%sr2,%r20)
+ ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+ stw,ma %r21, 0(%sr2,%r20)
ssm PSW_SM_I, %r0
b lws_exit
copy %r0, %r21
@@ -521,7 +533,8 @@ lws_wouldblock:
lws_pagefault:
lws_pagefault_enable %r1,%r21
- stw,ma %r20, 0(%sr2,%r20)
+ ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21
+ stw,ma %r21, 0(%sr2,%r20)
ssm PSW_SM_I, %r0
ldo 3(%r0),%r28
b lws_exit
@@ -619,6 +632,7 @@ lws_compare_and_swap:
/* Try to acquire the lock */
LDCW 0(%sr2,%r20), %r28
+ spinlock_check %r28, %r21
comclr,<> %r0, %r28, %r0
b,n lws_wouldblock
@@ -772,6 +786,7 @@ cas2_lock_start:
/* Try to acquire the lock */
LDCW 0(%sr2,%r20), %r28
+ spinlock_check %r28, %r21
comclr,<> %r0, %r28, %r0
b,n lws_wouldblock
@@ -1001,6 +1016,7 @@ atomic_xchg_start:
/* Try to acquire the lock */
LDCW 0(%sr2,%r20), %r28
+ spinlock_check %r28, %r21
comclr,<> %r0, %r28, %r0
b,n lws_wouldblock
@@ -1199,6 +1215,7 @@ atomic_store_start:
/* Try to acquire the lock */
LDCW 0(%sr2,%r20), %r28
+ spinlock_check %r28, %r21
comclr,<> %r0, %r28, %r0
b,n lws_wouldblock
@@ -1330,7 +1347,7 @@ ENTRY(lws_lock_start)
/* lws locks */
.rept 256
/* Keep locks aligned at 16-bytes */
- .word 1
+ .word __ARCH_SPIN_LOCK_UNLOCKED_VAL
.word 0
.word 0
.word 0
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index a0a9145b6dd4..e97c175b56f9 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -450,3 +450,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c
index 033b9e50b44a..170d0dda4213 100644
--- a/arch/parisc/kernel/unaligned.c
+++ b/arch/parisc/kernel/unaligned.c
@@ -11,6 +11,7 @@
#include <linux/signal.h>
#include <linux/ratelimit.h>
#include <linux/uaccess.h>
+#include <linux/sysctl.h>
#include <asm/unaligned.h>
#include <asm/hardirq.h>
#include <asm/traps.h>
@@ -337,7 +338,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop)
: "r19", "r20", "r21", "r22", "r1" );
#else
{
- unsigned long valh=(val>>32),vall=(val&0xffffffffl);
+ unsigned long valh = (val >> 32), vall = (val & 0xffffffffl);
__asm__ __volatile__ (
" mtsp %4, %%sr1\n"
" zdep %2, 29, 2, %%r19\n"
@@ -473,7 +474,7 @@ void handle_unaligned(struct pt_regs *regs)
case OPCODE_LDWA_I:
case OPCODE_LDW_S:
case OPCODE_LDWA_S:
- ret = emulate_ldw(regs, R3(regs->iir),0);
+ ret = emulate_ldw(regs, R3(regs->iir), 0);
break;
case OPCODE_STH:
@@ -482,7 +483,7 @@ void handle_unaligned(struct pt_regs *regs)
case OPCODE_STW:
case OPCODE_STWA:
- ret = emulate_stw(regs, R2(regs->iir),0);
+ ret = emulate_stw(regs, R2(regs->iir), 0);
break;
#ifdef CONFIG_64BIT
@@ -490,12 +491,12 @@ void handle_unaligned(struct pt_regs *regs)
case OPCODE_LDDA_I:
case OPCODE_LDD_S:
case OPCODE_LDDA_S:
- ret = emulate_ldd(regs, R3(regs->iir),0);
+ ret = emulate_ldd(regs, R3(regs->iir), 0);
break;
case OPCODE_STD:
case OPCODE_STDA:
- ret = emulate_std(regs, R2(regs->iir),0);
+ ret = emulate_std(regs, R2(regs->iir), 0);
break;
#endif
@@ -503,24 +504,24 @@ void handle_unaligned(struct pt_regs *regs)
case OPCODE_FLDWS:
case OPCODE_FLDWXR:
case OPCODE_FLDWSR:
- ret = emulate_ldw(regs,FR3(regs->iir),1);
+ ret = emulate_ldw(regs, FR3(regs->iir), 1);
break;
case OPCODE_FLDDX:
case OPCODE_FLDDS:
- ret = emulate_ldd(regs,R3(regs->iir),1);
+ ret = emulate_ldd(regs, R3(regs->iir), 1);
break;
case OPCODE_FSTWX:
case OPCODE_FSTWS:
case OPCODE_FSTWXR:
case OPCODE_FSTWSR:
- ret = emulate_stw(regs,FR3(regs->iir),1);
+ ret = emulate_stw(regs, FR3(regs->iir), 1);
break;
case OPCODE_FSTDX:
case OPCODE_FSTDS:
- ret = emulate_std(regs,R3(regs->iir),1);
+ ret = emulate_std(regs, R3(regs->iir), 1);
break;
case OPCODE_LDCD_I:
diff --git a/arch/parisc/lib/ucmpdi2.c b/arch/parisc/lib/ucmpdi2.c
index 8e6014a142ef..9d8b4dbae273 100644
--- a/arch/parisc/lib/ucmpdi2.c
+++ b/arch/parisc/lib/ucmpdi2.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
+#include <linux/libgcc.h>
union ull_union {
unsigned long long ull;
@@ -9,7 +10,7 @@ union ull_union {
} ui;
};
-int __ucmpdi2(unsigned long long a, unsigned long long b)
+word_type __ucmpdi2(unsigned long long a, unsigned long long b)
{
union ull_union au = {.ull = a};
union ull_union bu = {.ull = b};
diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c
index a4c7c7630f48..2fe5b44986e0 100644
--- a/arch/parisc/mm/fault.c
+++ b/arch/parisc/mm/fault.c
@@ -192,31 +192,31 @@ int fixup_exception(struct pt_regs *regs)
* For implementation see handle_interruption() in traps.c
*/
static const char * const trap_description[] = {
- [1] "High-priority machine check (HPMC)",
- [2] "Power failure interrupt",
- [3] "Recovery counter trap",
- [5] "Low-priority machine check",
- [6] "Instruction TLB miss fault",
- [7] "Instruction access rights / protection trap",
- [8] "Illegal instruction trap",
- [9] "Break instruction trap",
- [10] "Privileged operation trap",
- [11] "Privileged register trap",
- [12] "Overflow trap",
- [13] "Conditional trap",
- [14] "FP Assist Exception trap",
- [15] "Data TLB miss fault",
- [16] "Non-access ITLB miss fault",
- [17] "Non-access DTLB miss fault",
- [18] "Data memory protection/unaligned access trap",
- [19] "Data memory break trap",
- [20] "TLB dirty bit trap",
- [21] "Page reference trap",
- [22] "Assist emulation trap",
- [25] "Taken branch trap",
- [26] "Data memory access rights trap",
- [27] "Data memory protection ID trap",
- [28] "Unaligned data reference trap",
+ [1] = "High-priority machine check (HPMC)",
+ [2] = "Power failure interrupt",
+ [3] = "Recovery counter trap",
+ [5] = "Low-priority machine check",
+ [6] = "Instruction TLB miss fault",
+ [7] = "Instruction access rights / protection trap",
+ [8] = "Illegal instruction trap",
+ [9] = "Break instruction trap",
+ [10] = "Privileged operation trap",
+ [11] = "Privileged register trap",
+ [12] = "Overflow trap",
+ [13] = "Conditional trap",
+ [14] = "FP Assist Exception trap",
+ [15] = "Data TLB miss fault",
+ [16] = "Non-access ITLB miss fault",
+ [17] = "Non-access DTLB miss fault",
+ [18] = "Data memory protection/unaligned access trap",
+ [19] = "Data memory break trap",
+ [20] = "TLB dirty bit trap",
+ [21] = "Page reference trap",
+ [22] = "Assist emulation trap",
+ [25] = "Taken branch trap",
+ [26] = "Data memory access rights trap",
+ [27] = "Data memory protection ID trap",
+ [28] = "Unaligned data reference trap",
};
const char *trap_name(unsigned long code)
diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c
index cc15d737fda6..ae3493dae9dc 100644
--- a/arch/parisc/mm/fixmap.c
+++ b/arch/parisc/mm/fixmap.c
@@ -19,9 +19,6 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys)
pmd_t *pmd = pmd_offset(pud, vaddr);
pte_t *pte;
- if (pmd_none(*pmd))
- pte = pte_alloc_kernel(pmd, vaddr);
-
pte = pte_offset_kernel(pmd, vaddr);
set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX));
flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE);
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 406c52fe23d5..a088c243edea 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -523,10 +523,6 @@ void mark_rodata_ro(void)
void *parisc_vmalloc_start __ro_after_init;
EXPORT_SYMBOL(parisc_vmalloc_start);
-#ifdef CONFIG_PA11
-unsigned long pcxl_dma_start __ro_after_init;
-#endif
-
void __init mem_init(void)
{
/* Do sanity checks on IPC (compat) structures */
@@ -669,6 +665,39 @@ static void __init gateway_init(void)
PAGE_SIZE, PAGE_GATEWAY, 1);
}
+static void __init fixmap_init(void)
+{
+ unsigned long addr = FIXMAP_START;
+ unsigned long end = FIXMAP_START + FIXMAP_SIZE;
+ pgd_t *pgd = pgd_offset_k(addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
+ pmd_t *pmd;
+
+ BUILD_BUG_ON(FIXMAP_SIZE > PMD_SIZE);
+
+#if CONFIG_PGTABLE_LEVELS == 3
+ if (pud_none(*pud)) {
+ pmd = memblock_alloc(PAGE_SIZE << PMD_TABLE_ORDER,
+ PAGE_SIZE << PMD_TABLE_ORDER);
+ if (!pmd)
+ panic("fixmap: pmd allocation failed.\n");
+ pud_populate(NULL, pud, pmd);
+ }
+#endif
+
+ pmd = pmd_offset(pud, addr);
+ do {
+ pte_t *pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ if (!pte)
+ panic("fixmap: pte allocation failed.\n");
+
+ pmd_populate_kernel(&init_mm, pmd, pte);
+
+ addr += PAGE_SIZE;
+ } while (addr < end);
+}
+
static void __init parisc_bootmem_free(void)
{
unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, };
@@ -683,6 +712,7 @@ void __init paging_init(void)
setup_bootmem();
pagetable_init();
gateway_init();
+ fixmap_init();
flush_cache_all_local(); /* start with known state */
flush_tlb_all_local(NULL);
diff --git a/arch/parisc/mm/ioremap.c b/arch/parisc/mm/ioremap.c
index 345ff0b66499..d7ee1f43d997 100644
--- a/arch/parisc/mm/ioremap.c
+++ b/arch/parisc/mm/ioremap.c
@@ -27,7 +27,7 @@
*/
void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
{
- void __iomem *addr;
+ uintptr_t addr;
struct vm_struct *area;
unsigned long offset, last_addr;
pgprot_t pgprot;
@@ -79,10 +79,9 @@ void __iomem *ioremap(unsigned long phys_addr, unsigned long size)
if (!area)
return NULL;
- addr = (void __iomem *) area->addr;
- if (ioremap_page_range((unsigned long)addr, (unsigned long)addr + size,
- phys_addr, pgprot)) {
- vunmap(addr);
+ addr = (uintptr_t) area->addr;
+ if (ioremap_page_range(addr, addr + size, phys_addr, pgprot)) {
+ vunmap(area->addr);
return NULL;
}
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index ef09786d49b9..51499ee6366b 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -79,7 +79,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_REISERFS_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/powerpc/configs/85xx/stx_gp3_defconfig b/arch/powerpc/configs/85xx/stx_gp3_defconfig
index ecbcc853307d..e7080497048d 100644
--- a/arch/powerpc/configs/85xx/stx_gp3_defconfig
+++ b/arch/powerpc/configs/85xx/stx_gp3_defconfig
@@ -50,7 +50,7 @@ CONFIG_DRM=m
CONFIG_SOUND=m
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig
index 06391cc2af3a..53f43a34e1a9 100644
--- a/arch/powerpc/configs/cell_defconfig
+++ b/arch/powerpc/configs/cell_defconfig
@@ -172,7 +172,7 @@ CONFIG_EDAC_CELL=y
CONFIG_UIO=m
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
index b9dfa3a0713b..0d8d3f41f194 100644
--- a/arch/powerpc/configs/ep8248e_defconfig
+++ b/arch/powerpc/configs/ep8248e_defconfig
@@ -47,7 +47,7 @@ CONFIG_SERIAL_CPM_CONSOLE=y
# CONFIG_USB_SUPPORT is not set
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_CRAMFS=y
diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
index ce220ceeb91c..2101bfe6db94 100644
--- a/arch/powerpc/configs/mgcoge_defconfig
+++ b/arch/powerpc/configs/mgcoge_defconfig
@@ -60,7 +60,7 @@ CONFIG_USB_FSL_USB2=y
CONFIG_USB_G_SERIAL=y
CONFIG_UIO=y
CONFIG_EXT2_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_JFFS2_FS=y
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 96aa5355911f..61993944db40 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -143,7 +143,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT4_FS=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=y
CONFIG_MSDOS_FS=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 019163c2571e..05ed58576464 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -254,7 +254,7 @@ CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
CONFIG_EXT2_FS=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index e02ab94a09bf..ee84ade7a033 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -270,7 +270,7 @@ CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
CONFIG_FANOTIFY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
CONFIG_ISO9660_FS=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 268fa361a06d..c0f4bbc2c975 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -327,7 +327,7 @@ CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
CONFIG_FS_DAX=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_OVERLAY_FS=m
CONFIG_ISO9660_FS=y
diff --git a/arch/powerpc/configs/ppc64e_defconfig b/arch/powerpc/configs/ppc64e_defconfig
index 776c32964e12..624c371ffcc3 100644
--- a/arch/powerpc/configs/ppc64e_defconfig
+++ b/arch/powerpc/configs/ppc64e_defconfig
@@ -185,7 +185,7 @@ CONFIG_JFS_SECURITY=y
CONFIG_XFS_FS=m
CONFIG_XFS_POSIX_ACL=y
CONFIG_FS_DAX=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index f21170b8fa11..1034aeabdd6c 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -969,7 +969,7 @@ CONFIG_XFS_POSIX_ACL=y
CONFIG_GFS2_FS=m
CONFIG_FS_DAX=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index ea3ee0719bb2..1ea732c19235 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -129,7 +129,7 @@ CONFIG_EXT2_FS=m
CONFIG_EXT4_FS=y
CONFIG_QUOTA=y
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_UDF_FS=m
diff --git a/arch/powerpc/crypto/.gitignore b/arch/powerpc/crypto/.gitignore
new file mode 100644
index 000000000000..e1094f08f713
--- /dev/null
+++ b/arch/powerpc/crypto/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+aesp10-ppc.S
+ghashp10-ppc.S
diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index ef42adb44aa3..00c6b0b4ede4 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -4,14 +4,13 @@
#ifdef __KERNEL__
#include <asm/asm-compat.h>
-#include <asm/extable.h>
#ifdef CONFIG_BUG
#ifdef __ASSEMBLY__
#include <asm/asm-offsets.h>
#ifdef CONFIG_DEBUG_BUGVERBOSE
-.macro __EMIT_BUG_ENTRY addr,file,line,flags
+.macro EMIT_BUG_ENTRY addr,file,line,flags
.section __bug_table,"aw"
5001: .4byte \addr - .
.4byte 5002f - .
@@ -23,7 +22,7 @@
.previous
.endm
#else
-.macro __EMIT_BUG_ENTRY addr,file,line,flags
+.macro EMIT_BUG_ENTRY addr,file,line,flags
.section __bug_table,"aw"
5001: .4byte \addr - .
.short \flags
@@ -32,18 +31,6 @@
.endm
#endif /* verbose */
-.macro EMIT_WARN_ENTRY addr,file,line,flags
- EX_TABLE(\addr,\addr+4)
- __EMIT_BUG_ENTRY \addr,\file,\line,\flags
-.endm
-
-.macro EMIT_BUG_ENTRY addr,file,line,flags
- .if \flags & 1 /* BUGFLAG_WARNING */
- .err /* Use EMIT_WARN_ENTRY for warnings */
- .endif
- __EMIT_BUG_ENTRY \addr,\file,\line,\flags
-.endm
-
#else /* !__ASSEMBLY__ */
/* _EMIT_BUG_ENTRY expects args %0,%1,%2,%3 to be FILE, LINE, flags and
sizeof(struct bug_entry), respectively */
@@ -73,16 +60,6 @@
"i" (sizeof(struct bug_entry)), \
##__VA_ARGS__)
-#define WARN_ENTRY(insn, flags, label, ...) \
- asm_volatile_goto( \
- "1: " insn "\n" \
- EX_TABLE(1b, %l[label]) \
- _EMIT_BUG_ENTRY \
- : : "i" (__FILE__), "i" (__LINE__), \
- "i" (flags), \
- "i" (sizeof(struct bug_entry)), \
- ##__VA_ARGS__ : : label)
-
/*
* BUG_ON() and WARN_ON() do their best to cooperate with compile-time
* optimisations. However depending on the complexity of the condition
@@ -95,16 +72,7 @@
} while (0)
#define HAVE_ARCH_BUG
-#define __WARN_FLAGS(flags) do { \
- __label__ __label_warn_on; \
- \
- WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \
- barrier_before_unreachable(); \
- __builtin_unreachable(); \
- \
-__label_warn_on: \
- break; \
-} while (0)
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
#ifdef CONFIG_PPC64
#define BUG_ON(x) do { \
@@ -117,25 +85,15 @@ __label_warn_on: \
} while (0)
#define WARN_ON(x) ({ \
- bool __ret_warn_on = false; \
- do { \
- if (__builtin_constant_p((x))) { \
- if (!(x)) \
- break; \
+ int __ret_warn_on = !!(x); \
+ if (__builtin_constant_p(__ret_warn_on)) { \
+ if (__ret_warn_on) \
__WARN(); \
- __ret_warn_on = true; \
- } else { \
- __label__ __label_warn_on; \
- \
- WARN_ENTRY(PPC_TLNEI " %4, 0", \
- BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \
- __label_warn_on, \
- "r" ((__force long)(x))); \
- break; \
-__label_warn_on: \
- __ret_warn_on = true; \
- } \
- } while (0); \
+ } else { \
+ BUG_ENTRY(PPC_TLNEI " %4, 0", \
+ BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN), \
+ "r" (__ret_warn_on)); \
+ } \
unlikely(__ret_warn_on); \
})
@@ -148,14 +106,13 @@ __label_warn_on: \
#ifdef __ASSEMBLY__
.macro EMIT_BUG_ENTRY addr,file,line,flags
.endm
-.macro EMIT_WARN_ENTRY addr,file,line,flags
-.endm
#else /* !__ASSEMBLY__ */
#define _EMIT_BUG_ENTRY
-#define _EMIT_WARN_ENTRY
#endif
#endif /* CONFIG_BUG */
+#define EMIT_WARN_ENTRY EMIT_BUG_ENTRY
+
#include <asm-generic/bug.h>
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h
index a26ca097d032..79f1c480b5eb 100644
--- a/arch/powerpc/include/asm/elf.h
+++ b/arch/powerpc/include/asm/elf.h
@@ -12,14 +12,8 @@
/*
* This is used to ensure we don't load something for the wrong architecture.
- * 64le only supports ELFv2 64-bit binaries (64be supports v1 and v2).
*/
-#if defined(CONFIG_PPC64) && defined(CONFIG_CPU_LITTLE_ENDIAN)
-#define elf_check_arch(x) (((x)->e_machine == ELF_ARCH) && \
- (((x)->e_flags & 0x3) == 0x2))
-#else
#define elf_check_arch(x) ((x)->e_machine == ELF_ARCH)
-#endif
#define compat_elf_check_arch(x) ((x)->e_machine == EM_PPC)
#define CORE_DUMP_USE_REGSET
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 8a6754ffdc7e..a6c7069bec5d 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -393,7 +393,6 @@ int validate_sp_size(unsigned long sp, struct task_struct *p,
*/
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
static inline void prefetch(const void *x)
{
@@ -411,8 +410,6 @@ static inline void prefetchw(const void *x)
__asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x));
}
-#define spin_lock_prefetch(x) prefetchw(x)
-
/* asm stubs */
extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index bc5d39a835fe..bf5dde1a4114 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -183,13 +183,9 @@ static inline bool test_thread_local_flags(unsigned int flags)
#define clear_tsk_compat_task(tsk) do { } while (0)
#endif
-#ifdef CONFIG_PPC64
-#ifdef CONFIG_CPU_BIG_ENDIAN
+#if defined(CONFIG_PPC64)
#define is_elf2_task() (test_thread_flag(TIF_ELF2ABI))
#else
-#define is_elf2_task() (1)
-#endif
-#else
#define is_elf2_task() (0)
#endif
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 46c31fb8748d..30a12d208687 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
return leading_zero_bits >> 3;
}
-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
unsigned long rhs = val | c->low_bits;
*data = rhs;
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index f132d8704263..6440b1bb332a 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -375,8 +375,7 @@ _GLOBAL(generic_secondary_smp_init)
beq 20f
/* start the specified thread */
- LOAD_REG_ADDR(r5, fsl_secondary_thread_init)
- ld r4, 0(r5)
+ LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init))
bl book3e_start_thread
/* stop the current thread */
diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c
index 4caf5e3079eb..359577ec1680 100644
--- a/arch/powerpc/kernel/rtas_flash.c
+++ b/arch/powerpc/kernel/rtas_flash.c
@@ -709,9 +709,9 @@ static int __init rtas_flash_init(void)
if (!rtas_validate_flash_data.buf)
return -ENOMEM;
- flash_block_cache = kmem_cache_create("rtas_flash_cache",
- RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0,
- NULL);
+ flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache",
+ RTAS_BLK_SIZE, RTAS_BLK_SIZE,
+ 0, 0, RTAS_BLK_SIZE, NULL);
if (!flash_block_cache) {
printk(KERN_ERR "%s: failed to create block cache\n",
__func__);
diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl
index 8c0b08b7a80e..20e50586e8a2 100644
--- a/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@ -538,3 +538,4 @@
449 common futex_waitv sys_futex_waitv
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S
index ffb1db386849..1f7d86de1538 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S
@@ -33,6 +33,9 @@
* and then arrange for the ftrace function to be called.
*/
.macro ftrace_regs_entry allregs
+ /* Create a minimal stack frame for representing B */
+ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1)
+
/* Create our stack frame + pt_regs */
PPC_STLU r1,-SWITCH_FRAME_SIZE(r1)
@@ -42,7 +45,7 @@
#ifdef CONFIG_PPC64
/* Save the original return address in A's stack frame */
- std r0, LRSAVE+SWITCH_FRAME_SIZE(r1)
+ std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi r3, 0
@@ -77,6 +80,8 @@
mflr r7
/* Save it as pt_regs->nip */
PPC_STL r7, _NIP(r1)
+ /* Also save it in B's stackframe header for proper unwind */
+ PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
/* Save the read LR in pt_regs->link */
PPC_STL r0, _LINK(r1)
@@ -142,7 +147,7 @@
#endif
/* Pop our stack frame */
- addi r1, r1, SWITCH_FRAME_SIZE
+ addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
#ifdef CONFIG_LIVEPATCH_64
/* Based on the cmpd above, if the NIP was altered handle livepatch */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index e59ec6d32d37..7ef147e2a20d 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1508,13 +1508,8 @@ static void do_program_check(struct pt_regs *regs)
if (!(regs->msr & MSR_PR) && /* not user-mode */
report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
- const struct exception_table_entry *entry;
-
- entry = search_exception_tables(bugaddr);
- if (entry) {
- regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr);
- return;
- }
+ regs_add_return_ip(regs, 4);
+ return;
}
if (cpu_has_feature(CPU_FTR_DEXCR_NPHIE) && user_mode(regs)) {
diff --git a/arch/powerpc/mm/book3s64/subpage_prot.c b/arch/powerpc/mm/book3s64/subpage_prot.c
index 0dc85556dec5..ec98e526167e 100644
--- a/arch/powerpc/mm/book3s64/subpage_prot.c
+++ b/arch/powerpc/mm/book3s64/subpage_prot.c
@@ -145,6 +145,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops subpage_walk_ops = {
.pmd_entry = subpage_walk_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK_VERIFY,
};
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index fe1b83020e0d..0ec5b45b1e86 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -314,8 +314,7 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
start = ALIGN_DOWN(start, page_size);
if (altmap) {
alt_start = altmap->base_pfn;
- alt_end = altmap->base_pfn + altmap->reserve +
- altmap->free + altmap->alloc + altmap->align;
+ alt_end = altmap->base_pfn + altmap->reserve + altmap->free;
}
pr_debug("vmemmap_free %lx...%lx\n", start, end);
diff --git a/arch/powerpc/mm/kasan/Makefile b/arch/powerpc/mm/kasan/Makefile
index 699eeffd9f55..f9522fd70b2f 100644
--- a/arch/powerpc/mm/kasan/Makefile
+++ b/arch/powerpc/mm/kasan/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
KASAN_SANITIZE := n
+KCOV_INSTRUMENT := n
obj-$(CONFIG_PPC32) += init_32.o
obj-$(CONFIG_PPC_8xx) += 8xx.o
diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
index 1bfb29574caa..c1e981649bd9 100644
--- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
+++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c
@@ -477,7 +477,7 @@ static int mpc512x_lpbfifo_probe(struct platform_device *pdev)
return ret;
}
-static int mpc512x_lpbfifo_remove(struct platform_device *pdev)
+static void mpc512x_lpbfifo_remove(struct platform_device *pdev)
{
unsigned long flags;
struct dma_device *dma_dev = lpbfifo.chan->device;
@@ -494,8 +494,6 @@ static int mpc512x_lpbfifo_remove(struct platform_device *pdev)
free_irq(lpbfifo.irq, &pdev->dev);
irq_dispose_mapping(lpbfifo.irq);
dma_release_channel(lpbfifo.chan);
-
- return 0;
}
static const struct of_device_id mpc512x_lpbfifo_match[] = {
@@ -506,7 +504,7 @@ MODULE_DEVICE_TABLE(of, mpc512x_lpbfifo_match);
static struct platform_driver mpc512x_lpbfifo_driver = {
.probe = mpc512x_lpbfifo_probe,
- .remove = mpc512x_lpbfifo_remove,
+ .remove_new = mpc512x_lpbfifo_remove,
.driver = {
.name = DRV_NAME,
.of_match_table = mpc512x_lpbfifo_match,
diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c
index 9c43cf32f4c9..40aa58206888 100644
--- a/arch/powerpc/platforms/85xx/smp.c
+++ b/arch/powerpc/platforms/85xx/smp.c
@@ -180,7 +180,7 @@ static void wake_hw_thread(void *info)
unsigned long inia;
int cpu = *(const int *)info;
- inia = *(unsigned long *)fsl_secondary_thread_init;
+ inia = ppc_function_entry(fsl_secondary_thread_init);
book3e_start_thread(cpu_thread_in_core(cpu), inia);
}
#endif
diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index ea807aa0c31a..38c5be34c895 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode)
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
out:
return inode;
}
diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c
index 4c5790aff1b5..8633891b7aa5 100644
--- a/arch/powerpc/platforms/powermac/time.c
+++ b/arch/powerpc/platforms/powermac/time.c
@@ -26,8 +26,8 @@
#include <linux/rtc.h>
#include <linux/of_address.h>
+#include <asm/early_ioremap.h>
#include <asm/sections.h>
-#include <asm/io.h>
#include <asm/machdep.h>
#include <asm/time.h>
#include <asm/nvram.h>
@@ -182,7 +182,7 @@ static int __init via_calibrate_decr(void)
return 0;
}
of_node_put(vias);
- via = ioremap(rsrc.start, resource_size(&rsrc));
+ via = early_ioremap(rsrc.start, resource_size(&rsrc));
if (via == NULL) {
printk(KERN_ERR "Failed to map VIA for timer calibration !\n");
return 0;
@@ -207,7 +207,7 @@ static int __init via_calibrate_decr(void)
ppc_tb_freq = (dstart - dend) * 100 / 6;
- iounmap(via);
+ early_iounmap((void *)via, resource_size(&rsrc));
return 1;
}
diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c
index 9a44a98ba342..3fbc2a6aa319 100644
--- a/arch/powerpc/platforms/pseries/vas.c
+++ b/arch/powerpc/platforms/pseries/vas.c
@@ -744,6 +744,12 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
}
task_ref = &win->vas_win.task_ref;
+ /*
+ * VAS mmap (coproc_mmap()) and its fault handler
+ * (vas_mmap_fault()) are called after holding mmap lock.
+ * So hold mmap mutex after mmap_lock to avoid deadlock.
+ */
+ mmap_write_lock(task_ref->mm);
mutex_lock(&task_ref->mmap_mutex);
vma = task_ref->vma;
/*
@@ -752,7 +758,6 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
*/
win->vas_win.status |= flag;
- mmap_write_lock(task_ref->mm);
/*
* vma is set in the original mapping. But this mapping
* is done with mmap() after the window is opened with ioctl.
@@ -762,8 +767,8 @@ static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
if (vma)
zap_vma_pages(vma);
- mmap_write_unlock(task_ref->mm);
mutex_unlock(&task_ref->mmap_mutex);
+ mmap_write_unlock(task_ref->mm);
/*
* Close VAS window in the hypervisor, but do not
* free vas_window struct since it may be reused
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 4c07b9189c86..bea7b73e895d 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -570,24 +570,30 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
def_bool y
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc
- depends on AS_IS_GNU && AS_VERSION >= 23800
- help
- Newer binutils versions default to ISA spec version 20191213 which
- moves some instructions from the I extension to the Zicsr and Zifencei
- extensions.
+ # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd
+ depends on AS_IS_GNU && AS_VERSION >= 23600
+ help
+ Binutils-2.38 and GCC-12.1.0 bumped the default ISA spec to the newer
+ 20191213 version, which moves some instructions from the I extension to
+ the Zicsr and Zifencei extensions. This requires explicitly specifying
+ Zicsr and Zifencei when binutils >= 2.38 or GCC >= 12.1.0. Zicsr
+ and Zifencei are supported in binutils from version 2.36 onwards.
+ To make life easier, and avoid forcing toolchains that default to a
+ newer ISA spec to version 2.2, relax the check to binutils >= 2.36.
+ For clang < 17 or GCC < 11.3.0, for which this is not possible or need
+ special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC.
config TOOLCHAIN_NEEDS_OLD_ISA_SPEC
def_bool y
depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
# https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16
- depends on CC_IS_CLANG && CLANG_VERSION < 170000
- help
- Certain versions of clang do not support zicsr and zifencei via -march
- but newer versions of binutils require it for the reasons noted in the
- help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This
- option causes an older ISA spec compatible with these older versions
- of clang to be passed to GAS, which has the same result as passing zicsr
- and zifencei to -march.
+ # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671
+ depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300)
+ help
+ Certain versions of clang and GCC do not support zicsr and zifencei via
+ -march. This option causes an older ISA spec compatible with these older
+ versions of clang and GCC to be passed to GAS, which has the same result
+ as passing zicsr and zifencei to -march.
config FPU
bool "FPU support"
diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 0a0107460a5c..ab86ec3b9eab 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -192,7 +192,7 @@ CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_BTRFS_FS=m
CONFIG_BTRFS_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_OVERLAY_FS=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig
index 38760e4296cf..89b601e253a6 100644
--- a/arch/riscv/configs/rv32_defconfig
+++ b/arch/riscv/configs/rv32_defconfig
@@ -98,7 +98,7 @@ CONFIG_RPMSG_CTRL=y
CONFIG_RPMSG_VIRTIO=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_TMPFS=y
diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h
index f71ce21ff684..d5604d2073bc 100644
--- a/arch/riscv/include/asm/acpi.h
+++ b/arch/riscv/include/asm/acpi.h
@@ -19,7 +19,7 @@ typedef u64 phys_cpuid_t;
#define PHYS_CPUID_INVALID INVALID_HARTID
/* ACPI table mapping after acpi_permanent_mmap is set */
-void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size);
#define acpi_os_ioremap acpi_os_ioremap
#define acpi_strict 1 /* No out-of-spec workarounds on RISC-V */
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 8091b8bf4883..b93ffddf8a61 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -37,6 +37,10 @@ static inline void flush_dcache_page(struct page *page)
#define flush_icache_user_page(vma, pg, addr, len) \
flush_icache_mm(vma->vm_mm, 0)
+#ifdef CONFIG_64BIT
+#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end)
+#endif
+
#ifndef CONFIG_SMP
#define flush_icache_all() local_flush_icache_all()
diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h
index 4e1505cef8aa..fce00400c9bc 100644
--- a/arch/riscv/include/asm/insn.h
+++ b/arch/riscv/include/asm/insn.h
@@ -110,6 +110,7 @@
#define RVC_INSN_FUNCT4_OPOFF 12
#define RVC_INSN_FUNCT3_MASK GENMASK(15, 13)
#define RVC_INSN_FUNCT3_OPOFF 13
+#define RVC_INSN_J_RS1_MASK GENMASK(11, 7)
#define RVC_INSN_J_RS2_MASK GENMASK(6, 2)
#define RVC_INSN_OPCODE_MASK GENMASK(1, 0)
#define RVC_ENCODE_FUNCT3(f_) (RVC_FUNCT3_##f_ << RVC_INSN_FUNCT3_OPOFF)
@@ -245,8 +246,6 @@ __RISCV_INSN_FUNCS(c_jal, RVC_MASK_C_JAL, RVC_MATCH_C_JAL)
__RISCV_INSN_FUNCS(auipc, RVG_MASK_AUIPC, RVG_MATCH_AUIPC)
__RISCV_INSN_FUNCS(jalr, RVG_MASK_JALR, RVG_MATCH_JALR)
__RISCV_INSN_FUNCS(jal, RVG_MASK_JAL, RVG_MATCH_JAL)
-__RISCV_INSN_FUNCS(c_jr, RVC_MASK_C_JR, RVC_MATCH_C_JR)
-__RISCV_INSN_FUNCS(c_jalr, RVC_MASK_C_JALR, RVC_MATCH_C_JALR)
__RISCV_INSN_FUNCS(c_j, RVC_MASK_C_J, RVC_MATCH_C_J)
__RISCV_INSN_FUNCS(beq, RVG_MASK_BEQ, RVG_MATCH_BEQ)
__RISCV_INSN_FUNCS(bne, RVG_MASK_BNE, RVG_MATCH_BNE)
@@ -273,6 +272,18 @@ static __always_inline bool riscv_insn_is_branch(u32 code)
return (code & RV_INSN_OPCODE_MASK) == RVG_OPCODE_BRANCH;
}
+static __always_inline bool riscv_insn_is_c_jr(u32 code)
+{
+ return (code & RVC_MASK_C_JR) == RVC_MATCH_C_JR &&
+ (code & RVC_INSN_J_RS1_MASK) != 0;
+}
+
+static __always_inline bool riscv_insn_is_c_jalr(u32 code)
+{
+ return (code & RVC_MASK_C_JALR) == RVC_MATCH_C_JALR &&
+ (code & RVC_INSN_J_RS1_MASK) != 0;
+}
+
#define RV_IMM_SIGN(x) (-(((x) >> 31) & 1))
#define RVC_IMM_SIGN(x) (-(((x) >> 12) & 1))
#define RV_X(X, s, mask) (((X) >> (s)) & (mask))
diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h
index aff6c33ab0c0..4c58ee7f95ec 100644
--- a/arch/riscv/include/asm/mmio.h
+++ b/arch/riscv/include/asm/mmio.h
@@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
* Relaxed I/O memory access primitives. These follow the Device memory
* ordering rules but do not guarantee any ordering relative to Normal memory
* accesses. These are defined to order the indicated access (either a read or
- * write) with all other I/O memory accesses. Since the platform specification
- * defines that all I/O regions are strongly ordered on channel 2, no explicit
- * fences are required to enforce this ordering.
+ * write) with all other I/O memory accesses to the same peripheral. Since the
+ * platform specification defines that all I/O regions are strongly ordered on
+ * channel 0, no explicit fences are required to enforce this ordering.
*/
/* FIXME: These are now the same as asm-generic */
#define __io_rbr() do {} while (0)
@@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr)
#endif
/*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access. The memory barriers here are necessary as RISC-V
+ * I/O memory access primitives. Reads are ordered relative to any following
+ * Normal memory read and delay() loop. Writes are ordered relative to any
+ * prior Normal memory write. The memory barriers here are necessary as RISC-V
* doesn't define any ordering between the memory space and the I/O space.
*/
#define __io_br() do {} while (0)
-#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory")
-#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory")
+#define __io_ar(v) ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); })
+#define __io_bw() ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); })
#define __io_aw() mmiowb_set_pending()
#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 75970ee2bda2..b5680c940c1e 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -188,6 +188,8 @@ extern struct pt_alloc_ops pt_ops __initdata;
#define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP)
extern pgd_t swapper_pg_dir[];
+extern pgd_t trampoline_pg_dir[];
+extern pgd_t early_pg_dir[];
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static inline int pmd_present(pmd_t pmd)
diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h
index 3d78930cab51..c5ee07b3df07 100644
--- a/arch/riscv/include/asm/vector.h
+++ b/arch/riscv/include/asm/vector.h
@@ -70,8 +70,9 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
"csrr %1, " __stringify(CSR_VTYPE) "\n\t"
"csrr %2, " __stringify(CSR_VL) "\n\t"
"csrr %3, " __stringify(CSR_VCSR) "\n\t"
+ "csrr %4, " __stringify(CSR_VLENB) "\n\t"
: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
- "=r" (dest->vcsr) : :);
+ "=r" (dest->vcsr), "=r" (dest->vlenb) : :);
}
static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
index 58d3e447f191..924d01b56c9a 100644
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -3,12 +3,14 @@
#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+extern bool pgtable_l4_enabled, pgtable_l5_enabled;
+
#define IOREMAP_MAX_ORDER (PUD_SHIFT)
#define arch_vmap_pud_supported arch_vmap_pud_supported
static inline bool arch_vmap_pud_supported(pgprot_t prot)
{
- return true;
+ return pgtable_l4_enabled || pgtable_l5_enabled;
}
#define arch_vmap_pmd_supported arch_vmap_pmd_supported
diff --git a/arch/riscv/include/uapi/asm/bitsperlong.h b/arch/riscv/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..7d0b32e3b701
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
+#define _UAPI_ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h
index e17c550986a6..283800130614 100644
--- a/arch/riscv/include/uapi/asm/ptrace.h
+++ b/arch/riscv/include/uapi/asm/ptrace.h
@@ -97,6 +97,7 @@ struct __riscv_v_ext_state {
unsigned long vl;
unsigned long vtype;
unsigned long vcsr;
+ unsigned long vlenb;
void *datap;
/*
* In signal handler, datap will be set a correct user stack offset
diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c
index 5ee03ebab80e..56cb2c986c48 100644
--- a/arch/riscv/kernel/acpi.c
+++ b/arch/riscv/kernel/acpi.c
@@ -215,9 +215,9 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size)
early_iounmap(map, size);
}
-void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
+void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size)
{
- return memremap(phys, size, MEMREMAP_WB);
+ return (void __iomem *)memremap(phys, size, MEMREMAP_WB);
}
#ifdef CONFIG_PCI
diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile
index 189345773e7e..b86e5e2c3aea 100644
--- a/arch/riscv/kernel/compat_vdso/Makefile
+++ b/arch/riscv/kernel/compat_vdso/Makefile
@@ -11,7 +11,13 @@ compat_vdso-syms += flush_icache
COMPAT_CC := $(CC)
COMPAT_LD := $(LD)
-COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32
+# binutils 2.35 does not support the zifencei extension, but in the ISA
+# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI.
+ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
+ COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32
+else
+ COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32
+endif
COMPAT_LD_FLAGS := -melf32lriscv
# Disable attributes, as they're useless and break the build.
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index a2fc952318e9..35b854cf078e 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -17,6 +17,11 @@
#include <asm/smp.h>
#include <asm/pgtable.h>
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+ return phys_id == cpuid_to_hartid_map(cpu);
+}
+
/*
* Returns the hart ID of the given device tree node, or -ENODEV if the node
* isn't an enabled and valid RISC-V hart node.
diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c
index b351a3c01355..55f1d7856b54 100644
--- a/arch/riscv/kernel/crash_core.c
+++ b/arch/riscv/kernel/crash_core.c
@@ -18,4 +18,6 @@ void arch_crash_save_vmcoreinfo(void)
vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
#endif
vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR);
+ vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n",
+ kernel_map.va_kernel_pa_offset);
}
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
index 5372b708fae2..c08bb5c3b385 100644
--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -281,7 +281,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
kbuf.buffer = initrd;
kbuf.bufsz = kbuf.memsz = initrd_len;
kbuf.buf_align = PAGE_SIZE;
- kbuf.top_down = false;
+ kbuf.top_down = true;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
@@ -425,6 +425,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
* sym, instead of searching the whole relsec.
*/
case R_RISCV_PCREL_HI20:
+ case R_RISCV_CALL_PLT:
case R_RISCV_CALL:
*(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) |
ENCODE_UJTYPE_IMM(val - addr);
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index d0577cc6a081..a8efa053c4a5 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -84,6 +84,9 @@ void do_softirq_own_stack(void)
: [sp] "r" (sp)
: "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
"t0", "t1", "t2", "t3", "t4", "t5", "t6",
+#ifndef CONFIG_FRAME_POINTER
+ "s0",
+#endif
"memory");
} else
#endif
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 1d572cf3140f..487303e3ef22 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -25,9 +25,6 @@ enum riscv_regset {
#ifdef CONFIG_FPU
REGSET_F,
#endif
-#ifdef CONFIG_RISCV_ISA_V
- REGSET_V,
-#endif
};
static int riscv_gpr_get(struct task_struct *target,
@@ -84,61 +81,6 @@ static int riscv_fpr_set(struct task_struct *target,
}
#endif
-#ifdef CONFIG_RISCV_ISA_V
-static int riscv_vr_get(struct task_struct *target,
- const struct user_regset *regset,
- struct membuf to)
-{
- struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
- if (!riscv_v_vstate_query(task_pt_regs(target)))
- return -EINVAL;
-
- /*
- * Ensure the vector registers have been saved to the memory before
- * copying them to membuf.
- */
- if (target == current)
- riscv_v_vstate_save(current, task_pt_regs(current));
-
- /* Copy vector header from vstate. */
- membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap));
- membuf_zero(&to, sizeof(vstate->datap));
-
- /* Copy all the vector registers from vstate. */
- return membuf_write(&to, vstate->datap, riscv_v_vsize);
-}
-
-static int riscv_vr_set(struct task_struct *target,
- const struct user_regset *regset,
- unsigned int pos, unsigned int count,
- const void *kbuf, const void __user *ubuf)
-{
- int ret, size;
- struct __riscv_v_ext_state *vstate = &target->thread.vstate;
-
- if (!riscv_v_vstate_query(task_pt_regs(target)))
- return -EINVAL;
-
- /* Copy rest of the vstate except datap */
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0,
- offsetof(struct __riscv_v_ext_state, datap));
- if (unlikely(ret))
- return ret;
-
- /* Skip copy datap. */
- size = sizeof(vstate->datap);
- count -= size;
- ubuf += size;
-
- /* Copy all the vector registers. */
- pos = 0;
- ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap,
- 0, riscv_v_vsize);
- return ret;
-}
-#endif
-
static const struct user_regset riscv_user_regset[] = {
[REGSET_X] = {
.core_note_type = NT_PRSTATUS,
@@ -158,17 +100,6 @@ static const struct user_regset riscv_user_regset[] = {
.set = riscv_fpr_set,
},
#endif
-#ifdef CONFIG_RISCV_ISA_V
- [REGSET_V] = {
- .core_note_type = NT_RISCV_VECTOR,
- .align = 16,
- .n = ((32 * RISCV_MAX_VLENB) +
- sizeof(struct __riscv_v_ext_state)) / sizeof(__u32),
- .size = sizeof(__u32),
- .regset_get = riscv_vr_get,
- .set = riscv_vr_set,
- },
-#endif
};
static const struct user_regset_view riscv_user_native_view = {
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 85bbce0f758c..40420afbb1a0 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -61,11 +61,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid)
return -ENOENT;
}
-bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
-{
- return phys_id == cpuid_to_hartid_map(cpu);
-}
-
static void ipi_stop(void)
{
set_cpu_online(smp_processor_id(), false);
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f910dfccbf5d..f798c853bede 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -297,7 +297,7 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
{
if (user_mode(regs)) {
- ulong syscall = regs->a7;
+ long syscall = regs->a7;
regs->epc += 4;
regs->orig_a0 = regs->a0;
@@ -306,9 +306,9 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
syscall = syscall_enter_from_user_mode(regs, syscall);
- if (syscall < NR_syscalls)
+ if (syscall >= 0 && syscall < NR_syscalls)
syscall_handler(regs, syscall);
- else
+ else if (syscall != -1)
regs->a0 = -ENOSYS;
syscall_exit_to_user_mode(regs);
@@ -372,6 +372,9 @@ asmlinkage void noinstr do_irq(struct pt_regs *regs)
: [sp] "r" (sp), [regs] "r" (regs)
: "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7",
"t0", "t1", "t2", "t3", "t4", "t5", "t6",
+#ifndef CONFIG_FRAME_POINTER
+ "s0",
+#endif
"memory");
} else
#endif
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index ec486e5369d9..09b47ebacf2e 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -17,8 +17,11 @@ ENTRY(__asm_copy_from_user)
li t6, SR_SUM
csrs CSR_STATUS, t6
- /* Save for return value */
- mv t5, a2
+ /*
+ * Save the terminal address which will be used to compute the number
+ * of bytes copied in case of a fixup exception.
+ */
+ add t5, a0, a2
/*
* Register allocation for code below:
@@ -176,7 +179,7 @@ ENTRY(__asm_copy_from_user)
10:
/* Disable access to user memory */
csrc CSR_STATUS, t6
- mv a0, t5
+ sub a0, t5, a0
ret
ENDPROC(__asm_copy_to_user)
ENDPROC(__asm_copy_from_user)
@@ -228,7 +231,7 @@ ENTRY(__clear_user)
11:
/* Disable access to user memory */
csrc CSR_STATUS, t6
- mv a0, a1
+ sub a0, a3, a0
ret
ENDPROC(__clear_user)
EXPORT_SYMBOL(__clear_user)
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 9ce504737d18..e4c35ac2357f 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -26,12 +26,13 @@
#include <linux/kfence.h>
#include <asm/fixmap.h>
-#include <asm/tlbflush.h>
-#include <asm/sections.h>
-#include <asm/soc.h>
#include <asm/io.h>
-#include <asm/ptdump.h>
#include <asm/numa.h>
+#include <asm/pgtable.h>
+#include <asm/ptdump.h>
+#include <asm/sections.h>
+#include <asm/soc.h>
+#include <asm/tlbflush.h>
#include "../kernel/head.h"
@@ -214,8 +215,13 @@ static void __init setup_bootmem(void)
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
phys_ram_end = memblock_end_of_DRAM();
+
+ /*
+ * Make sure we align the start of the memory on a PMD boundary so that
+ * at worst, we map the linear mapping with PMD mappings.
+ */
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
- phys_ram_base = memblock_start_of_DRAM();
+ phys_ram_base = memblock_start_of_DRAM() & PMD_MASK;
/*
* In 64-bit, any use of __va/__pa before this point is wrong as we
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 8fc0efcf905c..a01bc15dce24 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -22,7 +22,6 @@
* region is not and then we have to go down to the PUD level.
*/
-extern pgd_t early_pg_dir[PTRS_PER_PGD];
pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss;
pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss;
diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c
index ea3d61de065b..161d0b34c2cb 100644
--- a/arch/riscv/mm/pageattr.c
+++ b/arch/riscv/mm/pageattr.c
@@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = {
.pmd_entry = pageattr_pmd_entry,
.pte_entry = pageattr_pte_entry,
.pte_hole = pageattr_pte_hole,
+ .walk_lock = PGWALK_RDLOCK,
};
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index aa95cf6dfabb..3a6a9a88bd38 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -116,7 +116,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -193,6 +192,7 @@ CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NFT_FIB_INET=m
+CONFIG_NETFILTER_XTABLES_COMPAT=y
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -379,6 +379,7 @@ CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
@@ -395,6 +396,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -502,7 +504,6 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_LITEX is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
@@ -542,6 +543,7 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
@@ -624,7 +626,7 @@ CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QUOTA_DEBUG=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
@@ -646,7 +648,6 @@ CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_TMPFS_INODE64=y
CONFIG_HUGETLBFS=y
-CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
@@ -690,7 +691,6 @@ CONFIG_HARDENED_USERCOPY=y
CONFIG_FORTIFY_SOURCE=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_LOCKDOWN_LSM=y
CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
CONFIG_SECURITY_LANDLOCK=y
@@ -744,7 +744,6 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_WP512=m
@@ -844,6 +843,7 @@ CONFIG_PREEMPT_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_USER_EVENTS=y
CONFIG_HIST_TRIGGERS=y
CONFIG_FTRACE_STARTUP_TEST=y
# CONFIG_EVENT_TRACE_STARTUP_TEST is not set
@@ -866,6 +866,7 @@ CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAIL_FUTEX=y
CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION_CONFIGFS=y
CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y
CONFIG_LKDTM=m
CONFIG_TEST_MIN_HEAP=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index f041945f9148..b13a5a0d99d2 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -107,7 +107,6 @@ CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
-CONFIG_NET_TC_SKB_EXT=y
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
@@ -184,6 +183,7 @@ CONFIG_NFT_REJECT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NFT_FIB_INET=m
+CONFIG_NETFILTER_XTABLES_COMPAT=y
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
@@ -369,6 +369,7 @@ CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_GATE=m
+CONFIG_NET_TC_SKB_EXT=y
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
@@ -385,6 +386,7 @@ CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_SAFE=y
+# CONFIG_FW_LOADER is not set
CONFIG_CONNECTOR=y
CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
@@ -492,7 +494,6 @@ CONFIG_NLMON=m
# CONFIG_NET_VENDOR_GOOGLE is not set
# CONFIG_NET_VENDOR_HUAWEI is not set
# CONFIG_NET_VENDOR_INTEL is not set
-# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_LITEX is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
@@ -532,6 +533,7 @@ CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_TI is not set
# CONFIG_NET_VENDOR_VERTEXCOM is not set
# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WANGXUN is not set
# CONFIG_NET_VENDOR_WIZNET is not set
# CONFIG_NET_VENDOR_XILINX is not set
CONFIG_PPP=m
@@ -609,7 +611,7 @@ CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_VIRTIO_FS=m
@@ -673,7 +675,6 @@ CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
-CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_SECURITY_LOCKDOWN_LSM=y
CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y
CONFIG_SECURITY_LANDLOCK=y
@@ -729,7 +730,6 @@ CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MD5=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_SM3_GENERIC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_WP512=m
@@ -793,6 +793,7 @@ CONFIG_STACK_TRACER=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_USER_EVENTS=y
CONFIG_HIST_TRIGGERS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_TRACE_PRINTK=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 6f68b39817ef..e62fb2015102 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -53,7 +53,6 @@ CONFIG_ZFCP=y
# CONFIG_HVC_IUCV is not set
# CONFIG_HW_RANDOM_S390 is not set
# CONFIG_HMC_DRV is not set
-# CONFIG_S390_UV_UAPI is not set
# CONFIG_S390_TAPE is not set
# CONFIG_VMCP is not set
# CONFIG_MONWRITER is not set
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index d29a9d908797..38349150c96e 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -103,7 +103,7 @@ static inline void _free_kb_keybuf(struct key_blob *kb)
{
if (kb->key && kb->key != kb->keybuf
&& kb->keylen > sizeof(kb->keybuf)) {
- kfree(kb->key);
+ kfree_sensitive(kb->key);
kb->key = NULL;
}
}
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index ee919bfc8186..5feef8da406b 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -53,7 +53,7 @@ static void hypfs_update_update(struct super_block *sb)
struct inode *inode = d_inode(sb_info->update_file);
sb_info->last_update = ktime_get_seconds();
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
/* directory tree removal functions */
@@ -101,7 +101,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode)
ret->i_mode = mode;
ret->i_uid = hypfs_info->uid;
ret->i_gid = hypfs_info->gid;
- ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+ ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
if (S_ISDIR(mode))
set_nlink(ret, 2);
}
diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h
index f0fe3bcc78a8..bb0826024bb9 100644
--- a/arch/s390/include/uapi/asm/ptrace.h
+++ b/arch/s390/include/uapi/asm/ptrace.h
@@ -8,6 +8,8 @@
#ifndef _UAPI_S390_PTRACE_H
#define _UAPI_S390_PTRACE_H
+#include <linux/const.h>
+
/*
* Offsets in the user_regs_struct. They are used for the ptrace
* system call and in entry.S
diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
index 4d141e2c132e..2ea7f208f0e7 100644
--- a/arch/s390/kernel/sthyi.c
+++ b/arch/s390/kernel/sthyi.c
@@ -459,9 +459,9 @@ static int sthyi_update_cache(u64 *rc)
*
* Fills the destination with system information returned by the STHYI
* instruction. The data is generated by emulation or execution of STHYI,
- * if available. The return value is the condition code that would be
- * returned, the rc parameter is the return code which is passed in
- * register R2 + 1.
+ * if available. The return value is either a negative error value or
+ * the condition code that would be returned, the rc parameter is the
+ * return code which is passed in register R2 + 1.
*/
int sthyi_fill(void *dst, u64 *rc)
{
diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl
index a6935af2235c..0122cc156952 100644
--- a/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@ -454,3 +454,4 @@
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2 sys_fchmodat2
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 954d39adf85c..341abafb96e4 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -389,8 +389,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
*/
int handle_sthyi(struct kvm_vcpu *vcpu)
{
- int reg1, reg2, r = 0;
- u64 code, addr, cc = 0, rc = 0;
+ int reg1, reg2, cc = 0, r = 0;
+ u64 code, addr, rc = 0;
struct sthyi_sctns *sctns = NULL;
if (!test_kvm_facility(vcpu->kvm, 74))
@@ -421,7 +421,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
return -ENOMEM;
cc = sthyi_fill(sctns, &rc);
-
+ if (cc < 0) {
+ free_page((unsigned long)sctns);
+ return cc;
+ }
out:
if (!cc) {
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 2f34c7c3c5ab..bf1fdc7bf89e 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -411,8 +411,12 @@ int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
u16 _rc, _rrc;
int cc = 0;
- /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */
- atomic_inc(&kvm->mm->context.protected_count);
+ /*
+ * Nothing to do if the counter was already 0. Otherwise make sure
+ * the counter does not reach 0 before calling s390_uv_destroy_range.
+ */
+ if (!atomic_inc_not_zero(&kvm->mm->context.protected_count))
+ return 0;
*rc = 1;
/* If the current VM is protected, destroy it */
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index dbe8394234e2..2f123429a291 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -421,6 +421,8 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
vma_end_read(vma);
if (!(fault & VM_FAULT_RETRY)) {
count_vm_vma_lock_event(VMA_LOCK_SUCCESS);
+ if (likely(!(fault & VM_FAULT_ERROR)))
+ fault = 0;
goto out;
}
count_vm_vma_lock_event(VMA_LOCK_RETRY);
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 989ebd0912b4..906a7bfc2a78 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2514,6 +2514,7 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops thp_split_walk_ops = {
.pmd_entry = thp_split_walk_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK_VERIFY,
};
static inline void thp_split_mm(struct mm_struct *mm)
@@ -2565,6 +2566,7 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
static const struct mm_walk_ops zap_zero_walk_ops = {
.pmd_entry = __zap_zero_pages,
+ .walk_lock = PGWALK_WRLOCK,
};
/*
@@ -2655,6 +2657,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
.hugetlb_entry = __s390_enable_skey_hugetlb,
.pte_entry = __s390_enable_skey_pte,
.pmd_entry = __s390_enable_skey_pmd,
+ .walk_lock = PGWALK_WRLOCK,
};
int s390_enable_skey(void)
@@ -2692,6 +2695,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
static const struct mm_walk_ops reset_cmma_walk_ops = {
.pte_entry = __s390_reset_cmma,
+ .walk_lock = PGWALK_WRLOCK,
};
void s390_reset_cmma(struct mm_struct *mm)
@@ -2728,6 +2732,7 @@ static int s390_gather_pages(pte_t *ptep, unsigned long addr,
static const struct mm_walk_ops gather_pages_ops = {
.pte_entry = s390_gather_pages,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
@@ -2853,6 +2858,7 @@ int s390_replace_asce(struct gmap *gmap)
page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
if (!page)
return -ENOMEM;
+ page->index = 0;
table = page_to_virt(page);
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
index b26649233d12..24a66670f5c3 100644
--- a/arch/s390/mm/vmem.c
+++ b/arch/s390/mm/vmem.c
@@ -763,6 +763,8 @@ void __init vmem_map_init(void)
if (static_key_enabled(&cpu_has_bear))
set_memory_nx(0, 1);
set_memory_nx(PAGE_SIZE, 1);
+ if (debug_pagealloc_enabled())
+ set_memory_4k(0, ident_map_size >> PAGE_SHIFT);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);
diff --git a/arch/sh/configs/espt_defconfig b/arch/sh/configs/espt_defconfig
index 623012d9af9d..67716a44463e 100644
--- a/arch/sh/configs/espt_defconfig
+++ b/arch/sh/configs/espt_defconfig
@@ -61,7 +61,7 @@ CONFIG_USB_STORAGE=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig
index 41cb588ca99c..cd24cf08210e 100644
--- a/arch/sh/configs/sdk7780_defconfig
+++ b/arch/sh/configs/sdk7780_defconfig
@@ -105,7 +105,7 @@ CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index a080c5d05a7b..cf59b98446e4 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -168,7 +168,7 @@ CONFIG_EXT3_FS=y
CONFIG_EXT4_FS=y
CONFIG_XFS_FS=y
CONFIG_BTRFS_FS=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_FSCACHE=m
diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig
index f661ef816f09..48f38ec236b6 100644
--- a/arch/sh/configs/sh03_defconfig
+++ b/arch/sh/configs/sh03_defconfig
@@ -60,7 +60,7 @@ CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/sh/configs/sh7763rdp_defconfig b/arch/sh/configs/sh7763rdp_defconfig
index ae726745d9a0..57923c3296cc 100644
--- a/arch/sh/configs/sh7763rdp_defconfig
+++ b/arch/sh/configs/sh7763rdp_defconfig
@@ -63,7 +63,7 @@ CONFIG_MMC=y
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_MSDOS_FS=y
CONFIG_VFAT_FS=y
CONFIG_PROC_KCORE=y
diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl
index 97377e8c5025..e90d585c4d3e 100644
--- a/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@ -454,3 +454,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/sparc/configs/sparc32_defconfig b/arch/sparc/configs/sparc32_defconfig
index 7c489e7f19b7..5010164de3e4 100644
--- a/arch/sparc/configs/sparc32_defconfig
+++ b/arch/sparc/configs/sparc32_defconfig
@@ -65,7 +65,7 @@ CONFIG_EXT2_FS=y
CONFIG_EXT2_FS_XATTR=y
CONFIG_EXT2_FS_POSIX_ACL=y
CONFIG_EXT2_FS_SECURITY=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_PROC_KCORE=y
CONFIG_ROMFS_FS=m
diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h
index 2667f35d5ea5..0a0d5c3d184c 100644
--- a/arch/sparc/include/asm/processor_64.h
+++ b/arch/sparc/include/asm/processor_64.h
@@ -213,7 +213,6 @@ unsigned long __get_wchan(struct task_struct *task);
*/
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
static inline void prefetch(const void *x)
{
@@ -239,8 +238,6 @@ static inline void prefetchw(const void *x)
: "r" (x));
}
-#define spin_lock_prefetch(x) prefetchw(x)
-
#define HAVE_ARCH_PICK_MMAP_LAYOUT
int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap);
diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl
index faa835f3c54a..4ed06c71c43f 100644
--- a/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@ -497,3 +497,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/arch/um/configs/i386_defconfig b/arch/um/configs/i386_defconfig
index 05ae535ba0dc..630be793759e 100644
--- a/arch/um/configs/i386_defconfig
+++ b/arch/um/configs/i386_defconfig
@@ -62,7 +62,7 @@ CONFIG_UML_NET_SLIRP=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=y
CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_PROC_KCORE=y
diff --git a/arch/um/configs/x86_64_defconfig b/arch/um/configs/x86_64_defconfig
index 1c757238be7f..8540d3370272 100644
--- a/arch/um/configs/x86_64_defconfig
+++ b/arch/um/configs/x86_64_defconfig
@@ -60,7 +60,7 @@ CONFIG_UML_NET_SLIRP=y
CONFIG_EXT4_FS=y
CONFIG_REISERFS_FS=y
CONFIG_QUOTA=y
-CONFIG_AUTOFS4_FS=m
+CONFIG_AUTOFS_FS=m
CONFIG_ISO9660_FS=m
CONFIG_JOLIET=y
CONFIG_PROC_KCORE=y
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 37d60e72cf26..9e71794839e8 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -3,7 +3,6 @@
* Copyright (C) 2002 - 2008 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
-#include <linux/minmax.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
@@ -51,7 +50,7 @@ static struct pollfds all_sigio_fds;
static int write_sigio_thread(void *unused)
{
- struct pollfds *fds;
+ struct pollfds *fds, tmp;
struct pollfd *p;
int i, n, respond_fd;
char c;
@@ -78,7 +77,9 @@ static int write_sigio_thread(void *unused)
"write_sigio_thread : "
"read on socket failed, "
"err = %d\n", errno);
- swap(current_poll, next_poll);
+ tmp = current_poll;
+ current_poll = next_poll;
+ next_poll = tmp;
respond_fd = sigio_private[1];
}
else {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7422db409770..e36261b4ea14 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2593,6 +2593,13 @@ config CPU_IBRS_ENTRY
This mitigates both spectre_v2 and retbleed at great cost to
performance.
+config CPU_SRSO
+ bool "Mitigate speculative RAS overflow on AMD"
+ depends on CPU_SUP_AMD && X86_64 && RETHUNK
+ default y
+ help
+ Enable the SRSO mitigation needed on AMD Zen1-4 machines.
+
config SLS
bool "Mitigate Straight-Line-Speculation"
depends on CC_HAS_SLS && X86_64
@@ -2603,6 +2610,25 @@ config SLS
against straight line speculation. The kernel image might be slightly
larger.
+config GDS_FORCE_MITIGATION
+ bool "Force GDS Mitigation"
+ depends on CPU_SUP_INTEL
+ default n
+ help
+ Gather Data Sampling (GDS) is a hardware vulnerability which allows
+ unprivileged speculative access to data which was previously stored in
+ vector registers.
+
+ This option is equivalent to setting gather_data_sampling=force on the
+ command line. The microcode mitigation is used if present, otherwise
+ AVX is disabled as a mitigation. On affected systems that are missing
+ the microcode any userspace code that unconditionally uses AVX will
+ break with this option set.
+
+ Setting this option on systems not vulnerable to GDS has no effect.
+
+ If in doubt, say N.
+
endif
config ARCH_HAS_ADD_PAGES
diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c
index 6debb816e83d..3cdf94b41456 100644
--- a/arch/x86/boot/compressed/idt_64.c
+++ b/arch/x86/boot/compressed/idt_64.c
@@ -63,7 +63,14 @@ void load_stage2_idt(void)
set_idt_entry(X86_TRAP_PF, boot_page_fault);
#ifdef CONFIG_AMD_MEM_ENCRYPT
- set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+ /*
+ * Clear the second stage #VC handler in case guest types
+ * needing #VC have not been detected.
+ */
+ if (sev_status & BIT(1))
+ set_idt_entry(X86_TRAP_VC, boot_stage2_vc);
+ else
+ set_idt_entry(X86_TRAP_VC, NULL);
#endif
load_boot_idt(&boot_idt_desc);
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c
index 09dc8c187b3c..c3e343bd4760 100644
--- a/arch/x86/boot/compressed/sev.c
+++ b/arch/x86/boot/compressed/sev.c
@@ -405,12 +405,45 @@ void sev_enable(struct boot_params *bp)
bp->cc_blob_address = 0;
/*
+ * Do an initial SEV capability check before snp_init() which
+ * loads the CPUID page and the same checks afterwards are done
+ * without the hypervisor and are trustworthy.
+ *
+ * If the HV fakes SEV support, the guest will crash'n'burn
+ * which is good enough.
+ */
+
+ /* Check for the SME/SEV support leaf */
+ eax = 0x80000000;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ if (eax < 0x8000001f)
+ return;
+
+ /*
+ * Check for the SME/SEV feature:
+ * CPUID Fn8000_001F[EAX]
+ * - Bit 0 - Secure Memory Encryption support
+ * - Bit 1 - Secure Encrypted Virtualization support
+ * CPUID Fn8000_001F[EBX]
+ * - Bits 5:0 - Pagetable bit position used to indicate encryption
+ */
+ eax = 0x8000001f;
+ ecx = 0;
+ native_cpuid(&eax, &ebx, &ecx, &edx);
+ /* Check whether SEV is supported */
+ if (!(eax & BIT(1)))
+ return;
+
+ /*
* Setup/preliminary detection of SNP. This will be sanity-checked
* against CPUID/MSR values later.
*/
snp = snp_init(bp);
- /* Check for the SME/SEV support leaf */
+ /* Now repeat the checks with the SNP CPUID table. */
+
+ /* Recheck the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
@@ -418,7 +451,7 @@ void sev_enable(struct boot_params *bp)
return;
/*
- * Check for the SME/SEV feature:
+ * Recheck for the SME/SEV feature:
* CPUID Fn8000_001F[EAX]
* - Bit 0 - Secure Memory Encryption support
* - Bit 1 - Secure Encrypted Virtualization support
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 3cf34912abfe..75a343f10e58 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -245,7 +245,7 @@ CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 27759236fd60..0902518e9b93 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -242,7 +242,7 @@ CONFIG_QUOTA=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
# CONFIG_PRINT_QUOTA_WARNING is not set
CONFIG_QFMT_V2=y
-CONFIG_AUTOFS4_FS=y
+CONFIG_AUTOFS_FS=y
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 91f6818884fa..43606de22511 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -285,7 +285,15 @@ SYM_FUNC_END(__switch_to_asm)
*/
.pushsection .text, "ax"
SYM_CODE_START(ret_from_fork_asm)
- UNWIND_HINT_REGS
+ /*
+ * This is the start of the kernel stack; even through there's a
+ * register set at the top, the regset isn't necessarily coherent
+ * (consider kthreads) and one cannot unwind further.
+ *
+ * This ensures stack unwinds of kernel threads terminate in a known
+ * good state.
+ */
+ UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR // copy_thread
CALL_DEPTH_ACCOUNT
@@ -295,6 +303,12 @@ SYM_CODE_START(ret_from_fork_asm)
movq %r12, %rcx /* fn_arg */
call ret_from_fork
+ /*
+ * Set the stack state to what is expected for the target function
+ * -- at this point the register set should be a valid user set
+ * and unwind should work normally.
+ */
+ UNWIND_HINT_REGS
jmp swapgs_restore_regs_and_return_to_usermode
SYM_CODE_END(ret_from_fork_asm)
.popsection
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index bc0a3c941b35..2d0b1bd866ea 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -456,3 +456,4 @@
449 i386 futex_waitv sys_futex_waitv
450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
451 i386 cachestat sys_cachestat
+452 i386 fchmodat2 sys_fchmodat2
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 227538b0ce80..814768249eae 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -373,6 +373,7 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 11a5c68d1218..7645730dc228 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -299,8 +299,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len)
/* Round the lowest possible end address up to a PMD boundary. */
end = (start + len + PMD_SIZE - 1) & PMD_MASK;
- if (end >= TASK_SIZE_MAX)
- end = TASK_SIZE_MAX;
+ if (end >= DEFAULT_MAP_WINDOW)
+ end = DEFAULT_MAP_WINDOW;
end -= len;
if (end > start) {
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 1fbda2f94184..b21335e6a210 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -107,7 +107,6 @@ static bool cpu_is_self(int cpu)
static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
bool exclude_self)
{
- struct hv_send_ipi_ex **arg;
struct hv_send_ipi_ex *ipi_arg;
unsigned long flags;
int nr_bank = 0;
@@ -117,9 +116,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector,
return false;
local_irq_save(flags);
- arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ ipi_arg = *this_cpu_ptr(hyperv_pcpu_input_arg);
- ipi_arg = *arg;
if (unlikely(!ipi_arg))
goto ipi_mask_ex_done;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 6c04b52f139b..953e280c07c3 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -14,6 +14,7 @@
#include <asm/apic.h>
#include <asm/desc.h>
#include <asm/sev.h>
+#include <asm/ibt.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
@@ -472,6 +473,26 @@ void __init hyperv_init(void)
}
/*
+ * Some versions of Hyper-V that provide IBT in guest VMs have a bug
+ * in that there's no ENDBR64 instruction at the entry to the
+ * hypercall page. Because hypercalls are invoked via an indirect call
+ * to the hypercall page, all hypercall attempts fail when IBT is
+ * enabled, and Linux panics. For such buggy versions, disable IBT.
+ *
+ * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall
+ * page, so if future Linux kernel versions enable IBT for 32-bit
+ * builds, additional hypercall page hackery will be required here
+ * to provide an ENDBR32.
+ */
+#ifdef CONFIG_X86_KERNEL_IBT
+ if (cpu_feature_enabled(X86_FEATURE_IBT) &&
+ *(u32 *)hv_hypercall_pg != gen_endbr()) {
+ setup_clear_cpu_cap(X86_FEATURE_IBT);
+ pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n");
+ }
+#endif
+
+ /*
* hyperv_init() is called before LAPIC is initialized: see
* apic_intr_mode_init() -> x86_platform.apic_post_init() and
* apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER
diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c
index 85d38b9f3586..db5d2ea39fc0 100644
--- a/arch/x86/hyperv/hv_vtl.c
+++ b/arch/x86/hyperv/hv_vtl.c
@@ -25,6 +25,10 @@ void __init hv_vtl_init_platform(void)
x86_init.irqs.pre_vector_init = x86_init_noop;
x86_init.timers.timer_init = x86_init_noop;
+ /* Avoid searching for BIOS MP tables */
+ x86_init.mpparse.find_smp_config = x86_init_noop;
+ x86_init.mpparse.get_smp_config = x86_init_uint_noop;
+
x86_platform.get_wallclock = get_rtc_noop;
x86_platform.set_wallclock = set_rtc_noop;
x86_platform.get_nmi_reason = hv_get_nmi_reason;
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
index 14f46ad2ca64..28be6df88063 100644
--- a/arch/x86/hyperv/ivm.c
+++ b/arch/x86/hyperv/ivm.c
@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(hv_ghcb_msr_read);
static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
enum hv_mem_host_visibility visibility)
{
- struct hv_gpa_range_for_visibility **input_pcpu, *input;
+ struct hv_gpa_range_for_visibility *input;
u16 pages_processed;
u64 hv_status;
unsigned long flags;
@@ -263,9 +263,8 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
}
local_irq_save(flags);
- input_pcpu = (struct hv_gpa_range_for_visibility **)
- this_cpu_ptr(hyperv_pcpu_input_arg);
- input = *input_pcpu;
+ input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+
if (unlikely(!input)) {
local_irq_restore(flags);
return -EINVAL;
diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c
index 8460bd35e10c..1cc113200ff5 100644
--- a/arch/x86/hyperv/mmu.c
+++ b/arch/x86/hyperv/mmu.c
@@ -61,7 +61,6 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int cpu, vcpu, gva_n, max_gvas;
- struct hv_tlb_flush **flush_pcpu;
struct hv_tlb_flush *flush;
u64 status;
unsigned long flags;
@@ -74,10 +73,7 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus,
local_irq_save(flags);
- flush_pcpu = (struct hv_tlb_flush **)
- this_cpu_ptr(hyperv_pcpu_input_arg);
-
- flush = *flush_pcpu;
+ flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
if (unlikely(!flush)) {
local_irq_restore(flags);
@@ -178,17 +174,13 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
const struct flush_tlb_info *info)
{
int nr_bank = 0, max_gvas, gva_n;
- struct hv_tlb_flush_ex **flush_pcpu;
struct hv_tlb_flush_ex *flush;
u64 status;
if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
return HV_STATUS_INVALID_PARAMETER;
- flush_pcpu = (struct hv_tlb_flush_ex **)
- this_cpu_ptr(hyperv_pcpu_input_arg);
-
- flush = *flush_pcpu;
+ flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
if (info->mm) {
/*
diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c
index 5d70968c8538..9dc259fa322e 100644
--- a/arch/x86/hyperv/nested.c
+++ b/arch/x86/hyperv/nested.c
@@ -19,7 +19,6 @@
int hyperv_flush_guest_mapping(u64 as)
{
- struct hv_guest_mapping_flush **flush_pcpu;
struct hv_guest_mapping_flush *flush;
u64 status;
unsigned long flags;
@@ -30,10 +29,7 @@ int hyperv_flush_guest_mapping(u64 as)
local_irq_save(flags);
- flush_pcpu = (struct hv_guest_mapping_flush **)
- this_cpu_ptr(hyperv_pcpu_input_arg);
-
- flush = *flush_pcpu;
+ flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
if (unlikely(!flush)) {
local_irq_restore(flags);
@@ -90,7 +86,6 @@ EXPORT_SYMBOL_GPL(hyperv_fill_flush_guest_mapping_list);
int hyperv_flush_guest_mapping_range(u64 as,
hyperv_fill_flush_list_func fill_flush_list_func, void *data)
{
- struct hv_guest_mapping_flush_list **flush_pcpu;
struct hv_guest_mapping_flush_list *flush;
u64 status;
unsigned long flags;
@@ -102,10 +97,8 @@ int hyperv_flush_guest_mapping_range(u64 as,
local_irq_save(flags);
- flush_pcpu = (struct hv_guest_mapping_flush_list **)
- this_cpu_ptr(hyperv_pcpu_input_arg);
+ flush = *this_cpu_ptr(hyperv_pcpu_input_arg);
- flush = *flush_pcpu;
if (unlikely(!flush)) {
local_irq_restore(flags);
goto fault;
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 8eb74cf386db..2888c0ee4df0 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -15,6 +15,7 @@
#include <asm/mpspec.h>
#include <asm/x86_init.h>
#include <asm/cpufeature.h>
+#include <asm/irq_vectors.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
@@ -31,6 +32,7 @@ extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
extern int acpi_fix_pin2_polarity;
extern int acpi_disable_cmcff;
+extern bool acpi_int_src_ovr[NR_IRQS_LEGACY];
extern u8 acpi_sci_flags;
extern u32 acpi_sci_override_gsi;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index cb8ca46213be..b69b0d7756aa 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -14,7 +14,7 @@
* Defines x86 CPU feature bits
*/
#define NCAPINTS 21 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
+#define NBUGINTS 2 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
@@ -309,6 +309,10 @@
#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
+
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -442,6 +446,10 @@
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */
+#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+
/*
* BUG word(s)
*/
@@ -483,5 +491,9 @@
#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
+/* BUG word 2 */
+#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 117903881fe4..ce8f50192ae3 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
static __always_inline void arch_exit_to_user_mode(void)
{
mds_user_clear_cpu_buffers();
+ amd_clear_divider();
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 13bc212cd4bc..e3054e3e46d5 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -37,6 +37,7 @@ KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
KVM_X86_OP(set_segment)
KVM_X86_OP(get_cs_db_l_bits)
+KVM_X86_OP(is_valid_cr0)
KVM_X86_OP(set_cr0)
KVM_X86_OP_OPTIONAL(post_set_cr3)
KVM_X86_OP(is_valid_cr4)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 28bd38303d70..3bc146dfd38d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1566,9 +1566,10 @@ struct kvm_x86_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
+ bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
- bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
+ bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 0953aa32a324..97a3de7892d3 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -21,7 +21,7 @@
#define FUNCTION_PADDING
#endif
-#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO)
+#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
# define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING
#else
# define __FUNC_ALIGN __ALIGN
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 320566a0443d..66dbba181bd9 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -5,6 +5,7 @@
#include <asm/cpu.h>
#include <linux/earlycpio.h>
#include <linux/initrd.h>
+#include <asm/microcode_amd.h>
struct ucode_patch {
struct list_head plist;
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h
index e6662adf3af4..9675c621c1ca 100644
--- a/arch/x86/include/asm/microcode_amd.h
+++ b/arch/x86/include/asm/microcode_amd.h
@@ -48,11 +48,13 @@ extern void __init load_ucode_amd_bsp(unsigned int family);
extern void load_ucode_amd_ap(unsigned int family);
extern int __init save_microcode_in_initrd_amd(unsigned int family);
void reload_ucode_amd(unsigned int cpu);
+extern void amd_check_microcode(void);
#else
static inline void __init load_ucode_amd_bsp(unsigned int family) {}
static inline void load_ucode_amd_ap(unsigned int family) {}
static inline int __init
save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; }
static inline void reload_ucode_amd(unsigned int cpu) {}
+static inline void amd_check_microcode(void) {}
#endif
#endif /* _ASM_X86_MICROCODE_AMD_H */
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 88d9ef98e087..fa83d88e4c99 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -5,7 +5,7 @@
#include <linux/types.h>
#include <linux/nmi.h>
#include <linux/msi.h>
-#include <asm/io.h>
+#include <linux/io.h>
#include <asm/hyperv-tlfs.h>
#include <asm/nospec-branch.h>
#include <asm/paravirt.h>
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 3aedae61af4f..1d111350197f 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -57,6 +57,7 @@
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
+#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */
#define MSR_PPIN_CTL 0x0000004e
#define MSR_PPIN 0x0000004f
@@ -155,6 +156,15 @@
* Not susceptible to Post-Barrier
* Return Stack Buffer Predictions.
*/
+#define ARCH_CAP_GDS_CTRL BIT(25) /*
+ * CPU is vulnerable to Gather
+ * Data Sampling (GDS) and
+ * has controls for mitigation.
+ */
+#define ARCH_CAP_GDS_NO BIT(26) /*
+ * CPU is not vulnerable to Gather
+ * Data Sampling (GDS).
+ */
#define ARCH_CAP_XAPIC_DISABLE BIT(21) /*
* IA32_XAPIC_DISABLE_STATUS MSR
@@ -178,6 +188,8 @@
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
#define RTM_ALLOW BIT(1) /* TSX development mode */
#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
+#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */
+#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
@@ -545,6 +557,7 @@
#define MSR_AMD64_DE_CFG 0xc0011029
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 1a65cf4acb2b..c55cc243592e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -211,7 +211,8 @@
* eventually turn into it's own annotation.
*/
.macro VALIDATE_UNRET_END
-#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY)
+#if defined(CONFIG_NOINSTR_VALIDATION) && \
+ (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO))
ANNOTATE_RETPOLINE_SAFE
nop
#endif
@@ -271,9 +272,9 @@
.endm
#ifdef CONFIG_CPU_UNRET_ENTRY
-#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
+#define CALL_UNTRAIN_RET "call entry_untrain_ret"
#else
-#define CALL_ZEN_UNTRAIN_RET ""
+#define CALL_UNTRAIN_RET ""
#endif
/*
@@ -281,7 +282,7 @@
* return thunk isn't mapped into the userspace tables (then again, AMD
* typically has NO_MELTDOWN).
*
- * While zen_untrain_ret() doesn't clobber anything but requires stack,
+ * While retbleed_untrain_ret() doesn't clobber anything but requires stack,
* entry_ibpb() will clobber AX, CX, DX.
*
* As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
@@ -289,21 +290,32 @@
*/
.macro UNTRAIN_RET
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
- defined(CONFIG_CALL_DEPTH_TRACKING)
+ defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
VALIDATE_UNRET_END
ALTERNATIVE_3 "", \
- CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
"call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
__stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
#endif
.endm
+.macro UNTRAIN_RET_VM
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+ defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO)
+ VALIDATE_UNRET_END
+ ALTERNATIVE_3 "", \
+ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \
+ __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
.macro UNTRAIN_RET_FROM_CALL
#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
defined(CONFIG_CALL_DEPTH_TRACKING)
VALIDATE_UNRET_END
ALTERNATIVE_3 "", \
- CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \
"call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \
__stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
#endif
@@ -330,15 +342,24 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[];
extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
+#ifdef CONFIG_RETHUNK
extern void __x86_return_thunk(void);
-extern void zen_untrain_ret(void);
+#else
+static inline void __x86_return_thunk(void) {}
+#endif
+
+extern void retbleed_return_thunk(void);
+extern void srso_return_thunk(void);
+extern void srso_alias_return_thunk(void);
+
+extern void retbleed_untrain_ret(void);
+extern void srso_untrain_ret(void);
+extern void srso_alias_untrain_ret(void);
+
+extern void entry_untrain_ret(void);
extern void entry_ibpb(void);
-#ifdef CONFIG_CALL_THUNKS
extern void (*x86_return_thunk)(void);
-#else
-#define x86_return_thunk (&__x86_return_thunk)
-#endif
#ifdef CONFIG_CALL_DEPTH_TRACKING
extern void __x86_return_skl(void);
@@ -465,9 +486,6 @@ enum ssb_mitigation {
SPEC_STORE_BYPASS_SECCOMP,
};
-extern char __indirect_thunk_start[];
-extern char __indirect_thunk_end[];
-
static __always_inline
void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
{
@@ -479,11 +497,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
: "memory");
}
+extern u64 x86_pred_cmd;
+
static inline void indirect_branch_prediction_barrier(void)
{
- u64 val = PRED_CMD_IBPB;
-
- alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
+ alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB);
}
/* The Intel SPEC CTRL MSR base value cache */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d46300e94f85..fd750247ca89 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -586,7 +586,6 @@ extern char ignore_fpu_irq;
#define HAVE_ARCH_PICK_MMAP_LAYOUT 1
#define ARCH_HAS_PREFETCHW
-#define ARCH_HAS_SPINLOCK_PREFETCH
#ifdef CONFIG_X86_32
# define BASE_PREFETCH ""
@@ -620,11 +619,6 @@ static __always_inline void prefetchw(const void *x)
"m" (*(const char *)x));
}
-static inline void spin_lock_prefetch(const void *x)
-{
- prefetchw(x);
-}
-
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
TOP_OF_KERNEL_STACK_PADDING)
@@ -682,9 +676,13 @@ extern u16 get_llc_id(unsigned int cpu);
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
extern u32 amd_get_highest_perf(void);
+extern bool cpu_has_ibpb_brtype_microcode(void);
+extern void amd_clear_divider(void);
#else
static inline u32 amd_get_nodes_per_socket(void) { return 0; }
static inline u32 amd_get_highest_perf(void) { return 0; }
+static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; }
+static inline void amd_clear_divider(void) { }
#endif
extern unsigned long arch_align_stack(unsigned long sp);
@@ -727,4 +725,6 @@ bool arch_is_platform_page(u64 paddr);
#define arch_is_platform_page arch_is_platform_page
#endif
+extern bool gds_ucode_mitigated(void);
+
#endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index 794f69625780..9d6411c65920 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -56,7 +56,7 @@
#define GDT_ENTRY_INVALID_SEG 0
-#ifdef CONFIG_X86_32
+#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64)
/*
* The layout of the per-CPU GDT under Linux:
*
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 21b542a6866c..53369c57751e 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -52,6 +52,7 @@ int acpi_lapic;
int acpi_ioapic;
int acpi_strict;
int acpi_disable_cmcff;
+bool acpi_int_src_ovr[NR_IRQS_LEGACY];
/* ACPI SCI override configuration */
u8 acpi_sci_flags __initdata;
@@ -588,6 +589,9 @@ acpi_parse_int_src_ovr(union acpi_subtable_headers * header,
acpi_table_print_madt_entry(&header->common);
+ if (intsrc->source_irq < NR_IRQS_LEGACY)
+ acpi_int_src_ovr[intsrc->source_irq] = true;
+
if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) {
acpi_sci_ioapic_setup(intsrc->source_irq,
intsrc->inti_flags & ACPI_MADT_POLARITY_MASK,
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 2dcf3a06af09..099d58d02a26 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -687,10 +687,6 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
#ifdef CONFIG_RETHUNK
-#ifdef CONFIG_CALL_THUNKS
-void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
-#endif
-
/*
* Rewrite the compiler generated return thunk tail-calls.
*
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 571abf808ea3..7eca6a8abbb1 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -27,11 +27,6 @@
#include "cpu.h"
-static const int amd_erratum_383[];
-static const int amd_erratum_400[];
-static const int amd_erratum_1054[];
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
-
/*
* nodes_per_socket: Stores the number of nodes per socket.
* Refer to Fam15h Models 00-0fh BKDG - CPUID Fn8000_001E_ECX
@@ -39,6 +34,83 @@ static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum);
*/
static u32 nodes_per_socket = 1;
+/*
+ * AMD errata checking
+ *
+ * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
+ * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
+ * have an OSVW id assigned, which it takes as first argument. Both take a
+ * variable number of family-specific model-stepping ranges created by
+ * AMD_MODEL_RANGE().
+ *
+ * Example:
+ *
+ * const int amd_erratum_319[] =
+ * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
+ * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
+ * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
+ */
+
+#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
+#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
+static const int amd_erratum_400[] =
+ AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
+ AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
+
+static const int amd_erratum_383[] =
+ AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
+
+/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
+static const int amd_erratum_1054[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
+
+static const int amd_zenbleed[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf),
+ AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf));
+
+static const int amd_div0[] =
+ AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf),
+ AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf));
+
+static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
+{
+ int osvw_id = *erratum++;
+ u32 range;
+ u32 ms;
+
+ if (osvw_id >= 0 && osvw_id < 65536 &&
+ cpu_has(cpu, X86_FEATURE_OSVW)) {
+ u64 osvw_len;
+
+ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
+ if (osvw_id < osvw_len) {
+ u64 osvw_bits;
+
+ rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
+ osvw_bits);
+ return osvw_bits & (1ULL << (osvw_id & 0x3f));
+ }
+ }
+
+ /* OSVW unavailable or ID unknown, match family-model-stepping range */
+ ms = (cpu->x86_model << 4) | cpu->x86_stepping;
+ while ((range = *erratum++))
+ if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
+ (ms >= AMD_MODEL_RANGE_START(range)) &&
+ (ms <= AMD_MODEL_RANGE_END(range)))
+ return true;
+
+ return false;
+}
+
static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p)
{
u32 gprs[8] = { 0 };
@@ -916,6 +988,47 @@ static void init_amd_zn(struct cpuinfo_x86 *c)
}
}
+static bool cpu_has_zenbleed_microcode(void)
+{
+ u32 good_rev = 0;
+
+ switch (boot_cpu_data.x86_model) {
+ case 0x30 ... 0x3f: good_rev = 0x0830107a; break;
+ case 0x60 ... 0x67: good_rev = 0x0860010b; break;
+ case 0x68 ... 0x6f: good_rev = 0x08608105; break;
+ case 0x70 ... 0x7f: good_rev = 0x08701032; break;
+ case 0xa0 ... 0xaf: good_rev = 0x08a00008; break;
+
+ default:
+ return false;
+ break;
+ }
+
+ if (boot_cpu_data.microcode < good_rev)
+ return false;
+
+ return true;
+}
+
+static void zenbleed_check(struct cpuinfo_x86 *c)
+{
+ if (!cpu_has_amd_erratum(c, amd_zenbleed))
+ return;
+
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return;
+
+ if (!cpu_has(c, X86_FEATURE_AVX))
+ return;
+
+ if (!cpu_has_zenbleed_microcode()) {
+ pr_notice_once("Zenbleed: please update your microcode for the most optimal fix\n");
+ msr_set_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ } else {
+ msr_clear_bit(MSR_AMD64_DE_CFG, MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT);
+ }
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
early_init_amd(c);
@@ -1020,6 +1133,13 @@ static void init_amd(struct cpuinfo_x86 *c)
if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
cpu_has(c, X86_FEATURE_AUTOIBRS))
WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS));
+
+ zenbleed_check(c);
+
+ if (cpu_has_amd_erratum(c, amd_div0)) {
+ pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n");
+ setup_force_cpu_bug(X86_BUG_DIV0);
+ }
}
#ifdef CONFIG_X86_32
@@ -1115,73 +1235,6 @@ static const struct cpu_dev amd_cpu_dev = {
cpu_dev_register(amd_cpu_dev);
-/*
- * AMD errata checking
- *
- * Errata are defined as arrays of ints using the AMD_LEGACY_ERRATUM() or
- * AMD_OSVW_ERRATUM() macros. The latter is intended for newer errata that
- * have an OSVW id assigned, which it takes as first argument. Both take a
- * variable number of family-specific model-stepping ranges created by
- * AMD_MODEL_RANGE().
- *
- * Example:
- *
- * const int amd_erratum_319[] =
- * AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0x4, 0x2),
- * AMD_MODEL_RANGE(0x10, 0x8, 0x0, 0x8, 0x0),
- * AMD_MODEL_RANGE(0x10, 0x9, 0x0, 0x9, 0x0));
- */
-
-#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
-#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
-#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
- ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
-#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
-#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
-#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
-
-static const int amd_erratum_400[] =
- AMD_OSVW_ERRATUM(1, AMD_MODEL_RANGE(0xf, 0x41, 0x2, 0xff, 0xf),
- AMD_MODEL_RANGE(0x10, 0x2, 0x1, 0xff, 0xf));
-
-static const int amd_erratum_383[] =
- AMD_OSVW_ERRATUM(3, AMD_MODEL_RANGE(0x10, 0, 0, 0xff, 0xf));
-
-/* #1054: Instructions Retired Performance Counter May Be Inaccurate */
-static const int amd_erratum_1054[] =
- AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf));
-
-static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum)
-{
- int osvw_id = *erratum++;
- u32 range;
- u32 ms;
-
- if (osvw_id >= 0 && osvw_id < 65536 &&
- cpu_has(cpu, X86_FEATURE_OSVW)) {
- u64 osvw_len;
-
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, osvw_len);
- if (osvw_id < osvw_len) {
- u64 osvw_bits;
-
- rdmsrl(MSR_AMD64_OSVW_STATUS + (osvw_id >> 6),
- osvw_bits);
- return osvw_bits & (1ULL << (osvw_id & 0x3f));
- }
- }
-
- /* OSVW unavailable or ID unknown, match family-model-stepping range */
- ms = (cpu->x86_model << 4) | cpu->x86_stepping;
- while ((range = *erratum++))
- if ((cpu->x86 == AMD_MODEL_RANGE_FAMILY(range)) &&
- (ms >= AMD_MODEL_RANGE_START(range)) &&
- (ms <= AMD_MODEL_RANGE_END(range)))
- return true;
-
- return false;
-}
-
static DEFINE_PER_CPU_READ_MOSTLY(unsigned long[4], amd_dr_addr_mask);
static unsigned int amd_msr_dr_addr_masks[] = {
@@ -1235,3 +1288,45 @@ u32 amd_get_highest_perf(void)
return 255;
}
EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+
+static void zenbleed_check_cpu(void *unused)
+{
+ struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
+
+ zenbleed_check(c);
+}
+
+void amd_check_microcode(void)
+{
+ on_each_cpu(zenbleed_check_cpu, NULL, 1);
+}
+
+bool cpu_has_ibpb_brtype_microcode(void)
+{
+ switch (boot_cpu_data.x86) {
+ /* Zen1/2 IBPB flushes branch type predictions too. */
+ case 0x17:
+ return boot_cpu_has(X86_FEATURE_AMD_IBPB);
+ case 0x19:
+ /* Poke the MSR bit on Zen3/4 to check its presence. */
+ if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) {
+ setup_force_cpu_cap(X86_FEATURE_SBPB);
+ return true;
+ } else {
+ return false;
+ }
+ default:
+ return false;
+ }
+}
+
+/*
+ * Issue a DIV 0/1 insn to clear any division data from previous DIV
+ * operations.
+ */
+void noinstr amd_clear_divider(void)
+{
+ asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0)
+ :: "a" (0), "d" (0), "r" (1));
+}
+EXPORT_SYMBOL_GPL(amd_clear_divider);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9e2a91830f72..f081d26616ac 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -47,6 +47,8 @@ static void __init taa_select_mitigation(void);
static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
+static void __init srso_select_mitigation(void);
+static void __init gds_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
@@ -56,8 +58,13 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
+u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB;
+EXPORT_SYMBOL_GPL(x86_pred_cmd);
+
static DEFINE_MUTEX(spec_ctrl_mutex);
+void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+
/* Update SPEC_CTRL MSR and its cached copy unconditionally */
static void update_spec_ctrl(u64 val)
{
@@ -160,6 +167,13 @@ void __init cpu_select_mitigations(void)
md_clear_select_mitigation();
srbds_select_mitigation();
l1d_flush_select_mitigation();
+
+ /*
+ * srso_select_mitigation() depends and must run after
+ * retbleed_select_mitigation().
+ */
+ srso_select_mitigation();
+ gds_select_mitigation();
}
/*
@@ -646,6 +660,149 @@ static int __init l1d_flush_parse_cmdline(char *str)
early_param("l1d_flush", l1d_flush_parse_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "GDS: " fmt
+
+enum gds_mitigations {
+ GDS_MITIGATION_OFF,
+ GDS_MITIGATION_UCODE_NEEDED,
+ GDS_MITIGATION_FORCE,
+ GDS_MITIGATION_FULL,
+ GDS_MITIGATION_FULL_LOCKED,
+ GDS_MITIGATION_HYPERVISOR,
+};
+
+#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION)
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE;
+#else
+static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL;
+#endif
+
+static const char * const gds_strings[] = {
+ [GDS_MITIGATION_OFF] = "Vulnerable",
+ [GDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode",
+ [GDS_MITIGATION_FORCE] = "Mitigation: AVX disabled, no microcode",
+ [GDS_MITIGATION_FULL] = "Mitigation: Microcode",
+ [GDS_MITIGATION_FULL_LOCKED] = "Mitigation: Microcode (locked)",
+ [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status",
+};
+
+bool gds_ucode_mitigated(void)
+{
+ return (gds_mitigation == GDS_MITIGATION_FULL ||
+ gds_mitigation == GDS_MITIGATION_FULL_LOCKED);
+}
+EXPORT_SYMBOL_GPL(gds_ucode_mitigated);
+
+void update_gds_msr(void)
+{
+ u64 mcu_ctrl_after;
+ u64 mcu_ctrl;
+
+ switch (gds_mitigation) {
+ case GDS_MITIGATION_OFF:
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ mcu_ctrl |= GDS_MITG_DIS;
+ break;
+ case GDS_MITIGATION_FULL_LOCKED:
+ /*
+ * The LOCKED state comes from the boot CPU. APs might not have
+ * the same state. Make sure the mitigation is enabled on all
+ * CPUs.
+ */
+ case GDS_MITIGATION_FULL:
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ mcu_ctrl &= ~GDS_MITG_DIS;
+ break;
+ case GDS_MITIGATION_FORCE:
+ case GDS_MITIGATION_UCODE_NEEDED:
+ case GDS_MITIGATION_HYPERVISOR:
+ return;
+ };
+
+ wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+
+ /*
+ * Check to make sure that the WRMSR value was not ignored. Writes to
+ * GDS_MITG_DIS will be ignored if this processor is locked but the boot
+ * processor was not.
+ */
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after);
+ WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after);
+}
+
+static void __init gds_select_mitigation(void)
+{
+ u64 mcu_ctrl;
+
+ if (!boot_cpu_has_bug(X86_BUG_GDS))
+ return;
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ gds_mitigation = GDS_MITIGATION_HYPERVISOR;
+ goto out;
+ }
+
+ if (cpu_mitigations_off())
+ gds_mitigation = GDS_MITIGATION_OFF;
+ /* Will verify below that mitigation _can_ be disabled */
+
+ /* No microcode */
+ if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) {
+ if (gds_mitigation == GDS_MITIGATION_FORCE) {
+ /*
+ * This only needs to be done on the boot CPU so do it
+ * here rather than in update_gds_msr()
+ */
+ setup_clear_cpu_cap(X86_FEATURE_AVX);
+ pr_warn("Microcode update needed! Disabling AVX as mitigation.\n");
+ } else {
+ gds_mitigation = GDS_MITIGATION_UCODE_NEEDED;
+ }
+ goto out;
+ }
+
+ /* Microcode has mitigation, use it */
+ if (gds_mitigation == GDS_MITIGATION_FORCE)
+ gds_mitigation = GDS_MITIGATION_FULL;
+
+ rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl);
+ if (mcu_ctrl & GDS_MITG_LOCKED) {
+ if (gds_mitigation == GDS_MITIGATION_OFF)
+ pr_warn("Mitigation locked. Disable failed.\n");
+
+ /*
+ * The mitigation is selected from the boot CPU. All other CPUs
+ * _should_ have the same state. If the boot CPU isn't locked
+ * but others are then update_gds_msr() will WARN() of the state
+ * mismatch. If the boot CPU is locked update_gds_msr() will
+ * ensure the other CPUs have the mitigation enabled.
+ */
+ gds_mitigation = GDS_MITIGATION_FULL_LOCKED;
+ }
+
+ update_gds_msr();
+out:
+ pr_info("%s\n", gds_strings[gds_mitigation]);
+}
+
+static int __init gds_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!boot_cpu_has_bug(X86_BUG_GDS))
+ return 0;
+
+ if (!strcmp(str, "off"))
+ gds_mitigation = GDS_MITIGATION_OFF;
+ else if (!strcmp(str, "force"))
+ gds_mitigation = GDS_MITIGATION_FORCE;
+
+ return 0;
+}
+early_param("gather_data_sampling", gds_parse_cmdline);
+
+#undef pr_fmt
#define pr_fmt(fmt) "Spectre V1 : " fmt
enum spectre_v1_mitigation {
@@ -885,6 +1042,9 @@ do_cmd_auto:
setup_force_cpu_cap(X86_FEATURE_RETHUNK);
setup_force_cpu_cap(X86_FEATURE_UNRET);
+ if (IS_ENABLED(CONFIG_RETHUNK))
+ x86_return_thunk = retbleed_return_thunk;
+
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
pr_err(RETBLEED_UNTRAIN_MSG);
@@ -894,6 +1054,7 @@ do_cmd_auto:
case RETBLEED_MITIGATION_IBPB:
setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
mitigate_smt = true;
break;
@@ -1150,19 +1311,21 @@ spectre_v2_user_select_mitigation(void)
}
/*
- * If no STIBP, enhanced IBRS is enabled, or SMT impossible, STIBP
+ * If no STIBP, Intel enhanced IBRS is enabled, or SMT impossible, STIBP
* is not required.
*
- * Enhanced IBRS also protects against cross-thread branch target
+ * Intel's Enhanced IBRS also protects against cross-thread branch target
* injection in user-mode as the IBRS bit remains always set which
* implicitly enables cross-thread protections. However, in legacy IBRS
* mode, the IBRS bit is set only on kernel entry and cleared on return
- * to userspace. This disables the implicit cross-thread protection,
- * so allow for STIBP to be selected in that case.
+ * to userspace. AMD Automatic IBRS also does not protect userspace.
+ * These modes therefore disable the implicit cross-thread protection,
+ * so allow for STIBP to be selected in those cases.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+ !boot_cpu_has(X86_FEATURE_AUTOIBRS)))
return;
/*
@@ -2186,6 +2349,170 @@ static int __init l1tf_cmdline(char *str)
early_param("l1tf", l1tf_cmdline);
#undef pr_fmt
+#define pr_fmt(fmt) "Speculative Return Stack Overflow: " fmt
+
+enum srso_mitigation {
+ SRSO_MITIGATION_NONE,
+ SRSO_MITIGATION_MICROCODE,
+ SRSO_MITIGATION_SAFE_RET,
+ SRSO_MITIGATION_IBPB,
+ SRSO_MITIGATION_IBPB_ON_VMEXIT,
+};
+
+enum srso_mitigation_cmd {
+ SRSO_CMD_OFF,
+ SRSO_CMD_MICROCODE,
+ SRSO_CMD_SAFE_RET,
+ SRSO_CMD_IBPB,
+ SRSO_CMD_IBPB_ON_VMEXIT,
+};
+
+static const char * const srso_strings[] = {
+ [SRSO_MITIGATION_NONE] = "Vulnerable",
+ [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode",
+ [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET",
+ [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB",
+ [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only"
+};
+
+static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE;
+static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET;
+
+static int __init srso_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off"))
+ srso_cmd = SRSO_CMD_OFF;
+ else if (!strcmp(str, "microcode"))
+ srso_cmd = SRSO_CMD_MICROCODE;
+ else if (!strcmp(str, "safe-ret"))
+ srso_cmd = SRSO_CMD_SAFE_RET;
+ else if (!strcmp(str, "ibpb"))
+ srso_cmd = SRSO_CMD_IBPB;
+ else if (!strcmp(str, "ibpb-vmexit"))
+ srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT;
+ else
+ pr_err("Ignoring unknown SRSO option (%s).", str);
+
+ return 0;
+}
+early_param("spec_rstack_overflow", srso_parse_cmdline);
+
+#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options."
+
+static void __init srso_select_mitigation(void)
+{
+ bool has_microcode;
+
+ if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off())
+ goto pred_cmd;
+
+ /*
+ * The first check is for the kernel running as a guest in order
+ * for guests to verify whether IBPB is a viable mitigation.
+ */
+ has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode();
+ if (!has_microcode) {
+ pr_warn("IBPB-extending microcode not applied!\n");
+ pr_warn(SRSO_NOTICE);
+ } else {
+ /*
+ * Enable the synthetic (even if in a real CPUID leaf)
+ * flags for guests.
+ */
+ setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE);
+
+ /*
+ * Zen1/2 with SMT off aren't vulnerable after the right
+ * IBPB microcode has been applied.
+ */
+ if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) {
+ setup_force_cpu_cap(X86_FEATURE_SRSO_NO);
+ return;
+ }
+ }
+
+ if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) {
+ if (has_microcode) {
+ pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n");
+ srso_mitigation = SRSO_MITIGATION_IBPB;
+ goto pred_cmd;
+ }
+ }
+
+ switch (srso_cmd) {
+ case SRSO_CMD_OFF:
+ return;
+
+ case SRSO_CMD_MICROCODE:
+ if (has_microcode) {
+ srso_mitigation = SRSO_MITIGATION_MICROCODE;
+ pr_warn(SRSO_NOTICE);
+ }
+ break;
+
+ case SRSO_CMD_SAFE_RET:
+ if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+ /*
+ * Enable the return thunk for generated code
+ * like ftrace, static_call, etc.
+ */
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+ if (boot_cpu_data.x86 == 0x19) {
+ setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS);
+ x86_return_thunk = srso_alias_return_thunk;
+ } else {
+ setup_force_cpu_cap(X86_FEATURE_SRSO);
+ x86_return_thunk = srso_return_thunk;
+ }
+ srso_mitigation = SRSO_MITIGATION_SAFE_RET;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+ goto pred_cmd;
+ }
+ break;
+
+ case SRSO_CMD_IBPB:
+ if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ if (has_microcode) {
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ srso_mitigation = SRSO_MITIGATION_IBPB;
+ }
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+ goto pred_cmd;
+ }
+ break;
+
+ case SRSO_CMD_IBPB_ON_VMEXIT:
+ if (IS_ENABLED(CONFIG_CPU_SRSO)) {
+ if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) {
+ setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT);
+ srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT;
+ }
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_SRSO.\n");
+ goto pred_cmd;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode"));
+
+pred_cmd:
+ if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) &&
+ boot_cpu_has(X86_FEATURE_SBPB))
+ x86_pred_cmd = PRED_CMD_SBPB;
+}
+
+#undef pr_fmt
#define pr_fmt(fmt) fmt
#ifdef CONFIG_SYSFS
@@ -2294,7 +2621,8 @@ static ssize_t mmio_stale_data_show_state(char *buf)
static char *stibp_state(void)
{
- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_eibrs_mode(spectre_v2_enabled) &&
+ !boot_cpu_has(X86_FEATURE_AUTOIBRS))
return "";
switch (spectre_v2_user_stibp) {
@@ -2382,6 +2710,21 @@ static ssize_t retbleed_show_state(char *buf)
return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
}
+static ssize_t srso_show_state(char *buf)
+{
+ if (boot_cpu_has(X86_FEATURE_SRSO_NO))
+ return sysfs_emit(buf, "Mitigation: SMT disabled\n");
+
+ return sysfs_emit(buf, "%s%s\n",
+ srso_strings[srso_mitigation],
+ (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode"));
+}
+
+static ssize_t gds_show_state(char *buf)
+{
+ return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
@@ -2431,6 +2774,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
case X86_BUG_RETBLEED:
return retbleed_show_state(buf);
+ case X86_BUG_SRSO:
+ return srso_show_state(buf);
+
+ case X86_BUG_GDS:
+ return gds_show_state(buf);
+
default:
break;
}
@@ -2495,4 +2844,14 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha
{
return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
}
+
+ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_SRSO);
+}
+
+ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_GDS);
+}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52683fddafaf..e3a65e9fc750 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1250,6 +1250,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = {
#define RETBLEED BIT(3)
/* CPU is affected by SMT (cross-thread) return predictions */
#define SMT_RSB BIT(4)
+/* CPU is affected by SRSO */
+#define SRSO BIT(5)
+/* CPU is affected by GDS */
+#define GDS BIT(6)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
@@ -1262,27 +1266,30 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
- VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
- VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
- VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS),
+ VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS),
VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
- VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
VULNBL_AMD(0x15, RETBLEED),
VULNBL_AMD(0x16, RETBLEED),
- VULNBL_AMD(0x17, RETBLEED | SMT_RSB),
+ VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO),
VULNBL_HYGON(0x18, RETBLEED | SMT_RSB),
+ VULNBL_AMD(0x19, SRSO),
{}
};
@@ -1406,6 +1413,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
if (cpu_matches(cpu_vuln_blacklist, SMT_RSB))
setup_force_cpu_bug(X86_BUG_SMT_RSB);
+ if (!cpu_has(c, X86_FEATURE_SRSO_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, SRSO))
+ setup_force_cpu_bug(X86_BUG_SRSO);
+ }
+
+ /*
+ * Check if CPU is vulnerable to GDS. If running in a virtual machine on
+ * an affected processor, the VMM may have disabled the use of GATHER by
+ * disabling AVX2. The only way to do this in HW is to clear XCR0[2],
+ * which means that AVX will be disabled.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) &&
+ boot_cpu_has(X86_FEATURE_AVX))
+ setup_force_cpu_bug(X86_BUG_GDS);
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
@@ -1962,6 +1984,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
validate_apic_and_package_id(c);
x86_spec_ctrl_setup_ap();
update_srbds_msr();
+ if (boot_cpu_has_bug(X86_BUG_GDS))
+ update_gds_msr();
tsx_ap_init();
}
@@ -2287,6 +2311,8 @@ void microcode_check(struct cpuinfo_x86 *prev_info)
perf_check_microcode();
+ amd_check_microcode();
+
store_cpu_caps(&curr_info);
if (!memcmp(&prev_info->x86_capability, &curr_info.x86_capability,
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 1c44630d4789..1dcd7d4e38ef 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -83,6 +83,7 @@ void cpu_select_mitigations(void);
extern void x86_spec_ctrl_setup_ap(void);
extern void update_srbds_msr(void);
+extern void update_gds_msr(void);
extern enum spectre_v2_mitigation spectre_v2_enabled;
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 5e74610b39e7..c4ec4ca47e11 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -1261,10 +1261,10 @@ static void __threshold_remove_blocks(struct threshold_bank *b)
struct threshold_block *pos = NULL;
struct threshold_block *tmp = NULL;
- kobject_del(b->kobj);
+ kobject_put(b->kobj);
list_for_each_entry_safe(pos, tmp, &b->blocks->miscj, miscj)
- kobject_del(&pos->kobj);
+ kobject_put(b->kobj);
}
static void threshold_remove_bank(struct threshold_bank *bank)
diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h
index af5cbdd9bd29..f6d856bd50bc 100644
--- a/arch/x86/kernel/fpu/context.h
+++ b/arch/x86/kernel/fpu/context.h
@@ -19,8 +19,7 @@
* FPU state for a task MUST let the rest of the kernel know that the
* FPU registers are no longer valid for this task.
*
- * Either one of these invalidation functions is enough. Invalidate
- * a resource you control: CPU if using the CPU for something else
+ * Invalidate a resource you control: CPU if using the CPU for something else
* (with preemption disabled), FPU for the current task, or a task that
* is prevented from running by the current task.
*/
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 1015af1ae562..98e507cc7d34 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void)
struct fpu *fpu = &current->thread.fpu;
fpregs_lock();
- fpu__drop(fpu);
+ __fpu_invalidate_fpregs_state(fpu);
/*
* This does not change the actual hardware registers. It just
* resets the memory image and sets TIF_NEED_FPU_LOAD so a
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 0bab497c9436..1afbc4866b10 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
goto out_disable;
}
+ /*
+ * CPU capabilities initialization runs before FPU init. So
+ * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely
+ * functional, set the feature bit so depending code works.
+ */
+ setup_force_cpu_cap(X86_FEATURE_OSXSAVE);
+
print_xstate_offset_size();
pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n",
fpu_kernel_cfg.max_features,
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index 57b0037d0a99..517821b48391 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -226,7 +226,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real)
}
/* Check whether insn is indirect jump */
-static int __insn_is_indirect_jump(struct insn *insn)
+static int insn_is_indirect_jump(struct insn *insn)
{
return ((insn->opcode.bytes[0] == 0xff &&
(X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */
@@ -260,26 +260,6 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len)
return (start <= target && target <= start + len);
}
-static int insn_is_indirect_jump(struct insn *insn)
-{
- int ret = __insn_is_indirect_jump(insn);
-
-#ifdef CONFIG_RETPOLINE
- /*
- * Jump to x86_indirect_thunk_* is treated as an indirect jump.
- * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with
- * older gcc may use indirect jump. So we add this check instead of
- * replace indirect-jump check.
- */
- if (!ret)
- ret = insn_jump_into_range(insn,
- (unsigned long)__indirect_thunk_start,
- (unsigned long)__indirect_thunk_end -
- (unsigned long)__indirect_thunk_start);
-#endif
- return ret;
-}
-
/* Decode whole function to ensure any instructions don't jump into target */
static int can_optimize(unsigned long paddr)
{
@@ -334,9 +314,21 @@ static int can_optimize(unsigned long paddr)
/* Recover address */
insn.kaddr = (void *)addr;
insn.next_byte = (void *)(addr + insn.length);
- /* Check any instructions don't jump into target */
- if (insn_is_indirect_jump(&insn) ||
- insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
+ /*
+ * Check any instructions don't jump into target, indirectly or
+ * directly.
+ *
+ * The indirect case is present to handle a code with jump
+ * tables. When the kernel uses retpolines, the check should in
+ * theory additionally look for jumps to indirect thunks.
+ * However, the kernel built with retpolines or IBT has jump
+ * tables disabled so the check can be skipped altogether.
+ */
+ if (!IS_ENABLED(CONFIG_RETPOLINE) &&
+ !IS_ENABLED(CONFIG_X86_KERNEL_IBT) &&
+ insn_is_indirect_jump(&insn))
+ return 0;
+ if (insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE,
DISP32_SIZE))
return 0;
addr += insn.length;
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index b70670a98597..77a9316da435 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -186,6 +186,19 @@ EXPORT_SYMBOL_GPL(arch_static_call_transform);
*/
bool __static_call_fixup(void *tramp, u8 op, void *dest)
{
+ unsigned long addr = (unsigned long)tramp;
+ /*
+ * Not all .return_sites are a static_call trampoline (most are not).
+ * Check if the 3 bytes after the return are still kernel text, if not,
+ * then this definitely is not a trampoline and we need not worry
+ * further.
+ *
+ * This avoids the memcmp() below tripping over pagefaults etc..
+ */
+ if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) &&
+ !kernel_text_address(addr + 7))
+ return false;
+
if (memcmp(tramp+5, tramp_ud, 3)) {
/* Not a trampoline site, not our problem. */
return false;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 58b1f208eff5..4a817d20ce3b 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -697,9 +697,10 @@ static bool try_fixup_enqcmd_gp(void)
}
static bool gp_try_fixup_and_notify(struct pt_regs *regs, int trapnr,
- unsigned long error_code, const char *str)
+ unsigned long error_code, const char *str,
+ unsigned long address)
{
- if (fixup_exception(regs, trapnr, error_code, 0))
+ if (fixup_exception(regs, trapnr, error_code, address))
return true;
current->thread.error_code = error_code;
@@ -759,7 +760,7 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection)
goto exit;
}
- if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc))
+ if (gp_try_fixup_and_notify(regs, X86_TRAP_GP, error_code, desc, 0))
goto exit;
if (error_code)
@@ -1357,17 +1358,20 @@ DEFINE_IDTENTRY(exc_device_not_available)
#define VE_FAULT_STR "VE fault"
-static void ve_raise_fault(struct pt_regs *regs, long error_code)
+static void ve_raise_fault(struct pt_regs *regs, long error_code,
+ unsigned long address)
{
if (user_mode(regs)) {
gp_user_force_sig_segv(regs, X86_TRAP_VE, error_code, VE_FAULT_STR);
return;
}
- if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code, VE_FAULT_STR))
+ if (gp_try_fixup_and_notify(regs, X86_TRAP_VE, error_code,
+ VE_FAULT_STR, address)) {
return;
+ }
- die_addr(VE_FAULT_STR, regs, error_code, 0);
+ die_addr(VE_FAULT_STR, regs, error_code, address);
}
/*
@@ -1431,7 +1435,7 @@ DEFINE_IDTENTRY(exc_virtualization_exception)
* it successfully, treat it as #GP(0) and handle it.
*/
if (!tdx_handle_virt_exception(regs, &ve))
- ve_raise_fault(regs, 0);
+ ve_raise_fault(regs, 0, ve.gla);
cond_local_irq_disable(regs);
}
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 03c885d3640f..83d41c2601d7 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -133,14 +133,26 @@ SECTIONS
KPROBES_TEXT
SOFTIRQENTRY_TEXT
#ifdef CONFIG_RETPOLINE
- __indirect_thunk_start = .;
- *(.text.__x86.*)
- __indirect_thunk_end = .;
+ *(.text..__x86.indirect_thunk)
+ *(.text..__x86.return_thunk)
#endif
STATIC_CALL_TEXT
ALIGN_ENTRY_TEXT_BEGIN
+#ifdef CONFIG_CPU_SRSO
+ *(.text..__x86.rethunk_untrain)
+#endif
+
ENTRY_TEXT
+
+#ifdef CONFIG_CPU_SRSO
+ /*
+ * See the comment above srso_alias_untrain_ret()'s
+ * definition.
+ */
+ . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20);
+ *(.text..__x86.rethunk_safe)
+#endif
ALIGN_ENTRY_TEXT_END
*(.gnu.warning)
@@ -509,7 +521,24 @@ INIT_PER_CPU(irq_stack_backing_store);
#endif
#ifdef CONFIG_RETHUNK
-. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned");
+. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned");
+. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned");
+#endif
+
+#ifdef CONFIG_CPU_SRSO
+/*
+ * GNU ld cannot do XOR until 2.41.
+ * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1
+ *
+ * LLVM lld cannot do XOR until lld-17.
+ * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb
+ *
+ * Instead do: (A | B) - (A & B) in order to compute the XOR
+ * of the two function addresses:
+ */
+. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) -
+ (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)),
+ "SRSO function pair won't alias");
#endif
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 7f4d13383cf2..d3432687c9e6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -729,6 +729,9 @@ void kvm_set_cpu_caps(void)
F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */
);
+ if (cpu_feature_enabled(X86_FEATURE_SRSO_NO))
+ kvm_cpu_cap_set(X86_FEATURE_SRSO_NO);
+
kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
F(PERFMON_V2)
);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 113ca9661ab2..a983a16163b1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -637,16 +637,22 @@ bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
*max_irr = -1;
for (i = vec = 0; i <= 7; i++, vec += 32) {
+ u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
+
+ irr_val = *p_irr;
pir_val = READ_ONCE(pir[i]);
- irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
+
if (pir_val) {
+ pir_val = xchg(&pir[i], 0);
+
prev_irr_val = irr_val;
- irr_val |= xchg(&pir[i], 0);
- *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
- if (prev_irr_val != irr_val) {
- max_updated_irr =
- __fls(irr_val ^ prev_irr_val) + vec;
- }
+ do {
+ irr_val = prev_irr_val | pir_val;
+ } while (prev_irr_val != irr_val &&
+ !try_cmpxchg(p_irr, &prev_irr_val, irr_val));
+
+ if (prev_irr_val != irr_val)
+ max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
}
if (irr_val)
*max_irr = __fls(irr_val) + vec;
@@ -660,8 +666,11 @@ EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
+ bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
- return __kvm_apic_update_irr(pir, apic->regs, max_irr);
+ if (unlikely(!apic->apicv_active && irr_updated))
+ apic->irr_pending = true;
+ return irr_updated;
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 07756b7348ae..d3aec1f2cad2 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2417,15 +2417,18 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
*/
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
- vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
- vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
- vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
- vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
- vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);
+ BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap));
+ memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap));
- svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);
+ vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb);
+ vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb);
+ vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb);
+ vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb);
+ vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb);
- if (ghcb_xcr0_is_valid(ghcb)) {
+ svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb);
+
+ if (kvm_ghcb_xcr0_is_valid(svm)) {
vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
kvm_update_cpuid_runtime(vcpu);
}
@@ -2436,84 +2439,88 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
control->exit_code_hi = upper_32_bits(exit_code);
control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);
+ svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb);
/* Clear the valid entries fields */
memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}
+static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control)
+{
+ return (((u64)control->exit_code_hi) << 32) | control->exit_code;
+}
+
static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
{
- struct kvm_vcpu *vcpu;
- struct ghcb *ghcb;
+ struct vmcb_control_area *control = &svm->vmcb->control;
+ struct kvm_vcpu *vcpu = &svm->vcpu;
u64 exit_code;
u64 reason;
- ghcb = svm->sev_es.ghcb;
-
/*
* Retrieve the exit code now even though it may not be marked valid
* as it could help with debugging.
*/
- exit_code = ghcb_get_sw_exit_code(ghcb);
+ exit_code = kvm_ghcb_get_sw_exit_code(control);
/* Only GHCB Usage code 0 is supported */
- if (ghcb->ghcb_usage) {
+ if (svm->sev_es.ghcb->ghcb_usage) {
reason = GHCB_ERR_INVALID_USAGE;
goto vmgexit_err;
}
reason = GHCB_ERR_MISSING_INPUT;
- if (!ghcb_sw_exit_code_is_valid(ghcb) ||
- !ghcb_sw_exit_info_1_is_valid(ghcb) ||
- !ghcb_sw_exit_info_2_is_valid(ghcb))
+ if (!kvm_ghcb_sw_exit_code_is_valid(svm) ||
+ !kvm_ghcb_sw_exit_info_1_is_valid(svm) ||
+ !kvm_ghcb_sw_exit_info_2_is_valid(svm))
goto vmgexit_err;
- switch (ghcb_get_sw_exit_code(ghcb)) {
+ switch (exit_code) {
case SVM_EXIT_READ_DR7:
break;
case SVM_EXIT_WRITE_DR7:
- if (!ghcb_rax_is_valid(ghcb))
+ if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
break;
case SVM_EXIT_RDTSC:
break;
case SVM_EXIT_RDPMC:
- if (!ghcb_rcx_is_valid(ghcb))
+ if (!kvm_ghcb_rcx_is_valid(svm))
goto vmgexit_err;
break;
case SVM_EXIT_CPUID:
- if (!ghcb_rax_is_valid(ghcb) ||
- !ghcb_rcx_is_valid(ghcb))
+ if (!kvm_ghcb_rax_is_valid(svm) ||
+ !kvm_ghcb_rcx_is_valid(svm))
goto vmgexit_err;
- if (ghcb_get_rax(ghcb) == 0xd)
- if (!ghcb_xcr0_is_valid(ghcb))
+ if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd)
+ if (!kvm_ghcb_xcr0_is_valid(svm))
goto vmgexit_err;
break;
case SVM_EXIT_INVD:
break;
case SVM_EXIT_IOIO:
- if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
- if (!ghcb_sw_scratch_is_valid(ghcb))
+ if (control->exit_info_1 & SVM_IOIO_STR_MASK) {
+ if (!kvm_ghcb_sw_scratch_is_valid(svm))
goto vmgexit_err;
} else {
- if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
- if (!ghcb_rax_is_valid(ghcb))
+ if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK))
+ if (!kvm_ghcb_rax_is_valid(svm))
goto vmgexit_err;
}
break;
case SVM_EXIT_MSR:
- if (!ghcb_rcx_is_valid(ghcb))
+ if (!kvm_ghcb_rcx_is_valid(svm))
goto vmgexit_err;
- if (ghcb_get_sw_exit_info_1(ghcb)) {
- if (!ghcb_rax_is_valid(ghcb) ||
- !ghcb_rdx_is_valid(ghcb))
+ if (control->exit_info_1) {
+ if (!kvm_ghcb_rax_is_valid(svm) ||
+ !kvm_ghcb_rdx_is_valid(svm))
goto vmgexit_err;
}
break;
case SVM_EXIT_VMMCALL:
- if (!ghcb_rax_is_valid(ghcb) ||
- !ghcb_cpl_is_valid(ghcb))
+ if (!kvm_ghcb_rax_is_valid(svm) ||
+ !kvm_ghcb_cpl_is_valid(svm))
goto vmgexit_err;
break;
case SVM_EXIT_RDTSCP:
@@ -2521,19 +2528,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
case SVM_EXIT_WBINVD:
break;
case SVM_EXIT_MONITOR:
- if (!ghcb_rax_is_valid(ghcb) ||
- !ghcb_rcx_is_valid(ghcb) ||
- !ghcb_rdx_is_valid(ghcb))
+ if (!kvm_ghcb_rax_is_valid(svm) ||
+ !kvm_ghcb_rcx_is_valid(svm) ||
+ !kvm_ghcb_rdx_is_valid(svm))
goto vmgexit_err;
break;
case SVM_EXIT_MWAIT:
- if (!ghcb_rax_is_valid(ghcb) ||
- !ghcb_rcx_is_valid(ghcb))
+ if (!kvm_ghcb_rax_is_valid(svm) ||
+ !kvm_ghcb_rcx_is_valid(svm))
goto vmgexit_err;
break;
case SVM_VMGEXIT_MMIO_READ:
case SVM_VMGEXIT_MMIO_WRITE:
- if (!ghcb_sw_scratch_is_valid(ghcb))
+ if (!kvm_ghcb_sw_scratch_is_valid(svm))
goto vmgexit_err;
break;
case SVM_VMGEXIT_NMI_COMPLETE:
@@ -2549,11 +2556,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
return 0;
vmgexit_err:
- vcpu = &svm->vcpu;
-
if (reason == GHCB_ERR_INVALID_USAGE) {
vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
- ghcb->ghcb_usage);
+ svm->sev_es.ghcb->ghcb_usage);
} else if (reason == GHCB_ERR_INVALID_EVENT) {
vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n",
exit_code);
@@ -2563,11 +2568,8 @@ vmgexit_err:
dump_ghcb(svm);
}
- /* Clear the valid entries fields */
- memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
-
- ghcb_set_sw_exit_info_1(ghcb, 2);
- ghcb_set_sw_exit_info_2(ghcb, reason);
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason);
/* Resume the guest to "return" the error code. */
return 1;
@@ -2586,7 +2588,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm)
*/
if (svm->sev_es.ghcb_sa_sync) {
kvm_write_guest(svm->vcpu.kvm,
- ghcb_get_sw_scratch(svm->sev_es.ghcb),
+ svm->sev_es.sw_scratch,
svm->sev_es.ghcb_sa,
svm->sev_es.ghcb_sa_len);
svm->sev_es.ghcb_sa_sync = false;
@@ -2632,12 +2634,11 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
{
struct vmcb_control_area *control = &svm->vmcb->control;
- struct ghcb *ghcb = svm->sev_es.ghcb;
u64 ghcb_scratch_beg, ghcb_scratch_end;
u64 scratch_gpa_beg, scratch_gpa_end;
void *scratch_va;
- scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
+ scratch_gpa_beg = svm->sev_es.sw_scratch;
if (!scratch_gpa_beg) {
pr_err("vmgexit: scratch gpa not provided\n");
goto e_scratch;
@@ -2708,8 +2709,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
return 0;
e_scratch:
- ghcb_set_sw_exit_info_1(ghcb, 2);
- ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA);
return 1;
}
@@ -2822,7 +2823,6 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_control_area *control = &svm->vmcb->control;
u64 ghcb_gpa, exit_code;
- struct ghcb *ghcb;
int ret;
/* Validate the GHCB */
@@ -2847,20 +2847,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
}
svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
- ghcb = svm->sev_es.ghcb_map.hva;
- trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
-
- exit_code = ghcb_get_sw_exit_code(ghcb);
+ trace_kvm_vmgexit_enter(vcpu->vcpu_id, svm->sev_es.ghcb);
+ sev_es_sync_from_ghcb(svm);
ret = sev_es_validate_vmgexit(svm);
if (ret)
return ret;
- sev_es_sync_from_ghcb(svm);
- ghcb_set_sw_exit_info_1(ghcb, 0);
- ghcb_set_sw_exit_info_2(ghcb, 0);
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0);
+ exit_code = kvm_ghcb_get_sw_exit_code(control);
switch (exit_code) {
case SVM_VMGEXIT_MMIO_READ:
ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
@@ -2898,13 +2896,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
break;
case 1:
/* Get AP jump table address */
- ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table);
break;
default:
pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
control->exit_info_1);
- ghcb_set_sw_exit_info_1(ghcb, 2);
- ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT);
+ ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2);
+ ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT);
}
ret = 1;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d381ad424554..d4bfdc607fe7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1498,7 +1498,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (sd->current_vmcb != svm->vmcb) {
sd->current_vmcb = svm->vmcb;
- indirect_branch_prediction_barrier();
+
+ if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT))
+ indirect_branch_prediction_barrier();
}
if (kvm_vcpu_apicv_active(vcpu))
avic_vcpu_load(vcpu, cpu);
@@ -1786,6 +1788,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
}
+static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ return true;
+}
+
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
@@ -3986,14 +3993,8 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
- struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
-
- /*
- * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
- * can't read guest memory (dereference memslots) to decode the WRMSR.
- */
- if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
- nrips && control->next_rip)
+ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
+ to_svm(vcpu)->vmcb->control.exit_info_1)
return handle_fastpath_set_msr_irqoff(vcpu);
return EXIT_FASTPATH_NONE;
@@ -4005,6 +4006,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in
guest_state_enter_irqoff();
+ amd_clear_divider();
+
if (sev_es_guest(vcpu->kvm))
__svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted);
else
@@ -4815,6 +4818,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_segment = svm_set_segment,
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
+ .is_valid_cr0 = svm_is_valid_cr0,
.set_cr0 = svm_set_cr0,
.post_set_cr3 = sev_post_set_cr3,
.is_valid_cr4 = svm_is_valid_cr4,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 18af7e712a5a..8239c8de45ac 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -190,10 +190,12 @@ struct vcpu_sev_es_state {
/* SEV-ES support */
struct sev_es_save_area *vmsa;
struct ghcb *ghcb;
+ u8 valid_bitmap[16];
struct kvm_host_map ghcb_map;
bool received_first_sipi;
/* SEV-ES scratch area support */
+ u64 sw_scratch;
void *ghcb_sa;
u32 ghcb_sa_len;
bool ghcb_sa_sync;
@@ -744,4 +746,28 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm);
void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted);
+#define DEFINE_KVM_GHCB_ACCESSORS(field) \
+ static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \
+ { \
+ return test_bit(GHCB_BITMAP_IDX(field), \
+ (unsigned long *)&svm->sev_es.valid_bitmap); \
+ } \
+ \
+ static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \
+ { \
+ return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \
+ } \
+
+DEFINE_KVM_GHCB_ACCESSORS(cpl)
+DEFINE_KVM_GHCB_ACCESSORS(rax)
+DEFINE_KVM_GHCB_ACCESSORS(rcx)
+DEFINE_KVM_GHCB_ACCESSORS(rdx)
+DEFINE_KVM_GHCB_ACCESSORS(rbx)
+DEFINE_KVM_GHCB_ACCESSORS(rsi)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
+DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
+DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
+DEFINE_KVM_GHCB_ACCESSORS(xcr0)
+
#endif
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 8e8295e774f0..ef2ebabb059c 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -222,7 +222,7 @@ SYM_FUNC_START(__svm_vcpu_run)
* because interrupt handlers won't sanitize 'ret' if the return is
* from the kernel.
*/
- UNTRAIN_RET
+ UNTRAIN_RET_VM
/*
* Clear all general purpose registers except RSP and RAX to prevent
@@ -359,7 +359,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run)
* because interrupt handlers won't sanitize RET if the return is
* from the kernel.
*/
- UNTRAIN_RET
+ UNTRAIN_RET_VM
/* "Pop" @spec_ctrl_intercepted. */
pop %_ASM_BX
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 07e927d4d099..be275a0410a8 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -303,10 +303,8 @@ SYM_FUNC_START(vmx_do_nmi_irqoff)
VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
SYM_FUNC_END(vmx_do_nmi_irqoff)
-
-.section .text, "ax"
-
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+
/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed
@@ -335,7 +333,7 @@ SYM_FUNC_START(vmread_error_trampoline)
mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
- call vmread_error
+ call vmread_error_trampoline2
/* Zero out @fault, which will be popped into the result register. */
_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
@@ -357,6 +355,8 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
+.section .text, "ax"
+
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 0ecf4be2c6af..df461f387e20 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -441,13 +441,23 @@ do { \
pr_warn_ratelimited(fmt); \
} while (0)
-void vmread_error(unsigned long field, bool fault)
+noinline void vmread_error(unsigned long field)
{
- if (fault)
+ vmx_insn_failed("vmread failed: field=%lx\n", field);
+}
+
+#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
+{
+ if (fault) {
kvm_spurious_fault();
- else
- vmx_insn_failed("vmread failed: field=%lx\n", field);
+ } else {
+ instrumentation_begin();
+ vmread_error(field);
+ instrumentation_end();
+ }
}
+#endif
noinline void vmwrite_error(unsigned long field, unsigned long value)
{
@@ -1503,6 +1513,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long old_rflags;
+ /*
+ * Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
+ * is an unrestricted guest in order to mark L2 as needing emulation
+ * if L1 runs L2 as a restricted guest.
+ */
if (is_unrestricted_guest(vcpu)) {
kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
vmx->rflags = rflags;
@@ -3037,6 +3052,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
+ /*
+ * KVM should never use VM86 to virtualize Real Mode when L2 is active,
+ * as using VM86 is unnecessary if unrestricted guest is enabled, and
+ * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
+ * should VM-Fail and KVM should reject userspace attempts to stuff
+ * CR0.PG=0 when L2 is active.
+ */
+ WARN_ON_ONCE(is_guest_mode(vcpu));
+
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
@@ -3226,6 +3250,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
CPU_BASED_CR3_STORE_EXITING)
+static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+ if (is_guest_mode(vcpu))
+ return nested_guest_cr0_valid(vcpu, cr0);
+
+ if (to_vmx(vcpu)->nested.vmxon)
+ return nested_host_cr0_valid(vcpu, cr0);
+
+ return true;
+}
+
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -3235,7 +3270,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
- if (is_unrestricted_guest(vcpu))
+ if (enable_unrestricted_guest)
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
else {
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
@@ -3263,7 +3298,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
- if (enable_ept && !is_unrestricted_guest(vcpu)) {
+ if (enable_ept && !enable_unrestricted_guest) {
/*
* Ensure KVM has an up-to-date snapshot of the guest's CR3. If
* the below code _enables_ CR3 exiting, vmx_cache_reg() will
@@ -3394,7 +3429,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* this bit, even if host CR4.MCE == 0.
*/
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
- if (is_unrestricted_guest(vcpu))
+ if (enable_unrestricted_guest)
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
else if (vmx->rmode.vm86_active)
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
@@ -3414,7 +3449,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vcpu->arch.cr4 = cr4;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
- if (!is_unrestricted_guest(vcpu)) {
+ if (!enable_unrestricted_guest) {
if (enable_ept) {
if (!is_paging(vcpu)) {
hw_cr4 &= ~X86_CR4_PAE;
@@ -4651,7 +4686,8 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
if (kvm_vmx->pid_table)
return 0;
- pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
+ pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
+ vmx_get_pid_table_order(kvm));
if (!pages)
return -ENOMEM;
@@ -5364,18 +5400,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
- if (!nested_guest_cr0_valid(vcpu, val))
- return 1;
-
if (kvm_set_cr0(vcpu, val))
return 1;
vmcs_writel(CR0_READ_SHADOW, orig_val);
return 0;
} else {
- if (to_vmx(vcpu)->nested.vmxon &&
- !nested_host_cr0_valid(vcpu, val))
- return 1;
-
return kvm_set_cr0(vcpu, val);
}
}
@@ -8203,6 +8232,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_segment = vmx_set_segment,
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
+ .is_valid_cr0 = vmx_is_valid_cr0,
.set_cr0 = vmx_set_cr0,
.is_valid_cr4 = vmx_is_valid_cr4,
.set_cr4 = vmx_set_cr4,
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index ce47dc265f89..33af7b4c6eb4 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -10,7 +10,7 @@
#include "vmcs.h"
#include "../x86.h"
-void vmread_error(unsigned long field, bool fault);
+void vmread_error(unsigned long field);
void vmwrite_error(unsigned long field, unsigned long value);
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
@@ -31,6 +31,13 @@ void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
* void vmread_error_trampoline(unsigned long field, bool fault);
*/
extern unsigned long vmread_error_trampoline;
+
+/*
+ * The second VMREAD error trampoline, called from the assembly trampoline,
+ * exists primarily to enable instrumentation for the VM-Fail path.
+ */
+void vmread_error_trampoline2(unsigned long field, bool fault);
+
#endif
static __always_inline void vmcs_check16(unsigned long field)
@@ -101,8 +108,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
do_fail:
instrumentation_begin();
- WARN_ONCE(1, KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
- pr_warn_ratelimited(KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
+ vmread_error(field);
instrumentation_end();
return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a6b9bea62fb8..c381770bcbf1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -906,6 +906,22 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
}
EXPORT_SYMBOL_GPL(load_pdptrs);
+static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+{
+#ifdef CONFIG_X86_64
+ if (cr0 & 0xffffffff00000000UL)
+ return false;
+#endif
+
+ if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
+ return false;
+
+ if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
+ return false;
+
+ return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
+}
+
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
{
/*
@@ -952,20 +968,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long old_cr0 = kvm_read_cr0(vcpu);
- cr0 |= X86_CR0_ET;
-
-#ifdef CONFIG_X86_64
- if (cr0 & 0xffffffff00000000UL)
+ if (!kvm_is_valid_cr0(vcpu, cr0))
return 1;
-#endif
-
- cr0 &= ~CR0_RESERVED_BITS;
- if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
- return 1;
+ cr0 |= X86_CR0_ET;
- if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
- return 1;
+ /* Write to CR0 reserved bits are ignored, even on Intel. */
+ cr0 &= ~CR0_RESERVED_BITS;
#ifdef CONFIG_X86_64
if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
@@ -1607,7 +1616,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr)
ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \
ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \
ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \
- ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO)
+ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO)
static u64 kvm_get_arch_capabilities(void)
{
@@ -1664,6 +1673,9 @@ static u64 kvm_get_arch_capabilities(void)
*/
}
+ if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated())
+ data |= ARCH_CAP_GDS_NO;
+
return data;
}
@@ -2172,6 +2184,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
u64 data;
fastpath_t ret = EXIT_FASTPATH_NONE;
+ kvm_vcpu_srcu_read_lock(vcpu);
+
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
data = kvm_read_edx_eax(vcpu);
@@ -2194,6 +2208,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
if (ret != EXIT_FASTPATH_NONE)
trace_kvm_msr_write(msr, data);
+ kvm_vcpu_srcu_read_unlock(vcpu);
+
return ret;
}
EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
@@ -10203,9 +10219,13 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
if (r < 0)
goto out;
if (r) {
- kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
- static_call(kvm_x86_inject_irq)(vcpu, false);
- WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
+ int irq = kvm_cpu_get_interrupt(vcpu);
+
+ if (!WARN_ON_ONCE(irq == -1)) {
+ kvm_queue_interrupt(vcpu, irq, false);
+ static_call(kvm_x86_inject_irq)(vcpu, false);
+ WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
+ }
}
if (kvm_cpu_has_injectable_intr(vcpu))
static_call(kvm_x86_enable_irq_window)(vcpu);
@@ -11460,7 +11480,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return false;
}
- return kvm_is_valid_cr4(vcpu, sregs->cr4);
+ return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
+ kvm_is_valid_cr0(vcpu, sregs->cr0);
}
static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
@@ -13185,7 +13206,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
bool kvm_arch_has_irq_bypass(void)
{
- return true;
+ return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
}
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 3fd066d42ec0..cd86aeb5fdd3 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -11,8 +11,9 @@
#include <asm/unwind_hints.h>
#include <asm/percpu.h>
#include <asm/frame.h>
+#include <asm/nops.h>
- .section .text.__x86.indirect_thunk
+ .section .text..__x86.indirect_thunk
.macro POLINE reg
@@ -131,36 +132,107 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
*/
#ifdef CONFIG_RETHUNK
- .section .text.__x86.return_thunk
+/*
+ * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at
+ * special addresses:
+ *
+ * - srso_alias_untrain_ret() is 2M aligned
+ * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14
+ * and 20 in its virtual address are set (while those bits in the
+ * srso_alias_untrain_ret() function are cleared).
+ *
+ * This guarantees that those two addresses will alias in the branch
+ * target buffer of Zen3/4 generations, leading to any potential
+ * poisoned entries at that BTB slot to get evicted.
+ *
+ * As a result, srso_alias_safe_ret() becomes a safe return.
+ */
+#ifdef CONFIG_CPU_SRSO
+ .section .text..__x86.rethunk_untrain
+
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ASM_NOP2
+ lfence
+ jmp srso_alias_return_thunk
+SYM_FUNC_END(srso_alias_untrain_ret)
+__EXPORT_THUNK(srso_alias_untrain_ret)
+
+ .section .text..__x86.rethunk_safe
+#else
+/* dummy definition for alternatives */
+SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_FUNC_END(srso_alias_untrain_ret)
+#endif
+
+SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ lea 8(%_ASM_SP), %_ASM_SP
+ UNWIND_HINT_FUNC
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_FUNC_END(srso_alias_safe_ret)
+
+ .section .text..__x86.return_thunk
+
+SYM_CODE_START(srso_alias_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ call srso_alias_safe_ret
+ ud2
+SYM_CODE_END(srso_alias_return_thunk)
+
+/*
+ * Some generic notes on the untraining sequences:
+ *
+ * They are interchangeable when it comes to flushing potentially wrong
+ * RET predictions from the BTB.
+ *
+ * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the
+ * Retbleed sequence because the return sequence done there
+ * (srso_safe_ret()) is longer and the return sequence must fully nest
+ * (end before) the untraining sequence. Therefore, the untraining
+ * sequence must fully overlap the return sequence.
+ *
+ * Regarding alignment - the instructions which need to be untrained,
+ * must all start at a cacheline boundary for Zen1/2 generations. That
+ * is, instruction sequences starting at srso_safe_ret() and
+ * the respective instruction sequences at retbleed_return_thunk()
+ * must start at a cacheline boundary.
+ */
/*
* Safety details here pertain to the AMD Zen{1,2} microarchitecture:
- * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for
* alignment within the BTB.
- * 2) The instruction at zen_untrain_ret must contain, and not
+ * 2) The instruction at retbleed_untrain_ret must contain, and not
* end with, the 0xc3 byte of the RET.
* 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
* from re-poisioning the BTB prediction.
*/
.align 64
- .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc
-SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc
+SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
ANNOTATE_NOENDBR
/*
- * As executed from zen_untrain_ret, this is:
+ * As executed from retbleed_untrain_ret, this is:
*
* TEST $0xcc, %bl
* LFENCE
- * JMP __x86_return_thunk
+ * JMP retbleed_return_thunk
*
* Executing the TEST instruction has a side effect of evicting any BTB
* prediction (potentially attacker controlled) attached to the RET, as
- * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ * retbleed_return_thunk + 1 isn't an instruction boundary at the moment.
*/
.byte 0xf6
/*
- * As executed from __x86_return_thunk, this is a plain RET.
+ * As executed from retbleed_return_thunk, this is a plain RET.
*
* As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
*
@@ -172,13 +244,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
* With SMT enabled and STIBP active, a sibling thread cannot poison
* RET's prediction to a type of its choice, but can evict the
* prediction due to competitive sharing. If the prediction is
- * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * evicted, retbleed_return_thunk will suffer Straight Line Speculation
* which will be contained safely by the INT3.
*/
-SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL)
ret
int3
-SYM_CODE_END(__x86_return_thunk)
+SYM_CODE_END(retbleed_return_thunk)
/*
* Ensure the TEST decoding / BTB invalidation is complete.
@@ -189,11 +261,67 @@ SYM_CODE_END(__x86_return_thunk)
* Jump back and execute the RET in the middle of the TEST instruction.
* INT3 is for SLS protection.
*/
- jmp __x86_return_thunk
+ jmp retbleed_return_thunk
int3
-SYM_FUNC_END(zen_untrain_ret)
-__EXPORT_THUNK(zen_untrain_ret)
+SYM_FUNC_END(retbleed_untrain_ret)
+__EXPORT_THUNK(retbleed_untrain_ret)
+/*
+ * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret()
+ * above. On kernel entry, srso_untrain_ret() is executed which is a
+ *
+ * movabs $0xccccc30824648d48,%rax
+ *
+ * and when the return thunk executes the inner label srso_safe_ret()
+ * later, it is a stack manipulation and a RET which is mispredicted and
+ * thus a "safe" one to use.
+ */
+ .align 64
+ .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc
+SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ ANNOTATE_NOENDBR
+ .byte 0x48, 0xb8
+
+/*
+ * This forces the function return instruction to speculate into a trap
+ * (UD2 in srso_return_thunk() below). This RET will then mispredict
+ * and execution will continue at the return site read from the top of
+ * the stack.
+ */
+SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL)
+ lea 8(%_ASM_SP), %_ASM_SP
+ ret
+ int3
+ int3
+ /* end of movabs */
+ lfence
+ call srso_safe_ret
+ ud2
+SYM_CODE_END(srso_safe_ret)
+SYM_FUNC_END(srso_untrain_ret)
+__EXPORT_THUNK(srso_untrain_ret)
+
+SYM_CODE_START(srso_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ call srso_safe_ret
+ ud2
+SYM_CODE_END(srso_return_thunk)
+
+SYM_FUNC_START(entry_untrain_ret)
+ ALTERNATIVE_2 "jmp retbleed_untrain_ret", \
+ "jmp srso_untrain_ret", X86_FEATURE_SRSO, \
+ "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS
+SYM_FUNC_END(entry_untrain_ret)
+__EXPORT_THUNK(entry_untrain_ret)
+
+SYM_CODE_START(__x86_return_thunk)
+ UNWIND_HINT_FUNC
+ ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
+SYM_CODE_END(__x86_return_thunk)
EXPORT_SYMBOL(__x86_return_thunk)
#endif /* CONFIG_RETHUNK */
diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl
index 2b69c3c035b6..fc1a4f3c81d9 100644
--- a/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@ -422,3 +422,4 @@
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
451 common cachestat sys_cachestat
+452 common fchmodat2 sys_fchmodat2
diff --git a/block/bdev.c b/block/bdev.c
index 979e28a46b98..f3b13aa1b7d4 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -206,23 +206,6 @@ int sync_blockdev_range(struct block_device *bdev, loff_t lstart, loff_t lend)
}
EXPORT_SYMBOL(sync_blockdev_range);
-/*
- * Write out and wait upon all dirty data associated with this
- * device. Filesystem data as well as the underlying block
- * device. Takes the superblock lock.
- */
-int fsync_bdev(struct block_device *bdev)
-{
- struct super_block *sb = get_super(bdev);
- if (sb) {
- int res = sync_filesystem(sb);
- drop_super(sb);
- return res;
- }
- return sync_blockdev(bdev);
-}
-EXPORT_SYMBOL(fsync_bdev);
-
/**
* freeze_bdev - lock a filesystem and force it into a consistent state
* @bdev: blockdevice to lock
@@ -248,9 +231,9 @@ int freeze_bdev(struct block_device *bdev)
if (!sb)
goto sync;
if (sb->s_op->freeze_super)
- error = sb->s_op->freeze_super(sb);
+ error = sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
else
- error = freeze_super(sb);
+ error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
deactivate_super(sb);
if (error) {
@@ -291,9 +274,9 @@ int thaw_bdev(struct block_device *bdev)
goto out;
if (sb->s_op->thaw_super)
- error = sb->s_op->thaw_super(sb);
+ error = sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
else
- error = thaw_super(sb);
+ error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
if (error)
bdev->bd_fsfreeze_count++;
else
@@ -960,26 +943,38 @@ out_path_put:
}
EXPORT_SYMBOL(lookup_bdev);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty)
+/**
+ * bdev_mark_dead - mark a block device as dead
+ * @bdev: block device to operate on
+ * @surprise: indicate a surprise removal
+ *
+ * Tell the file system that this devices or media is dead. If @surprise is set
+ * to %true the device or media is already gone, if not we are preparing for an
+ * orderly removal.
+ *
+ * This calls into the file system, which then typicall syncs out all dirty data
+ * and writes back inodes and then invalidates any cached data in the inodes on
+ * the file system. In addition we also invalidate the block device mapping.
+ */
+void bdev_mark_dead(struct block_device *bdev, bool surprise)
{
- struct super_block *sb = get_super(bdev);
- int res = 0;
+ mutex_lock(&bdev->bd_holder_lock);
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
+ bdev->bd_holder_ops->mark_dead(bdev, surprise);
+ else
+ sync_blockdev(bdev);
+ mutex_unlock(&bdev->bd_holder_lock);
- if (sb) {
- /*
- * no need to lock the super, get_super holds the
- * read mutex so the filesystem cannot go away
- * under us (->put_super runs with the write lock
- * hold).
- */
- shrink_dcache_sb(sb);
- res = invalidate_inodes(sb, kill_dirty);
- drop_super(sb);
- }
invalidate_bdev(bdev);
- return res;
}
-EXPORT_SYMBOL(__invalidate_device);
+#ifdef CONFIG_DASD_MODULE
+/*
+ * Drivers should not use this directly, but the DASD driver has historically
+ * had a shutdown to offline mode that doesn't actually remove the gendisk
+ * that otherwise looks a lot like a safe device removal.
+ */
+EXPORT_SYMBOL_GPL(bdev_mark_dead);
+#endif
void sync_bdevs(bool wait)
{
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index fc49be622e05..9faafcd10e17 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -136,7 +136,9 @@ static void blkg_free_workfn(struct work_struct *work)
blkcg_policy[i]->pd_free_fn(blkg->pd[i]);
if (blkg->parent)
blkg_put(blkg->parent);
+ spin_lock_irq(&q->queue_lock);
list_del_init(&blkg->q_node);
+ spin_unlock_irq(&q->queue_lock);
mutex_unlock(&q->blkcg_mutex);
blk_put_queue(q);
diff --git a/block/blk-core.c b/block/blk-core.c
index 99d8b9812b18..9866468c72a2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -722,14 +722,9 @@ void submit_bio_noacct(struct bio *bio)
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
blk_status_t status = BLK_STS_IOERR;
- struct blk_plug *plug;
might_sleep();
- plug = blk_mq_plug(bio);
- if (plug && plug->nowait)
- bio->bi_opf |= REQ_NOWAIT;
-
/*
* For a REQ_NOWAIT based request, return -EOPNOTSUPP
* if queue does not support NOWAIT.
@@ -1059,7 +1054,6 @@ void blk_start_plug_nr_ios(struct blk_plug *plug, unsigned short nr_ios)
plug->rq_count = 0;
plug->multiple_queues = false;
plug->has_elevator = false;
- plug->nowait = false;
INIT_LIST_HEAD(&plug->cb_list);
/*
@@ -1144,8 +1138,7 @@ void __blk_flush_plug(struct blk_plug *plug, bool from_schedule)
{
if (!list_empty(&plug->cb_list))
flush_plug_callbacks(plug, from_schedule);
- if (!rq_list_empty(plug->mq_list))
- blk_mq_flush_plug_list(plug, from_schedule);
+ blk_mq_flush_plug_list(plug, from_schedule);
/*
* Unconditionally flush out cached requests, even if the unplug
* event came from schedule. Since we know hold references to the
diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c
index ad9844c5b40c..e6468eab2681 100644
--- a/block/blk-crypto-fallback.c
+++ b/block/blk-crypto-fallback.c
@@ -78,7 +78,7 @@ static struct blk_crypto_fallback_keyslot {
struct crypto_skcipher *tfms[BLK_ENCRYPTION_MODE_MAX];
} *blk_crypto_keyslots;
-static struct blk_crypto_profile blk_crypto_fallback_profile;
+static struct blk_crypto_profile *blk_crypto_fallback_profile;
static struct workqueue_struct *blk_crypto_wq;
static mempool_t *blk_crypto_bounce_page_pool;
static struct bio_set crypto_bio_split;
@@ -292,7 +292,7 @@ static bool blk_crypto_fallback_encrypt_bio(struct bio **bio_ptr)
* Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
* this bio's algorithm and key.
*/
- blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+ blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
bc->bc_key, &slot);
if (blk_st != BLK_STS_OK) {
src_bio->bi_status = blk_st;
@@ -395,7 +395,7 @@ static void blk_crypto_fallback_decrypt_bio(struct work_struct *work)
* Get a blk-crypto-fallback keyslot that contains a crypto_skcipher for
* this bio's algorithm and key.
*/
- blk_st = blk_crypto_get_keyslot(&blk_crypto_fallback_profile,
+ blk_st = blk_crypto_get_keyslot(blk_crypto_fallback_profile,
bc->bc_key, &slot);
if (blk_st != BLK_STS_OK) {
bio->bi_status = blk_st;
@@ -499,7 +499,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
return false;
}
- if (!__blk_crypto_cfg_supported(&blk_crypto_fallback_profile,
+ if (!__blk_crypto_cfg_supported(blk_crypto_fallback_profile,
&bc->bc_key->crypto_cfg)) {
bio->bi_status = BLK_STS_NOTSUPP;
return false;
@@ -526,7 +526,7 @@ bool blk_crypto_fallback_bio_prep(struct bio **bio_ptr)
int blk_crypto_fallback_evict_key(const struct blk_crypto_key *key)
{
- return __blk_crypto_evict_key(&blk_crypto_fallback_profile, key);
+ return __blk_crypto_evict_key(blk_crypto_fallback_profile, key);
}
static bool blk_crypto_fallback_inited;
@@ -534,7 +534,6 @@ static int blk_crypto_fallback_init(void)
{
int i;
int err;
- struct blk_crypto_profile *profile = &blk_crypto_fallback_profile;
if (blk_crypto_fallback_inited)
return 0;
@@ -545,18 +544,27 @@ static int blk_crypto_fallback_init(void)
if (err)
goto out;
- err = blk_crypto_profile_init(profile, blk_crypto_num_keyslots);
- if (err)
+ /* Dynamic allocation is needed because of lockdep_register_key(). */
+ blk_crypto_fallback_profile =
+ kzalloc(sizeof(*blk_crypto_fallback_profile), GFP_KERNEL);
+ if (!blk_crypto_fallback_profile) {
+ err = -ENOMEM;
goto fail_free_bioset;
+ }
+
+ err = blk_crypto_profile_init(blk_crypto_fallback_profile,
+ blk_crypto_num_keyslots);
+ if (err)
+ goto fail_free_profile;
err = -ENOMEM;
- profile->ll_ops = blk_crypto_fallback_ll_ops;
- profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
+ blk_crypto_fallback_profile->ll_ops = blk_crypto_fallback_ll_ops;
+ blk_crypto_fallback_profile->max_dun_bytes_supported = BLK_CRYPTO_MAX_IV_SIZE;
/* All blk-crypto modes have a crypto API fallback. */
for (i = 0; i < BLK_ENCRYPTION_MODE_MAX; i++)
- profile->modes_supported[i] = 0xFFFFFFFF;
- profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
+ blk_crypto_fallback_profile->modes_supported[i] = 0xFFFFFFFF;
+ blk_crypto_fallback_profile->modes_supported[BLK_ENCRYPTION_MODE_INVALID] = 0;
blk_crypto_wq = alloc_workqueue("blk_crypto_wq",
WQ_UNBOUND | WQ_HIGHPRI |
@@ -597,7 +605,9 @@ fail_free_keyslots:
fail_free_wq:
destroy_workqueue(blk_crypto_wq);
fail_destroy_profile:
- blk_crypto_profile_destroy(profile);
+ blk_crypto_profile_destroy(blk_crypto_fallback_profile);
+fail_free_profile:
+ kfree(blk_crypto_fallback_profile);
fail_free_bioset:
bioset_exit(&crypto_bio_split);
out:
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 9dfcf540f400..089fcb9cfce3 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -2516,6 +2516,10 @@ static void calc_vtime_cost_builtin(struct bio *bio, struct ioc_gq *iocg,
u64 seek_pages = 0;
u64 cost = 0;
+ /* Can't calculate cost for empty bio */
+ if (!bio->bi_iter.bi_size)
+ goto out;
+
switch (bio_op(bio)) {
case REQ_OP_READ:
coef_seqio = ioc->params.lcoefs[LCOEF_RSEQIO];
@@ -3297,11 +3301,12 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input,
if (qos[QOS_MIN] > qos[QOS_MAX])
goto einval;
- if (enable) {
+ if (enable && !ioc->enabled) {
blk_stat_enable_accounting(disk->queue);
blk_queue_flag_set(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = true;
- } else {
+ } else if (!enable && ioc->enabled) {
+ blk_stat_disable_accounting(disk->queue);
blk_queue_flag_clear(QUEUE_FLAG_RQ_ALLOC_TIME, disk->queue);
ioc->enabled = false;
}
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d50b1d62a3d9..953f08354c8c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -681,6 +681,21 @@ out_queue_exit:
}
EXPORT_SYMBOL_GPL(blk_mq_alloc_request_hctx);
+static void blk_mq_finish_request(struct request *rq)
+{
+ struct request_queue *q = rq->q;
+
+ if (rq->rq_flags & RQF_USE_SCHED) {
+ q->elevator->type->ops.finish_request(rq);
+ /*
+ * For postflush request that may need to be
+ * completed twice, we should clear this flag
+ * to avoid double finish_request() on the rq.
+ */
+ rq->rq_flags &= ~RQF_USE_SCHED;
+ }
+}
+
static void __blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -707,9 +722,7 @@ void blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
- if ((rq->rq_flags & RQF_USE_SCHED) &&
- q->elevator->type->ops.finish_request)
- q->elevator->type->ops.finish_request(rq);
+ blk_mq_finish_request(rq);
if (unlikely(laptop_mode && !blk_rq_is_passthrough(rq)))
laptop_io_completion(q->disk->bdi);
@@ -1020,6 +1033,8 @@ inline void __blk_mq_end_request(struct request *rq, blk_status_t error)
if (blk_mq_need_time_stamp(rq))
__blk_mq_end_request_acct(rq, ktime_get_ns());
+ blk_mq_finish_request(rq);
+
if (rq->end_io) {
rq_qos_done(rq->q, rq);
if (rq->end_io(rq, error) == RQ_END_IO_FREE)
@@ -1074,6 +1089,8 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob)
if (iob->need_ts)
__blk_mq_end_request_acct(rq, now);
+ blk_mq_finish_request(rq);
+
rq_qos_done(rq->q, rq);
/*
@@ -2754,7 +2771,14 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
{
struct request *rq;
- if (rq_list_empty(plug->mq_list))
+ /*
+ * We may have been called recursively midway through handling
+ * plug->mq_list via a schedule() in the driver's queue_rq() callback.
+ * To avoid mq_list changing under our feet, clear rq_count early and
+ * bail out specifically if rq_count is 0 rather than checking
+ * whether the mq_list is empty.
+ */
+ if (plug->rq_count == 0)
return;
plug->rq_count = 0;
diff --git a/block/disk-events.c b/block/disk-events.c
index 0cfac464e6d1..422db8292d09 100644
--- a/block/disk-events.c
+++ b/block/disk-events.c
@@ -281,9 +281,7 @@ bool disk_check_media_change(struct gendisk *disk)
if (!(events & DISK_EVENT_MEDIA_CHANGE))
return false;
- if (__invalidate_device(disk->part0, true))
- pr_warn("VFS: busy inodes on changed media %s\n",
- disk->disk_name);
+ bdev_mark_dead(disk->part0, true);
set_bit(GD_NEED_PART_SCAN, &disk->state);
return true;
}
@@ -294,25 +292,16 @@ EXPORT_SYMBOL(disk_check_media_change);
* @disk: the disk which will raise the event
* @events: the events to raise
*
- * Generate uevents for the disk. If DISK_EVENT_MEDIA_CHANGE is present,
- * attempt to free all dentries and inodes and invalidates all block
+ * Should be called when the media changes for @disk. Generates a uevent
+ * and attempts to free all dentries and inodes and invalidates all block
* device page cache entries in that case.
- *
- * Returns %true if DISK_EVENT_MEDIA_CHANGE was raised, or %false if not.
*/
-bool disk_force_media_change(struct gendisk *disk, unsigned int events)
+void disk_force_media_change(struct gendisk *disk)
{
- disk_event_uevent(disk, events);
-
- if (!(events & DISK_EVENT_MEDIA_CHANGE))
- return false;
-
+ disk_event_uevent(disk, DISK_EVENT_MEDIA_CHANGE);
inc_diskseq(disk);
- if (__invalidate_device(disk->part0, true))
- pr_warn("VFS: busy inodes on changed media %s\n",
- disk->disk_name);
+ bdev_mark_dead(disk->part0, true);
set_bit(GD_NEED_PART_SCAN, &disk->state);
- return true;
}
EXPORT_SYMBOL_GPL(disk_force_media_change);
diff --git a/block/elevator.c b/block/elevator.c
index 8400e303fbcb..5ff093cb3cf8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -499,6 +499,9 @@ void elv_unregister_queue(struct request_queue *q)
int elv_register(struct elevator_type *e)
{
+ /* finish request is mandatory */
+ if (WARN_ON_ONCE(!e->ops.finish_request))
+ return -EINVAL;
/* insert_requests and dispatch_request are mandatory */
if (WARN_ON_ONCE(!e->ops.insert_requests || !e->ops.dispatch_request))
return -EINVAL;
diff --git a/block/fops.c b/block/fops.c
index a286bf3325c5..838ffada5341 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -358,13 +358,14 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
task_io_account_write(bio->bi_iter.bi_size);
}
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio->bi_opf |= REQ_NOWAIT;
+
if (iocb->ki_flags & IOCB_HIPRI) {
- bio->bi_opf |= REQ_POLLED | REQ_NOWAIT;
+ bio->bi_opf |= REQ_POLLED;
submit_bio(bio);
WRITE_ONCE(iocb->private, bio);
} else {
- if (iocb->ki_flags & IOCB_NOWAIT)
- bio->bi_opf |= REQ_NOWAIT;
submit_bio(bio);
}
return -EIOCBQUEUED;
diff --git a/block/genhd.c b/block/genhd.c
index 3d287b32d50d..cc32a0c704eb 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -554,7 +554,7 @@ out_exit_elevator:
}
EXPORT_SYMBOL(device_add_disk);
-static void blk_report_disk_dead(struct gendisk *disk)
+static void blk_report_disk_dead(struct gendisk *disk, bool surprise)
{
struct block_device *bdev;
unsigned long idx;
@@ -565,10 +565,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
continue;
rcu_read_unlock();
- mutex_lock(&bdev->bd_holder_lock);
- if (bdev->bd_holder_ops && bdev->bd_holder_ops->mark_dead)
- bdev->bd_holder_ops->mark_dead(bdev);
- mutex_unlock(&bdev->bd_holder_lock);
+ bdev_mark_dead(bdev, surprise);
put_device(&bdev->bd_device);
rcu_read_lock();
@@ -576,14 +573,7 @@ static void blk_report_disk_dead(struct gendisk *disk)
rcu_read_unlock();
}
-/**
- * blk_mark_disk_dead - mark a disk as dead
- * @disk: disk to mark as dead
- *
- * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
- * to this disk.
- */
-void blk_mark_disk_dead(struct gendisk *disk)
+static void __blk_mark_disk_dead(struct gendisk *disk)
{
/*
* Fail any new I/O.
@@ -603,8 +593,19 @@ void blk_mark_disk_dead(struct gendisk *disk)
* Prevent new I/O from crossing bio_queue_enter().
*/
blk_queue_start_drain(disk->queue);
+}
- blk_report_disk_dead(disk);
+/**
+ * blk_mark_disk_dead - mark a disk as dead
+ * @disk: disk to mark as dead
+ *
+ * Mark as disk as dead (e.g. surprise removed) and don't accept any new I/O
+ * to this disk.
+ */
+void blk_mark_disk_dead(struct gendisk *disk)
+{
+ __blk_mark_disk_dead(disk);
+ blk_report_disk_dead(disk, true);
}
EXPORT_SYMBOL_GPL(blk_mark_disk_dead);
@@ -641,18 +642,20 @@ void del_gendisk(struct gendisk *disk)
disk_del_events(disk);
/*
- * Prevent new openers by unlinked the bdev inode, and write out
- * dirty data before marking the disk dead and stopping all I/O.
+ * Prevent new openers by unlinked the bdev inode.
*/
mutex_lock(&disk->open_mutex);
- xa_for_each(&disk->part_tbl, idx, part) {
+ xa_for_each(&disk->part_tbl, idx, part)
remove_inode_hash(part->bd_inode);
- fsync_bdev(part);
- __invalidate_device(part, true);
- }
mutex_unlock(&disk->open_mutex);
- blk_mark_disk_dead(disk);
+ /*
+ * Tell the file system to write back all dirty data and shut down if
+ * it hasn't been notified earlier.
+ */
+ if (!test_bit(GD_DEAD, &disk->state))
+ blk_report_disk_dead(disk, false);
+ __blk_mark_disk_dead(disk);
/*
* Drop all partitions now that the disk is marked dead.
diff --git a/block/ioctl.c b/block/ioctl.c
index 3be11941fb2d..648670ddb164 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -364,7 +364,14 @@ static int blkdev_flushbuf(struct block_device *bdev, unsigned cmd,
{
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
- fsync_bdev(bdev);
+
+ mutex_lock(&bdev->bd_holder_lock);
+ if (bdev->bd_holder_ops && bdev->bd_holder_ops->sync)
+ bdev->bd_holder_ops->sync(bdev);
+ else
+ sync_blockdev(bdev);
+ mutex_unlock(&bdev->bd_holder_lock);
+
invalidate_bdev(bdev);
return 0;
}
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 13a7341299a9..e137a87f4db0 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -281,10 +281,7 @@ static void delete_partition(struct block_device *part)
* looked up any more even when openers still hold references.
*/
remove_inode_hash(part->bd_inode);
-
- fsync_bdev(part);
- __invalidate_device(part, true);
-
+ bdev_mark_dead(part, false);
drop_partition(part);
}
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 06b15b9f661c..10efb56d8b48 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -1241,6 +1241,8 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
return -ENOMEM;
}
+ rsgl->sgl.need_unpin =
+ iov_iter_extract_will_pin(&msg->msg_iter);
rsgl->sgl.sgt.sgl = rsgl->sgl.sgl;
rsgl->sgl.sgt.nents = 0;
rsgl->sgl.sgt.orig_nents = 0;
@@ -1255,8 +1257,6 @@ int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
}
sg_mark_end(rsgl->sgl.sgt.sgl + rsgl->sgl.sgt.nents - 1);
- rsgl->sgl.need_unpin =
- iov_iter_extract_will_pin(&msg->msg_iter);
/* chain the new scatterlist with previous one */
if (areq->last_rsgl)
diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h
index d92ba2e30e31..2f027d5a8206 100644
--- a/drivers/accel/habanalabs/common/habanalabs.h
+++ b/drivers/accel/habanalabs/common/habanalabs.h
@@ -3980,6 +3980,15 @@ static inline void hl_debugfs_fini(void)
{
}
+static inline int hl_debugfs_device_init(struct hl_device *hdev)
+{
+ return 0;
+}
+
+static inline void hl_debugfs_device_fini(struct hl_device *hdev)
+{
+}
+
static inline void hl_debugfs_add_device(struct hl_device *hdev)
{
}
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index 52b339aefadc..9967fcfa27ec 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -173,6 +173,9 @@ static void internal_free_pages_locked(struct ivpu_bo *bo)
{
unsigned int i, npages = bo->base.size >> PAGE_SHIFT;
+ if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
+ set_pages_array_wb(bo->pages, bo->base.size >> PAGE_SHIFT);
+
for (i = 0; i < npages; i++)
put_page(bo->pages[i]);
@@ -587,6 +590,11 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
drm_clflush_pages(bo->pages, bo->base.size >> PAGE_SHIFT);
+ if (bo->flags & DRM_IVPU_BO_WC)
+ set_pages_array_wc(bo->pages, bo->base.size >> PAGE_SHIFT);
+ else if (bo->flags & DRM_IVPU_BO_UNCACHED)
+ set_pages_array_uc(bo->pages, bo->base.size >> PAGE_SHIFT);
+
prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
bo->kvaddr = vmap(bo->pages, bo->base.size >> PAGE_SHIFT, VM_MAP, prot);
if (!bo->kvaddr) {
diff --git a/drivers/accel/qaic/qaic_control.c b/drivers/accel/qaic/qaic_control.c
index 5c57f7b4494e..388abd40024b 100644
--- a/drivers/accel/qaic/qaic_control.c
+++ b/drivers/accel/qaic/qaic_control.c
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <linux/moduleparam.h>
#include <linux/mutex.h>
+#include <linux/overflow.h>
#include <linux/pci.h>
#include <linux/scatterlist.h>
#include <linux/types.h>
@@ -366,7 +367,7 @@ static int encode_passthrough(struct qaic_device *qdev, void *trans, struct wrap
if (in_trans->hdr.len % 8 != 0)
return -EINVAL;
- if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_EXT_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_EXT_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers,
@@ -391,18 +392,31 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
struct qaic_manage_trans_dma_xfer *in_trans,
struct ioctl_resources *resources, struct dma_xfer *xfer)
{
+ u64 xfer_start_addr, remaining, end, total;
unsigned long need_pages;
struct page **page_list;
unsigned long nr_pages;
struct sg_table *sgt;
- u64 xfer_start_addr;
int ret;
int i;
- xfer_start_addr = in_trans->addr + resources->xferred_dma_size;
+ if (check_add_overflow(in_trans->addr, resources->xferred_dma_size, &xfer_start_addr))
+ return -EINVAL;
- need_pages = DIV_ROUND_UP(in_trans->size + offset_in_page(xfer_start_addr) -
- resources->xferred_dma_size, PAGE_SIZE);
+ if (in_trans->size < resources->xferred_dma_size)
+ return -EINVAL;
+ remaining = in_trans->size - resources->xferred_dma_size;
+ if (remaining == 0)
+ return 0;
+
+ if (check_add_overflow(xfer_start_addr, remaining, &end))
+ return -EINVAL;
+
+ total = remaining + offset_in_page(xfer_start_addr);
+ if (total >= SIZE_MAX)
+ return -EINVAL;
+
+ need_pages = DIV_ROUND_UP(total, PAGE_SIZE);
nr_pages = need_pages;
@@ -418,9 +432,12 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
}
ret = get_user_pages_fast(xfer_start_addr, nr_pages, 0, page_list);
- if (ret < 0 || ret != nr_pages) {
- ret = -EFAULT;
+ if (ret < 0)
goto free_page_list;
+ if (ret != nr_pages) {
+ nr_pages = ret;
+ ret = -EFAULT;
+ goto put_pages;
}
sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
@@ -431,7 +448,7 @@ static int find_and_map_user_pages(struct qaic_device *qdev,
ret = sg_alloc_table_from_pages(sgt, page_list, nr_pages,
offset_in_page(xfer_start_addr),
- in_trans->size - resources->xferred_dma_size, GFP_KERNEL);
+ remaining, GFP_KERNEL);
if (ret) {
ret = -ENOMEM;
goto free_sgt;
@@ -557,17 +574,11 @@ static int encode_dma(struct qaic_device *qdev, void *trans, struct wrapper_list
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (msg_hdr_len > (UINT_MAX - QAIC_MANAGE_EXT_MSG_LENGTH))
- return -EINVAL;
-
/* There should be enough space to hold at least one ASP entry. */
- if (msg_hdr_len + sizeof(*out_trans) + sizeof(struct wire_addr_size_pair) >
+ if (size_add(msg_hdr_len, sizeof(*out_trans) + sizeof(struct wire_addr_size_pair)) >
QAIC_MANAGE_EXT_MSG_LENGTH)
return -ENOMEM;
- if (in_trans->addr + in_trans->size < in_trans->addr || !in_trans->size)
- return -EINVAL;
-
xfer = kmalloc(sizeof(*xfer), GFP_KERNEL);
if (!xfer)
return -ENOMEM;
@@ -634,7 +645,7 @@ static int encode_activate(struct qaic_device *qdev, void *trans, struct wrapper
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (msg_hdr_len + sizeof(*out_trans) > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, sizeof(*out_trans)) > QAIC_MANAGE_MAX_MSG_LENGTH)
return -ENOSPC;
if (!in_trans->queue_size)
@@ -718,7 +729,7 @@ static int encode_status(struct qaic_device *qdev, void *trans, struct wrapper_l
msg = &wrapper->msg;
msg_hdr_len = le32_to_cpu(msg->hdr.len);
- if (msg_hdr_len + in_trans->hdr.len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (size_add(msg_hdr_len, in_trans->hdr.len) > QAIC_MANAGE_MAX_MSG_LENGTH)
return -ENOSPC;
trans_wrapper = add_wrapper(wrappers, sizeof(*trans_wrapper));
@@ -748,7 +759,8 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
int ret;
int i;
- if (!user_msg->count) {
+ if (!user_msg->count ||
+ user_msg->len < sizeof(*trans_hdr)) {
ret = -EINVAL;
goto out;
}
@@ -765,12 +777,13 @@ static int encode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
}
for (i = 0; i < user_msg->count; ++i) {
- if (user_len >= user_msg->len) {
+ if (user_len > user_msg->len - sizeof(*trans_hdr)) {
ret = -EINVAL;
break;
}
trans_hdr = (struct qaic_manage_trans_hdr *)(user_msg->data + user_len);
- if (user_len + trans_hdr->len > user_msg->len) {
+ if (trans_hdr->len < sizeof(trans_hdr) ||
+ size_add(user_len, trans_hdr->len) > user_msg->len) {
ret = -EINVAL;
break;
}
@@ -953,15 +966,23 @@ static int decode_message(struct qaic_device *qdev, struct manage_msg *user_msg,
int ret;
int i;
- if (msg_hdr_len > QAIC_MANAGE_MAX_MSG_LENGTH)
+ if (msg_hdr_len < sizeof(*trans_hdr) ||
+ msg_hdr_len > QAIC_MANAGE_MAX_MSG_LENGTH)
return -EINVAL;
user_msg->len = 0;
user_msg->count = le32_to_cpu(msg->hdr.count);
for (i = 0; i < user_msg->count; ++i) {
+ u32 hdr_len;
+
+ if (msg_len > msg_hdr_len - sizeof(*trans_hdr))
+ return -EINVAL;
+
trans_hdr = (struct wire_trans_hdr *)(msg->data + msg_len);
- if (msg_len + le32_to_cpu(trans_hdr->len) > msg_hdr_len)
+ hdr_len = le32_to_cpu(trans_hdr->len);
+ if (hdr_len < sizeof(*trans_hdr) ||
+ size_add(msg_len, hdr_len) > msg_hdr_len)
return -EINVAL;
switch (le32_to_cpu(trans_hdr->type)) {
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index e9a1cb779b30..6b6d981a71be 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -1021,6 +1021,7 @@ int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data, struct drm_fi
bo->dbc = dbc;
srcu_read_unlock(&dbc->ch_lock, rcu_id);
drm_gem_object_put(obj);
+ kfree(slice_ent);
srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index 3631230a61c8..56d887323ae5 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -1007,9 +1007,6 @@ static void iort_node_get_rmr_info(struct acpi_iort_node *node,
for (i = 0; i < node->mapping_count; i++, map++) {
struct acpi_iort_node *parent;
- if (!map->id_count)
- continue;
-
parent = ACPI_ADD_PTR(struct acpi_iort_node, iort_table,
map->output_reference);
if (parent != iommu)
diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 1dd8d5aebf67..32cfa3f4efd3 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c
@@ -470,6 +470,49 @@ static const struct dmi_system_id asus_laptop[] = {
{ }
};
+static const struct dmi_system_id tongfang_gm_rg[] = {
+ {
+ .ident = "TongFang GMxRGxx/XMG CORE 15 (M22)/TUXEDO Stellaris 15 Gen4 AMD",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "GMxRGxx"),
+ },
+ },
+ { }
+};
+
+static const struct dmi_system_id maingear_laptop[] = {
+ {
+ .ident = "MAINGEAR Vector Pro 2 15",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-15A3070T"),
+ }
+ },
+ {
+ .ident = "MAINGEAR Vector Pro 2 17",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Micro Electronics Inc"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "MG-VCP2-17A3070T"),
+ },
+ },
+ { }
+};
+
+static const struct dmi_system_id pcspecialist_laptop[] = {
+ {
+ .ident = "PCSpecialist Elimina Pro 16 M",
+ /*
+ * Some models have product-name "Elimina Pro 16 M",
+ * others "GM6BGEQ". Match on board-name to match both.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "PCSpecialist"),
+ DMI_MATCH(DMI_BOARD_NAME, "GM6BGEQ"),
+ },
+ },
+ { }
+};
+
static const struct dmi_system_id lg_laptop[] = {
{
.ident = "LG Electronics 17U70P",
@@ -493,6 +536,9 @@ struct irq_override_cmp {
static const struct irq_override_cmp override_table[] = {
{ medion_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
{ asus_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
+ { tongfang_gm_rg, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+ { maingear_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
+ { pcspecialist_laptop, 1, ACPI_EDGE_SENSITIVE, ACPI_ACTIVE_LOW, 1, true },
{ lg_laptop, 1, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW, 0, false },
};
@@ -512,6 +558,28 @@ static bool acpi_dev_irq_override(u32 gsi, u8 triggering, u8 polarity,
return entry->override;
}
+#ifdef CONFIG_X86
+ /*
+ * Always use the MADT override info, except for the i8042 PS/2 ctrl
+ * IRQs (1 and 12). For these the DSDT IRQ settings should sometimes
+ * be used otherwise PS/2 keyboards / mice will not work.
+ */
+ if (gsi != 1 && gsi != 12)
+ return true;
+
+ /* If the override comes from an INT_SRC_OVR MADT entry, honor it. */
+ if (acpi_int_src_ovr[gsi])
+ return true;
+
+ /*
+ * IRQ override isn't needed on modern AMD Zen systems and
+ * this override breaks active low IRQs on AMD Ryzen 6000 and
+ * newer systems. Skip it.
+ */
+ if (boot_cpu_has(X86_FEATURE_ZEN))
+ return false;
+#endif
+
return true;
}
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 5b145f1aaa1b..87e385542576 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1714,6 +1714,7 @@ static bool acpi_device_enumeration_by_parent(struct acpi_device *device)
{"BSG1160", },
{"BSG2150", },
{"CSC3551", },
+ {"CSC3556", },
{"INT33FE", },
{"INT3515", },
/* Non-conforming _HID for Cirrus Logic already released */
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 486c8271cab7..d720f93d8b19 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -6617,6 +6617,7 @@ err_init_binder_device_failed:
err_alloc_device_names_failed:
debugfs_remove_recursive(binder_debugfs_dir_entry_root);
+ binder_alloc_shrinker_exit();
return ret;
}
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index 662a2a2e2e84..e3db8297095a 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -1087,6 +1087,12 @@ int binder_alloc_shrinker_init(void)
return ret;
}
+void binder_alloc_shrinker_exit(void)
+{
+ unregister_shrinker(&binder_shrinker);
+ list_lru_destroy(&binder_alloc_lru);
+}
+
/**
* check_buffer() - verify that buffer/offset is safe to access
* @alloc: binder_alloc for this proc
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index 138d1d5af9ce..dc1e2b01dd64 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -129,6 +129,7 @@ extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
int pid);
extern void binder_alloc_init(struct binder_alloc *alloc);
extern int binder_alloc_shrinker_init(void);
+extern void binder_alloc_shrinker_exit(void);
extern void binder_alloc_vma_close(struct binder_alloc *alloc);
extern struct binder_buffer *
binder_alloc_prepare_to_free(struct binder_alloc *alloc,
diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c
index 76e7d6676657..faebe9f5412a 100644
--- a/drivers/android/binderfs.c
+++ b/drivers/android/binderfs.c
@@ -153,7 +153,7 @@ static int binderfs_binder_device_create(struct inode *ref_inode,
goto err;
inode->i_ino = minor + INODE_OFFSET;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
init_special_inode(inode, S_IFCHR | 0600,
MKDEV(MAJOR(binderfs_dev), minor));
inode->i_fop = &binder_fops;
@@ -432,7 +432,7 @@ static int binderfs_binder_ctl_create(struct super_block *sb)
}
inode->i_ino = SECOND_INODE;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
init_special_inode(inode, S_IFCHR | 0600,
MKDEV(MAJOR(binderfs_dev), minor));
inode->i_fop = &binder_ctl_fops;
@@ -474,7 +474,7 @@ static struct inode *binderfs_make_inode(struct super_block *sb, int mode)
if (ret) {
ret->i_ino = iunique(sb, BINDERFS_MAX_MINOR + INODE_OFFSET);
ret->i_mode = mode;
- ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+ ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
}
return ret;
}
@@ -703,7 +703,7 @@ static int binderfs_fill_super(struct super_block *sb, struct fs_context *fc)
inode->i_ino = FIRST_INODE;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | 0755;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_op = &binderfs_dir_inode_operations;
set_nlink(inode, 2);
diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index d37ab6087f2f..04db0f2c683a 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -4938,8 +4938,8 @@ void ata_qc_complete(struct ata_queued_cmd *qc)
if (qc->result_tf.status & ATA_SENSE &&
((ata_is_ncq(qc->tf.protocol) &&
dev->flags & ATA_DFLAG_CDL_ENABLED) ||
- (!(ata_is_ncq(qc->tf.protocol) &&
- ata_id_sense_reporting_enabled(dev->id))))) {
+ (!ata_is_ncq(qc->tf.protocol) &&
+ ata_id_sense_reporting_enabled(dev->id)))) {
/*
* Tell SCSI EH to not overwrite scmd->result even if
* this command is finished with result SAM_STAT_GOOD.
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 370d18aca71e..c6ece32de8e3 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1100,7 +1100,14 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct ata_device *dev)
}
} else {
sdev->sector_size = ata_id_logical_sector_size(dev->id);
+ /*
+ * Stop the drive on suspend but do not issue START STOP UNIT
+ * on resume as this is not necessary and may fail: the device
+ * will be woken up by ata_port_pm_resume() with a port reset
+ * and device revalidation.
+ */
sdev->manage_start_stop = 1;
+ sdev->no_start_on_resume = 1;
}
/*
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index 6ab294322e79..314eaa167954 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -529,7 +529,8 @@ static void data_xfer(struct work_struct *work)
/* dma_request_channel may sleep, so calling from process context */
acdev->dma_chan = dma_request_chan(acdev->host->dev, "data");
if (IS_ERR(acdev->dma_chan)) {
- dev_err(acdev->host->dev, "Unable to get dma_chan\n");
+ dev_err_probe(acdev->host->dev, PTR_ERR(acdev->dma_chan),
+ "Unable to get dma_chan\n");
acdev->dma_chan = NULL;
goto chan_request_fail;
}
diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c
index d60e1f69d7b0..c697219a61a2 100644
--- a/drivers/ata/pata_ns87415.c
+++ b/drivers/ata/pata_ns87415.c
@@ -260,7 +260,7 @@ static u8 ns87560_check_status(struct ata_port *ap)
* LOCKING:
* Inherited from caller.
*/
-void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
+static void ns87560_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
{
struct ata_ioports *ioaddr = &ap->ioaddr;
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index ff538b858928..2884acfc4863 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -815,8 +815,8 @@ static int octeon_cf_probe(struct platform_device *pdev)
irq_handler_t irq_handler = NULL;
void __iomem *base;
struct octeon_cf_port *cf_port;
- int rv = -ENOMEM;
u32 bus_width;
+ int rv;
node = pdev->dev.of_node;
if (node == NULL)
@@ -893,12 +893,12 @@ static int octeon_cf_probe(struct platform_device *pdev)
cs0 = devm_ioremap(&pdev->dev, res_cs0->start,
resource_size(res_cs0));
if (!cs0)
- return rv;
+ return -ENOMEM;
/* allocate host */
host = ata_host_alloc(&pdev->dev, 1);
if (!host)
- return rv;
+ return -ENOMEM;
ap = host->ports[0];
ap->private_data = cf_port;
diff --git a/drivers/ata/pata_parport/aten.c b/drivers/ata/pata_parport/aten.c
index 8328a49a95ef..620ce6c8da5c 100644
--- a/drivers/ata/pata_parport/aten.c
+++ b/drivers/ata/pata_parport/aten.c
@@ -139,4 +139,6 @@ static struct pi_protocol aten = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("ATEN EH-100 parallel port IDE adapter protocol driver");
module_pata_parport_driver(aten);
diff --git a/drivers/ata/pata_parport/bpck.c b/drivers/ata/pata_parport/bpck.c
index 9f4309f9b57f..bba1eda65f36 100644
--- a/drivers/ata/pata_parport/bpck.c
+++ b/drivers/ata/pata_parport/bpck.c
@@ -502,4 +502,6 @@ static struct pi_protocol bpck = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("MicroSolutions BACKPACK parallel port IDE adapter protocol driver");
module_pata_parport_driver(bpck);
diff --git a/drivers/ata/pata_parport/bpck6.c b/drivers/ata/pata_parport/bpck6.c
index c6dbd14120d1..62c2b53325e1 100644
--- a/drivers/ata/pata_parport/bpck6.c
+++ b/drivers/ata/pata_parport/bpck6.c
@@ -459,5 +459,6 @@ static struct pi_protocol bpck6 = {
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Micro Solutions Inc.");
-MODULE_DESCRIPTION("BACKPACK Protocol module, compatible with PARIDE");
+MODULE_DESCRIPTION("Micro Solutions BACKPACK parallel port IDE adapter "
+ "(version 6 drives) protocol driver");
module_pata_parport_driver(bpck6);
diff --git a/drivers/ata/pata_parport/comm.c b/drivers/ata/pata_parport/comm.c
index cc5485bd0a5b..4839becbbd56 100644
--- a/drivers/ata/pata_parport/comm.c
+++ b/drivers/ata/pata_parport/comm.c
@@ -201,4 +201,6 @@ static struct pi_protocol comm = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("DataStor Commuter parallel port IDE adapter protocol driver");
module_pata_parport_driver(comm);
diff --git a/drivers/ata/pata_parport/dstr.c b/drivers/ata/pata_parport/dstr.c
index 368d7c7962a9..88930bb1f07e 100644
--- a/drivers/ata/pata_parport/dstr.c
+++ b/drivers/ata/pata_parport/dstr.c
@@ -230,4 +230,6 @@ static struct pi_protocol dstr = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("DataStor EP2000 parallel port IDE adapter protocol driver");
module_pata_parport_driver(dstr);
diff --git a/drivers/ata/pata_parport/epat.c b/drivers/ata/pata_parport/epat.c
index 016bd96bce89..3cb54fcbf0d0 100644
--- a/drivers/ata/pata_parport/epat.c
+++ b/drivers/ata/pata_parport/epat.c
@@ -358,5 +358,8 @@ static void __exit epat_exit(void)
}
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Shuttle Technologies EPAT parallel port IDE adapter "
+ "protocol driver");
module_init(epat_init)
module_exit(epat_exit)
diff --git a/drivers/ata/pata_parport/epia.c b/drivers/ata/pata_parport/epia.c
index 920e9f40d401..7aaba474c671 100644
--- a/drivers/ata/pata_parport/epia.c
+++ b/drivers/ata/pata_parport/epia.c
@@ -306,4 +306,7 @@ static struct pi_protocol epia = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Shuttle Technologies EPIA parallel port IDE adapter "
+ "protocol driver");
module_pata_parport_driver(epia);
diff --git a/drivers/ata/pata_parport/fit2.c b/drivers/ata/pata_parport/fit2.c
index 6524f3033b1e..de79cf91ad5f 100644
--- a/drivers/ata/pata_parport/fit2.c
+++ b/drivers/ata/pata_parport/fit2.c
@@ -132,4 +132,7 @@ static struct pi_protocol fit2 = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Fidelity International Technology parallel port IDE adapter"
+ "(older models) protocol driver");
module_pata_parport_driver(fit2);
diff --git a/drivers/ata/pata_parport/fit3.c b/drivers/ata/pata_parport/fit3.c
index c172a38ae67d..bad7aa920cdc 100644
--- a/drivers/ata/pata_parport/fit3.c
+++ b/drivers/ata/pata_parport/fit3.c
@@ -193,4 +193,7 @@ static struct pi_protocol fit3 = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Fidelity International Technology parallel port IDE adapter"
+ "(newer models) protocol driver");
module_pata_parport_driver(fit3);
diff --git a/drivers/ata/pata_parport/friq.c b/drivers/ata/pata_parport/friq.c
index dc428f54fe0c..7abe2ff40685 100644
--- a/drivers/ata/pata_parport/friq.c
+++ b/drivers/ata/pata_parport/friq.c
@@ -259,4 +259,6 @@ static struct pi_protocol friq = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Freecom IQ parallel port IDE adapter protocol driver");
module_pata_parport_driver(friq);
diff --git a/drivers/ata/pata_parport/frpw.c b/drivers/ata/pata_parport/frpw.c
index 28d9bb2c6baf..7fa9b9857321 100644
--- a/drivers/ata/pata_parport/frpw.c
+++ b/drivers/ata/pata_parport/frpw.c
@@ -293,4 +293,6 @@ static struct pi_protocol frpw = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Freecom Power parallel port IDE adapter protocol driver");
module_pata_parport_driver(frpw);
diff --git a/drivers/ata/pata_parport/kbic.c b/drivers/ata/pata_parport/kbic.c
index 6023e071516d..fca322627b82 100644
--- a/drivers/ata/pata_parport/kbic.c
+++ b/drivers/ata/pata_parport/kbic.c
@@ -301,5 +301,8 @@ static void __exit kbic_exit(void)
}
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("KingByte Information Systems KBIC-951A and KBIC-971A "
+ "parallel port IDE adapter protocol driver");
module_init(kbic_init)
module_exit(kbic_exit)
diff --git a/drivers/ata/pata_parport/ktti.c b/drivers/ata/pata_parport/ktti.c
index bca6c20ef617..c078d1934862 100644
--- a/drivers/ata/pata_parport/ktti.c
+++ b/drivers/ata/pata_parport/ktti.c
@@ -106,4 +106,6 @@ static struct pi_protocol ktti = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("KT Technology parallel port IDE adapter protocol driver");
module_pata_parport_driver(ktti);
diff --git a/drivers/ata/pata_parport/on20.c b/drivers/ata/pata_parport/on20.c
index 34e69da2bec8..7c70e5b13a2a 100644
--- a/drivers/ata/pata_parport/on20.c
+++ b/drivers/ata/pata_parport/on20.c
@@ -142,4 +142,6 @@ static struct pi_protocol on20 = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Onspec 90c20 parallel port IDE adapter protocol driver");
module_pata_parport_driver(on20);
diff --git a/drivers/ata/pata_parport/on26.c b/drivers/ata/pata_parport/on26.c
index 5da317b394c1..c88e5d6f203e 100644
--- a/drivers/ata/pata_parport/on26.c
+++ b/drivers/ata/pata_parport/on26.c
@@ -310,4 +310,6 @@ static struct pi_protocol on26 = {
};
MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Grant R. Guenther <grant@torque.net>");
+MODULE_DESCRIPTION("Onspec 90c26 parallel port IDE adapter protocol driver");
module_pata_parport_driver(on26);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index c1815b9dae68..fe6690ecf563 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -509,73 +509,30 @@ static void __init cpu_dev_register_generic(void)
}
#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
-
-ssize_t __weak cpu_show_meltdown(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spectre_v1(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spectre_v2(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_spec_store_bypass(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_l1tf(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_mds(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_tsx_async_abort(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_itlb_multihit(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_srbds(struct device *dev,
+static ssize_t cpu_show_not_affected(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sysfs_emit(buf, "Not affected\n");
}
-ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
-
-ssize_t __weak cpu_show_retbleed(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sysfs_emit(buf, "Not affected\n");
-}
+#define CPU_SHOW_VULN_FALLBACK(func) \
+ ssize_t cpu_show_##func(struct device *, \
+ struct device_attribute *, char *) \
+ __attribute__((weak, alias("cpu_show_not_affected")))
+
+CPU_SHOW_VULN_FALLBACK(meltdown);
+CPU_SHOW_VULN_FALLBACK(spectre_v1);
+CPU_SHOW_VULN_FALLBACK(spectre_v2);
+CPU_SHOW_VULN_FALLBACK(spec_store_bypass);
+CPU_SHOW_VULN_FALLBACK(l1tf);
+CPU_SHOW_VULN_FALLBACK(mds);
+CPU_SHOW_VULN_FALLBACK(tsx_async_abort);
+CPU_SHOW_VULN_FALLBACK(itlb_multihit);
+CPU_SHOW_VULN_FALLBACK(srbds);
+CPU_SHOW_VULN_FALLBACK(mmio_stale_data);
+CPU_SHOW_VULN_FALLBACK(retbleed);
+CPU_SHOW_VULN_FALLBACK(spec_rstack_overflow);
+CPU_SHOW_VULN_FALLBACK(gds);
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
@@ -588,6 +545,8 @@ static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
+static DEVICE_ATTR(spec_rstack_overflow, 0444, cpu_show_spec_rstack_overflow, NULL);
+static DEVICE_ATTR(gather_data_sampling, 0444, cpu_show_gds, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
@@ -601,6 +560,8 @@ static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_srbds.attr,
&dev_attr_mmio_stale_data.attr,
&dev_attr_retbleed.attr,
+ &dev_attr_spec_rstack_overflow.attr,
+ &dev_attr_gather_data_sampling.attr,
NULL
};
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index 0eb7f02b3ad5..922ed457db19 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -29,6 +29,7 @@ extern u64 pm_runtime_active_time(struct device *dev);
#define WAKE_IRQ_DEDICATED_MASK (WAKE_IRQ_DEDICATED_ALLOCATED | \
WAKE_IRQ_DEDICATED_MANAGED | \
WAKE_IRQ_DEDICATED_REVERSE)
+#define WAKE_IRQ_DEDICATED_ENABLED BIT(3)
struct wake_irq {
struct device *dev;
diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c
index d487a6bac630..42171f766dcb 100644
--- a/drivers/base/power/wakeirq.c
+++ b/drivers/base/power/wakeirq.c
@@ -194,7 +194,6 @@ err_free:
return err;
}
-
/**
* dev_pm_set_dedicated_wake_irq - Request a dedicated wake-up interrupt
* @dev: Device entry
@@ -206,11 +205,6 @@ err_free:
* Sets up a threaded interrupt handler for a device that has
* a dedicated wake-up interrupt in addition to the device IO
* interrupt.
- *
- * The interrupt starts disabled, and needs to be managed for
- * the device by the bus code or the device driver using
- * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
- * functions.
*/
int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
{
@@ -232,11 +226,6 @@ EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq);
* the status of WAKE_IRQ_DEDICATED_REVERSE to tell rpm_suspend()
* to enable dedicated wake-up interrupt after running the runtime suspend
* callback for @dev.
- *
- * The interrupt starts disabled, and needs to be managed for
- * the device by the bus code or the device driver using
- * dev_pm_enable_wake_irq*() and dev_pm_disable_wake_irq*()
- * functions.
*/
int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
{
@@ -245,44 +234,6 @@ int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq)
EXPORT_SYMBOL_GPL(dev_pm_set_dedicated_wake_irq_reverse);
/**
- * dev_pm_enable_wake_irq - Enable device wake-up interrupt
- * @dev: Device
- *
- * Optionally called from the bus code or the device driver for
- * runtime_resume() to override the PM runtime core managed wake-up
- * interrupt handling to enable the wake-up interrupt.
- *
- * Note that for runtime_suspend()) the wake-up interrupts
- * should be unconditionally enabled unlike for suspend()
- * that is conditional.
- */
-void dev_pm_enable_wake_irq(struct device *dev)
-{
- struct wake_irq *wirq = dev->power.wakeirq;
-
- if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED))
- enable_irq(wirq->irq);
-}
-EXPORT_SYMBOL_GPL(dev_pm_enable_wake_irq);
-
-/**
- * dev_pm_disable_wake_irq - Disable device wake-up interrupt
- * @dev: Device
- *
- * Optionally called from the bus code or the device driver for
- * runtime_suspend() to override the PM runtime core managed wake-up
- * interrupt handling to disable the wake-up interrupt.
- */
-void dev_pm_disable_wake_irq(struct device *dev)
-{
- struct wake_irq *wirq = dev->power.wakeirq;
-
- if (wirq && (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED))
- disable_irq_nosync(wirq->irq);
-}
-EXPORT_SYMBOL_GPL(dev_pm_disable_wake_irq);
-
-/**
* dev_pm_enable_wake_irq_check - Checks and enables wake-up interrupt
* @dev: Device
* @can_change_status: Can change wake-up interrupt status
@@ -314,8 +265,10 @@ void dev_pm_enable_wake_irq_check(struct device *dev,
return;
enable:
- if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
+ if (!can_change_status || !(wirq->status & WAKE_IRQ_DEDICATED_REVERSE)) {
enable_irq(wirq->irq);
+ wirq->status |= WAKE_IRQ_DEDICATED_ENABLED;
+ }
}
/**
@@ -336,8 +289,10 @@ void dev_pm_disable_wake_irq_check(struct device *dev, bool cond_disable)
if (cond_disable && (wirq->status & WAKE_IRQ_DEDICATED_REVERSE))
return;
- if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED)
+ if (wirq->status & WAKE_IRQ_DEDICATED_MANAGED) {
+ wirq->status &= ~WAKE_IRQ_DEDICATED_ENABLED;
disable_irq_nosync(wirq->irq);
+ }
}
/**
@@ -376,7 +331,7 @@ void dev_pm_arm_wake_irq(struct wake_irq *wirq)
if (device_may_wakeup(wirq->dev)) {
if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
- !pm_runtime_status_suspended(wirq->dev))
+ !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
enable_irq(wirq->irq);
enable_irq_wake(wirq->irq);
@@ -399,7 +354,7 @@ void dev_pm_disarm_wake_irq(struct wake_irq *wirq)
disable_irq_wake(wirq->irq);
if (wirq->status & WAKE_IRQ_DEDICATED_ALLOCATED &&
- !pm_runtime_status_suspended(wirq->dev))
+ !(wirq->status & WAKE_IRQ_DEDICATED_ENABLED))
disable_irq_nosync(wirq->irq);
}
}
diff --git a/drivers/base/regmap/regcache-rbtree.c b/drivers/base/regmap/regcache-rbtree.c
index fabf87058d80..584bcc55f56e 100644
--- a/drivers/base/regmap/regcache-rbtree.c
+++ b/drivers/base/regmap/regcache-rbtree.c
@@ -471,6 +471,8 @@ static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
unsigned int start, end;
int ret;
+ map->async = true;
+
rbtree_ctx = map->cache;
for (node = rb_first(&rbtree_ctx->root); node; node = rb_next(node)) {
rbnode = rb_entry(node, struct regcache_rbtree_node, node);
@@ -499,6 +501,8 @@ static int regcache_rbtree_sync(struct regmap *map, unsigned int min,
return ret;
}
+ map->async = false;
+
return regmap_async_complete(map);
}
diff --git a/drivers/base/regmap/regcache.c b/drivers/base/regmap/regcache.c
index 28bc3ae9458a..7d3e47436056 100644
--- a/drivers/base/regmap/regcache.c
+++ b/drivers/base/regmap/regcache.c
@@ -368,8 +368,6 @@ int regcache_sync(struct regmap *map)
if (!map->cache_dirty)
goto out;
- map->async = true;
-
/* Apply any patch first */
map->cache_bypass = true;
for (i = 0; i < map->patch_regs; i++) {
@@ -392,7 +390,6 @@ int regcache_sync(struct regmap *map)
out:
/* Restore the bypass state */
- map->async = false;
map->cache_bypass = bypass;
map->no_sync_defaults = false;
map->unlock(map->lock_arg);
diff --git a/drivers/base/regmap/regmap-i2c.c b/drivers/base/regmap/regmap-i2c.c
index 980e5ce6a3a3..3ec611dc0c09 100644
--- a/drivers/base/regmap/regmap-i2c.c
+++ b/drivers/base/regmap/regmap-i2c.c
@@ -242,8 +242,8 @@ static int regmap_i2c_smbus_i2c_read(void *context, const void *reg,
static const struct regmap_bus regmap_i2c_smbus_i2c_block = {
.write = regmap_i2c_smbus_i2c_write,
.read = regmap_i2c_smbus_i2c_read,
- .max_raw_read = I2C_SMBUS_BLOCK_MAX,
- .max_raw_write = I2C_SMBUS_BLOCK_MAX,
+ .max_raw_read = I2C_SMBUS_BLOCK_MAX - 1,
+ .max_raw_write = I2C_SMBUS_BLOCK_MAX - 1,
};
static int regmap_i2c_smbus_i2c_write_reg16(void *context, const void *data,
@@ -299,8 +299,8 @@ static int regmap_i2c_smbus_i2c_read_reg16(void *context, const void *reg,
static const struct regmap_bus regmap_i2c_smbus_i2c_block_reg16 = {
.write = regmap_i2c_smbus_i2c_write_reg16,
.read = regmap_i2c_smbus_i2c_read_reg16,
- .max_raw_read = I2C_SMBUS_BLOCK_MAX,
- .max_raw_write = I2C_SMBUS_BLOCK_MAX,
+ .max_raw_read = I2C_SMBUS_BLOCK_MAX - 2,
+ .max_raw_write = I2C_SMBUS_BLOCK_MAX - 2,
};
static const struct regmap_bus *regmap_get_i2c_bus(struct i2c_client *i2c,
diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c
index 24257aa9004d..9ff3018a46aa 100644
--- a/drivers/base/regmap/regmap-kunit.c
+++ b/drivers/base/regmap/regmap-kunit.c
@@ -58,6 +58,9 @@ static struct regmap *gen_regmap(struct regmap_config *config,
int i;
struct reg_default *defaults;
+ config->disable_locking = config->cache_type == REGCACHE_RBTREE ||
+ config->cache_type == REGCACHE_MAPLE;
+
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
return ERR_PTR(-ENOMEM);
@@ -889,6 +892,8 @@ static struct regmap *gen_raw_regmap(struct regmap_config *config,
config->cache_type = test_type->cache_type;
config->val_format_endian = test_type->val_endian;
+ config->disable_locking = config->cache_type == REGCACHE_RBTREE ||
+ config->cache_type == REGCACHE_MAPLE;
buf = kmalloc(size, GFP_KERNEL);
if (!buf)
diff --git a/drivers/base/regmap/regmap-spi-avmm.c b/drivers/base/regmap/regmap-spi-avmm.c
index 6af692844c19..4c2b94b3e30b 100644
--- a/drivers/base/regmap/regmap-spi-avmm.c
+++ b/drivers/base/regmap/regmap-spi-avmm.c
@@ -660,7 +660,7 @@ static const struct regmap_bus regmap_spi_avmm_bus = {
.reg_format_endian_default = REGMAP_ENDIAN_NATIVE,
.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
.max_raw_read = SPI_AVMM_VAL_SIZE * MAX_READ_CNT,
- .max_raw_write = SPI_AVMM_REG_SIZE + SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
+ .max_raw_write = SPI_AVMM_VAL_SIZE * MAX_WRITE_CNT,
.free_context = spi_avmm_bridge_ctx_free,
};
diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 89a7f1c459c1..1bfd1727b4da 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2082,8 +2082,6 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
size_t val_count = val_len / val_bytes;
size_t chunk_count, chunk_bytes;
size_t chunk_regs = val_count;
- size_t max_data = map->max_raw_write - map->format.reg_bytes -
- map->format.pad_bytes;
int ret, i;
if (!val_count)
@@ -2091,8 +2089,8 @@ int _regmap_raw_write(struct regmap *map, unsigned int reg,
if (map->use_single_write)
chunk_regs = 1;
- else if (map->max_raw_write && val_len > max_data)
- chunk_regs = max_data / val_bytes;
+ else if (map->max_raw_write && val_len > map->max_raw_write)
+ chunk_regs = map->max_raw_write / val_bytes;
chunk_count = val_count / chunk_regs;
chunk_bytes = chunk_regs * val_bytes;
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index e460c9799d9f..2b98114a9fe0 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1547,7 +1547,6 @@ static int fd_locked_ioctl(struct block_device *bdev, blk_mode_t mode,
rel_fdc();
return -EBUSY;
}
- fsync_bdev(bdev);
if (fd_motor_on(drive) == 0) {
rel_fdc();
return -ENODEV;
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index 2db9b186b977..ea4eb88a2e45 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -3255,7 +3255,7 @@ static int set_geometry(unsigned int cmd, struct floppy_struct *g,
if (!disk || ITYPE(drive_state[cnt].fd_device) != type)
continue;
- __invalidate_device(disk->part0, true);
+ disk_force_media_change(disk);
}
mutex_unlock(&open_lock);
} else {
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 37511d2b2caf..9f2d412fc560 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -603,7 +603,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
goto out_err;
/* and ... switch */
- disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+ disk_force_media_change(lo->lo_disk);
blk_mq_freeze_queue(lo->lo_queue);
mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
lo->lo_backing_file = file;
@@ -1067,7 +1067,7 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
/* suppress uevents while reconfiguring the device */
dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
- disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+ disk_force_media_change(lo->lo_disk);
set_disk_ro(lo->lo_disk, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
@@ -1171,7 +1171,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
if (!release)
blk_mq_unfreeze_queue(lo->lo_queue);
- disk_force_media_change(lo->lo_disk, DISK_EVENT_MEDIA_CHANGE);
+ disk_force_media_change(lo->lo_disk);
if (lo->lo_flags & LO_FLAGS_PARTSCAN) {
int err;
@@ -1775,14 +1775,43 @@ static const struct block_device_operations lo_fops = {
/*
* If max_loop is specified, create that many devices upfront.
* This also becomes a hard limit. If max_loop is not specified,
+ * the default isn't a hard limit (as before commit 85c50197716c
+ * changed the default value from 0 for max_loop=0 reasons), just
* create CONFIG_BLK_DEV_LOOP_MIN_COUNT loop devices at module
* init time. Loop devices can be requested on-demand with the
* /dev/loop-control interface, or be instantiated by accessing
* a 'dead' device node.
*/
static int max_loop = CONFIG_BLK_DEV_LOOP_MIN_COUNT;
-module_param(max_loop, int, 0444);
+
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
+static bool max_loop_specified;
+
+static int max_loop_param_set_int(const char *val,
+ const struct kernel_param *kp)
+{
+ int ret;
+
+ ret = param_set_int(val, kp);
+ if (ret < 0)
+ return ret;
+
+ max_loop_specified = true;
+ return 0;
+}
+
+static const struct kernel_param_ops max_loop_param_ops = {
+ .set = max_loop_param_set_int,
+ .get = param_get_int,
+};
+
+module_param_cb(max_loop, &max_loop_param_ops, &max_loop, 0444);
MODULE_PARM_DESC(max_loop, "Maximum number of loop devices");
+#else
+module_param(max_loop, int, 0444);
+MODULE_PARM_DESC(max_loop, "Initial number of loop devices");
+#endif
+
module_param(max_part, int, 0444);
MODULE_PARM_DESC(max_part, "Maximum number of partitions per loop device");
@@ -2093,14 +2122,18 @@ static void loop_remove(struct loop_device *lo)
put_disk(lo->lo_disk);
}
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
static void loop_probe(dev_t dev)
{
int idx = MINOR(dev) >> part_shift;
- if (max_loop && idx >= max_loop)
+ if (max_loop_specified && max_loop && idx >= max_loop)
return;
loop_add(idx);
}
+#else
+#define loop_probe NULL
+#endif /* !CONFIG_BLOCK_LEGACY_AUTOLOAD */
static int loop_control_remove(int idx)
{
@@ -2281,6 +2314,9 @@ module_exit(loop_exit);
static int __init max_loop_setup(char *str)
{
max_loop = simple_strtol(str, NULL, 0);
+#ifdef CONFIG_BLOCK_LEGACY_AUTOLOAD
+ max_loop_specified = true;
+#endif
return 1;
}
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 8576d696c7a2..42e0159bb258 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1434,12 +1434,10 @@ static int nbd_start_device_ioctl(struct nbd_device *nbd)
return ret;
}
-static void nbd_clear_sock_ioctl(struct nbd_device *nbd,
- struct block_device *bdev)
+static void nbd_clear_sock_ioctl(struct nbd_device *nbd)
{
+ blk_mark_disk_dead(nbd->disk);
nbd_clear_sock(nbd);
- __invalidate_device(bdev, true);
- nbd_bdev_reset(nbd);
if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
&nbd->config->runtime_flags))
nbd_config_put(nbd);
@@ -1465,7 +1463,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
case NBD_DISCONNECT:
return nbd_disconnect(nbd);
case NBD_CLEAR_SOCK:
- nbd_clear_sock_ioctl(nbd, bdev);
+ nbd_clear_sock_ioctl(nbd);
return 0;
case NBD_SET_SOCK:
return nbd_add_socket(nbd, arg, false);
diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index bd0e075a5d89..2328cc05be36 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -3675,7 +3675,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
RBD_LOCK_TAG, "", 0);
- if (ret)
+ if (ret && ret != -EEXIST)
return ret;
__rbd_lock(rbd_dev, cookie);
@@ -3849,51 +3849,82 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
}
-static int get_lock_owner_info(struct rbd_device *rbd_dev,
- struct ceph_locker **lockers, u32 *num_lockers)
+static bool locker_equal(const struct ceph_locker *lhs,
+ const struct ceph_locker *rhs)
+{
+ return lhs->id.name.type == rhs->id.name.type &&
+ lhs->id.name.num == rhs->id.name.num &&
+ !strcmp(lhs->id.cookie, rhs->id.cookie) &&
+ ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
+}
+
+static void free_locker(struct ceph_locker *locker)
+{
+ if (locker)
+ ceph_free_lockers(locker, 1);
+}
+
+static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
{
struct ceph_osd_client *osdc = &rbd_dev->rbd_client->client->osdc;
+ struct ceph_locker *lockers;
+ u32 num_lockers;
u8 lock_type;
char *lock_tag;
+ u64 handle;
int ret;
- dout("%s rbd_dev %p\n", __func__, rbd_dev);
-
ret = ceph_cls_lock_info(osdc, &rbd_dev->header_oid,
&rbd_dev->header_oloc, RBD_LOCK_NAME,
- &lock_type, &lock_tag, lockers, num_lockers);
- if (ret)
- return ret;
+ &lock_type, &lock_tag, &lockers, &num_lockers);
+ if (ret) {
+ rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
+ return ERR_PTR(ret);
+ }
- if (*num_lockers == 0) {
+ if (num_lockers == 0) {
dout("%s rbd_dev %p no lockers detected\n", __func__, rbd_dev);
+ lockers = NULL;
goto out;
}
if (strcmp(lock_tag, RBD_LOCK_TAG)) {
rbd_warn(rbd_dev, "locked by external mechanism, tag %s",
lock_tag);
- ret = -EBUSY;
- goto out;
+ goto err_busy;
}
- if (lock_type == CEPH_CLS_LOCK_SHARED) {
- rbd_warn(rbd_dev, "shared lock type detected");
- ret = -EBUSY;
- goto out;
+ if (lock_type != CEPH_CLS_LOCK_EXCLUSIVE) {
+ rbd_warn(rbd_dev, "incompatible lock type detected");
+ goto err_busy;
}
- if (strncmp((*lockers)[0].id.cookie, RBD_LOCK_COOKIE_PREFIX,
- strlen(RBD_LOCK_COOKIE_PREFIX))) {
+ WARN_ON(num_lockers != 1);
+ ret = sscanf(lockers[0].id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu",
+ &handle);
+ if (ret != 1) {
rbd_warn(rbd_dev, "locked by external mechanism, cookie %s",
- (*lockers)[0].id.cookie);
- ret = -EBUSY;
- goto out;
+ lockers[0].id.cookie);
+ goto err_busy;
+ }
+ if (ceph_addr_is_blank(&lockers[0].info.addr)) {
+ rbd_warn(rbd_dev, "locker has a blank address");
+ goto err_busy;
}
+ dout("%s rbd_dev %p got locker %s%llu@%pISpc/%u handle %llu\n",
+ __func__, rbd_dev, ENTITY_NAME(lockers[0].id.name),
+ &lockers[0].info.addr.in_addr,
+ le32_to_cpu(lockers[0].info.addr.nonce), handle);
+
out:
kfree(lock_tag);
- return ret;
+ return lockers;
+
+err_busy:
+ kfree(lock_tag);
+ ceph_free_lockers(lockers, num_lockers);
+ return ERR_PTR(-EBUSY);
}
static int find_watcher(struct rbd_device *rbd_dev,
@@ -3909,8 +3940,10 @@ static int find_watcher(struct rbd_device *rbd_dev,
ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
&rbd_dev->header_oloc, &watchers,
&num_watchers);
- if (ret)
+ if (ret) {
+ rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
return ret;
+ }
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
for (i = 0; i < num_watchers; i++) {
@@ -3947,51 +3980,72 @@ out:
static int rbd_try_lock(struct rbd_device *rbd_dev)
{
struct ceph_client *client = rbd_dev->rbd_client->client;
- struct ceph_locker *lockers;
- u32 num_lockers;
+ struct ceph_locker *locker, *refreshed_locker;
int ret;
for (;;) {
+ locker = refreshed_locker = NULL;
+
ret = rbd_lock(rbd_dev);
- if (ret != -EBUSY)
- return ret;
+ if (!ret)
+ goto out;
+ if (ret != -EBUSY) {
+ rbd_warn(rbd_dev, "failed to lock header: %d", ret);
+ goto out;
+ }
/* determine if the current lock holder is still alive */
- ret = get_lock_owner_info(rbd_dev, &lockers, &num_lockers);
- if (ret)
- return ret;
-
- if (num_lockers == 0)
+ locker = get_lock_owner_info(rbd_dev);
+ if (IS_ERR(locker)) {
+ ret = PTR_ERR(locker);
+ locker = NULL;
+ goto out;
+ }
+ if (!locker)
goto again;
- ret = find_watcher(rbd_dev, lockers);
+ ret = find_watcher(rbd_dev, locker);
if (ret)
goto out; /* request lock or error */
+ refreshed_locker = get_lock_owner_info(rbd_dev);
+ if (IS_ERR(refreshed_locker)) {
+ ret = PTR_ERR(refreshed_locker);
+ refreshed_locker = NULL;
+ goto out;
+ }
+ if (!refreshed_locker ||
+ !locker_equal(locker, refreshed_locker))
+ goto again;
+
rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
- ENTITY_NAME(lockers[0].id.name));
+ ENTITY_NAME(locker->id.name));
ret = ceph_monc_blocklist_add(&client->monc,
- &lockers[0].info.addr);
+ &locker->info.addr);
if (ret) {
- rbd_warn(rbd_dev, "blocklist of %s%llu failed: %d",
- ENTITY_NAME(lockers[0].id.name), ret);
+ rbd_warn(rbd_dev, "failed to blocklist %s%llu: %d",
+ ENTITY_NAME(locker->id.name), ret);
goto out;
}
ret = ceph_cls_break_lock(&client->osdc, &rbd_dev->header_oid,
&rbd_dev->header_oloc, RBD_LOCK_NAME,
- lockers[0].id.cookie,
- &lockers[0].id.name);
- if (ret && ret != -ENOENT)
+ locker->id.cookie, &locker->id.name);
+ if (ret && ret != -ENOENT) {
+ rbd_warn(rbd_dev, "failed to break header lock: %d",
+ ret);
goto out;
+ }
again:
- ceph_free_lockers(lockers, num_lockers);
+ free_locker(refreshed_locker);
+ free_locker(locker);
}
out:
- ceph_free_lockers(lockers, num_lockers);
+ free_locker(refreshed_locker);
+ free_locker(locker);
return ret;
}
@@ -4041,11 +4095,8 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
ret = rbd_try_lock(rbd_dev);
if (ret < 0) {
- rbd_warn(rbd_dev, "failed to lock header: %d", ret);
- if (ret == -EBLOCKLISTED)
- goto out;
-
- ret = 1; /* request lock anyway */
+ rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
+ goto out;
}
if (ret > 0) {
up_write(&rbd_dev->lock_rwsem);
@@ -6579,12 +6630,11 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
cancel_delayed_work_sync(&rbd_dev->lock_dwork);
if (!ret)
ret = -ETIMEDOUT;
- }
- if (ret) {
- rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
- return ret;
+ rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
}
+ if (ret)
+ return ret;
/*
* The lock may have been released by now, unless automatic lock
diff --git a/drivers/block/rnbd/rnbd-clt-sysfs.c b/drivers/block/rnbd/rnbd-clt-sysfs.c
index c36d8b1ceeed..39887556cf95 100644
--- a/drivers/block/rnbd/rnbd-clt-sysfs.c
+++ b/drivers/block/rnbd/rnbd-clt-sysfs.c
@@ -25,7 +25,7 @@
static struct device *rnbd_dev;
static const struct class rnbd_dev_class = {
- .name = "rnbd_client",
+ .name = "rnbd-client",
};
static struct kobject *rnbd_devs_kobj;
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 1c823750c95a..21d2e71c5514 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -1847,7 +1847,8 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (ublksrv_pid <= 0)
return -EINVAL;
- wait_for_completion_interruptible(&ub->completion);
+ if (wait_for_completion_interruptible(&ub->completion) != 0)
+ return -EINTR;
schedule_delayed_work(&ub->monitor_work, UBLK_DAEMON_MONITOR_PERIOD);
@@ -2125,8 +2126,8 @@ static int ublk_ctrl_del_dev(struct ublk_device **p_ub)
* - the device number is freed already, we will not find this
* device via ublk_get_device_from_id()
*/
- wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx));
-
+ if (wait_event_interruptible(ublk_idr_wq, ublk_idr_freed(idx)))
+ return -EINTR;
return 0;
}
@@ -2323,7 +2324,9 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
/* wait until new ubq_daemon sending all FETCH_REQ */
- wait_for_completion_interruptible(&ub->completion);
+ if (wait_for_completion_interruptible(&ub->completion))
+ return -EINTR;
+
pr_devel("%s: All new ubq_daemons(nr: %d) are ready, dev id %d\n",
__func__, ub->dev_info.nr_hw_queues, header->dev_id);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 5676e6dd5b16..06673c6ca255 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1870,15 +1870,16 @@ static void zram_bio_discard(struct zram *zram, struct bio *bio)
static void zram_bio_read(struct zram *zram, struct bio *bio)
{
- struct bvec_iter iter;
- struct bio_vec bv;
- unsigned long start_time;
+ unsigned long start_time = bio_start_io_acct(bio);
+ struct bvec_iter iter = bio->bi_iter;
- start_time = bio_start_io_acct(bio);
- bio_for_each_segment(bv, bio, iter) {
+ do {
u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
SECTOR_SHIFT;
+ struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+ bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
if (zram_bvec_read(zram, &bv, index, offset, bio) < 0) {
atomic64_inc(&zram->stats.failed_reads);
@@ -1890,22 +1891,26 @@ static void zram_bio_read(struct zram *zram, struct bio *bio)
zram_slot_lock(zram, index);
zram_accessed(zram, index);
zram_slot_unlock(zram, index);
- }
+
+ bio_advance_iter_single(bio, &iter, bv.bv_len);
+ } while (iter.bi_size);
+
bio_end_io_acct(bio, start_time);
bio_endio(bio);
}
static void zram_bio_write(struct zram *zram, struct bio *bio)
{
- struct bvec_iter iter;
- struct bio_vec bv;
- unsigned long start_time;
+ unsigned long start_time = bio_start_io_acct(bio);
+ struct bvec_iter iter = bio->bi_iter;
- start_time = bio_start_io_acct(bio);
- bio_for_each_segment(bv, bio, iter) {
+ do {
u32 index = iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
u32 offset = (iter.bi_sector & (SECTORS_PER_PAGE - 1)) <<
SECTOR_SHIFT;
+ struct bio_vec bv = bio_iter_iovec(bio, iter);
+
+ bv.bv_len = min_t(u32, bv.bv_len, PAGE_SIZE - offset);
if (zram_bvec_write(zram, &bv, index, offset, bio) < 0) {
atomic64_inc(&zram->stats.failed_writes);
@@ -1916,7 +1921,10 @@ static void zram_bio_write(struct zram *zram, struct bio *bio)
zram_slot_lock(zram, index);
zram_accessed(zram, index);
zram_slot_unlock(zram, index);
- }
+
+ bio_advance_iter_single(bio, &iter, bv.bv_len);
+ } while (iter.bi_size);
+
bio_end_io_acct(bio, start_time);
bio_endio(bio);
}
diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 5ec4ad0a5c86..764d176e9735 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -4104,6 +4104,7 @@ static int btusb_probe(struct usb_interface *intf,
BT_DBG("intf %p id %p", intf, id);
if ((id->driver_info & BTUSB_IFNUM_2) &&
+ (intf->cur_altsetting->desc.bInterfaceNumber != 0) &&
(intf->cur_altsetting->desc.bInterfaceNumber != 2))
return -ENODEV;
diff --git a/drivers/bus/ti-sysc.c b/drivers/bus/ti-sysc.c
index 21fe9854703f..4cb23b9e06ea 100644
--- a/drivers/bus/ti-sysc.c
+++ b/drivers/bus/ti-sysc.c
@@ -2142,6 +2142,8 @@ static int sysc_reset(struct sysc *ddata)
sysc_val = sysc_read_sysconfig(ddata);
sysc_val |= sysc_mask;
sysc_write(ddata, sysc_offset, sysc_val);
+ /* Flush posted write */
+ sysc_val = sysc_read_sysconfig(ddata);
}
if (ddata->cfg.srst_udelay)
diff --git a/drivers/char/tpm/st33zp24/i2c.c b/drivers/char/tpm/st33zp24/i2c.c
index 2d28f55ef490..661574bb0acf 100644
--- a/drivers/char/tpm/st33zp24/i2c.c
+++ b/drivers/char/tpm/st33zp24/i2c.c
@@ -160,7 +160,7 @@ static struct i2c_driver st33zp24_i2c_driver = {
.of_match_table = of_match_ptr(of_st33zp24_i2c_match),
.acpi_match_table = ACPI_PTR(st33zp24_i2c_acpi_match),
},
- .probe_new = st33zp24_i2c_probe,
+ .probe = st33zp24_i2c_probe,
.remove = st33zp24_i2c_remove,
.id_table = st33zp24_i2c_id
};
diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
index cd48033b804a..ea6b4013bc38 100644
--- a/drivers/char/tpm/tpm-chip.c
+++ b/drivers/char/tpm/tpm-chip.c
@@ -510,63 +510,6 @@ static int tpm_add_legacy_sysfs(struct tpm_chip *chip)
return 0;
}
-/*
- * Some AMD fTPM versions may cause stutter
- * https://www.amd.com/en/support/kb/faq/pa-410
- *
- * Fixes are available in two series of fTPM firmware:
- * 6.x.y.z series: 6.0.18.6 +
- * 3.x.y.z series: 3.57.y.5 +
- */
-static bool tpm_amd_is_rng_defective(struct tpm_chip *chip)
-{
- u32 val1, val2;
- u64 version;
- int ret;
-
- if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
- return false;
-
- ret = tpm_request_locality(chip);
- if (ret)
- return false;
-
- ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val1, NULL);
- if (ret)
- goto release;
- if (val1 != 0x414D4400U /* AMD */) {
- ret = -ENODEV;
- goto release;
- }
- ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_1, &val1, NULL);
- if (ret)
- goto release;
- ret = tpm2_get_tpm_pt(chip, TPM2_PT_FIRMWARE_VERSION_2, &val2, NULL);
-
-release:
- tpm_relinquish_locality(chip);
-
- if (ret)
- return false;
-
- version = ((u64)val1 << 32) | val2;
- if ((version >> 48) == 6) {
- if (version >= 0x0006000000180006ULL)
- return false;
- } else if ((version >> 48) == 3) {
- if (version >= 0x0003005700000005ULL)
- return false;
- } else {
- return false;
- }
-
- dev_warn(&chip->dev,
- "AMD fTPM version 0x%llx causes system stutter; hwrng disabled\n",
- version);
-
- return true;
-}
-
static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
{
struct tpm_chip *chip = container_of(rng, struct tpm_chip, hwrng);
@@ -578,10 +521,20 @@ static int tpm_hwrng_read(struct hwrng *rng, void *data, size_t max, bool wait)
return tpm_get_random(chip, data, max);
}
+static bool tpm_is_hwrng_enabled(struct tpm_chip *chip)
+{
+ if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM))
+ return false;
+ if (tpm_is_firmware_upgrade(chip))
+ return false;
+ if (chip->flags & TPM_CHIP_FLAG_HWRNG_DISABLED)
+ return false;
+ return true;
+}
+
static int tpm_add_hwrng(struct tpm_chip *chip)
{
- if (!IS_ENABLED(CONFIG_HW_RANDOM_TPM) || tpm_is_firmware_upgrade(chip) ||
- tpm_amd_is_rng_defective(chip))
+ if (!tpm_is_hwrng_enabled(chip))
return 0;
snprintf(chip->hwrng_name, sizeof(chip->hwrng_name),
@@ -686,7 +639,7 @@ int tpm_chip_register(struct tpm_chip *chip)
return 0;
out_hwrng:
- if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip))
+ if (tpm_is_hwrng_enabled(chip))
hwrng_unregister(&chip->hwrng);
out_ppi:
tpm_bios_log_teardown(chip);
@@ -711,8 +664,7 @@ EXPORT_SYMBOL_GPL(tpm_chip_register);
void tpm_chip_unregister(struct tpm_chip *chip)
{
tpm_del_legacy_sysfs(chip);
- if (IS_ENABLED(CONFIG_HW_RANDOM_TPM) && !tpm_is_firmware_upgrade(chip) &&
- !tpm_amd_is_rng_defective(chip))
+ if (tpm_is_hwrng_enabled(chip))
hwrng_unregister(&chip->hwrng);
tpm_bios_log_teardown(chip);
if (chip->flags & TPM_CHIP_FLAG_TPM2 && !tpm_is_firmware_upgrade(chip))
diff --git a/drivers/char/tpm/tpm_crb.c b/drivers/char/tpm/tpm_crb.c
index d43a0d7b97a8..9eb1a1859012 100644
--- a/drivers/char/tpm/tpm_crb.c
+++ b/drivers/char/tpm/tpm_crb.c
@@ -463,6 +463,28 @@ static bool crb_req_canceled(struct tpm_chip *chip, u8 status)
return (cancel & CRB_CANCEL_INVOKE) == CRB_CANCEL_INVOKE;
}
+static int crb_check_flags(struct tpm_chip *chip)
+{
+ u32 val;
+ int ret;
+
+ ret = crb_request_locality(chip, 0);
+ if (ret)
+ return ret;
+
+ ret = tpm2_get_tpm_pt(chip, TPM2_PT_MANUFACTURER, &val, NULL);
+ if (ret)
+ goto release;
+
+ if (val == 0x414D4400U /* AMD */)
+ chip->flags |= TPM_CHIP_FLAG_HWRNG_DISABLED;
+
+release:
+ crb_relinquish_locality(chip, 0);
+
+ return ret;
+}
+
static const struct tpm_class_ops tpm_crb = {
.flags = TPM_OPS_AUTO_STARTUP,
.status = crb_status,
@@ -563,15 +585,18 @@ static int crb_map_io(struct acpi_device *device, struct crb_priv *priv,
u32 rsp_size;
int ret;
- INIT_LIST_HEAD(&acpi_resource_list);
- ret = acpi_dev_get_resources(device, &acpi_resource_list,
- crb_check_resource, iores_array);
- if (ret < 0)
- return ret;
- acpi_dev_free_resource_list(&acpi_resource_list);
-
- /* Pluton doesn't appear to define ACPI memory regions */
+ /*
+ * Pluton sometimes does not define ACPI memory regions.
+ * Mapping is then done in crb_map_pluton
+ */
if (priv->sm != ACPI_TPM2_COMMAND_BUFFER_WITH_PLUTON) {
+ INIT_LIST_HEAD(&acpi_resource_list);
+ ret = acpi_dev_get_resources(device, &acpi_resource_list,
+ crb_check_resource, iores_array);
+ if (ret < 0)
+ return ret;
+ acpi_dev_free_resource_list(&acpi_resource_list);
+
if (resource_type(iores_array) != IORESOURCE_MEM) {
dev_err(dev, FW_BUG "TPM2 ACPI table does not define a memory resource\n");
return -EINVAL;
@@ -797,6 +822,14 @@ static int crb_acpi_add(struct acpi_device *device)
chip->acpi_dev_handle = device->handle;
chip->flags = TPM_CHIP_FLAG_TPM2;
+ rc = tpm_chip_bootstrap(chip);
+ if (rc)
+ goto out;
+
+ rc = crb_check_flags(chip);
+ if (rc)
+ goto out;
+
rc = tpm_chip_register(chip);
out:
diff --git a/drivers/char/tpm/tpm_i2c_atmel.c b/drivers/char/tpm/tpm_i2c_atmel.c
index 8f77154e0550..301a95b3734f 100644
--- a/drivers/char/tpm/tpm_i2c_atmel.c
+++ b/drivers/char/tpm/tpm_i2c_atmel.c
@@ -203,7 +203,7 @@ static SIMPLE_DEV_PM_OPS(i2c_atmel_pm_ops, tpm_pm_suspend, tpm_pm_resume);
static struct i2c_driver i2c_atmel_driver = {
.id_table = i2c_atmel_id,
- .probe_new = i2c_atmel_probe,
+ .probe = i2c_atmel_probe,
.remove = i2c_atmel_remove,
.driver = {
.name = I2C_DRIVER_NAME,
diff --git a/drivers/char/tpm/tpm_i2c_infineon.c b/drivers/char/tpm/tpm_i2c_infineon.c
index 7cdaff52a96d..81d8a78dc655 100644
--- a/drivers/char/tpm/tpm_i2c_infineon.c
+++ b/drivers/char/tpm/tpm_i2c_infineon.c
@@ -716,7 +716,7 @@ static void tpm_tis_i2c_remove(struct i2c_client *client)
static struct i2c_driver tpm_tis_i2c_driver = {
.id_table = tpm_tis_i2c_table,
- .probe_new = tpm_tis_i2c_probe,
+ .probe = tpm_tis_i2c_probe,
.remove = tpm_tis_i2c_remove,
.driver = {
.name = "tpm_i2c_infineon",
diff --git a/drivers/char/tpm/tpm_i2c_nuvoton.c b/drivers/char/tpm/tpm_i2c_nuvoton.c
index a026e98add50..d7be03c41098 100644
--- a/drivers/char/tpm/tpm_i2c_nuvoton.c
+++ b/drivers/char/tpm/tpm_i2c_nuvoton.c
@@ -650,7 +650,7 @@ static SIMPLE_DEV_PM_OPS(i2c_nuvoton_pm_ops, tpm_pm_suspend, tpm_pm_resume);
static struct i2c_driver i2c_nuvoton_driver = {
.id_table = i2c_nuvoton_id,
- .probe_new = i2c_nuvoton_probe,
+ .probe = i2c_nuvoton_probe,
.remove = i2c_nuvoton_remove,
.driver = {
.name = "tpm_i2c_nuvoton",
diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c
index 7db3593941ea..7fa3d91042b2 100644
--- a/drivers/char/tpm/tpm_tis.c
+++ b/drivers/char/tpm/tpm_tis.c
@@ -89,7 +89,7 @@ static inline void tpm_tis_iowrite32(u32 b, void __iomem *iobase, u32 addr)
tpm_tis_flush(iobase);
}
-static int interrupts = -1;
+static int interrupts;
module_param(interrupts, int, 0444);
MODULE_PARM_DESC(interrupts, "Enable interrupts");
@@ -116,6 +116,22 @@ static int tpm_tis_disable_irq(const struct dmi_system_id *d)
static const struct dmi_system_id tpm_tis_dmi_table[] = {
{
.callback = tpm_tis_disable_irq,
+ .ident = "Framework Laptop (12th Gen Intel Core)",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Laptop (12th Gen Intel Core)"),
+ },
+ },
+ {
+ .callback = tpm_tis_disable_irq,
+ .ident = "Framework Laptop (13th Gen Intel Core)",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Framework"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Laptop (13th Gen Intel Core)"),
+ },
+ },
+ {
+ .callback = tpm_tis_disable_irq,
.ident = "ThinkPad T490s",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
@@ -140,9 +156,34 @@ static const struct dmi_system_id tpm_tis_dmi_table[] = {
},
{
.callback = tpm_tis_disable_irq,
+ .ident = "ThinkPad L590",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad L590"),
+ },
+ },
+ {
+ .callback = tpm_tis_disable_irq,
+ .ident = "ThinkStation P620",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkStation P620"),
+ },
+ },
+ {
+ .callback = tpm_tis_disable_irq,
+ .ident = "TUXEDO InfinityBook S 15/17 Gen7",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TUXEDO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TUXEDO InfinityBook S 15/17 Gen7"),
+ },
+ },
+ {
+ .callback = tpm_tis_disable_irq,
.ident = "UPX-TGL",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "UPX-TGL01"),
},
},
{}
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 558144fa707a..b95963095729 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -24,9 +24,12 @@
#include <linux/wait.h>
#include <linux/acpi.h>
#include <linux/freezer.h>
+#include <linux/dmi.h>
#include "tpm.h"
#include "tpm_tis_core.h"
+#define TPM_TIS_MAX_UNHANDLED_IRQS 1000
+
static void tpm_tis_clkrun_enable(struct tpm_chip *chip, bool value);
static bool wait_for_tpm_stat_cond(struct tpm_chip *chip, u8 mask,
@@ -363,8 +366,13 @@ static int tpm_tis_recv(struct tpm_chip *chip, u8 *buf, size_t count)
goto out;
}
- size += recv_data(chip, &buf[TPM_HEADER_SIZE],
- expected - TPM_HEADER_SIZE);
+ rc = recv_data(chip, &buf[TPM_HEADER_SIZE],
+ expected - TPM_HEADER_SIZE);
+ if (rc < 0) {
+ size = rc;
+ goto out;
+ }
+ size += rc;
if (size < expected) {
dev_err(&chip->dev, "Unable to read remainder of result\n");
size = -ETIME;
@@ -468,25 +476,29 @@ out_err:
return rc;
}
-static void disable_interrupts(struct tpm_chip *chip)
+static void __tpm_tis_disable_interrupts(struct tpm_chip *chip)
+{
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ u32 int_mask = 0;
+
+ tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &int_mask);
+ int_mask &= ~TPM_GLOBAL_INT_ENABLE;
+ tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), int_mask);
+
+ chip->flags &= ~TPM_CHIP_FLAG_IRQ;
+}
+
+static void tpm_tis_disable_interrupts(struct tpm_chip *chip)
{
struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
- u32 intmask;
- int rc;
if (priv->irq == 0)
return;
- rc = tpm_tis_read32(priv, TPM_INT_ENABLE(priv->locality), &intmask);
- if (rc < 0)
- intmask = 0;
-
- intmask &= ~TPM_GLOBAL_INT_ENABLE;
- rc = tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
+ __tpm_tis_disable_interrupts(chip);
devm_free_irq(chip->dev.parent, priv->irq, chip);
priv->irq = 0;
- chip->flags &= ~TPM_CHIP_FLAG_IRQ;
}
/*
@@ -552,7 +564,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len)
if (!test_bit(TPM_TIS_IRQ_TESTED, &priv->flags))
tpm_msleep(1);
if (!test_bit(TPM_TIS_IRQ_TESTED, &priv->flags))
- disable_interrupts(chip);
+ tpm_tis_disable_interrupts(chip);
set_bit(TPM_TIS_IRQ_TESTED, &priv->flags);
return rc;
}
@@ -752,6 +764,57 @@ static bool tpm_tis_req_canceled(struct tpm_chip *chip, u8 status)
return status == TPM_STS_COMMAND_READY;
}
+static irqreturn_t tpm_tis_revert_interrupts(struct tpm_chip *chip)
+{
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ const char *product;
+ const char *vendor;
+
+ dev_warn(&chip->dev, FW_BUG
+ "TPM interrupt storm detected, polling instead\n");
+
+ vendor = dmi_get_system_info(DMI_SYS_VENDOR);
+ product = dmi_get_system_info(DMI_PRODUCT_VERSION);
+
+ if (vendor && product) {
+ dev_info(&chip->dev,
+ "Consider adding the following entry to tpm_tis_dmi_table:\n");
+ dev_info(&chip->dev, "\tDMI_SYS_VENDOR: %s\n", vendor);
+ dev_info(&chip->dev, "\tDMI_PRODUCT_VERSION: %s\n", product);
+ }
+
+ if (tpm_tis_request_locality(chip, 0) != 0)
+ return IRQ_NONE;
+
+ __tpm_tis_disable_interrupts(chip);
+ tpm_tis_relinquish_locality(chip, 0);
+
+ schedule_work(&priv->free_irq_work);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t tpm_tis_update_unhandled_irqs(struct tpm_chip *chip)
+{
+ struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev);
+ irqreturn_t irqret = IRQ_HANDLED;
+
+ if (!(chip->flags & TPM_CHIP_FLAG_IRQ))
+ return IRQ_HANDLED;
+
+ if (time_after(jiffies, priv->last_unhandled_irq + HZ/10))
+ priv->unhandled_irqs = 1;
+ else
+ priv->unhandled_irqs++;
+
+ priv->last_unhandled_irq = jiffies;
+
+ if (priv->unhandled_irqs > TPM_TIS_MAX_UNHANDLED_IRQS)
+ irqret = tpm_tis_revert_interrupts(chip);
+
+ return irqret;
+}
+
static irqreturn_t tis_int_handler(int dummy, void *dev_id)
{
struct tpm_chip *chip = dev_id;
@@ -761,10 +824,10 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
rc = tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
if (rc < 0)
- return IRQ_NONE;
+ goto err;
if (interrupt == 0)
- return IRQ_NONE;
+ goto err;
set_bit(TPM_TIS_IRQ_TESTED, &priv->flags);
if (interrupt & TPM_INTF_DATA_AVAIL_INT)
@@ -780,10 +843,13 @@ static irqreturn_t tis_int_handler(int dummy, void *dev_id)
rc = tpm_tis_write32(priv, TPM_INT_STATUS(priv->locality), interrupt);
tpm_tis_relinquish_locality(chip, 0);
if (rc < 0)
- return IRQ_NONE;
+ goto err;
tpm_tis_read32(priv, TPM_INT_STATUS(priv->locality), &interrupt);
return IRQ_HANDLED;
+
+err:
+ return tpm_tis_update_unhandled_irqs(chip);
}
static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
@@ -804,6 +870,15 @@ static void tpm_tis_gen_interrupt(struct tpm_chip *chip)
chip->flags &= ~TPM_CHIP_FLAG_IRQ;
}
+static void tpm_tis_free_irq_func(struct work_struct *work)
+{
+ struct tpm_tis_data *priv = container_of(work, typeof(*priv), free_irq_work);
+ struct tpm_chip *chip = priv->chip;
+
+ devm_free_irq(chip->dev.parent, priv->irq, chip);
+ priv->irq = 0;
+}
+
/* Register the IRQ and issue a command that will cause an interrupt. If an
* irq is seen then leave the chip setup for IRQ operation, otherwise reverse
* everything and leave in polling mode. Returns 0 on success.
@@ -816,6 +891,7 @@ static int tpm_tis_probe_irq_single(struct tpm_chip *chip, u32 intmask,
int rc;
u32 int_status;
+ INIT_WORK(&priv->free_irq_work, tpm_tis_free_irq_func);
rc = devm_request_threaded_irq(chip->dev.parent, irq, NULL,
tis_int_handler, IRQF_ONESHOT | flags,
@@ -918,6 +994,7 @@ void tpm_tis_remove(struct tpm_chip *chip)
interrupt = 0;
tpm_tis_write32(priv, reg, ~TPM_GLOBAL_INT_ENABLE & interrupt);
+ flush_work(&priv->free_irq_work);
tpm_tis_clkrun_enable(chip, false);
@@ -1021,6 +1098,7 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
chip->timeout_b = msecs_to_jiffies(TIS_TIMEOUT_B_MAX);
chip->timeout_c = msecs_to_jiffies(TIS_TIMEOUT_C_MAX);
chip->timeout_d = msecs_to_jiffies(TIS_TIMEOUT_D_MAX);
+ priv->chip = chip;
priv->timeout_min = TPM_TIMEOUT_USECS_MIN;
priv->timeout_max = TPM_TIMEOUT_USECS_MAX;
priv->phy_ops = phy_ops;
@@ -1179,7 +1257,7 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
rc = tpm_tis_request_locality(chip, 0);
if (rc < 0)
goto out_err;
- disable_interrupts(chip);
+ tpm_tis_disable_interrupts(chip);
tpm_tis_relinquish_locality(chip, 0);
}
}
diff --git a/drivers/char/tpm/tpm_tis_core.h b/drivers/char/tpm/tpm_tis_core.h
index 610bfadb6acf..b1a169d7d1ca 100644
--- a/drivers/char/tpm/tpm_tis_core.h
+++ b/drivers/char/tpm/tpm_tis_core.h
@@ -91,11 +91,15 @@ enum tpm_tis_flags {
};
struct tpm_tis_data {
+ struct tpm_chip *chip;
u16 manufacturer_id;
struct mutex locality_count_mutex;
unsigned int locality_count;
int locality;
int irq;
+ struct work_struct free_irq_work;
+ unsigned long last_unhandled_irq;
+ unsigned int unhandled_irqs;
unsigned int int_mask;
unsigned long flags;
void __iomem *ilb_base_addr;
diff --git a/drivers/char/tpm/tpm_tis_i2c.c b/drivers/char/tpm/tpm_tis_i2c.c
index c8c34adc14c0..a897402cc36a 100644
--- a/drivers/char/tpm/tpm_tis_i2c.c
+++ b/drivers/char/tpm/tpm_tis_i2c.c
@@ -189,21 +189,28 @@ static int tpm_tis_i2c_read_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
int ret;
for (i = 0; i < TPM_RETRY; i++) {
- /* write register */
- msg.len = sizeof(reg);
- msg.buf = &reg;
- msg.flags = 0;
- ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
- if (ret < 0)
- return ret;
-
- /* read data */
- msg.buf = result;
- msg.len = len;
- msg.flags = I2C_M_RD;
- ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
- if (ret < 0)
- return ret;
+ u16 read = 0;
+
+ while (read < len) {
+ /* write register */
+ msg.len = sizeof(reg);
+ msg.buf = &reg;
+ msg.flags = 0;
+ ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
+ if (ret < 0)
+ return ret;
+
+ /* read data */
+ msg.buf = result + read;
+ msg.len = len - read;
+ msg.flags = I2C_M_RD;
+ if (msg.len > I2C_SMBUS_BLOCK_MAX)
+ msg.len = I2C_SMBUS_BLOCK_MAX;
+ ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
+ if (ret < 0)
+ return ret;
+ read += msg.len;
+ }
ret = tpm_tis_i2c_sanity_check_read(reg, len, result);
if (ret == 0)
@@ -223,19 +230,27 @@ static int tpm_tis_i2c_write_bytes(struct tpm_tis_data *data, u32 addr, u16 len,
struct i2c_msg msg = { .addr = phy->i2c_client->addr };
u8 reg = tpm_tis_i2c_address_to_register(addr);
int ret;
+ u16 wrote = 0;
if (len > TPM_BUFSIZE - 1)
return -EIO;
- /* write register and data in one go */
phy->io_buf[0] = reg;
- memcpy(phy->io_buf + sizeof(reg), value, len);
-
- msg.len = sizeof(reg) + len;
msg.buf = phy->io_buf;
- ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
- if (ret < 0)
- return ret;
+ while (wrote < len) {
+ /* write register and data in one go */
+ msg.len = sizeof(reg) + len - wrote;
+ if (msg.len > I2C_SMBUS_BLOCK_MAX)
+ msg.len = I2C_SMBUS_BLOCK_MAX;
+
+ memcpy(phy->io_buf + sizeof(reg), value + wrote,
+ msg.len - sizeof(reg));
+
+ ret = tpm_tis_i2c_retry_transfer_until_ack(data, &msg);
+ if (ret < 0)
+ return ret;
+ wrote += msg.len - sizeof(reg);
+ }
return 0;
}
@@ -379,7 +394,7 @@ static struct i2c_driver tpm_tis_i2c_driver = {
.pm = &tpm_tis_pm,
.of_match_table = of_match_ptr(of_tis_i2c_match),
},
- .probe_new = tpm_tis_i2c_probe,
+ .probe = tpm_tis_i2c_probe,
.remove = tpm_tis_i2c_remove,
.id_table = tpm_tis_i2c_id,
};
diff --git a/drivers/char/tpm/tpm_tis_i2c_cr50.c b/drivers/char/tpm/tpm_tis_i2c_cr50.c
index 376ae18a04eb..e70abd69e1ae 100644
--- a/drivers/char/tpm/tpm_tis_i2c_cr50.c
+++ b/drivers/char/tpm/tpm_tis_i2c_cr50.c
@@ -779,7 +779,7 @@ static void tpm_cr50_i2c_remove(struct i2c_client *client)
static SIMPLE_DEV_PM_OPS(cr50_i2c_pm, tpm_pm_suspend, tpm_pm_resume);
static struct i2c_driver cr50_i2c_driver = {
- .probe_new = tpm_cr50_i2c_probe,
+ .probe = tpm_cr50_i2c_probe,
.remove = tpm_cr50_i2c_remove,
.driver = {
.name = "cr50_i2c",
diff --git a/drivers/char/tpm/tpm_tis_spi_main.c b/drivers/char/tpm/tpm_tis_spi_main.c
index 1f5207974a17..9bfaba092a06 100644
--- a/drivers/char/tpm/tpm_tis_spi_main.c
+++ b/drivers/char/tpm/tpm_tis_spi_main.c
@@ -136,6 +136,14 @@ int tpm_tis_spi_transfer(struct tpm_tis_data *data, u32 addr, u16 len,
}
exit:
+ if (ret < 0) {
+ /* Deactivate chip select */
+ memset(&spi_xfer, 0, sizeof(spi_xfer));
+ spi_message_init(&m);
+ spi_message_add_tail(&spi_xfer, &m);
+ spi_sync_locked(phy->spi_device, &m);
+ }
+
spi_bus_unlock(phy->spi_device->master);
return ret;
}
diff --git a/drivers/char/tpm/tpm_vtpm_proxy.c b/drivers/char/tpm/tpm_vtpm_proxy.c
index 5c865987ba5c..30e953988cab 100644
--- a/drivers/char/tpm/tpm_vtpm_proxy.c
+++ b/drivers/char/tpm/tpm_vtpm_proxy.c
@@ -683,37 +683,21 @@ static struct miscdevice vtpmx_miscdev = {
.fops = &vtpmx_fops,
};
-static int vtpmx_init(void)
-{
- return misc_register(&vtpmx_miscdev);
-}
-
-static void vtpmx_cleanup(void)
-{
- misc_deregister(&vtpmx_miscdev);
-}
-
static int __init vtpm_module_init(void)
{
int rc;
- rc = vtpmx_init();
- if (rc) {
- pr_err("couldn't create vtpmx device\n");
- return rc;
- }
-
workqueue = create_workqueue("tpm-vtpm");
if (!workqueue) {
pr_err("couldn't create workqueue\n");
- rc = -ENOMEM;
- goto err_vtpmx_cleanup;
+ return -ENOMEM;
}
- return 0;
-
-err_vtpmx_cleanup:
- vtpmx_cleanup();
+ rc = misc_register(&vtpmx_miscdev);
+ if (rc) {
+ pr_err("couldn't create vtpmx device\n");
+ destroy_workqueue(workqueue);
+ }
return rc;
}
@@ -721,7 +705,7 @@ err_vtpmx_cleanup:
static void __exit vtpm_module_exit(void)
{
destroy_workqueue(workqueue);
- vtpmx_cleanup();
+ misc_deregister(&vtpmx_miscdev);
}
module_init(vtpm_module_init);
diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig
index 93f38a8178ba..6b3b424addab 100644
--- a/drivers/clk/Kconfig
+++ b/drivers/clk/Kconfig
@@ -444,6 +444,7 @@ config COMMON_CLK_BD718XX
config COMMON_CLK_FIXED_MMIO
bool "Clock driver for Memory Mapped Fixed values"
depends on COMMON_CLK && OF
+ depends on HAS_IOMEM
help
Support for Memory Mapped IO Fixed clocks
diff --git a/drivers/clk/clk-devres.c b/drivers/clk/clk-devres.c
index 4fb4fd4b06bd..737aa70e2cb3 100644
--- a/drivers/clk/clk-devres.c
+++ b/drivers/clk/clk-devres.c
@@ -205,18 +205,19 @@ EXPORT_SYMBOL(devm_clk_put);
struct clk *devm_get_clk_from_child(struct device *dev,
struct device_node *np, const char *con_id)
{
- struct clk **ptr, *clk;
+ struct devm_clk_state *state;
+ struct clk *clk;
- ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
- if (!ptr)
+ state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL);
+ if (!state)
return ERR_PTR(-ENOMEM);
clk = of_clk_get_by_name(np, con_id);
if (!IS_ERR(clk)) {
- *ptr = clk;
- devres_add(dev, ptr);
+ state->clk = clk;
+ devres_add(dev, state);
} else {
- devres_free(ptr);
+ devres_free(state);
}
return clk;
diff --git a/drivers/clk/imx/clk-imx93.c b/drivers/clk/imx/clk-imx93.c
index b6c7c2725906..44f435103c65 100644
--- a/drivers/clk/imx/clk-imx93.c
+++ b/drivers/clk/imx/clk-imx93.c
@@ -291,7 +291,7 @@ static int imx93_clocks_probe(struct platform_device *pdev)
anatop_base = devm_of_iomap(dev, np, 0, NULL);
of_node_put(np);
if (WARN_ON(IS_ERR(anatop_base))) {
- ret = PTR_ERR(base);
+ ret = PTR_ERR(anatop_base);
goto unregister_hws;
}
diff --git a/drivers/clk/keystone/syscon-clk.c b/drivers/clk/keystone/syscon-clk.c
index d33f74119488..935d9a2d8c2b 100644
--- a/drivers/clk/keystone/syscon-clk.c
+++ b/drivers/clk/keystone/syscon-clk.c
@@ -151,8 +151,10 @@ static int ti_syscon_gate_clk_probe(struct platform_device *pdev)
data[i].name);
}
- return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get,
- hw_data);
+ if (num_clks == 1)
+ return devm_of_clk_add_hw_provider(dev, of_clk_hw_simple_get,
+ hw_data->hws[0]);
+ return devm_of_clk_add_hw_provider(dev, of_clk_hw_onecell_get, hw_data);
}
#define TI_SYSCON_CLK_GATE(_name, _offset, _bit_idx) \
diff --git a/drivers/clk/mediatek/clk-mt8183.c b/drivers/clk/mediatek/clk-mt8183.c
index 1ba421b38ec5..e31f94387d87 100644
--- a/drivers/clk/mediatek/clk-mt8183.c
+++ b/drivers/clk/mediatek/clk-mt8183.c
@@ -328,6 +328,14 @@ static const char * const atb_parents[] = {
"syspll_d5"
};
+static const char * const sspm_parents[] = {
+ "clk26m",
+ "univpll_d2_d4",
+ "syspll_d2_d2",
+ "univpll_d2_d2",
+ "syspll_d3"
+};
+
static const char * const dpi0_parents[] = {
"clk26m",
"tvdpll_d2",
@@ -507,6 +515,9 @@ static const struct mtk_mux top_muxes[] = {
/* CLK_CFG_6 */
MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_ATB, "atb_sel",
atb_parents, 0xa0, 0xa4, 0xa8, 0, 2, 7, 0x004, 24),
+ MUX_GATE_CLR_SET_UPD_FLAGS(CLK_TOP_MUX_SSPM, "sspm_sel",
+ sspm_parents, 0xa0, 0xa4, 0xa8, 8, 3, 15, 0x004, 25,
+ CLK_IS_CRITICAL | CLK_SET_RATE_PARENT),
MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_DPI0, "dpi0_sel",
dpi0_parents, 0xa0, 0xa4, 0xa8, 16, 4, 23, 0x004, 26),
MUX_GATE_CLR_SET_UPD(CLK_TOP_MUX_SCAM, "scam_sel",
@@ -673,10 +684,18 @@ static const struct mtk_gate_regs infra3_cg_regs = {
GATE_MTK(_id, _name, _parent, &infra2_cg_regs, _shift, \
&mtk_clk_gate_ops_setclr)
+#define GATE_INFRA2_FLAGS(_id, _name, _parent, _shift, _flag) \
+ GATE_MTK_FLAGS(_id, _name, _parent, &infra2_cg_regs, \
+ _shift, &mtk_clk_gate_ops_setclr, _flag)
+
#define GATE_INFRA3(_id, _name, _parent, _shift) \
GATE_MTK(_id, _name, _parent, &infra3_cg_regs, _shift, \
&mtk_clk_gate_ops_setclr)
+#define GATE_INFRA3_FLAGS(_id, _name, _parent, _shift, _flag) \
+ GATE_MTK_FLAGS(_id, _name, _parent, &infra3_cg_regs, \
+ _shift, &mtk_clk_gate_ops_setclr, _flag)
+
static const struct mtk_gate infra_clks[] = {
/* INFRA0 */
GATE_INFRA0(CLK_INFRA_PMIC_TMR, "infra_pmic_tmr", "axi_sel", 0),
@@ -748,7 +767,11 @@ static const struct mtk_gate infra_clks[] = {
GATE_INFRA2(CLK_INFRA_UNIPRO_TICK, "infra_unipro_tick", "fufs_sel", 12),
GATE_INFRA2(CLK_INFRA_UFS_MP_SAP_BCLK, "infra_ufs_mp_sap_bck", "fufs_sel", 13),
GATE_INFRA2(CLK_INFRA_MD32_BCLK, "infra_md32_bclk", "axi_sel", 14),
+ /* infra_sspm is main clock in co-processor, should not be closed in Linux. */
+ GATE_INFRA2_FLAGS(CLK_INFRA_SSPM, "infra_sspm", "sspm_sel", 15, CLK_IS_CRITICAL),
GATE_INFRA2(CLK_INFRA_UNIPRO_MBIST, "infra_unipro_mbist", "axi_sel", 16),
+ /* infra_sspm_bus_hclk is main clock in co-processor, should not be closed in Linux. */
+ GATE_INFRA2_FLAGS(CLK_INFRA_SSPM_BUS_HCLK, "infra_sspm_bus_hclk", "axi_sel", 17, CLK_IS_CRITICAL),
GATE_INFRA2(CLK_INFRA_I2C5, "infra_i2c5", "i2c_sel", 18),
GATE_INFRA2(CLK_INFRA_I2C5_ARBITER, "infra_i2c5_arbiter", "i2c_sel", 19),
GATE_INFRA2(CLK_INFRA_I2C5_IMM, "infra_i2c5_imm", "i2c_sel", 20),
@@ -766,6 +789,10 @@ static const struct mtk_gate infra_clks[] = {
GATE_INFRA3(CLK_INFRA_MSDC0_SELF, "infra_msdc0_self", "msdc50_0_sel", 0),
GATE_INFRA3(CLK_INFRA_MSDC1_SELF, "infra_msdc1_self", "msdc50_0_sel", 1),
GATE_INFRA3(CLK_INFRA_MSDC2_SELF, "infra_msdc2_self", "msdc50_0_sel", 2),
+ /* infra_sspm_26m_self is main clock in co-processor, should not be closed in Linux. */
+ GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_26M_SELF, "infra_sspm_26m_self", "f_f26m_ck", 3, CLK_IS_CRITICAL),
+ /* infra_sspm_32k_self is main clock in co-processor, should not be closed in Linux. */
+ GATE_INFRA3_FLAGS(CLK_INFRA_SSPM_32K_SELF, "infra_sspm_32k_self", "f_f26m_ck", 4, CLK_IS_CRITICAL),
GATE_INFRA3(CLK_INFRA_UFS_AXI, "infra_ufs_axi", "axi_sel", 5),
GATE_INFRA3(CLK_INFRA_I2C6, "infra_i2c6", "i2c_sel", 6),
GATE_INFRA3(CLK_INFRA_AP_MSDC0, "infra_ap_msdc0", "msdc50_hclk_sel", 7),
diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c
index 8fef90bf962f..6fa7639a3050 100644
--- a/drivers/clk/meson/clk-pll.c
+++ b/drivers/clk/meson/clk-pll.c
@@ -367,9 +367,9 @@ static int meson_clk_pll_enable(struct clk_hw *hw)
* 3. enable the lock detect module
*/
if (MESON_PARM_APPLICABLE(&pll->current_en)) {
- usleep_range(10, 20);
+ udelay(10);
meson_parm_write(clk->map, &pll->current_en, 1);
- usleep_range(40, 50);
+ udelay(40);
}
if (MESON_PARM_APPLICABLE(&pll->l_detect)) {
diff --git a/drivers/counter/Kconfig b/drivers/counter/Kconfig
index bca21df51168..62962ae84b77 100644
--- a/drivers/counter/Kconfig
+++ b/drivers/counter/Kconfig
@@ -3,13 +3,6 @@
# Counter devices
#
-menuconfig COUNTER
- tristate "Counter support"
- help
- This enables counter device support through the Generic Counter
- interface. You only need to enable this, if you also want to enable
- one or more of the counter device drivers below.
-
config I8254
tristate
select COUNTER
@@ -25,6 +18,13 @@ config I8254
If built as a module its name will be i8254.
+menuconfig COUNTER
+ tristate "Counter support"
+ help
+ This enables counter device support through the Generic Counter
+ interface. You only need to enable this, if you also want to enable
+ one or more of the counter device drivers below.
+
if COUNTER
config 104_QUAD_8
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 81fba0dcbee9..9a1e194d5cf8 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1012,8 +1012,8 @@ static int amd_pstate_update_status(const char *buf, size_t size)
return 0;
}
-static ssize_t show_status(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
+static ssize_t status_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
ssize_t ret;
@@ -1024,7 +1024,7 @@ static ssize_t show_status(struct kobject *kobj,
return ret;
}
-static ssize_t store_status(struct kobject *a, struct kobj_attribute *b,
+static ssize_t status_store(struct device *a, struct device_attribute *b,
const char *buf, size_t count)
{
char *p = memchr(buf, '\n', count);
@@ -1043,7 +1043,7 @@ cpufreq_freq_attr_ro(amd_pstate_lowest_nonlinear_freq);
cpufreq_freq_attr_ro(amd_pstate_highest_perf);
cpufreq_freq_attr_rw(energy_performance_preference);
cpufreq_freq_attr_ro(energy_performance_available_preferences);
-define_one_global_rw(status);
+static DEVICE_ATTR_RW(status);
static struct freq_attr *amd_pstate_attr[] = {
&amd_pstate_max_freq,
@@ -1062,7 +1062,7 @@ static struct freq_attr *amd_pstate_epp_attr[] = {
};
static struct attribute *pstate_global_attributes[] = {
- &status.attr,
+ &dev_attr_status.attr,
NULL
};
diff --git a/drivers/cpuidle/cpuidle-psci-domain.c b/drivers/cpuidle/cpuidle-psci-domain.c
index c2d6d9c3c930..b88af1262f1a 100644
--- a/drivers/cpuidle/cpuidle-psci-domain.c
+++ b/drivers/cpuidle/cpuidle-psci-domain.c
@@ -120,20 +120,6 @@ static void psci_pd_remove(void)
}
}
-static bool psci_pd_try_set_osi_mode(void)
-{
- int ret;
-
- if (!psci_has_osi_support())
- return false;
-
- ret = psci_set_osi_mode(true);
- if (ret)
- return false;
-
- return true;
-}
-
static void psci_cpuidle_domain_sync_state(struct device *dev)
{
/*
@@ -152,15 +138,12 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
{
struct device_node *np = pdev->dev.of_node;
struct device_node *node;
- bool use_osi;
+ bool use_osi = psci_has_osi_support();
int ret = 0, pd_count = 0;
if (!np)
return -ENODEV;
- /* If OSI mode is supported, let's try to enable it. */
- use_osi = psci_pd_try_set_osi_mode();
-
/*
* Parse child nodes for the "#power-domain-cells" property and
* initialize a genpd/genpd-of-provider pair when it's found.
@@ -170,33 +153,37 @@ static int psci_cpuidle_domain_probe(struct platform_device *pdev)
continue;
ret = psci_pd_init(node, use_osi);
- if (ret)
- goto put_node;
+ if (ret) {
+ of_node_put(node);
+ goto exit;
+ }
pd_count++;
}
/* Bail out if not using the hierarchical CPU topology. */
if (!pd_count)
- goto no_pd;
+ return 0;
/* Link genpd masters/subdomains to model the CPU topology. */
ret = dt_idle_pd_init_topology(np);
if (ret)
goto remove_pd;
+ /* let's try to enable OSI. */
+ ret = psci_set_osi_mode(use_osi);
+ if (ret)
+ goto remove_pd;
+
pr_info("Initialized CPU PM domain topology using %s mode\n",
use_osi ? "OSI" : "PC");
return 0;
-put_node:
- of_node_put(node);
remove_pd:
+ dt_idle_pd_remove_topology(np);
psci_pd_remove();
+exit:
pr_err("failed to create CPU PM domains ret=%d\n", ret);
-no_pd:
- if (use_osi)
- psci_set_osi_mode(false);
return ret;
}
diff --git a/drivers/cpuidle/dt_idle_genpd.c b/drivers/cpuidle/dt_idle_genpd.c
index b37165514d4e..1af63c189039 100644
--- a/drivers/cpuidle/dt_idle_genpd.c
+++ b/drivers/cpuidle/dt_idle_genpd.c
@@ -152,6 +152,30 @@ int dt_idle_pd_init_topology(struct device_node *np)
return 0;
}
+int dt_idle_pd_remove_topology(struct device_node *np)
+{
+ struct device_node *node;
+ struct of_phandle_args child, parent;
+ int ret;
+
+ for_each_child_of_node(np, node) {
+ if (of_parse_phandle_with_args(node, "power-domains",
+ "#power-domain-cells", 0, &parent))
+ continue;
+
+ child.np = node;
+ child.args_count = 0;
+ ret = of_genpd_remove_subdomain(&parent, &child);
+ of_node_put(parent.np);
+ if (ret) {
+ of_node_put(node);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
struct device *dt_idle_attach_cpu(int cpu, const char *name)
{
struct device *dev;
diff --git a/drivers/cpuidle/dt_idle_genpd.h b/drivers/cpuidle/dt_idle_genpd.h
index a95483d08a02..3be1f70f55b5 100644
--- a/drivers/cpuidle/dt_idle_genpd.h
+++ b/drivers/cpuidle/dt_idle_genpd.h
@@ -14,6 +14,8 @@ struct generic_pm_domain *dt_idle_pd_alloc(struct device_node *np,
int dt_idle_pd_init_topology(struct device_node *np);
+int dt_idle_pd_remove_topology(struct device_node *np);
+
struct device *dt_idle_attach_cpu(int cpu, const char *name);
void dt_idle_detach_cpu(struct device *dev);
@@ -36,6 +38,11 @@ static inline int dt_idle_pd_init_topology(struct device_node *np)
return 0;
}
+static inline int dt_idle_pd_remove_topology(struct device_node *np)
+{
+ return 0;
+}
+
static inline struct device *dt_idle_attach_cpu(int cpu, const char *name)
{
return NULL;
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index ff9ddbbca377..68e73775392d 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -382,8 +382,8 @@ static void kick_trng(struct device *dev, int ent_delay)
val = ent_delay;
/* min. freq. count, equal to 1/4 of the entropy sample length */
wr_reg32(&r4tst->rtfrqmin, val >> 2);
- /* max. freq. count, equal to 16 times the entropy sample length */
- wr_reg32(&r4tst->rtfrqmax, val << 4);
+ /* disable maximum frequency count */
+ wr_reg32(&r4tst->rtfrqmax, RTFRQMAX_DISABLE);
}
wr_reg32(&r4tst->rtsdctl, (val << RTSDCTL_ENT_DLY_SHIFT) |
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
index fcbf8295fde3..8ea1d340e438 100644
--- a/drivers/cxl/Kconfig
+++ b/drivers/cxl/Kconfig
@@ -2,6 +2,8 @@
menuconfig CXL_BUS
tristate "CXL (Compute Express Link) Devices Support"
depends on PCI
+ select FW_LOADER
+ select FW_UPLOAD
select PCI_DOE
help
CXL is a bus that is electrically compatible with PCI Express, but
@@ -82,7 +84,6 @@ config CXL_PMEM
config CXL_MEM
tristate "CXL: Memory Expansion"
depends on CXL_PCI
- select FW_UPLOAD
default CXL_BUS
help
The CXL.mem protocol allows a device to act as a provider of "System
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index 658e6b84a769..d1c559879dcc 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -296,9 +296,8 @@ err_xormap:
else
rc = cxl_decoder_autoremove(dev, cxld);
if (rc) {
- dev_err(dev, "Failed to add decode range [%#llx - %#llx]\n",
- cxld->hpa_range.start, cxld->hpa_range.end);
- return 0;
+ dev_err(dev, "Failed to add decode range: %pr", res);
+ return rc;
}
dev_dbg(dev, "add: %s node: %d range [%#llx - %#llx]\n",
dev_name(&cxld->dev),
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index d6d067fbee97..ca60bb8114f2 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -121,6 +121,45 @@ static bool cxl_is_security_command(u16 opcode)
return false;
}
+static void cxl_set_security_cmd_enabled(struct cxl_security_state *security,
+ u16 opcode)
+{
+ switch (opcode) {
+ case CXL_MBOX_OP_SANITIZE:
+ set_bit(CXL_SEC_ENABLED_SANITIZE, security->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_SECURE_ERASE:
+ set_bit(CXL_SEC_ENABLED_SECURE_ERASE,
+ security->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_GET_SECURITY_STATE:
+ set_bit(CXL_SEC_ENABLED_GET_SECURITY_STATE,
+ security->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_SET_PASSPHRASE:
+ set_bit(CXL_SEC_ENABLED_SET_PASSPHRASE,
+ security->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_DISABLE_PASSPHRASE:
+ set_bit(CXL_SEC_ENABLED_DISABLE_PASSPHRASE,
+ security->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_UNLOCK:
+ set_bit(CXL_SEC_ENABLED_UNLOCK, security->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_FREEZE_SECURITY:
+ set_bit(CXL_SEC_ENABLED_FREEZE_SECURITY,
+ security->enabled_cmds);
+ break;
+ case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
+ set_bit(CXL_SEC_ENABLED_PASSPHRASE_SECURE_ERASE,
+ security->enabled_cmds);
+ break;
+ default:
+ break;
+ }
+}
+
static bool cxl_is_poison_command(u16 opcode)
{
#define CXL_MBOX_OP_POISON_CMDS 0x43
@@ -677,7 +716,8 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
u16 opcode = le16_to_cpu(cel_entry[i].opcode);
struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
- if (!cmd && !cxl_is_poison_command(opcode)) {
+ if (!cmd && (!cxl_is_poison_command(opcode) ||
+ !cxl_is_security_command(opcode))) {
dev_dbg(dev,
"Opcode 0x%04x unsupported by driver\n", opcode);
continue;
@@ -689,6 +729,9 @@ static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
if (cxl_is_poison_command(opcode))
cxl_set_poison_cmd_enabled(&mds->poison, opcode);
+ if (cxl_is_security_command(opcode))
+ cxl_set_security_cmd_enabled(&mds->security, opcode);
+
dev_dbg(dev, "Opcode 0x%04x enabled\n", opcode);
}
}
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index f99e7ec3cc40..14b547c07f54 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -477,9 +477,28 @@ static struct attribute_group cxl_memdev_pmem_attribute_group = {
.attrs = cxl_memdev_pmem_attributes,
};
+static umode_t cxl_memdev_security_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
+
+ if (a == &dev_attr_security_sanitize.attr &&
+ !test_bit(CXL_SEC_ENABLED_SANITIZE, mds->security.enabled_cmds))
+ return 0;
+
+ if (a == &dev_attr_security_erase.attr &&
+ !test_bit(CXL_SEC_ENABLED_SECURE_ERASE, mds->security.enabled_cmds))
+ return 0;
+
+ return a->mode;
+}
+
static struct attribute_group cxl_memdev_security_attribute_group = {
.name = "security",
.attrs = cxl_memdev_security_attributes,
+ .is_visible = cxl_memdev_security_visible,
};
static const struct attribute_group *cxl_memdev_attribute_groups[] = {
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 79e99c873ca2..706f8a6d1ef4 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -244,6 +244,19 @@ enum poison_cmd_enabled_bits {
CXL_POISON_ENABLED_MAX
};
+/* Device enabled security commands */
+enum security_cmd_enabled_bits {
+ CXL_SEC_ENABLED_SANITIZE,
+ CXL_SEC_ENABLED_SECURE_ERASE,
+ CXL_SEC_ENABLED_GET_SECURITY_STATE,
+ CXL_SEC_ENABLED_SET_PASSPHRASE,
+ CXL_SEC_ENABLED_DISABLE_PASSPHRASE,
+ CXL_SEC_ENABLED_UNLOCK,
+ CXL_SEC_ENABLED_FREEZE_SECURITY,
+ CXL_SEC_ENABLED_PASSPHRASE_SECURE_ERASE,
+ CXL_SEC_ENABLED_MAX
+};
+
/**
* struct cxl_poison_state - Driver poison state info
*
@@ -323,7 +336,7 @@ struct cxl_mbox_activate_fw {
/* FW state bits */
#define CXL_FW_STATE_BITS 32
-#define CXL_FW_CANCEL BIT(0)
+#define CXL_FW_CANCEL 0
/**
* struct cxl_fw_state - Firmware upload / activation state
@@ -346,6 +359,7 @@ struct cxl_fw_state {
* struct cxl_security_state - Device security state
*
* @state: state of last security operation
+ * @enabled_cmds: All security commands enabled in the CEL
* @poll: polling for sanitization is enabled, device has no mbox irq support
* @poll_tmo_secs: polling timeout
* @poll_dwork: polling work item
@@ -353,6 +367,7 @@ struct cxl_fw_state {
*/
struct cxl_security_state {
unsigned long state;
+ DECLARE_BITMAP(enabled_cmds, CXL_SEC_ENABLED_MAX);
bool poll;
int poll_tmo_secs;
struct delayed_work poll_dwork;
@@ -434,6 +449,7 @@ struct cxl_dev_state {
* @next_persistent_bytes: persistent capacity change pending device reset
* @event: event log driver state
* @poison: poison driver state info
+ * @security: security driver state info
* @fw: firmware upload / activation state
* @mbox_send: @dev specific transport for transmitting mailbox commands
*
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index b6f71eb00866..38b4110378de 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -571,6 +571,7 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
dma_resv_for_each_fence_unlocked(&cursor, fence) {
if (dma_resv_iter_is_restarted(&cursor)) {
+ struct dma_fence **new_fences;
unsigned int count;
while (*num_fences)
@@ -579,13 +580,17 @@ int dma_resv_get_fences(struct dma_resv *obj, enum dma_resv_usage usage,
count = cursor.num_fences + 1;
/* Eventually re-allocate the array */
- *fences = krealloc_array(*fences, count,
- sizeof(void *),
- GFP_KERNEL);
- if (count && !*fences) {
+ new_fences = krealloc_array(*fences, count,
+ sizeof(void *),
+ GFP_KERNEL);
+ if (count && !new_fences) {
+ kfree(*fences);
+ *fences = NULL;
+ *num_fences = 0;
dma_resv_iter_end(&cursor);
return -ENOMEM;
}
+ *fences = new_fences;
}
(*fences)[(*num_fences)++] = dma_fence_get(fence);
diff --git a/drivers/dma-buf/sw_sync.c b/drivers/dma-buf/sw_sync.c
index 63f0aeb66db6..f0a35277fd84 100644
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@@ -191,6 +191,7 @@ static const struct dma_fence_ops timeline_fence_ops = {
*/
static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
{
+ LIST_HEAD(signalled);
struct sync_pt *pt, *next;
trace_sync_timeline(obj);
@@ -203,21 +204,20 @@ static void sync_timeline_signal(struct sync_timeline *obj, unsigned int inc)
if (!timeline_fence_signaled(&pt->base))
break;
- list_del_init(&pt->link);
+ dma_fence_get(&pt->base);
+
+ list_move_tail(&pt->link, &signalled);
rb_erase(&pt->node, &obj->pt_tree);
- /*
- * A signal callback may release the last reference to this
- * fence, causing it to be freed. That operation has to be
- * last to avoid a use after free inside this loop, and must
- * be after we remove the fence from the timeline in order to
- * prevent deadlocking on timeline->lock inside
- * timeline_fence_release().
- */
dma_fence_signal_locked(&pt->base);
}
spin_unlock_irq(&obj->lock);
+
+ list_for_each_entry_safe(pt, next, &signalled, link) {
+ list_del_init(&pt->link);
+ dma_fence_put(&pt->base);
+ }
}
/**
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 644c188d6a11..08fdd0e2ed1b 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -211,6 +211,7 @@ config FSL_DMA
config FSL_EDMA
tristate "Freescale eDMA engine support"
depends on OF
+ depends on HAS_IOMEM
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
help
@@ -280,6 +281,7 @@ config IMX_SDMA
config INTEL_IDMA64
tristate "Intel integrated DMA 64-bit support"
+ depends on HAS_IOMEM
select DMA_ENGINE
select DMA_VIRTUAL_CHANNELS
help
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 5abbcc61c528..9a15f0d12c79 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -384,9 +384,7 @@ static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
wq->threshold = 0;
wq->priority = 0;
wq->enqcmds_retries = IDXD_ENQCMDS_RETRIES;
- clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
- clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
- clear_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+ wq->flags = 0;
memset(wq->name, 0, WQ_NAME_SIZE);
wq->max_xfer_bytes = WQ_DEFAULT_MAX_XFER;
idxd_wq_set_max_batch_size(idxd->data->type, wq, WQ_DEFAULT_MAX_BATCH);
diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c
index ebd8733f72ad..9413fad08a60 100644
--- a/drivers/dma/mcf-edma.c
+++ b/drivers/dma/mcf-edma.c
@@ -190,7 +190,13 @@ static int mcf_edma_probe(struct platform_device *pdev)
return -EINVAL;
}
- chans = pdata->dma_channels;
+ if (!pdata->dma_channels) {
+ dev_info(&pdev->dev, "setting default channel number to 64");
+ chans = 64;
+ } else {
+ chans = pdata->dma_channels;
+ }
+
len = sizeof(*mcf_edma) + sizeof(*mcf_chan) * chans;
mcf_edma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
if (!mcf_edma)
@@ -202,11 +208,6 @@ static int mcf_edma_probe(struct platform_device *pdev)
mcf_edma->drvdata = &mcf_data;
mcf_edma->big_endian = 1;
- if (!mcf_edma->n_chans) {
- dev_info(&pdev->dev, "setting default channel number to 64");
- mcf_edma->n_chans = 64;
- }
-
mutex_init(&mcf_edma->fsl_edma_mutex);
mcf_edma->membase = devm_platform_ioremap_resource(pdev, 0);
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
index 95a462a1f511..b6e0ac8314e5 100644
--- a/drivers/dma/owl-dma.c
+++ b/drivers/dma/owl-dma.c
@@ -192,7 +192,7 @@ struct owl_dma_pchan {
};
/**
- * struct owl_dma_pchan - Wrapper for DMA ENGINE channel
+ * struct owl_dma_vchan - Wrapper for DMA ENGINE channel
* @vc: wrapped virtual channel
* @pchan: the physical channel utilized by this channel
* @txd: active transaction on this channel
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index b4731fe6bbc1..3cf0b38387ae 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -404,6 +404,12 @@ enum desc_status {
*/
BUSY,
/*
+ * Pause was called while descriptor was BUSY. Due to hardware
+ * limitations, only termination is possible for descriptors
+ * that have been paused.
+ */
+ PAUSED,
+ /*
* Sitting on the channel work_list but xfer done
* by PL330 core
*/
@@ -2041,7 +2047,7 @@ static inline void fill_queue(struct dma_pl330_chan *pch)
list_for_each_entry(desc, &pch->work_list, node) {
/* If already submitted */
- if (desc->status == BUSY)
+ if (desc->status == BUSY || desc->status == PAUSED)
continue;
ret = pl330_submit_req(pch->thread, desc);
@@ -2326,6 +2332,7 @@ static int pl330_pause(struct dma_chan *chan)
{
struct dma_pl330_chan *pch = to_pchan(chan);
struct pl330_dmac *pl330 = pch->dmac;
+ struct dma_pl330_desc *desc;
unsigned long flags;
pm_runtime_get_sync(pl330->ddma.dev);
@@ -2335,6 +2342,10 @@ static int pl330_pause(struct dma_chan *chan)
_stop(pch->thread);
spin_unlock(&pl330->lock);
+ list_for_each_entry(desc, &pch->work_list, node) {
+ if (desc->status == BUSY)
+ desc->status = PAUSED;
+ }
spin_unlock_irqrestore(&pch->lock, flags);
pm_runtime_mark_last_busy(pl330->ddma.dev);
pm_runtime_put_autosuspend(pl330->ddma.dev);
@@ -2425,7 +2436,7 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
else if (running && desc == running)
transferred =
pl330_get_current_xferred_count(pch, desc);
- else if (desc->status == BUSY)
+ else if (desc->status == BUSY || desc->status == PAUSED)
/*
* Busy but not running means either just enqueued,
* or finished and not yet marked done
@@ -2442,6 +2453,9 @@ pl330_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
case DONE:
ret = DMA_COMPLETE;
break;
+ case PAUSED:
+ ret = DMA_PAUSED;
+ break;
case PREP:
case BUSY:
ret = DMA_IN_PROGRESS;
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c
index 93ee298d52b8..e0bfd129d563 100644
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -668,6 +668,8 @@ static int xdma_set_vector_reg(struct xdma_device *xdev, u32 vec_tbl_start,
val |= irq_start << shift;
irq_start++;
irq_num--;
+ if (!irq_num)
+ break;
}
/* write IRQ register */
@@ -715,7 +717,7 @@ static int xdma_irq_init(struct xdma_device *xdev)
ret = request_irq(irq, xdma_channel_isr, 0,
"xdma-c2h-channel", &xdev->c2h_chans[j]);
if (ret) {
- xdma_err(xdev, "H2C channel%d request irq%d failed: %d",
+ xdma_err(xdev, "C2H channel%d request irq%d failed: %d",
j, irq, ret);
goto failed_init_c2h;
}
@@ -892,7 +894,7 @@ static int xdma_probe(struct platform_device *pdev)
}
reg_base = devm_ioremap_resource(&pdev->dev, res);
- if (!reg_base) {
+ if (IS_ERR(reg_base)) {
xdma_err(xdev, "ioremap failed");
goto failed;
}
diff --git a/drivers/firmware/arm_scmi/mailbox.c b/drivers/firmware/arm_scmi/mailbox.c
index 1efa5e9392c4..19246ed1f01f 100644
--- a/drivers/firmware/arm_scmi/mailbox.c
+++ b/drivers/firmware/arm_scmi/mailbox.c
@@ -166,8 +166,10 @@ static int mailbox_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
return -ENOMEM;
shmem = of_parse_phandle(cdev->of_node, "shmem", idx);
- if (!of_device_is_compatible(shmem, "arm,scmi-shmem"))
+ if (!of_device_is_compatible(shmem, "arm,scmi-shmem")) {
+ of_node_put(shmem);
return -ENXIO;
+ }
ret = of_address_to_resource(shmem, 0, &res);
of_node_put(shmem);
diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
index 6971dcf72fb9..0493aa3c12bf 100644
--- a/drivers/firmware/arm_scmi/raw_mode.c
+++ b/drivers/firmware/arm_scmi/raw_mode.c
@@ -818,10 +818,13 @@ static ssize_t scmi_dbg_raw_mode_common_write(struct file *filp,
* before sending it with a single RAW xfer.
*/
if (rd->tx_size < rd->tx_req_size) {
- size_t cnt;
+ ssize_t cnt;
cnt = simple_write_to_buffer(rd->tx.buf, rd->tx.len, ppos,
buf, count);
+ if (cnt < 0)
+ return cnt;
+
rd->tx_size += cnt;
if (cnt < count)
return cnt;
diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c
index 621c37efe3ec..c193516a254d 100644
--- a/drivers/firmware/arm_scmi/smc.c
+++ b/drivers/firmware/arm_scmi/smc.c
@@ -40,6 +40,7 @@
/**
* struct scmi_smc - Structure representing a SCMI smc transport
*
+ * @irq: An optional IRQ for completion
* @cinfo: SCMI channel info
* @shmem: Transmit/Receive shared memory area
* @shmem_lock: Lock to protect access to Tx/Rx shared memory area.
@@ -52,6 +53,7 @@
*/
struct scmi_smc {
+ int irq;
struct scmi_chan_info *cinfo;
struct scmi_shared_mem __iomem *shmem;
/* Protect access to shmem area */
@@ -127,7 +129,7 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
struct resource res;
struct device_node *np;
u32 func_id;
- int ret, irq;
+ int ret;
if (!tx)
return -ENODEV;
@@ -137,8 +139,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
return -ENOMEM;
np = of_parse_phandle(cdev->of_node, "shmem", 0);
- if (!of_device_is_compatible(np, "arm,scmi-shmem"))
+ if (!of_device_is_compatible(np, "arm,scmi-shmem")) {
+ of_node_put(np);
return -ENXIO;
+ }
ret = of_address_to_resource(np, 0, &res);
of_node_put(np);
@@ -167,11 +171,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
* completion of a message is signaled by an interrupt rather than by
* the return of the SMC call.
*/
- irq = of_irq_get_byname(cdev->of_node, "a2p");
- if (irq > 0) {
- ret = devm_request_irq(dev, irq, smc_msg_done_isr,
- IRQF_NO_SUSPEND,
- dev_name(dev), scmi_info);
+ scmi_info->irq = of_irq_get_byname(cdev->of_node, "a2p");
+ if (scmi_info->irq > 0) {
+ ret = request_irq(scmi_info->irq, smc_msg_done_isr,
+ IRQF_NO_SUSPEND, dev_name(dev), scmi_info);
if (ret) {
dev_err(dev, "failed to setup SCMI smc irq\n");
return ret;
@@ -193,6 +196,10 @@ static int smc_chan_free(int id, void *p, void *data)
struct scmi_chan_info *cinfo = p;
struct scmi_smc *scmi_info = cinfo->transport_info;
+ /* Ignore any possible further reception on the IRQ path */
+ if (scmi_info->irq > 0)
+ free_irq(scmi_info->irq, scmi_info);
+
cinfo->transport_info = NULL;
scmi_info->cinfo = NULL;
diff --git a/drivers/firmware/smccc/soc_id.c b/drivers/firmware/smccc/soc_id.c
index 890eb454599a..1990263fbba0 100644
--- a/drivers/firmware/smccc/soc_id.c
+++ b/drivers/firmware/smccc/soc_id.c
@@ -34,7 +34,6 @@ static struct soc_device_attribute *soc_dev_attr;
static int __init smccc_soc_init(void)
{
- struct arm_smccc_res res;
int soc_id_rev, soc_id_version;
static char soc_id_str[20], soc_id_rev_str[12];
static char soc_id_jep106_id_str[12];
@@ -49,13 +48,13 @@ static int __init smccc_soc_init(void)
}
if (soc_id_version < 0) {
- pr_err("ARCH_SOC_ID(0) returned error: %lx\n", res.a0);
+ pr_err("Invalid SoC Version: %x\n", soc_id_version);
return -EINVAL;
}
soc_id_rev = arm_smccc_get_soc_id_revision();
if (soc_id_rev < 0) {
- pr_err("ARCH_SOC_ID(1) returned error: %lx\n", res.a0);
+ pr_err("Invalid SoC Revision: %x\n", soc_id_rev);
return -EINVAL;
}
diff --git a/drivers/gpio/gpio-mvebu.c b/drivers/gpio/gpio-mvebu.c
index a68f682aec01..67497116ce27 100644
--- a/drivers/gpio/gpio-mvebu.c
+++ b/drivers/gpio/gpio-mvebu.c
@@ -874,7 +874,7 @@ static int mvebu_pwm_probe(struct platform_device *pdev,
spin_lock_init(&mvpwm->lock);
- return pwmchip_add(&mvpwm->chip);
+ return devm_pwmchip_add(dev, &mvpwm->chip);
}
#ifdef CONFIG_DEBUG_FS
@@ -1112,6 +1112,13 @@ static int mvebu_gpio_probe_syscon(struct platform_device *pdev,
return 0;
}
+static void mvebu_gpio_remove_irq_domain(void *data)
+{
+ struct irq_domain *domain = data;
+
+ irq_domain_remove(domain);
+}
+
static int mvebu_gpio_probe(struct platform_device *pdev)
{
struct mvebu_gpio_chip *mvchip;
@@ -1243,17 +1250,21 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
if (!mvchip->domain) {
dev_err(&pdev->dev, "couldn't allocate irq domain %s (DT).\n",
mvchip->chip.label);
- err = -ENODEV;
- goto err_pwm;
+ return -ENODEV;
}
+ err = devm_add_action_or_reset(&pdev->dev, mvebu_gpio_remove_irq_domain,
+ mvchip->domain);
+ if (err)
+ return err;
+
err = irq_alloc_domain_generic_chips(
mvchip->domain, ngpios, 2, np->name, handle_level_irq,
IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_LEVEL, 0, 0);
if (err) {
dev_err(&pdev->dev, "couldn't allocate irq chips %s (DT).\n",
mvchip->chip.label);
- goto err_domain;
+ return err;
}
/*
@@ -1293,13 +1304,6 @@ static int mvebu_gpio_probe(struct platform_device *pdev)
}
return 0;
-
-err_domain:
- irq_domain_remove(mvchip->domain);
-err_pwm:
- pwmchip_remove(&mvchip->mvpwm->chip);
-
- return err;
}
static struct platform_driver mvebu_gpio_driver = {
diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 8b49b0abacd5..533d81572579 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -291,6 +291,15 @@ static void gpio_sim_mutex_destroy(void *data)
mutex_destroy(lock);
}
+static void gpio_sim_dispose_mappings(void *data)
+{
+ struct gpio_sim_chip *chip = data;
+ unsigned int i;
+
+ for (i = 0; i < chip->gc.ngpio; i++)
+ irq_dispose_mapping(irq_find_mapping(chip->irq_sim, i));
+}
+
static void gpio_sim_sysfs_remove(void *data)
{
struct gpio_sim_chip *chip = data;
@@ -402,10 +411,14 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
if (!chip->pull_map)
return -ENOMEM;
- chip->irq_sim = devm_irq_domain_create_sim(dev, NULL, num_lines);
+ chip->irq_sim = devm_irq_domain_create_sim(dev, swnode, num_lines);
if (IS_ERR(chip->irq_sim))
return PTR_ERR(chip->irq_sim);
+ ret = devm_add_action_or_reset(dev, gpio_sim_dispose_mappings, chip);
+ if (ret)
+ return ret;
+
mutex_init(&chip->lock);
ret = devm_add_action_or_reset(dev, gpio_sim_mutex_destroy,
&chip->lock);
@@ -429,6 +442,7 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
gc->set_config = gpio_sim_set_config;
gc->to_irq = gpio_sim_to_irq;
gc->free = gpio_sim_free;
+ gc->can_sleep = true;
ret = devm_gpiochip_add_data(dev, gc, chip);
if (ret)
diff --git a/drivers/gpio/gpio-tps68470.c b/drivers/gpio/gpio-tps68470.c
index aaddcabe9b35..532deaddfd4e 100644
--- a/drivers/gpio/gpio-tps68470.c
+++ b/drivers/gpio/gpio-tps68470.c
@@ -91,13 +91,13 @@ static int tps68470_gpio_output(struct gpio_chip *gc, unsigned int offset,
struct tps68470_gpio_data *tps68470_gpio = gpiochip_get_data(gc);
struct regmap *regmap = tps68470_gpio->tps68470_regmap;
+ /* Set the initial value */
+ tps68470_gpio_set(gc, offset, value);
+
/* rest are always outputs */
if (offset >= TPS68470_N_REGULAR_GPIO)
return 0;
- /* Set the initial value */
- tps68470_gpio_set(gc, offset, value);
-
return regmap_update_bits(regmap, TPS68470_GPIO_CTL_REG_A(offset),
TPS68470_GPIO_MODE_MASK,
TPS68470_GPIO_MODE_OUT_CMOS);
diff --git a/drivers/gpio/gpio-ws16c48.c b/drivers/gpio/gpio-ws16c48.c
index e73885a4dc32..afb42a8e916f 100644
--- a/drivers/gpio/gpio-ws16c48.c
+++ b/drivers/gpio/gpio-ws16c48.c
@@ -18,7 +18,7 @@
#include <linux/spinlock.h>
#include <linux/types.h>
-#define WS16C48_EXTENT 10
+#define WS16C48_EXTENT 11
#define MAX_NUM_WS16C48 max_num_isa_dev(WS16C48_EXTENT)
static unsigned int base[MAX_NUM_WS16C48];
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 530dfd19d7b5..50503a4525eb 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -515,8 +515,9 @@ static ssize_t unexport_store(const struct class *class,
* they may be undone on its behalf too.
*/
if (test_and_clear_bit(FLAG_SYSFS, &desc->flags)) {
- status = 0;
+ gpiod_unexport(desc);
gpiod_free(desc);
+ status = 0;
}
done:
if (status)
@@ -781,8 +782,10 @@ void gpiochip_sysfs_unregister(struct gpio_device *gdev)
mutex_unlock(&sysfs_lock);
/* unregister gpiod class devices owned by sysfs */
- for_each_gpio_desc_with_flag(chip, desc, FLAG_SYSFS)
+ for_each_gpio_desc_with_flag(chip, desc, FLAG_SYSFS) {
+ gpiod_unexport(desc);
gpiod_free(desc);
+ }
}
static int __init gpiolib_sysfs_init(void)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 251c875b5c34..76e0c38026c3 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -2167,12 +2167,18 @@ static bool gpiod_free_commit(struct gpio_desc *desc)
void gpiod_free(struct gpio_desc *desc)
{
- if (desc && desc->gdev && gpiod_free_commit(desc)) {
- module_put(desc->gdev->owner);
- gpio_device_put(desc->gdev);
- } else {
+ /*
+ * We must not use VALIDATE_DESC_VOID() as the underlying gdev->chip
+ * may already be NULL but we still want to put the references.
+ */
+ if (!desc)
+ return;
+
+ if (!gpiod_free_commit(desc))
WARN_ON(extra_checks);
- }
+
+ module_put(desc->gdev->owner);
+ gpio_device_put(desc->gdev);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index a3b86b86dc47..6dc950c1b689 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1296,6 +1296,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev);
bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
bool amdgpu_device_aspm_support_quirk(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index a7f314ddd173..d34c3ef8f3ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1709,7 +1709,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0;
}
- xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id;
+ xcp_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ?
+ 0 : fpriv->xcp_id;
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 040f4cb6ab2d..fb78a8f47587 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -295,7 +295,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
if (!p->gang_size) {
ret = -EINVAL;
- goto free_partial_kdata;
+ goto free_all_kdata;
}
for (i = 0; i < p->gang_size; ++i) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a2cdde0ca0a7..6238701cde23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1459,6 +1459,32 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
}
/*
+ * On APUs with >= 64GB white flickering has been observed w/ SG enabled.
+ * Disable S/G on such systems until we have a proper fix.
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2354
+ * https://gitlab.freedesktop.org/drm/amd/-/issues/2735
+ */
+bool amdgpu_sg_display_supported(struct amdgpu_device *adev)
+{
+ switch (amdgpu_sg_display) {
+ case -1:
+ break;
+ case 0:
+ return false;
+ case 1:
+ return true;
+ default:
+ return false;
+ }
+ if ((totalram_pages() << (PAGE_SHIFT - 10)) +
+ (adev->gmc.real_vram_size / 1024) >= 64000000) {
+ DRM_WARN("Disabling S/G due to >=64GB RAM\n");
+ return false;
+ }
+ return true;
+}
+
+/*
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
* speed switching. Until we have confirmation from Intel that a specific host
* supports it, it's safer that we keep it disabled for all.
@@ -3696,10 +3722,11 @@ static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
{
if (amdgpu_mcbp == 1)
adev->gfx.mcbp = true;
-
- if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
- (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
- adev->gfx.num_gfx_rings)
+ else if (amdgpu_mcbp == 0)
+ adev->gfx.mcbp = false;
+ else if ((adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 0, 0)) &&
+ (adev->ip_versions[GC_HWIP][0] < IP_VERSION(10, 0, 0)) &&
+ adev->gfx.num_gfx_rings)
adev->gfx.mcbp = true;
if (amdgpu_sriov_vf(adev))
@@ -4367,6 +4394,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
cancel_delayed_work_sync(&adev->delayed_init_work);
+ flush_delayed_work(&adev->gfx.gfx_off_delay_work);
amdgpu_ras_suspend(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index c694b41f6461..7537f5aa76f0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -552,6 +552,41 @@ int amdgpu_fence_driver_sw_init(struct amdgpu_device *adev)
}
/**
+ * amdgpu_fence_need_ring_interrupt_restore - helper function to check whether
+ * fence driver interrupts need to be restored.
+ *
+ * @ring: ring that to be checked
+ *
+ * Interrupts for rings that belong to GFX IP don't need to be restored
+ * when the target power state is s0ix.
+ *
+ * Return true if need to restore interrupts, false otherwise.
+ */
+static bool amdgpu_fence_need_ring_interrupt_restore(struct amdgpu_ring *ring)
+{
+ struct amdgpu_device *adev = ring->adev;
+ bool is_gfx_power_domain = false;
+
+ switch (ring->funcs->type) {
+ case AMDGPU_RING_TYPE_SDMA:
+ /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */
+ if (adev->ip_versions[SDMA0_HWIP][0] >= IP_VERSION(5, 0, 0))
+ is_gfx_power_domain = true;
+ break;
+ case AMDGPU_RING_TYPE_GFX:
+ case AMDGPU_RING_TYPE_COMPUTE:
+ case AMDGPU_RING_TYPE_KIQ:
+ case AMDGPU_RING_TYPE_MES:
+ is_gfx_power_domain = true;
+ break;
+ default:
+ break;
+ }
+
+ return !(adev->in_s0ix && is_gfx_power_domain);
+}
+
+/**
* amdgpu_fence_driver_hw_fini - tear down the fence driver
* for all possible rings.
*
@@ -579,7 +614,8 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
amdgpu_fence_driver_force_completion(ring);
if (!drm_dev_is_unplugged(adev_to_drm(adev)) &&
- ring->fence_drv.irq_src)
+ ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
@@ -655,7 +691,8 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
continue;
/* enable the interrupt */
- if (ring->fence_drv.irq_src)
+ if (ring->fence_drv.irq_src &&
+ amdgpu_fence_need_ring_interrupt_restore(ring))
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index a33d4bc34cee..fd81b04559d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -692,15 +692,8 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
if (adev->gfx.gfx_off_req_count == 0 &&
!adev->gfx.gfx_off_state) {
- /* If going to s2idle, no need to wait */
- if (adev->in_s0ix) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev,
- AMD_IP_BLOCK_TYPE_GFX, true))
- adev->gfx.gfx_off_state = true;
- } else {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
delay);
- }
}
} else {
if (adev->gfx.gfx_off_req_count == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index cca5a495611f..12414a713256 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1229,13 +1229,13 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
pasid = 0;
}
- r = amdgpu_vm_init(adev, &fpriv->vm);
+ r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
if (r)
goto error_pasid;
- r = amdgpu_xcp_open_device(adev, fpriv, file_priv);
+ r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
if (r)
- goto error_vm;
+ goto error_pasid;
r = amdgpu_vm_set_pasid(adev, &fpriv->vm, pasid);
if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index e9091ebfe230..f808841310fd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1382,7 +1382,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
goto error_pasid;
}
- r = amdgpu_vm_init(adev, vm);
+ r = amdgpu_vm_init(adev, vm, -1);
if (r) {
DRM_ERROR("failed to initialize vm\n");
goto error_pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 6d676bdd1505..78d1ee71f3f4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -498,11 +498,11 @@ static int psp_sw_init(void *handle)
return 0;
failed2:
- amdgpu_bo_free_kernel(&psp->fw_pri_bo,
- &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
-failed1:
amdgpu_bo_free_kernel(&psp->fence_buf_bo,
&psp->fence_buf_mc_addr, &psp->fence_buf);
+failed1:
+ amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+ &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
index b779ee4bbaa7..e1ee1c7117fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -397,7 +397,7 @@ void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
WARN_ON(!ring->is_sw_ring);
- if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+ if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
if (amdgpu_mcbp_scan(mux) > 0)
amdgpu_mcbp_trigger_preempt(mux);
return;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 53ff91fc6cf6..d0748bcfad16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -55,8 +55,9 @@ static enum hrtimer_restart amdgpu_vkms_vblank_simulate(struct hrtimer *timer)
DRM_WARN("%s: vblank timer overrun\n", __func__);
ret = drm_crtc_handle_vblank(crtc);
+ /* Don't queue timer again when vblank is disabled. */
if (!ret)
- DRM_ERROR("amdgpu_vkms failure on handling vblank");
+ return HRTIMER_NORESTART;
return HRTIMER_RESTART;
}
@@ -81,7 +82,7 @@ static void amdgpu_vkms_disable_vblank(struct drm_crtc *crtc)
{
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
- hrtimer_cancel(&amdgpu_crtc->vblank_timer);
+ hrtimer_try_to_cancel(&amdgpu_crtc->vblank_timer);
}
static bool amdgpu_vkms_get_vblank_timestamp(struct drm_crtc *crtc,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 291977b93b1d..ec1ec08d4058 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2121,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @xcp_id: GPU partition selection id
*
* Init @vm fields.
*
* Returns:
* 0 for success, error for failure.
*/
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id)
{
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2177,7 +2178,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->evicting = false;
r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
- false, &root);
+ false, &root, xcp_id);
if (r)
goto error_free_delayed;
root_bo = &root->bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9c85d494f2a2..ffac7413c657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -392,7 +392,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
u32 pasid);
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -475,7 +475,8 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm,
int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct amdgpu_bo_vm *vmbo, bool immediate);
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int level, bool immediate, struct amdgpu_bo_vm **vmbo);
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id);
void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct amdgpu_vm *vm);
bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index dea1a64be44d..5431332bbdb8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -498,11 +498,12 @@ exit:
* @level: the page table level
* @immediate: use a immediate update
* @vmbo: pointer to the buffer object pointer
+ * @xcp_id: GPU partition id
*/
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int level, bool immediate, struct amdgpu_bo_vm **vmbo)
+ int level, bool immediate, struct amdgpu_bo_vm **vmbo,
+ int32_t xcp_id)
{
- struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
struct amdgpu_bo_param bp;
struct amdgpu_bo *bo;
struct dma_resv *resv;
@@ -535,7 +536,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel;
bp.no_wait_gpu = immediate;
- bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+ bp.xcp_id_plus1 = xcp_id + 1;
if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv;
@@ -561,7 +562,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
- bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+ bp.xcp_id_plus1 = xcp_id + 1;
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
@@ -606,7 +607,8 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
return 0;
amdgpu_vm_eviction_unlock(vm);
- r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
+ r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt,
+ vm->root.bo->xcp_id);
amdgpu_vm_eviction_lock(vm);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
index d175e862f222..565a1fa436d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c
@@ -239,8 +239,13 @@ static int amdgpu_xcp_dev_alloc(struct amdgpu_device *adev)
for (i = 1; i < MAX_XCP; i++) {
ret = amdgpu_xcp_drm_dev_alloc(&p_ddev);
- if (ret)
+ if (ret == -ENOSPC) {
+ dev_warn(adev->dev,
+ "Skip xcp node #%d when out of drm node resource.", i);
+ return 0;
+ } else if (ret) {
return ret;
+ }
/* Redirect all IOCTLs to the primary device */
adev->xcp_mgr->xcp[i].rdev = p_ddev->render->dev;
@@ -328,6 +333,9 @@ int amdgpu_xcp_dev_register(struct amdgpu_device *adev,
return 0;
for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
ret = drm_dev_register(adev->xcp_mgr->xcp[i].ddev, ent->driver_data);
if (ret)
return ret;
@@ -345,6 +353,9 @@ void amdgpu_xcp_dev_unplug(struct amdgpu_device *adev)
return;
for (i = 1; i < MAX_XCP; i++) {
+ if (!adev->xcp_mgr->xcp[i].ddev)
+ break;
+
p_ddev = adev->xcp_mgr->xcp[i].ddev;
drm_dev_unplug(p_ddev);
p_ddev->render->dev = adev->xcp_mgr->xcp[i].rdev;
@@ -363,7 +374,7 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev,
if (!adev->xcp_mgr)
return 0;
- fpriv->xcp_id = ~0;
+ fpriv->xcp_id = AMDGPU_XCP_NO_PARTITION;
for (i = 0; i < MAX_XCP; ++i) {
if (!adev->xcp_mgr->xcp[i].ddev)
break;
@@ -381,7 +392,7 @@ int amdgpu_xcp_open_device(struct amdgpu_device *adev,
}
}
- fpriv->vm.mem_id = fpriv->xcp_id == ~0 ? -1 :
+ fpriv->vm.mem_id = fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION ? -1 :
adev->xcp_mgr->xcp[fpriv->xcp_id].mem_id;
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
index 0f8026d64ea5..9a1036aeec2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.h
@@ -37,6 +37,8 @@
#define AMDGPU_XCP_FL_NONE 0
#define AMDGPU_XCP_FL_LOCKED (1 << 0)
+#define AMDGPU_XCP_NO_PARTITION (~0)
+
struct amdgpu_fpriv;
enum AMDGPU_XCP_IP_BLOCK {
diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
index 16471b81a1f5..72b629a78c62 100644
--- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram_reg_init.c
@@ -68,7 +68,7 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev,
enum AMDGPU_XCP_IP_BLOCK ip_blk;
uint32_t inst_mask;
- ring->xcp_id = ~0;
+ ring->xcp_id = AMDGPU_XCP_NO_PARTITION;
if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE)
return;
@@ -177,7 +177,7 @@ static int aqua_vanjaram_select_scheds(
u32 sel_xcp_id;
int i;
- if (fpriv->xcp_id == ~0) {
+ if (fpriv->xcp_id == AMDGPU_XCP_NO_PARTITION) {
u32 least_ref_cnt = ~0;
fpriv->xcp_id = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 3a7af59e83ca..0451533ddde4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -471,8 +471,12 @@ static void gfx_v11_0_check_fw_cp_gfx_shadow(struct amdgpu_device *adev)
case IP_VERSION(11, 0, 3):
if ((adev->gfx.me_fw_version >= 1505) &&
(adev->gfx.pfp_fw_version >= 1600) &&
- (adev->gfx.mec_fw_version >= 512))
- adev->gfx.cp_gfx_shadow = true;
+ (adev->gfx.mec_fw_version >= 512)) {
+ if (amdgpu_sriov_vf(adev))
+ adev->gfx.cp_gfx_shadow = true;
+ else
+ adev->gfx.cp_gfx_shadow = false;
+ }
break;
default:
adev->gfx.cp_gfx_shadow = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 9e3b835bdbb2..4f883b94f98e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -46,6 +46,7 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin");
#define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
#define GOLDEN_GB_ADDR_CONFIG 0x2a114042
+#define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301
struct amdgpu_gfx_ras gfx_v9_4_3_ras;
@@ -1736,7 +1737,7 @@ static int gfx_v9_4_3_xcc_q_fini_register(struct amdgpu_ring *ring,
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IQ_TIMER, 0);
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_IB_CONTROL, 0);
- WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, 0);
+ WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PERSISTENT_STATE, CP_HQD_PERSISTENT_STATE_DEFAULT);
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0x40000000);
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_DOORBELL_CONTROL, 0);
WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regCP_HQD_PQ_RPTR, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
index 4dabf910334b..d9f14dc55998 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c
@@ -402,18 +402,15 @@ static void gfxhub_v1_2_xcc_program_invalidation(struct amdgpu_device *adev,
static int gfxhub_v1_2_xcc_gart_enable(struct amdgpu_device *adev,
uint32_t xcc_mask)
{
- uint32_t tmp_mask;
int i;
- tmp_mask = xcc_mask;
/*
* MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, because they are
* VF copy registers so vbios post doesn't program them, for
* SRIOV driver need to program them
*/
if (amdgpu_sriov_vf(adev)) {
- for_each_inst(i, tmp_mask) {
- i = ffs(tmp_mask) - 1;
+ for_each_inst(i, xcc_mask) {
WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_BASE,
adev->gmc.vram_start >> 24);
WREG32_SOC15_RLC(GC, GET_INST(GC, i), regMC_VM_FB_LOCATION_TOP,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index f9cb0d2c89d1..af5685f4cb34 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -49,6 +49,7 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin");
/* For large FW files the time to complete can be very long */
#define USBC_PD_POLLING_LIMIT_S 240
@@ -136,14 +137,15 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp)
int ret;
int retry_loop;
+ /* Wait for bootloader to signify that it is ready having bit 31 of
+ * C2PMSG_35 set to 1. All other bits are expected to be cleared.
+ * If there is an error in processing command, bits[7:0] will be set.
+ * This is applicable for PSP v13.0.6 and newer.
+ */
for (retry_loop = 0; retry_loop < 10; retry_loop++) {
- /* Wait for bootloader to signify that is
- ready having bit 31 of C2PMSG_35 set to 1 */
- ret = psp_wait_for(psp,
- SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
- 0x80000000,
- 0x80000000,
- false);
+ ret = psp_wait_for(
+ psp, SOC15_REG_OFFSET(MP0, 0, regMP0_SMN_C2PMSG_35),
+ 0x80000000, 0xffffffff, false);
if (ret == 0)
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index 49f40d9f16e8..f5a6f562e2a8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -1543,11 +1543,7 @@ static bool kfd_ignore_crat(void)
if (ignore_crat)
return true;
-#ifndef KFD_SUPPORT_IOMMU_V2
ret = true;
-#else
- ret = false;
-#endif
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index fff3ccc04fa9..9766076e9ec4 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -302,8 +302,7 @@ static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
if (!q)
return 0;
- if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
- KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
+ if (!kfd_dbg_has_cwsr_workaround(q->device))
return 0;
if (enable && q->properties.is_user_cu_masked)
@@ -349,7 +348,7 @@ int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
{
uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd->spi_dbg_launch_mode;
uint32_t flags = pdd->process->dbg_flags;
- bool sq_trap_en = !!spi_dbg_cntl;
+ bool sq_trap_en = !!spi_dbg_cntl || !kfd_dbg_has_cwsr_workaround(pdd->dev);
if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
return 0;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
index a289e59ceb79..662a13a0d582 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
@@ -100,6 +100,12 @@ static inline bool kfd_dbg_is_rlc_restore_supported(struct kfd_node *dev)
KFD_GC_VERSION(dev) == IP_VERSION(10, 1, 1));
}
+static inline bool kfd_dbg_has_cwsr_workaround(struct kfd_node *dev)
+{
+ return KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0) &&
+ KFD_GC_VERSION(dev) <= IP_VERSION(11, 0, 3);
+}
+
static inline bool kfd_dbg_has_gws_support(struct kfd_node *dev)
{
if ((KFD_GC_VERSION(dev) == IP_VERSION(9, 0, 1)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 0b3dc754e06b..a53e0757fe64 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -194,11 +194,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
kfd_device_info_set_event_interrupt_class(kfd);
- /* Raven */
- if (gc_version == IP_VERSION(9, 1, 0) ||
- gc_version == IP_VERSION(9, 2, 2))
- kfd->device_info.needs_iommu_device = true;
-
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
if (gc_version == IP_VERSION(10, 3, 6))
@@ -233,10 +228,6 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
asic_type != CHIP_TONGA)
kfd->device_info.supports_cwsr = true;
- if (asic_type == CHIP_KAVERI ||
- asic_type == CHIP_CARRIZO)
- kfd->device_info.needs_iommu_device = true;
-
if (asic_type != CHIP_HAWAII && !vf)
kfd->device_info.needs_pci_atomics = true;
}
@@ -249,7 +240,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
uint32_t gfx_target_version = 0;
switch (adev->asic_type) {
-#ifdef KFD_SUPPORT_IOMMU_V2
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
gfx_target_version = 70000;
@@ -262,7 +252,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v8_kfd2kgd;
break;
-#endif
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_HAWAII:
gfx_target_version = 70001;
@@ -298,7 +287,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
gfx_target_version = 90000;
f2g = &gfx_v9_kfd2kgd;
break;
-#ifdef KFD_SUPPORT_IOMMU_V2
/* Raven */
case IP_VERSION(9, 1, 0):
case IP_VERSION(9, 2, 2):
@@ -306,7 +294,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
if (!vf)
f2g = &gfx_v9_kfd2kgd;
break;
-#endif
/* Vega12 */
case IP_VERSION(9, 2, 1):
gfx_target_version = 90004;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f515cb8f30ca..01192f5abe46 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -226,8 +226,7 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
queue_input.paging = false;
queue_input.tba_addr = qpd->tba_addr;
queue_input.tma_addr = qpd->tma_addr;
- queue_input.trap_en = KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
- KFD_GC_VERSION(q->device) > IP_VERSION(11, 0, 3);
+ queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device);
queue_input.skip_process_ctx_clear = qpd->pqm->process->debug_trap_enabled;
queue_type = convert_to_mes_queue_type(q->properties.type);
@@ -1806,8 +1805,7 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
*/
q->properties.is_evicted = !!qpd->evicted;
q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled &&
- KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0) &&
- KFD_GC_VERSION(q->device) <= IP_VERSION(11, 0, 3);
+ kfd_dbg_has_cwsr_workaround(q->device);
if (qd)
mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
@@ -2540,18 +2538,12 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev)
}
switch (dev->adev->asic_type) {
- case CHIP_CARRIZO:
- device_queue_manager_init_vi(&dqm->asic_ops);
- break;
-
case CHIP_KAVERI:
- device_queue_manager_init_cik(&dqm->asic_ops);
- break;
-
case CHIP_HAWAII:
device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
break;
+ case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
case CHIP_POLARIS10:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 61fc62f3e003..4a17bb7c7b27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1965,7 +1965,14 @@ int kfd_topology_add_device(struct kfd_node *gpu)
const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
gpu_id = kfd_generate_gpu_id(gpu);
- pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ if (gpu->xcp && !gpu->xcp->ddev) {
+ dev_warn(gpu->adev->dev,
+ "Won't add GPU (ID: 0x%x) to topology since it has no drm node assigned.",
+ gpu_id);
+ return 0;
+ } else {
+ pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
+ }
/* Check to see if this gpu device exists in the topology_device_list.
* If so, assign the gpu to that device,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index ff0a217b9d56..e5554a36e8c8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -424,12 +424,12 @@ static void dm_pflip_high_irq(void *interrupt_params)
spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
- if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED){
- DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p] \n",
- amdgpu_crtc->pflip_status,
- AMDGPU_FLIP_SUBMITTED,
- amdgpu_crtc->crtc_id,
- amdgpu_crtc);
+ if (amdgpu_crtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+ DC_LOG_PFLIP("amdgpu_crtc->pflip_status = %d !=AMDGPU_FLIP_SUBMITTED(%d) on crtc:%d[%p]\n",
+ amdgpu_crtc->pflip_status,
+ AMDGPU_FLIP_SUBMITTED,
+ amdgpu_crtc->crtc_id,
+ amdgpu_crtc);
spin_unlock_irqrestore(&adev_to_drm(adev)->event_lock, flags);
return;
}
@@ -883,7 +883,7 @@ static int dm_set_powergating_state(void *handle,
}
/* Prototypes of private functions */
-static int dm_early_init(void* handle);
+static int dm_early_init(void *handle);
/* Allocate memory for FBC compressed data */
static void amdgpu_dm_fbc_init(struct drm_connector *connector)
@@ -1282,7 +1282,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
pa_config->system_aperture.start_addr = (uint64_t)logical_addr_low << 18;
pa_config->system_aperture.end_addr = (uint64_t)logical_addr_high << 18;
- pa_config->system_aperture.agp_base = (uint64_t)agp_base << 24 ;
+ pa_config->system_aperture.agp_base = (uint64_t)agp_base << 24;
pa_config->system_aperture.agp_bot = (uint64_t)agp_bot << 24;
pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24;
@@ -1347,6 +1347,15 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
if (amdgpu_in_reset(adev))
goto skip;
+ if (offload_work->data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY ||
+ offload_work->data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
+ dm_handle_mst_sideband_msg_ready_event(&aconnector->mst_mgr, DOWN_OR_UP_MSG_RDY_EVENT);
+ spin_lock_irqsave(&offload_work->offload_wq->offload_lock, flags);
+ offload_work->offload_wq->is_handling_mst_msg_rdy_event = false;
+ spin_unlock_irqrestore(&offload_work->offload_wq->offload_lock, flags);
+ goto skip;
+ }
+
mutex_lock(&adev->dm.dc_lock);
if (offload_work->data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
dc_link_dp_handle_automated_test(dc_link);
@@ -1365,8 +1374,7 @@ static void dm_handle_hpd_rx_offload_work(struct work_struct *work)
DP_TEST_RESPONSE,
&test_response.raw,
sizeof(test_response));
- }
- else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) &&
+ } else if ((dc_link->connector_signal != SIGNAL_TYPE_EDP) &&
dc_link_check_link_loss_status(dc_link, &offload_work->data) &&
dc_link_dp_allow_hpd_rx_irq(dc_link)) {
/* offload_work->data is from handle_hpd_rx_irq->
@@ -1554,7 +1562,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
mutex_init(&adev->dm.dc_lock);
mutex_init(&adev->dm.audio_lock);
- if(amdgpu_dm_irq_init(adev)) {
+ if (amdgpu_dm_irq_init(adev)) {
DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
goto error;
}
@@ -1630,9 +1638,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
}
break;
}
- if (init_data.flags.gpu_vm_support &&
- (amdgpu_sg_display == 0))
- init_data.flags.gpu_vm_support = false;
+ if (init_data.flags.gpu_vm_support)
+ init_data.flags.gpu_vm_support = amdgpu_sg_display_supported(adev);
if (init_data.flags.gpu_vm_support)
adev->mode_info.gpu_vm_support = true;
@@ -1696,9 +1703,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
if (amdgpu_dc_debug_mask & DC_DISABLE_STUTTER)
adev->dm.dc->debug.disable_stutter = true;
- if (amdgpu_dc_debug_mask & DC_DISABLE_DSC) {
+ if (amdgpu_dc_debug_mask & DC_DISABLE_DSC)
adev->dm.dc->debug.disable_dsc = true;
- }
if (amdgpu_dc_debug_mask & DC_DISABLE_CLOCK_GATING)
adev->dm.dc->debug.disable_clock_gate = true;
@@ -1942,8 +1948,6 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
mutex_destroy(&adev->dm.audio_lock);
mutex_destroy(&adev->dm.dc_lock);
mutex_destroy(&adev->dm.dpia_aux_lock);
-
- return;
}
static int load_dmcu_fw(struct amdgpu_device *adev)
@@ -1952,7 +1956,7 @@ static int load_dmcu_fw(struct amdgpu_device *adev)
int r;
const struct dmcu_firmware_header_v1_0 *hdr;
- switch(adev->asic_type) {
+ switch (adev->asic_type) {
#if defined(CONFIG_DRM_AMD_DC_SI)
case CHIP_TAHITI:
case CHIP_PITCAIRN:
@@ -2709,7 +2713,7 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
struct dc_scaling_info scaling_infos[MAX_SURFACES];
struct dc_flip_addrs flip_addrs[MAX_SURFACES];
struct dc_stream_update stream_update;
- } * bundle;
+ } *bundle;
int k, m;
bundle = kzalloc(sizeof(*bundle), GFP_KERNEL);
@@ -2739,8 +2743,6 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
cleanup:
kfree(bundle);
-
- return;
}
static int dm_resume(void *handle)
@@ -2954,8 +2956,7 @@ static const struct amd_ip_funcs amdgpu_dm_funcs = {
.set_powergating_state = dm_set_powergating_state,
};
-const struct amdgpu_ip_block_version dm_ip_block =
-{
+const struct amdgpu_ip_block_version dm_ip_block = {
.type = AMD_IP_BLOCK_TYPE_DCE,
.major = 1,
.minor = 0,
@@ -3000,9 +3001,12 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
caps->aux_support = false;
- if (caps->ext_caps->bits.oled == 1 /*||
- caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
- caps->ext_caps->bits.hdr_aux_backlight_control == 1*/)
+ if (caps->ext_caps->bits.oled == 1
+ /*
+ * ||
+ * caps->ext_caps->bits.sdr_aux_backlight_control == 1 ||
+ * caps->ext_caps->bits.hdr_aux_backlight_control == 1
+ */)
caps->aux_support = true;
if (amdgpu_backlight == 0)
@@ -3236,86 +3240,6 @@ static void handle_hpd_irq(void *param)
}
-static void dm_handle_mst_sideband_msg(struct amdgpu_dm_connector *aconnector)
-{
- u8 esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
- u8 dret;
- bool new_irq_handled = false;
- int dpcd_addr;
- int dpcd_bytes_to_read;
-
- const int max_process_count = 30;
- int process_count = 0;
-
- const struct dc_link_status *link_status = dc_link_get_status(aconnector->dc_link);
-
- if (link_status->dpcd_caps->dpcd_rev.raw < 0x12) {
- dpcd_bytes_to_read = DP_LANE0_1_STATUS - DP_SINK_COUNT;
- /* DPCD 0x200 - 0x201 for downstream IRQ */
- dpcd_addr = DP_SINK_COUNT;
- } else {
- dpcd_bytes_to_read = DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI;
- /* DPCD 0x2002 - 0x2005 for downstream IRQ */
- dpcd_addr = DP_SINK_COUNT_ESI;
- }
-
- dret = drm_dp_dpcd_read(
- &aconnector->dm_dp_aux.aux,
- dpcd_addr,
- esi,
- dpcd_bytes_to_read);
-
- while (dret == dpcd_bytes_to_read &&
- process_count < max_process_count) {
- u8 ack[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = {};
- u8 retry;
- dret = 0;
-
- process_count++;
-
- DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]);
- /* handle HPD short pulse irq */
- if (aconnector->mst_mgr.mst_state)
- drm_dp_mst_hpd_irq_handle_event(&aconnector->mst_mgr,
- esi,
- ack,
- &new_irq_handled);
-
- if (new_irq_handled) {
- /* ACK at DPCD to notify down stream */
- for (retry = 0; retry < 3; retry++) {
- ssize_t wret;
-
- wret = drm_dp_dpcd_writeb(&aconnector->dm_dp_aux.aux,
- dpcd_addr + 1,
- ack[1]);
- if (wret == 1)
- break;
- }
-
- if (retry == 3) {
- DRM_ERROR("Failed to ack MST event.\n");
- return;
- }
-
- drm_dp_mst_hpd_irq_send_new_request(&aconnector->mst_mgr);
- /* check if there is new irq to be handled */
- dret = drm_dp_dpcd_read(
- &aconnector->dm_dp_aux.aux,
- dpcd_addr,
- esi,
- dpcd_bytes_to_read);
-
- new_irq_handled = false;
- } else {
- break;
- }
- }
-
- if (process_count == max_process_count)
- DRM_DEBUG_DRIVER("Loop exceeded max iterations\n");
-}
-
static void schedule_hpd_rx_offload_work(struct hpd_rx_irq_offload_work_queue *offload_wq,
union hpd_irq_data hpd_irq_data)
{
@@ -3377,7 +3301,23 @@ static void handle_hpd_rx_irq(void *param)
if (dc_link_dp_allow_hpd_rx_irq(dc_link)) {
if (hpd_irq_data.bytes.device_service_irq.bits.UP_REQ_MSG_RDY ||
hpd_irq_data.bytes.device_service_irq.bits.DOWN_REP_MSG_RDY) {
- dm_handle_mst_sideband_msg(aconnector);
+ bool skip = false;
+
+ /*
+ * DOWN_REP_MSG_RDY is also handled by polling method
+ * mgr->cbs->poll_hpd_irq()
+ */
+ spin_lock(&offload_wq->offload_lock);
+ skip = offload_wq->is_handling_mst_msg_rdy_event;
+
+ if (!skip)
+ offload_wq->is_handling_mst_msg_rdy_event = true;
+
+ spin_unlock(&offload_wq->offload_lock);
+
+ if (!skip)
+ schedule_hpd_rx_offload_work(offload_wq, hpd_irq_data);
+
goto out;
}
@@ -3468,7 +3408,7 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
aconnector = to_amdgpu_dm_connector(connector);
dc_link = aconnector->dc_link;
- if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd) {
+ if (dc_link->irq_source_hpd != DC_IRQ_SOURCE_INVALID) {
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
int_params.irq_source = dc_link->irq_source_hpd;
@@ -3477,7 +3417,7 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
(void *) aconnector);
}
- if (DC_IRQ_SOURCE_INVALID != dc_link->irq_source_hpd_rx) {
+ if (dc_link->irq_source_hpd_rx != DC_IRQ_SOURCE_INVALID) {
/* Also register for DP short pulse (hpd_rx). */
int_params.int_context = INTERRUPT_LOW_IRQ_CONTEXT;
@@ -3486,11 +3426,11 @@ static void register_hpd_handlers(struct amdgpu_device *adev)
amdgpu_dm_irq_register_interrupt(adev, &int_params,
handle_hpd_rx_irq,
(void *) aconnector);
-
- if (adev->dm.hpd_rx_offload_wq)
- adev->dm.hpd_rx_offload_wq[dc_link->link_index].aconnector =
- aconnector;
}
+
+ if (adev->dm.hpd_rx_offload_wq)
+ adev->dm.hpd_rx_offload_wq[connector->index].aconnector =
+ aconnector;
}
}
@@ -3503,7 +3443,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
struct dc_interrupt_params int_params = {0};
int r;
int i;
- unsigned client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ unsigned int client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
int_params.requested_polarity = INTERRUPT_POLARITY_DEFAULT;
int_params.current_polarity = INTERRUPT_POLARITY_DEFAULT;
@@ -3517,11 +3457,12 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
* Base driver will call amdgpu_dm_irq_handler() for ALL interrupts
* coming from DC hardware.
* amdgpu_dm_irq_handler() will re-direct the interrupt to DC
- * for acknowledging and handling. */
+ * for acknowledging and handling.
+ */
/* Use VBLANK interrupt */
for (i = 0; i < adev->mode_info.num_crtc; i++) {
- r = amdgpu_irq_add_id(adev, client_id, i+1 , &adev->crtc_irq);
+ r = amdgpu_irq_add_id(adev, client_id, i + 1, &adev->crtc_irq);
if (r) {
DRM_ERROR("Failed to add crtc irq id!\n");
return r;
@@ -3529,7 +3470,7 @@ static int dce60_register_irq_handlers(struct amdgpu_device *adev)
int_params.int_context = INTERRUPT_HIGH_IRQ_CONTEXT;
int_params.irq_source =
- dc_interrupt_to_irq_source(dc, i+1 , 0);
+ dc_interrupt_to_irq_source(dc, i + 1, 0);
c_irq_params = &adev->dm.vblank_params[int_params.irq_source - DC_IRQ_SOURCE_VBLANK1];
@@ -3585,7 +3526,7 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
struct dc_interrupt_params int_params = {0};
int r;
int i;
- unsigned client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
+ unsigned int client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
if (adev->family >= AMDGPU_FAMILY_AI)
client_id = SOC15_IH_CLIENTID_DCE;
@@ -3602,7 +3543,8 @@ static int dce110_register_irq_handlers(struct amdgpu_device *adev)
* Base driver will call amdgpu_dm_irq_handler() for ALL interrupts
* coming from DC hardware.
* amdgpu_dm_irq_handler() will re-direct the interrupt to DC
- * for acknowledging and handling. */
+ * for acknowledging and handling.
+ */
/* Use VBLANK interrupt */
for (i = VISLANDS30_IV_SRCID_D1_VERTICAL_INTERRUPT0; i <= VISLANDS30_IV_SRCID_D6_VERTICAL_INTERRUPT0; i++) {
@@ -4049,7 +3991,7 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm,
}
static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
- unsigned *min, unsigned *max)
+ unsigned int *min, unsigned int *max)
{
if (!caps)
return 0;
@@ -4069,7 +4011,7 @@ static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *caps,
uint32_t brightness)
{
- unsigned min, max;
+ unsigned int min, max;
if (!get_brightness_range(caps, &min, &max))
return brightness;
@@ -4082,7 +4024,7 @@ static u32 convert_brightness_from_user(const struct amdgpu_dm_backlight_caps *c
static u32 convert_brightness_to_user(const struct amdgpu_dm_backlight_caps *caps,
uint32_t brightness)
{
- unsigned min, max;
+ unsigned int min, max;
if (!get_brightness_range(caps, &min, &max))
return brightness;
@@ -4562,7 +4504,6 @@ fail:
static void amdgpu_dm_destroy_drm_device(struct amdgpu_display_manager *dm)
{
drm_atomic_private_obj_fini(&dm->atomic_obj);
- return;
}
/******************************************************************************
@@ -5394,6 +5335,7 @@ static bool adjust_colour_depth_from_display_info(
{
enum dc_color_depth depth = timing_out->display_color_depth;
int normalized_clk;
+
do {
normalized_clk = timing_out->pix_clk_100hz / 10;
/* YCbCr 4:2:0 requires additional adjustment of 1/2 */
@@ -5609,6 +5551,7 @@ create_fake_sink(struct amdgpu_dm_connector *aconnector)
{
struct dc_sink_init_data sink_init_data = { 0 };
struct dc_sink *sink = NULL;
+
sink_init_data.link = aconnector->dc_link;
sink_init_data.sink_signal = aconnector->dc_link->connector_signal;
@@ -5732,7 +5675,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
return &aconnector->freesync_vid_base;
/* Find the preferred mode */
- list_for_each_entry (m, list_head, head) {
+ list_for_each_entry(m, list_head, head) {
if (m->type & DRM_MODE_TYPE_PREFERRED) {
m_pref = m;
break;
@@ -5756,7 +5699,7 @@ get_highest_refresh_rate_mode(struct amdgpu_dm_connector *aconnector,
* For some monitors, preferred mode is not the mode with highest
* supported refresh rate.
*/
- list_for_each_entry (m, list_head, head) {
+ list_for_each_entry(m, list_head, head) {
current_refresh = drm_mode_vrefresh(m);
if (m->hdisplay == m_pref->hdisplay &&
@@ -6028,7 +5971,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
* This may not be an error, the use case is when we have no
* usermode calls to reset and set mode upon hotplug. In this
* case, we call set mode ourselves to restore the previous mode
- * and the modelist may not be filled in in time.
+ * and the modelist may not be filled in time.
*/
DRM_DEBUG_DRIVER("No preferred mode found\n");
} else {
@@ -6051,9 +5994,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
drm_mode_set_crtcinfo(&mode, 0);
/*
- * If scaling is enabled and refresh rate didn't change
- * we copy the vic and polarities of the old timings
- */
+ * If scaling is enabled and refresh rate didn't change
+ * we copy the vic and polarities of the old timings
+ */
if (!scale || mode_refresh != preferred_refresh)
fill_stream_properties_from_drm_display_mode(
stream, &mode, &aconnector->base, con_state, NULL,
@@ -6817,6 +6760,7 @@ static int dm_encoder_helper_atomic_check(struct drm_encoder *encoder,
if (!state->duplicated) {
int max_bpc = conn_state->max_requested_bpc;
+
is_y420 = drm_mode_is_420_also(&connector->display_info, adjusted_mode) &&
aconnector->force_yuv420_output;
color_depth = convert_color_depth_from_display_info(connector,
@@ -7135,7 +7079,7 @@ static bool is_duplicate_mode(struct amdgpu_dm_connector *aconnector,
{
struct drm_display_mode *m;
- list_for_each_entry (m, &aconnector->base.probed_modes, head) {
+ list_for_each_entry(m, &aconnector->base.probed_modes, head) {
if (drm_mode_equal(m, mode))
return true;
}
@@ -7295,6 +7239,7 @@ void amdgpu_dm_connector_init_helper(struct amdgpu_display_manager *dm,
aconnector->as_type = ADAPTIVE_SYNC_TYPE_NONE;
memset(&aconnector->vsdb_info, 0, sizeof(aconnector->vsdb_info));
mutex_init(&aconnector->hpd_lock);
+ mutex_init(&aconnector->handle_mst_msg_ready);
/*
* configure support HPD hot plug connector_>polled default value is 0
@@ -7454,7 +7399,6 @@ static int amdgpu_dm_connector_init(struct amdgpu_display_manager *dm,
link->priv = aconnector;
- DRM_DEBUG_DRIVER("%s()\n", __func__);
i2c = create_i2c(link->ddc, link->link_index, &res);
if (!i2c) {
@@ -8125,7 +8069,15 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* Only allow immediate flips for fast updates that don't
* change memory domain, FB pitch, DCC state, rotation or
* mirroring.
+ *
+ * dm_crtc_helper_atomic_check() only accepts async flips with
+ * fast updates.
*/
+ if (crtc->state->async_flip &&
+ acrtc_state->update_type != UPDATE_TYPE_FAST)
+ drm_warn_once(state->dev,
+ "[PLANE:%d:%s] async flip with non-fast update\n",
+ plane->base.id, plane->name);
bundle->flip_addrs[planes_count].flip_immediate =
crtc->state->async_flip &&
acrtc_state->update_type == UPDATE_TYPE_FAST &&
@@ -8168,8 +8120,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
* DRI3/Present extension with defined target_msc.
*/
last_flip_vblank = amdgpu_get_vblank_counter_kms(pcrtc);
- }
- else {
+ } else {
/* For variable refresh rate mode only:
* Get vblank of last completed flip to avoid > 1 vrr
* flips per video frame by use of throttling, but allow
@@ -8502,8 +8453,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
dc_resource_state_copy_construct_current(dm->dc, dc_state);
}
- for_each_oldnew_crtc_in_state (state, crtc, old_crtc_state,
- new_crtc_state, i) {
+ for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state,
+ new_crtc_state, i) {
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
@@ -8526,9 +8477,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
dm_old_crtc_state = to_dm_crtc_state(old_crtc_state);
drm_dbg_state(state->dev,
- "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, "
- "planes_changed:%d, mode_changed:%d,active_changed:%d,"
- "connectors_changed:%d\n",
+ "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, planes_changed:%d, mode_changed:%d,active_changed:%d,connectors_changed:%d\n",
acrtc->crtc_id,
new_crtc_state->enable,
new_crtc_state->active,
@@ -9104,8 +9053,8 @@ static int do_aquire_global_lock(struct drm_device *dev,
&commit->flip_done, 10*HZ);
if (ret == 0)
- DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done "
- "timed out\n", crtc->base.id, crtc->name);
+ DRM_ERROR("[CRTC:%d:%s] hw_done or flip_done timed out\n",
+ crtc->base.id, crtc->name);
drm_crtc_commit_put(commit);
}
@@ -9190,7 +9139,8 @@ is_timing_unchanged_for_freesync(struct drm_crtc_state *old_crtc_state,
return false;
}
-static void set_freesync_fixed_config(struct dm_crtc_state *dm_new_crtc_state) {
+static void set_freesync_fixed_config(struct dm_crtc_state *dm_new_crtc_state)
+{
u64 num, den, res;
struct drm_crtc_state *new_crtc_state = &dm_new_crtc_state->base;
@@ -9312,9 +9262,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
goto skip_modeset;
drm_dbg_state(state->dev,
- "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, "
- "planes_changed:%d, mode_changed:%d,active_changed:%d,"
- "connectors_changed:%d\n",
+ "amdgpu_crtc id:%d crtc_state_flags: enable:%d, active:%d, planes_changed:%d, mode_changed:%d,active_changed:%d,connectors_changed:%d\n",
acrtc->crtc_id,
new_crtc_state->enable,
new_crtc_state->active,
@@ -9343,8 +9291,7 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
old_crtc_state)) {
new_crtc_state->mode_changed = false;
DRM_DEBUG_DRIVER(
- "Mode change not required for front porch change, "
- "setting mode_changed to %d",
+ "Mode change not required for front porch change, setting mode_changed to %d",
new_crtc_state->mode_changed);
set_freesync_fixed_config(dm_new_crtc_state);
@@ -9356,9 +9303,8 @@ static int dm_update_crtc_state(struct amdgpu_display_manager *dm,
struct drm_display_mode *high_mode;
high_mode = get_highest_refresh_rate_mode(aconnector, false);
- if (!drm_mode_equal(&new_crtc_state->mode, high_mode)) {
+ if (!drm_mode_equal(&new_crtc_state->mode, high_mode))
set_freesync_fixed_config(dm_new_crtc_state);
- }
}
ret = dm_atomic_get_state(state, &dm_state);
@@ -9526,6 +9472,7 @@ static bool should_reset_plane(struct drm_atomic_state *state,
*/
for_each_oldnew_plane_in_state(state, other, old_other_state, new_other_state, i) {
struct amdgpu_framebuffer *old_afb, *new_afb;
+
if (other->type == DRM_PLANE_TYPE_CURSOR)
continue;
@@ -9624,11 +9571,12 @@ static int dm_check_cursor_fb(struct amdgpu_crtc *new_acrtc,
}
/* Core DRM takes care of checking FB modifiers, so we only need to
- * check tiling flags when the FB doesn't have a modifier. */
+ * check tiling flags when the FB doesn't have a modifier.
+ */
if (!(fb->flags & DRM_MODE_FB_MODIFIERS)) {
if (adev->family < AMDGPU_FAMILY_AI) {
linear = AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_2D_TILED_THIN1 &&
- AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
+ AMDGPU_TILING_GET(afb->tiling_flags, ARRAY_MODE) != DC_ARRAY_1D_TILED_THIN1 &&
AMDGPU_TILING_GET(afb->tiling_flags, MICRO_TILE_MODE) == 0;
} else {
linear = AMDGPU_TILING_GET(afb->tiling_flags, SWIZZLE_MODE) == 0;
@@ -9850,12 +9798,12 @@ static int dm_check_crtc_cursor(struct drm_atomic_state *state,
/* On DCE and DCN there is no dedicated hardware cursor plane. We get a
* cursor per pipe but it's going to inherit the scaling and
* positioning from the underlying pipe. Check the cursor plane's
- * blending properties match the underlying planes'. */
+ * blending properties match the underlying planes'.
+ */
new_cursor_state = drm_atomic_get_new_plane_state(state, cursor);
- if (!new_cursor_state || !new_cursor_state->fb) {
+ if (!new_cursor_state || !new_cursor_state->fb)
return 0;
- }
dm_get_oriented_plane_size(new_cursor_state, &cursor_src_w, &cursor_src_h);
cursor_scale_w = new_cursor_state->crtc_w * 1000 / cursor_src_w;
@@ -9900,6 +9848,7 @@ static int add_affected_mst_dsc_crtcs(struct drm_atomic_state *state, struct drm
struct drm_connector_state *conn_state, *old_conn_state;
struct amdgpu_dm_connector *aconnector = NULL;
int i;
+
for_each_oldnew_connector_in_state(state, connector, old_conn_state, conn_state, i) {
if (!conn_state->crtc)
conn_state = old_conn_state;
@@ -10334,7 +10283,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
}
/* Store the overall update type for use later in atomic check. */
- for_each_new_crtc_in_state (state, crtc, new_crtc_state, i) {
+ for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
struct dm_crtc_state *dm_new_crtc_state =
to_dm_crtc_state(new_crtc_state);
@@ -10356,7 +10305,7 @@ fail:
else if (ret == -EINTR || ret == -EAGAIN || ret == -ERESTARTSYS)
DRM_DEBUG_DRIVER("Atomic check stopped due to signal.\n");
else
- DRM_DEBUG_DRIVER("Atomic check failed with err: %d \n", ret);
+ DRM_DEBUG_DRIVER("Atomic check failed with err: %d\n", ret);
trace_amdgpu_dm_atomic_check_finish(state, ret);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 4561f55afa99..9fb5bb3a75a7 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -195,6 +195,11 @@ struct hpd_rx_irq_offload_work_queue {
*/
bool is_handling_link_loss;
/**
+ * @is_handling_mst_msg_rdy_event: Used to prevent inserting mst message
+ * ready event when we're already handling mst message ready event
+ */
+ bool is_handling_mst_msg_rdy_event;
+ /**
* @aconnector: The aconnector that this work queue is attached to
*/
struct amdgpu_dm_connector *aconnector;
@@ -638,6 +643,8 @@ struct amdgpu_dm_connector {
struct drm_dp_mst_port *mst_output_port;
struct amdgpu_dm_connector *mst_root;
struct drm_dp_aux *dsc_aux;
+ struct mutex handle_mst_msg_ready;
+
/* TODO see if we can merge with ddc_bus or make a dm_connector */
struct amdgpu_i2c_adapter *i2c;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 440fc0869a34..30d4c6fd95f5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -398,6 +398,18 @@ static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
return -EINVAL;
}
+ /*
+ * Only allow async flips for fast updates that don't change the FB
+ * pitch, the DCC state, rotation, etc.
+ */
+ if (crtc_state->async_flip &&
+ dm_crtc_state->update_type != UPDATE_TYPE_FAST) {
+ drm_dbg_atomic(crtc->dev,
+ "[CRTC:%d:%s] async flips are only supported for fast updates\n",
+ crtc->base.id, crtc->name);
+ return -EINVAL;
+ }
+
/* In some use cases, like reset, no stream is attached */
if (!dm_crtc_state->stream)
return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 46d0a8f57e55..b885c39bd16b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -619,8 +619,118 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
return connector;
}
+void dm_handle_mst_sideband_msg_ready_event(
+ struct drm_dp_mst_topology_mgr *mgr,
+ enum mst_msg_ready_type msg_rdy_type)
+{
+ uint8_t esi[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = { 0 };
+ uint8_t dret;
+ bool new_irq_handled = false;
+ int dpcd_addr;
+ uint8_t dpcd_bytes_to_read;
+ const uint8_t max_process_count = 30;
+ uint8_t process_count = 0;
+ u8 retry;
+ struct amdgpu_dm_connector *aconnector =
+ container_of(mgr, struct amdgpu_dm_connector, mst_mgr);
+
+
+ const struct dc_link_status *link_status = dc_link_get_status(aconnector->dc_link);
+
+ if (link_status->dpcd_caps->dpcd_rev.raw < 0x12) {
+ dpcd_bytes_to_read = DP_LANE0_1_STATUS - DP_SINK_COUNT;
+ /* DPCD 0x200 - 0x201 for downstream IRQ */
+ dpcd_addr = DP_SINK_COUNT;
+ } else {
+ dpcd_bytes_to_read = DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI;
+ /* DPCD 0x2002 - 0x2005 for downstream IRQ */
+ dpcd_addr = DP_SINK_COUNT_ESI;
+ }
+
+ mutex_lock(&aconnector->handle_mst_msg_ready);
+
+ while (process_count < max_process_count) {
+ u8 ack[DP_PSR_ERROR_STATUS - DP_SINK_COUNT_ESI] = {};
+
+ process_count++;
+
+ dret = drm_dp_dpcd_read(
+ &aconnector->dm_dp_aux.aux,
+ dpcd_addr,
+ esi,
+ dpcd_bytes_to_read);
+
+ if (dret != dpcd_bytes_to_read) {
+ DRM_DEBUG_KMS("DPCD read and acked number is not as expected!");
+ break;
+ }
+
+ DRM_DEBUG_DRIVER("ESI %02x %02x %02x\n", esi[0], esi[1], esi[2]);
+
+ switch (msg_rdy_type) {
+ case DOWN_REP_MSG_RDY_EVENT:
+ /* Only handle DOWN_REP_MSG_RDY case*/
+ esi[1] &= DP_DOWN_REP_MSG_RDY;
+ break;
+ case UP_REQ_MSG_RDY_EVENT:
+ /* Only handle UP_REQ_MSG_RDY case*/
+ esi[1] &= DP_UP_REQ_MSG_RDY;
+ break;
+ default:
+ /* Handle both cases*/
+ esi[1] &= (DP_DOWN_REP_MSG_RDY | DP_UP_REQ_MSG_RDY);
+ break;
+ }
+
+ if (!esi[1])
+ break;
+
+ /* handle MST irq */
+ if (aconnector->mst_mgr.mst_state)
+ drm_dp_mst_hpd_irq_handle_event(&aconnector->mst_mgr,
+ esi,
+ ack,
+ &new_irq_handled);
+
+ if (new_irq_handled) {
+ /* ACK at DPCD to notify down stream */
+ for (retry = 0; retry < 3; retry++) {
+ ssize_t wret;
+
+ wret = drm_dp_dpcd_writeb(&aconnector->dm_dp_aux.aux,
+ dpcd_addr + 1,
+ ack[1]);
+ if (wret == 1)
+ break;
+ }
+
+ if (retry == 3) {
+ DRM_ERROR("Failed to ack MST event.\n");
+ break;
+ }
+
+ drm_dp_mst_hpd_irq_send_new_request(&aconnector->mst_mgr);
+
+ new_irq_handled = false;
+ } else {
+ break;
+ }
+ }
+
+ mutex_unlock(&aconnector->handle_mst_msg_ready);
+
+ if (process_count == max_process_count)
+ DRM_DEBUG_DRIVER("Loop exceeded max iterations\n");
+}
+
+static void dm_handle_mst_down_rep_msg_ready(struct drm_dp_mst_topology_mgr *mgr)
+{
+ dm_handle_mst_sideband_msg_ready_event(mgr, DOWN_REP_MSG_RDY_EVENT);
+}
+
static const struct drm_dp_mst_topology_cbs dm_mst_cbs = {
.add_connector = dm_dp_add_mst_connector,
+ .poll_hpd_irq = dm_handle_mst_down_rep_msg_ready,
};
void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
@@ -1210,7 +1320,7 @@ int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
if (computed_streams[i])
continue;
- if (!res_pool->funcs->remove_stream_from_ctx ||
+ if (res_pool->funcs->remove_stream_from_ctx &&
res_pool->funcs->remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index 1e4ede1e57ab..37c820ab0fdb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -49,6 +49,13 @@
#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031
#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000
+enum mst_msg_ready_type {
+ NONE_MSG_RDY_EVENT = 0,
+ DOWN_REP_MSG_RDY_EVENT = 1,
+ UP_REQ_MSG_RDY_EVENT = 2,
+ DOWN_OR_UP_MSG_RDY_EVENT = 3
+};
+
struct amdgpu_display_manager;
struct amdgpu_dm_connector;
@@ -61,6 +68,10 @@ void amdgpu_dm_initialize_dp_connector(struct amdgpu_display_manager *dm,
void
dm_dp_create_fake_mst_encoders(struct amdgpu_device *adev);
+void dm_handle_mst_sideband_msg_ready_event(
+ struct drm_dp_mst_topology_mgr *mgr,
+ enum mst_msg_ready_type msg_rdy_type);
+
struct dsc_mst_fairness_vars {
int pbn;
bool dsc_enabled;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index 7ccd96959256..3db4ef564b99 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -87,6 +87,11 @@ static int dcn31_get_active_display_cnt_wa(
stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
tmds_present = true;
+
+ /* Checking stream / link detection ensuring that PHY is active*/
+ if (dc_is_dp_signal(stream->signal) && !stream->dpms_off)
+ display_count++;
+
}
for (i = 0; i < dc->link_count; i++) {
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 6c9ca43d1040..6966420dfbac 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -777,7 +777,8 @@ void dce110_edp_wait_for_hpd_ready(
dal_gpio_destroy_irq(&hpd);
/* ensure that the panel is detected */
- ASSERT(edp_hpd_high);
+ if (!edp_hpd_high)
+ DC_LOG_DC("%s: wait timed out!\n", __func__);
}
void dce110_edp_power_control(
@@ -1792,10 +1793,13 @@ void dce110_enable_accelerated_mode(struct dc *dc, struct dc_state *context)
hws->funcs.edp_backlight_control(edp_link_with_sink, false);
}
/*resume from S3, no vbios posting, no need to power down again*/
+ clk_mgr_exit_optimized_pwr_state(dc, dc->clk_mgr);
+
power_down_all_hw_blocks(dc);
disable_vga_and_power_gate_all_controllers(dc);
if (edp_link_with_sink && !keep_edp_vdd_on)
dc->hwss.edp_power_control(edp_link_with_sink, false);
+ clk_mgr_optimize_pwr_state(dc, dc->clk_mgr);
}
bios_set_scratch_acc_mode_change(dc->ctx->dc_bios, 1);
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index a50309039d08..9834b75f1837 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -3278,7 +3278,8 @@ void dcn10_wait_for_mpcc_disconnect(
if (pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst]) {
struct hubp *hubp = get_hubp_by_inst(res_pool, mpcc_inst);
- if (pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg))
+ if (pipe_ctx->stream_res.tg &&
+ pipe_ctx->stream_res.tg->funcs->is_tg_enabled(pipe_ctx->stream_res.tg))
res_pool->mpc->funcs->wait_for_idle(res_pool->mpc, mpcc_inst);
pipe_ctx->stream_res.opp->mpcc_disconnect_pending[mpcc_inst] = false;
hubp->funcs->set_blank(hubp, true);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 4cc8de2627ce..9f2e24398cd7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -712,7 +712,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .pipe_split_policy = MPC_SPLIT_AVOID_MULT_DISP,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
index e5b7ef7422b8..50dc83404644 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dpp.c
@@ -357,8 +357,11 @@ void dpp3_set_cursor_attributes(
int cur_rom_en = 0;
if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
- color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA)
- cur_rom_en = 1;
+ color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
+ if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
+ cur_rom_en = 1;
+ }
+ }
REG_UPDATE_3(CURSOR0_CONTROL,
CUR0_MODE, color_format,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
index dfb8f62765f2..5bf4d0aa6230 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
@@ -215,7 +215,7 @@ void optc3_set_odm_bypass(struct timing_generator *optc,
optc1->opp_count = 1;
}
-static void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
+void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
struct dc_crtc_timing *timing)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
@@ -293,7 +293,7 @@ static void optc3_set_timing_double_buffer(struct timing_generator *optc, bool e
OTG_DRR_TIMING_DBUF_UPDATE_MODE, mode);
}
-static void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc)
+void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc)
{
struct optc *optc1 = DCN10TG_FROM_TG(optc);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
index fb06dc9a4893..d3a056c12b0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.h
@@ -351,6 +351,9 @@ void optc3_set_timing_db_mode(struct timing_generator *optc, bool enable);
void optc3_set_odm_bypass(struct timing_generator *optc,
const struct dc_crtc_timing *dc_crtc_timing);
+void optc3_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
+ struct dc_crtc_timing *timing);
+void optc3_wait_drr_doublebuffer_pending_clear(struct timing_generator *optc);
void optc3_tg_init(struct timing_generator *optc);
void optc3_set_vtotal_min_max(struct timing_generator *optc, int vtotal_min, int vtotal_max);
#endif /* __DC_OPTC_DCN30_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
index 7aa628c21973..9002cb10a6ae 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile
@@ -11,7 +11,8 @@
# Makefile for dcn30.
DCN301 = dcn301_init.o dcn301_resource.o dcn301_dccg.o \
- dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o
+ dcn301_dio_link_encoder.o dcn301_hwseq.o dcn301_panel_cntl.o dcn301_hubbub.o \
+ dcn301_optc.o
AMD_DAL_DCN301 = $(addprefix $(AMDDALPATH)/dc/dcn301/,$(DCN301))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c
new file mode 100644
index 000000000000..b3cfcb887905
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include "reg_helper.h"
+#include "dcn301_optc.h"
+#include "dc.h"
+#include "dcn_calc_math.h"
+#include "dc_dmub_srv.h"
+
+#include "dml/dcn30/dcn30_fpu.h"
+#include "dc_trace.h"
+
+#define REG(reg)\
+ optc1->tg_regs->reg
+
+#define CTX \
+ optc1->base.ctx
+
+#undef FN
+#define FN(reg_name, field_name) \
+ optc1->tg_shift->field_name, optc1->tg_mask->field_name
+
+
+/**
+ * optc301_set_drr() - Program dynamic refresh rate registers m_OTGx_OTG_V_TOTAL_*.
+ *
+ * @optc: timing_generator instance.
+ * @params: parameters used for Dynamic Refresh Rate.
+ */
+void optc301_set_drr(
+ struct timing_generator *optc,
+ const struct drr_params *params)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ if (params != NULL &&
+ params->vertical_total_max > 0 &&
+ params->vertical_total_min > 0) {
+
+ if (params->vertical_total_mid != 0) {
+
+ REG_SET(OTG_V_TOTAL_MID, 0,
+ OTG_V_TOTAL_MID, params->vertical_total_mid - 1);
+
+ REG_UPDATE_2(OTG_V_TOTAL_CONTROL,
+ OTG_VTOTAL_MID_REPLACING_MAX_EN, 1,
+ OTG_VTOTAL_MID_FRAME_NUM,
+ (uint8_t)params->vertical_total_mid_frame_num);
+
+ }
+
+ optc->funcs->set_vtotal_min_max(optc, params->vertical_total_min - 1, params->vertical_total_max - 1);
+
+ REG_UPDATE_5(OTG_V_TOTAL_CONTROL,
+ OTG_V_TOTAL_MIN_SEL, 1,
+ OTG_V_TOTAL_MAX_SEL, 1,
+ OTG_FORCE_LOCK_ON_EVENT, 0,
+ OTG_SET_V_TOTAL_MIN_MASK_EN, 0,
+ OTG_SET_V_TOTAL_MIN_MASK, 0);
+ // Setup manual flow control for EOF via TRIG_A
+ optc->funcs->setup_manual_trigger(optc);
+
+ } else {
+ REG_UPDATE_4(OTG_V_TOTAL_CONTROL,
+ OTG_SET_V_TOTAL_MIN_MASK, 0,
+ OTG_V_TOTAL_MIN_SEL, 0,
+ OTG_V_TOTAL_MAX_SEL, 0,
+ OTG_FORCE_LOCK_ON_EVENT, 0);
+
+ optc->funcs->set_vtotal_min_max(optc, 0, 0);
+ }
+}
+
+
+void optc301_setup_manual_trigger(struct timing_generator *optc)
+{
+ struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+ REG_SET_8(OTG_TRIGA_CNTL, 0,
+ OTG_TRIGA_SOURCE_SELECT, 21,
+ OTG_TRIGA_SOURCE_PIPE_SELECT, optc->inst,
+ OTG_TRIGA_RISING_EDGE_DETECT_CNTL, 1,
+ OTG_TRIGA_FALLING_EDGE_DETECT_CNTL, 0,
+ OTG_TRIGA_POLARITY_SELECT, 0,
+ OTG_TRIGA_FREQUENCY_SELECT, 0,
+ OTG_TRIGA_DELAY, 0,
+ OTG_TRIGA_CLEAR, 1);
+}
+
+static struct timing_generator_funcs dcn30_tg_funcs = {
+ .validate_timing = optc1_validate_timing,
+ .program_timing = optc1_program_timing,
+ .setup_vertical_interrupt0 = optc1_setup_vertical_interrupt0,
+ .setup_vertical_interrupt1 = optc1_setup_vertical_interrupt1,
+ .setup_vertical_interrupt2 = optc1_setup_vertical_interrupt2,
+ .program_global_sync = optc1_program_global_sync,
+ .enable_crtc = optc2_enable_crtc,
+ .disable_crtc = optc1_disable_crtc,
+ /* used by enable_timing_synchronization. Not need for FPGA */
+ .is_counter_moving = optc1_is_counter_moving,
+ .get_position = optc1_get_position,
+ .get_frame_count = optc1_get_vblank_counter,
+ .get_scanoutpos = optc1_get_crtc_scanoutpos,
+ .get_otg_active_size = optc1_get_otg_active_size,
+ .set_early_control = optc1_set_early_control,
+ /* used by enable_timing_synchronization. Not need for FPGA */
+ .wait_for_state = optc1_wait_for_state,
+ .set_blank_color = optc3_program_blank_color,
+ .did_triggered_reset_occur = optc1_did_triggered_reset_occur,
+ .triplebuffer_lock = optc3_triplebuffer_lock,
+ .triplebuffer_unlock = optc2_triplebuffer_unlock,
+ .enable_reset_trigger = optc1_enable_reset_trigger,
+ .enable_crtc_reset = optc1_enable_crtc_reset,
+ .disable_reset_trigger = optc1_disable_reset_trigger,
+ .lock = optc3_lock,
+ .unlock = optc1_unlock,
+ .lock_doublebuffer_enable = optc3_lock_doublebuffer_enable,
+ .lock_doublebuffer_disable = optc3_lock_doublebuffer_disable,
+ .enable_optc_clock = optc1_enable_optc_clock,
+ .set_drr = optc301_set_drr,
+ .get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
+ .set_vtotal_min_max = optc3_set_vtotal_min_max,
+ .set_static_screen_control = optc1_set_static_screen_control,
+ .program_stereo = optc1_program_stereo,
+ .is_stereo_left_eye = optc1_is_stereo_left_eye,
+ .tg_init = optc3_tg_init,
+ .is_tg_enabled = optc1_is_tg_enabled,
+ .is_optc_underflow_occurred = optc1_is_optc_underflow_occurred,
+ .clear_optc_underflow = optc1_clear_optc_underflow,
+ .setup_global_swap_lock = NULL,
+ .get_crc = optc1_get_crc,
+ .configure_crc = optc2_configure_crc,
+ .set_dsc_config = optc3_set_dsc_config,
+ .get_dsc_status = optc2_get_dsc_status,
+ .set_dwb_source = NULL,
+ .set_odm_bypass = optc3_set_odm_bypass,
+ .set_odm_combine = optc3_set_odm_combine,
+ .get_optc_source = optc2_get_optc_source,
+ .set_out_mux = optc3_set_out_mux,
+ .set_drr_trigger_window = optc3_set_drr_trigger_window,
+ .set_vtotal_change_limit = optc3_set_vtotal_change_limit,
+ .set_gsl = optc2_set_gsl,
+ .set_gsl_source_select = optc2_set_gsl_source_select,
+ .set_vtg_params = optc1_set_vtg_params,
+ .program_manual_trigger = optc2_program_manual_trigger,
+ .setup_manual_trigger = optc301_setup_manual_trigger,
+ .get_hw_timing = optc1_get_hw_timing,
+ .wait_drr_doublebuffer_pending_clear = optc3_wait_drr_doublebuffer_pending_clear,
+};
+
+void dcn301_timing_generator_init(struct optc *optc1)
+{
+ optc1->base.funcs = &dcn30_tg_funcs;
+
+ optc1->max_h_total = optc1->tg_mask->OTG_H_TOTAL + 1;
+ optc1->max_v_total = optc1->tg_mask->OTG_V_TOTAL + 1;
+
+ optc1->min_h_blank = 32;
+ optc1->min_v_blank = 3;
+ optc1->min_v_blank_interlace = 5;
+ optc1->min_h_sync_width = 4;
+ optc1->min_v_sync_width = 1;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h
new file mode 100644
index 000000000000..b49585682a15
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_optc.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __DC_OPTC_DCN301_H__
+#define __DC_OPTC_DCN301_H__
+
+#include "dcn20/dcn20_optc.h"
+#include "dcn30/dcn30_optc.h"
+
+void dcn301_timing_generator_init(struct optc *optc1);
+void optc301_setup_manual_trigger(struct timing_generator *optc);
+void optc301_set_drr(struct timing_generator *optc, const struct drr_params *params);
+
+#endif /* __DC_OPTC_DCN301_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index 3485fbb1093e..1bee9a4636e6 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -42,7 +42,7 @@
#include "dcn30/dcn30_hubp.h"
#include "irq/dcn30/irq_service_dcn30.h"
#include "dcn30/dcn30_dpp.h"
-#include "dcn30/dcn30_optc.h"
+#include "dcn301/dcn301_optc.h"
#include "dcn20/dcn20_hwseq.h"
#include "dcn30/dcn30_hwseq.h"
#include "dce110/dce110_hw_sequencer.h"
@@ -855,7 +855,7 @@ static struct timing_generator *dcn301_timing_generator_create(
tgn10->tg_shift = &optc_shift;
tgn10->tg_mask = &optc_mask;
- dcn30_timing_generator_init(tgn10);
+ dcn301_timing_generator_init(tgn10);
return &tgn10->base;
}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 45956ef6f3f9..131b8b82afc0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -65,7 +65,7 @@ static const struct dc_debug_options debug_defaults_drv = {
.timing_trace = false,
.clock_trace = true,
.disable_pplib_clock_request = true,
- .pipe_split_policy = MPC_SPLIT_DYNAMIC,
+ .pipe_split_policy = MPC_SPLIT_AVOID,
.force_single_disp_pipe_split = false,
.disable_dcc = DCC_ENABLE,
.vsr_support = true,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c
index 65c1d754e2d6..01cc679ae418 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_dccg.c
@@ -84,7 +84,8 @@ static enum phyd32clk_clock_source get_phy_mux_symclk(
struct dcn_dccg *dccg_dcn,
enum phyd32clk_clock_source src)
{
- if (dccg_dcn->base.ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
+ if (dccg_dcn->base.ctx->asic_id.chip_family == FAMILY_YELLOW_CARP &&
+ dccg_dcn->base.ctx->asic_id.hw_internal_rev == YELLOW_CARP_B0) {
if (src == PHYD32CLKC)
src = PHYD32CLKF;
if (src == PHYD32CLKD)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
index 11e28e056cf7..61ceff6bc0b1 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
@@ -49,7 +49,10 @@ static void dccg32_trigger_dio_fifo_resync(
uint32_t dispclk_rdivider_value = 0;
REG_GET(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_RDIVIDER, &dispclk_rdivider_value);
- REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
+
+ /* Not valid for the WDIVIDER to be set to 0 */
+ if (dispclk_rdivider_value != 0)
+ REG_UPDATE(DENTIST_DISPCLK_CNTL, DENTIST_DISPCLK_WDIVIDER, dispclk_rdivider_value);
}
static void dccg32_get_pixel_rate_div(
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
index d9e049e7ff0a..ed8ddb75b333 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -295,7 +295,11 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
pipe = &res_ctx->pipe_ctx[i];
timing = &pipe->stream->timing;
- pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+ if (pipe->stream->adjust.v_total_min != 0)
+ pipes[pipe_cnt].pipe.dest.vtotal = pipe->stream->adjust.v_total_min;
+ else
+ pipes[pipe_cnt].pipe.dest.vtotal = timing->v_total;
+
pipes[pipe_cnt].pipe.dest.vblank_nom = timing->v_total - pipes[pipe_cnt].pipe.dest.vactive;
pipes[pipe_cnt].pipe.dest.vblank_nom = min(pipes[pipe_cnt].pipe.dest.vblank_nom, dcn3_14_ip.VBlankNomDefaultUS);
pipes[pipe_cnt].pipe.dest.vblank_nom = max(pipes[pipe_cnt].pipe.dest.vblank_nom, timing->v_sync_width);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
index 6841a4bce186..1cb402264497 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
@@ -1798,17 +1798,6 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
return result;
}
-static bool intel_core_rkl_chk(void)
-{
-#if IS_ENABLED(CONFIG_X86_64)
- struct cpuinfo_x86 *c = &cpu_data(0);
-
- return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ROCKETLAKE);
-#else
- return false;
-#endif
-}
-
static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
{
struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -1835,7 +1824,8 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true;
data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? false : true;
data->pcie_dpm_key_disabled =
- intel_core_rkl_chk() || !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
+ !amdgpu_device_pcie_dynamic_switching_supported() ||
+ !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
/* need to set voltage control types before EVV patching */
data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE;
data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index ce41a8309582..222af2fae745 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -1581,9 +1581,9 @@ static int smu_disable_dpms(struct smu_context *smu)
/*
* For SMU 13.0.4/11, PMFW will handle the features disablement properly
- * for gpu reset case. Driver involvement is unnecessary.
+ * for gpu reset and S0i3 cases. Driver involvement is unnecessary.
*/
- if (amdgpu_in_reset(adev)) {
+ if (amdgpu_in_reset(adev) || adev->in_s0ix) {
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 4):
case IP_VERSION(13, 0, 11):
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index f6599c00a6fd..f0800c0c5168 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -588,7 +588,9 @@ err0_out:
return -ENOMEM;
}
-static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu)
+static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *smu,
+ bool use_metrics_v3,
+ bool use_metrics_v2)
{
struct smu_table_context *smu_table= &smu->smu_table;
SmuMetricsExternal_t *metrics_ext =
@@ -596,13 +598,11 @@ static uint32_t sienna_cichlid_get_throttler_status_locked(struct smu_context *s
uint32_t throttler_status = 0;
int i;
- if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
- (smu->smc_fw_version >= 0x3A4900)) {
+ if (use_metrics_v3) {
for (i = 0; i < THROTTLER_COUNT; i++)
throttler_status |=
(metrics_ext->SmuMetrics_V3.ThrottlingPercentage[i] ? 1U << i : 0);
- } else if ((smu->adev->ip_versions[MP1_HWIP][0] == IP_VERSION(11, 0, 7)) &&
- (smu->smc_fw_version >= 0x3A4300)) {
+ } else if (use_metrics_v2) {
for (i = 0; i < THROTTLER_COUNT; i++)
throttler_status |=
(metrics_ext->SmuMetrics_V2.ThrottlingPercentage[i] ? 1U << i : 0);
@@ -864,7 +864,7 @@ static int sienna_cichlid_get_smu_metrics_data(struct smu_context *smu,
metrics->TemperatureVrSoc) * SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_THROTTLER_STATUS:
- *value = sienna_cichlid_get_throttler_status_locked(smu);
+ *value = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2);
break;
case METRICS_CURR_FANSPEED:
*value = use_metrics_v3 ? metrics_v3->CurrFanSpeed :
@@ -1927,12 +1927,16 @@ static int sienna_cichlid_read_sensor(struct smu_context *smu,
*size = 4;
break;
case AMDGPU_PP_SENSOR_GFX_MCLK:
- ret = sienna_cichlid_get_current_clk_freq_by_table(smu, SMU_UCLK, (uint32_t *)data);
+ ret = sienna_cichlid_get_smu_metrics_data(smu,
+ METRICS_CURR_UCLK,
+ (uint32_t *)data);
*(uint32_t *)data *= 100;
*size = 4;
break;
case AMDGPU_PP_SENSOR_GFX_SCLK:
- ret = sienna_cichlid_get_current_clk_freq_by_table(smu, SMU_GFXCLK, (uint32_t *)data);
+ ret = sienna_cichlid_get_smu_metrics_data(smu,
+ METRICS_AVERAGE_GFXCLK,
+ (uint32_t *)data);
*(uint32_t *)data *= 100;
*size = 4;
break;
@@ -4013,7 +4017,7 @@ static ssize_t sienna_cichlid_get_gpu_metrics(struct smu_context *smu,
gpu_metrics->current_dclk1 = use_metrics_v3 ? metrics_v3->CurrClock[PPCLK_DCLK_1] :
use_metrics_v2 ? metrics_v2->CurrClock[PPCLK_DCLK_1] : metrics->CurrClock[PPCLK_DCLK_1];
- gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu);
+ gpu_metrics->throttle_status = sienna_cichlid_get_throttler_status_locked(smu, use_metrics_v3, use_metrics_v2);
gpu_metrics->indep_throttle_status =
smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status,
sienna_cichlid_throttler_map);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index b9bde5fa8f8f..0fb6be11a0cc 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -332,10 +332,13 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
table_context->power_play_table;
struct smu_baco_context *smu_baco = &smu->smu_baco;
PPTable_t *pptable = smu->smu_table.driver_pptable;
+#if 0
+ PPTable_t *pptable = smu->smu_table.driver_pptable;
const OverDriveLimits_t * const overdrive_upperlimits =
&pptable->SkuTable.OverDriveLimitsBasicMax;
const OverDriveLimits_t * const overdrive_lowerlimits =
&pptable->SkuTable.OverDriveLimitsMin;
+#endif
if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_HARDWAREDC)
smu->dc_controlled_by_gpio = true;
@@ -347,18 +350,30 @@ static int smu_v13_0_0_check_powerplay_table(struct smu_context *smu)
if (powerplay_table->platform_caps & SMU_13_0_0_PP_PLATFORM_CAP_MACO)
smu_baco->maco_support = true;
+ /*
+ * We are in the transition to a new OD mechanism.
+ * Disable the OD feature support for SMU13 temporarily.
+ * TODO: get this reverted when new OD mechanism online
+ */
+#if 0
if (!overdrive_lowerlimits->FeatureCtrlMask ||
!overdrive_upperlimits->FeatureCtrlMask)
smu->od_enabled = false;
- table_context->thermal_controller_type =
- powerplay_table->thermal_controller_type;
-
/*
* Instead of having its own buffer space and get overdrive_table copied,
* smu->od_settings just points to the actual overdrive_table
*/
smu->od_settings = &powerplay_table->overdrive_table;
+#else
+ smu->od_enabled = false;
+#endif
+
+ table_context->thermal_controller_type =
+ powerplay_table->thermal_controller_type;
+
+ smu->adev->pm.no_fan =
+ !(pptable->SkuTable.FeaturesToRun[0] & (1 << FEATURE_FAN_CONTROL_BIT));
return 0;
}
@@ -1140,7 +1155,6 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
(OverDriveTableExternal_t *)smu->smu_table.overdrive_table;
struct smu_13_0_dpm_table *single_dpm_table;
struct smu_13_0_pcie_table *pcie_table;
- const int link_width[] = {0, 1, 2, 4, 8, 12, 16};
uint32_t gen_speed, lane_width;
int i, curr_freq, size = 0;
int32_t min_value, max_value;
@@ -1256,7 +1270,7 @@ static int smu_v13_0_0_print_clk_levels(struct smu_context *smu,
(pcie_table->pcie_lane[i] == 6) ? "x16" : "",
pcie_table->clk_freq[i],
(gen_speed == DECODE_GEN_SPEED(pcie_table->pcie_gen[i])) &&
- (lane_width == DECODE_LANE_WIDTH(link_width[pcie_table->pcie_lane[i]])) ?
+ (lane_width == DECODE_LANE_WIDTH(pcie_table->pcie_lane[i])) ?
"*" : "");
break;
@@ -1734,7 +1748,7 @@ static ssize_t smu_v13_0_0_get_gpu_metrics(struct smu_context *smu,
gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency;
gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency;
- gpu_metrics->current_gfxclk = metrics->CurrClock[PPCLK_GFXCLK];
+ gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency;
gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK];
gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK];
gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0];
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 1ac552142763..dc6104a04dce 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -81,9 +81,10 @@
#define EPSILON 1
#define smnPCIE_ESM_CTRL 0x193D0
-#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1ab40288
+#define smnPCIE_LC_LINK_WIDTH_CNTL 0x1a340288
#define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK 0x00000070L
#define PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT 0x4
+#define MAX_LINK_WIDTH 6
static const struct cmn2asic_msg_mapping smu_v13_0_6_message_map[SMU_MSG_MAX_COUNT] = {
MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 0),
@@ -708,16 +709,19 @@ static int smu_v13_0_6_get_smu_metrics_data(struct smu_context *smu,
*value = SMUQ10_TO_UINT(metrics->SocketPower) << 8;
break;
case METRICS_TEMPERATURE_HOTSPOT:
- *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature);
+ *value = SMUQ10_TO_UINT(metrics->MaxSocketTemperature) *
+ SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
case METRICS_TEMPERATURE_MEM:
- *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature);
+ *value = SMUQ10_TO_UINT(metrics->MaxHbmTemperature) *
+ SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
/* This is the max of all VRs and not just SOC VR.
* No need to define another data type for the same.
*/
case METRICS_TEMPERATURE_VRSOC:
- *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature);
+ *value = SMUQ10_TO_UINT(metrics->MaxVrTemperature) *
+ SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
break;
default:
*value = UINT_MAX;
@@ -1966,6 +1970,7 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
struct amdgpu_device *adev = smu->adev;
int ret = 0, inst0, xcc0;
MetricsTable_t *metrics;
+ u16 link_width_level;
inst0 = adev->sdma.instance[0].aid_id;
xcc0 = GET_INST(GC, 0);
@@ -1993,9 +1998,8 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
gpu_metrics->average_socket_power =
SMUQ10_TO_UINT(metrics->SocketPower);
- /* Energy is reported in 15.625mJ units */
- gpu_metrics->energy_accumulator =
- SMUQ10_TO_UINT(metrics->SocketEnergyAcc);
+ /* Energy counter reported in 15.259uJ (2^-16) units */
+ gpu_metrics->energy_accumulator = metrics->SocketEnergyAcc;
gpu_metrics->current_gfxclk =
SMUQ10_TO_UINT(metrics->GfxclkFrequency[xcc0]);
@@ -2017,8 +2021,12 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
gpu_metrics->throttle_status = 0;
if (!(adev->flags & AMD_IS_APU)) {
+ link_width_level = smu_v13_0_6_get_current_pcie_link_width_level(smu);
+ if (link_width_level > MAX_LINK_WIDTH)
+ link_width_level = 0;
+
gpu_metrics->pcie_link_width =
- smu_v13_0_6_get_current_pcie_link_width_level(smu);
+ DECODE_LANE_WIDTH(link_width_level);
gpu_metrics->pcie_link_speed =
smu_v13_0_6_get_current_pcie_link_speed(smu);
}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index 3ba02131e682..62f2886ab4df 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -323,10 +323,12 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
struct smu_baco_context *smu_baco = &smu->smu_baco;
PPTable_t *smc_pptable = table_context->driver_pptable;
BoardTable_t *BoardTable = &smc_pptable->BoardTable;
+#if 0
const OverDriveLimits_t * const overdrive_upperlimits =
&smc_pptable->SkuTable.OverDriveLimitsBasicMax;
const OverDriveLimits_t * const overdrive_lowerlimits =
&smc_pptable->SkuTable.OverDriveLimitsMin;
+#endif
if (powerplay_table->platform_caps & SMU_13_0_7_PP_PLATFORM_CAP_HARDWAREDC)
smu->dc_controlled_by_gpio = true;
@@ -338,18 +340,22 @@ static int smu_v13_0_7_check_powerplay_table(struct smu_context *smu)
if (smu_baco->platform_support && (BoardTable->HsrEnabled || BoardTable->VddqOffEnabled))
smu_baco->maco_support = true;
+#if 0
if (!overdrive_lowerlimits->FeatureCtrlMask ||
!overdrive_upperlimits->FeatureCtrlMask)
smu->od_enabled = false;
- table_context->thermal_controller_type =
- powerplay_table->thermal_controller_type;
-
/*
* Instead of having its own buffer space and get overdrive_table copied,
* smu->od_settings just points to the actual overdrive_table
*/
smu->od_settings = &powerplay_table->overdrive_table;
+#else
+ smu->od_enabled = false;
+#endif
+
+ table_context->thermal_controller_type =
+ powerplay_table->thermal_controller_type;
return 0;
}
@@ -949,7 +955,7 @@ static int smu_v13_0_7_read_sensor(struct smu_context *smu,
break;
case AMDGPU_PP_SENSOR_GFX_MCLK:
ret = smu_v13_0_7_get_smu_metrics_data(smu,
- METRICS_AVERAGE_UCLK,
+ METRICS_CURR_UCLK,
(uint32_t *)data);
*(uint32_t *)data *= 100;
*size = 4;
diff --git a/drivers/gpu/drm/bridge/ite-it6505.c b/drivers/gpu/drm/bridge/ite-it6505.c
index 504d51c42f79..aadb396508c5 100644
--- a/drivers/gpu/drm/bridge/ite-it6505.c
+++ b/drivers/gpu/drm/bridge/ite-it6505.c
@@ -2517,9 +2517,11 @@ static irqreturn_t it6505_int_threaded_handler(int unused, void *data)
};
int int_status[3], i;
- if (it6505->enable_drv_hold || pm_runtime_get_if_in_use(dev) <= 0)
+ if (it6505->enable_drv_hold || !it6505->powered)
return IRQ_HANDLED;
+ pm_runtime_get_sync(dev);
+
int_status[0] = it6505_read(it6505, INT_STATUS_01);
int_status[1] = it6505_read(it6505, INT_STATUS_02);
int_status[2] = it6505_read(it6505, INT_STATUS_03);
diff --git a/drivers/gpu/drm/bridge/lontium-lt9611.c b/drivers/gpu/drm/bridge/lontium-lt9611.c
index 5163e5224aad..9663601ce098 100644
--- a/drivers/gpu/drm/bridge/lontium-lt9611.c
+++ b/drivers/gpu/drm/bridge/lontium-lt9611.c
@@ -774,9 +774,7 @@ static struct mipi_dsi_device *lt9611_attach_dsi(struct lt9611 *lt9611,
dsi->lanes = 4;
dsi->format = MIPI_DSI_FMT_RGB888;
dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_SYNC_PULSE |
- MIPI_DSI_MODE_VIDEO_HSE | MIPI_DSI_MODE_VIDEO_NO_HSA |
- MIPI_DSI_MODE_VIDEO_NO_HFP | MIPI_DSI_MODE_VIDEO_NO_HBP |
- MIPI_DSI_MODE_NO_EOT_PACKET;
+ MIPI_DSI_MODE_VIDEO_HSE;
ret = devm_mipi_dsi_attach(dev, dsi);
if (ret < 0) {
diff --git a/drivers/gpu/drm/bridge/samsung-dsim.c b/drivers/gpu/drm/bridge/samsung-dsim.c
index 043b8109e64a..73ec60757dbc 100644
--- a/drivers/gpu/drm/bridge/samsung-dsim.c
+++ b/drivers/gpu/drm/bridge/samsung-dsim.c
@@ -1386,6 +1386,18 @@ static void samsung_dsim_disable_irq(struct samsung_dsim *dsi)
disable_irq(dsi->irq);
}
+static void samsung_dsim_set_stop_state(struct samsung_dsim *dsi, bool enable)
+{
+ u32 reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
+
+ if (enable)
+ reg |= DSIM_FORCE_STOP_STATE;
+ else
+ reg &= ~DSIM_FORCE_STOP_STATE;
+
+ samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+}
+
static int samsung_dsim_init(struct samsung_dsim *dsi)
{
const struct samsung_dsim_driver_data *driver_data = dsi->driver_data;
@@ -1445,15 +1457,12 @@ static void samsung_dsim_atomic_enable(struct drm_bridge *bridge,
struct drm_bridge_state *old_bridge_state)
{
struct samsung_dsim *dsi = bridge_to_dsi(bridge);
- u32 reg;
if (samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
samsung_dsim_set_display_mode(dsi);
samsung_dsim_set_display_enable(dsi, true);
} else {
- reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
- reg &= ~DSIM_FORCE_STOP_STATE;
- samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
+ samsung_dsim_set_stop_state(dsi, false);
}
dsi->state |= DSIM_STATE_VIDOUT_AVAILABLE;
@@ -1463,16 +1472,12 @@ static void samsung_dsim_atomic_disable(struct drm_bridge *bridge,
struct drm_bridge_state *old_bridge_state)
{
struct samsung_dsim *dsi = bridge_to_dsi(bridge);
- u32 reg;
if (!(dsi->state & DSIM_STATE_ENABLED))
return;
- if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type)) {
- reg = samsung_dsim_read(dsi, DSIM_ESCMODE_REG);
- reg |= DSIM_FORCE_STOP_STATE;
- samsung_dsim_write(dsi, DSIM_ESCMODE_REG, reg);
- }
+ if (!samsung_dsim_hw_is_exynos(dsi->plat_data->hw_type))
+ samsung_dsim_set_stop_state(dsi, true);
dsi->state &= ~DSIM_STATE_VIDOUT_AVAILABLE;
}
@@ -1775,6 +1780,8 @@ static ssize_t samsung_dsim_host_transfer(struct mipi_dsi_host *host,
if (ret)
return ret;
+ samsung_dsim_set_stop_state(dsi, false);
+
ret = mipi_dsi_create_packet(&xfer.packet, msg);
if (ret < 0)
return ret;
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 2c454568a607..c277b198fa3f 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -140,6 +140,12 @@ drm_atomic_state_init(struct drm_device *dev, struct drm_atomic_state *state)
if (!state->planes)
goto fail;
+ /*
+ * Because drm_atomic_state can be committed asynchronously we need our
+ * own reference and cannot rely on the on implied by drm_file in the
+ * ioctl call.
+ */
+ drm_dev_get(dev);
state->dev = dev;
drm_dbg_atomic(dev, "Allocated atomic state %p\n", state);
@@ -299,7 +305,8 @@ EXPORT_SYMBOL(drm_atomic_state_clear);
void __drm_atomic_state_free(struct kref *ref)
{
struct drm_atomic_state *state = container_of(ref, typeof(*state), ref);
- struct drm_mode_config *config = &state->dev->mode_config;
+ struct drm_device *dev = state->dev;
+ struct drm_mode_config *config = &dev->mode_config;
drm_atomic_state_clear(state);
@@ -311,6 +318,8 @@ void __drm_atomic_state_free(struct kref *ref)
drm_atomic_state_default_release(state);
kfree(state);
}
+
+ drm_dev_put(dev);
}
EXPORT_SYMBOL(__drm_atomic_state_free);
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index 1b12a3c201a3..871e4e2129d6 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -311,6 +311,9 @@ static bool drm_client_target_cloned(struct drm_device *dev,
can_clone = true;
dmt_mode = drm_mode_find_dmt(dev, 1024, 768, 60, false);
+ if (!dmt_mode)
+ goto fail;
+
for (i = 0; i < connector_count; i++) {
if (!enabled[i])
continue;
@@ -326,11 +329,13 @@ static bool drm_client_target_cloned(struct drm_device *dev,
if (!modes[i])
can_clone = false;
}
+ kfree(dmt_mode);
if (can_clone) {
DRM_DEBUG_KMS("can clone using 1024x768\n");
return true;
}
+fail:
DRM_INFO("kms: can't enable cloning when we probably wanted to.\n");
return false;
}
@@ -862,6 +867,7 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width,
break;
}
+ kfree(modeset->mode);
modeset->mode = drm_mode_duplicate(dev, mode);
drm_connector_get(connector);
modeset->connectors[modeset->num_connectors++] = connector;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index e0dbd9140726..1f470968ed14 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -3456,6 +3456,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto
connector->base.id, connector->name);
return NULL;
}
+ if (!(pt->misc & DRM_EDID_PT_SEPARATE_SYNC)) {
+ drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Composite sync not supported\n",
+ connector->base.id, connector->name);
+ }
/* it is incorrect if hsync/vsync width is zero */
if (!hsync_pulse_width || !vsync_pulse_width) {
@@ -3502,27 +3506,10 @@ static struct drm_display_mode *drm_mode_detailed(struct drm_connector *connecto
if (info->quirks & EDID_QUIRK_DETAILED_SYNC_PP) {
mode->flags |= DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC;
} else {
- switch (pt->misc & DRM_EDID_PT_SYNC_MASK) {
- case DRM_EDID_PT_ANALOG_CSYNC:
- case DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC:
- drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Analog composite sync!\n",
- connector->base.id, connector->name);
- mode->flags |= DRM_MODE_FLAG_CSYNC | DRM_MODE_FLAG_NCSYNC;
- break;
- case DRM_EDID_PT_DIGITAL_CSYNC:
- drm_dbg_kms(dev, "[CONNECTOR:%d:%s] Digital composite sync!\n",
- connector->base.id, connector->name);
- mode->flags |= DRM_MODE_FLAG_CSYNC;
- mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
- DRM_MODE_FLAG_PCSYNC : DRM_MODE_FLAG_NCSYNC;
- break;
- case DRM_EDID_PT_DIGITAL_SEPARATE_SYNC:
- mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
- DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
- mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
- DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
- break;
- }
+ mode->flags |= (pt->misc & DRM_EDID_PT_HSYNC_POSITIVE) ?
+ DRM_MODE_FLAG_PHSYNC : DRM_MODE_FLAG_NHSYNC;
+ mode->flags |= (pt->misc & DRM_EDID_PT_VSYNC_POSITIVE) ?
+ DRM_MODE_FLAG_PVSYNC : DRM_MODE_FLAG_NVSYNC;
}
set_size:
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 4ea6507a77e5..baaf0e0feb06 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -623,7 +623,13 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
int ret;
if (obj->import_attach) {
+ /* Reset both vm_ops and vm_private_data, so we don't end up with
+ * vm_ops pointing to our implementation if the dma-buf backend
+ * doesn't set those fields.
+ */
vma->vm_private_data = NULL;
+ vma->vm_ops = NULL;
+
ret = dma_buf_mmap(obj->dma_buf, vma, 0);
/* Drop the reference drm_gem_mmap_obj() acquired.*/
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 2fb9bf901a2c..3f479483d7d8 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -262,6 +262,26 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
}
#define DRM_OUTPUT_POLL_PERIOD (10*HZ)
+static void reschedule_output_poll_work(struct drm_device *dev)
+{
+ unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
+
+ if (dev->mode_config.delayed_event)
+ /*
+ * FIXME:
+ *
+ * Use short (1s) delay to handle the initial delayed event.
+ * This delay should not be needed, but Optimus/nouveau will
+ * fail in a mysterious way if the delayed event is handled as
+ * soon as possible like it is done in
+ * drm_helper_probe_single_connector_modes() in case the poll
+ * was enabled before.
+ */
+ delay = HZ;
+
+ schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+}
+
/**
* drm_kms_helper_poll_enable - re-enable output polling.
* @dev: drm_device
@@ -279,37 +299,41 @@ static bool drm_kms_helper_enable_hpd(struct drm_device *dev)
*/
void drm_kms_helper_poll_enable(struct drm_device *dev)
{
- bool poll = false;
- unsigned long delay = DRM_OUTPUT_POLL_PERIOD;
-
if (!dev->mode_config.poll_enabled || !drm_kms_helper_poll ||
dev->mode_config.poll_running)
return;
- poll = drm_kms_helper_enable_hpd(dev);
-
- if (dev->mode_config.delayed_event) {
- /*
- * FIXME:
- *
- * Use short (1s) delay to handle the initial delayed event.
- * This delay should not be needed, but Optimus/nouveau will
- * fail in a mysterious way if the delayed event is handled as
- * soon as possible like it is done in
- * drm_helper_probe_single_connector_modes() in case the poll
- * was enabled before.
- */
- poll = true;
- delay = HZ;
- }
-
- if (poll)
- schedule_delayed_work(&dev->mode_config.output_poll_work, delay);
+ if (drm_kms_helper_enable_hpd(dev) ||
+ dev->mode_config.delayed_event)
+ reschedule_output_poll_work(dev);
dev->mode_config.poll_running = true;
}
EXPORT_SYMBOL(drm_kms_helper_poll_enable);
+/**
+ * drm_kms_helper_poll_reschedule - reschedule the output polling work
+ * @dev: drm_device
+ *
+ * This function reschedules the output polling work, after polling for a
+ * connector has been enabled.
+ *
+ * Drivers must call this helper after enabling polling for a connector by
+ * setting %DRM_CONNECTOR_POLL_CONNECT / %DRM_CONNECTOR_POLL_DISCONNECT flags
+ * in drm_connector::polled. Note that after disabling polling by clearing these
+ * flags for a connector will stop the output polling work automatically if
+ * the polling is disabled for all other connectors as well.
+ *
+ * The function can be called only after polling has been enabled by calling
+ * drm_kms_helper_poll_init() / drm_kms_helper_poll_enable().
+ */
+void drm_kms_helper_poll_reschedule(struct drm_device *dev)
+{
+ if (dev->mode_config.poll_running)
+ reschedule_output_poll_work(dev);
+}
+EXPORT_SYMBOL(drm_kms_helper_poll_reschedule);
+
static enum drm_connector_status
drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
{
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 789dce9e2608..dcbda9ba32dd 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -23,6 +23,11 @@ subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
subdir-ccflags-y += $(call cc-disable-warning, frame-address)
subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
+# Fine grained warnings disable
+CFLAGS_i915_pci.o = $(call cc-disable-warning, override-init)
+CFLAGS_display/intel_display_device.o = $(call cc-disable-warning, override-init)
+CFLAGS_display/intel_fbdev.o = $(call cc-disable-warning, override-init)
+
subdir-ccflags-y += -I$(srctree)/$(src)
# Please keep these build lists sorted!
diff --git a/drivers/gpu/drm/i915/display/intel_display_device.c b/drivers/gpu/drm/i915/display/intel_display_device.c
index 3fd30e7f0062..b0c6a2a86f2f 100644
--- a/drivers/gpu/drm/i915/display/intel_display_device.c
+++ b/drivers/gpu/drm/i915/display/intel_display_device.c
@@ -16,9 +16,6 @@
#include "intel_display_reg_defs.h"
#include "intel_fbc.h"
-__diag_push();
-__diag_ignore_all("-Woverride-init", "Allow overriding inherited members");
-
static const struct intel_display_device_info no_display = {};
#define PIPE_A_OFFSET 0x70000
@@ -665,12 +662,24 @@ static const struct intel_display_device_info xe_lpdp_display = {
BIT(TRANSCODER_C) | BIT(TRANSCODER_D),
};
-__diag_pop();
+/*
+ * Separate detection for no display cases to keep the display id array simple.
+ *
+ * IVB Q requires subvendor and subdevice matching to differentiate from IVB D
+ * GT2 server.
+ */
+static bool has_no_display(struct pci_dev *pdev)
+{
+ static const struct pci_device_id ids[] = {
+ INTEL_IVB_Q_IDS(0),
+ {}
+ };
+
+ return pci_match_id(ids, pdev);
+}
#undef INTEL_VGA_DEVICE
-#undef INTEL_QUANTA_VGA_DEVICE
#define INTEL_VGA_DEVICE(id, info) { id, info }
-#define INTEL_QUANTA_VGA_DEVICE(info) { 0x16a, info }
static const struct {
u32 devid;
@@ -695,7 +704,6 @@ static const struct {
INTEL_IRONLAKE_M_IDS(&ilk_m_display),
INTEL_SNB_D_IDS(&snb_display),
INTEL_SNB_M_IDS(&snb_display),
- INTEL_IVB_Q_IDS(NULL), /* must be first IVB in list */
INTEL_IVB_M_IDS(&ivb_display),
INTEL_IVB_D_IDS(&ivb_display),
INTEL_HSW_IDS(&hsw_display),
@@ -780,6 +788,11 @@ intel_display_device_probe(struct drm_i915_private *i915, bool has_gmdid,
if (has_gmdid)
return probe_gmdid_display(i915, gmdid_ver, gmdid_rel, gmdid_step);
+ if (has_no_display(pdev)) {
+ drm_dbg_kms(&i915->drm, "Device doesn't have display\n");
+ return &no_display;
+ }
+
for (i = 0; i < ARRAY_SIZE(intel_display_ids); i++) {
if (intel_display_ids[i].devid == pdev->device)
return intel_display_ids[i].info;
diff --git a/drivers/gpu/drm/i915/display/intel_dpt.c b/drivers/gpu/drm/i915/display/intel_dpt.c
index 7c5fddb203ba..fbfd8f959f17 100644
--- a/drivers/gpu/drm/i915/display/intel_dpt.c
+++ b/drivers/gpu/drm/i915/display/intel_dpt.c
@@ -166,6 +166,8 @@ struct i915_vma *intel_dpt_pin(struct i915_address_space *vm)
i915_vma_get(vma);
}
+ dpt->obj->mm.dirty = true;
+
atomic_dec(&i915->gpu_error.pending_fb_pin);
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
@@ -261,7 +263,7 @@ intel_dpt_create(struct intel_framebuffer *fb)
dpt_obj = i915_gem_object_create_stolen(i915, size);
if (IS_ERR(dpt_obj) && !HAS_LMEM(i915)) {
drm_dbg_kms(&i915->drm, "Allocating dpt from smem\n");
- dpt_obj = i915_gem_object_create_internal(i915, size);
+ dpt_obj = i915_gem_object_create_shmem(i915, size);
}
if (IS_ERR(dpt_obj))
return ERR_CAST(dpt_obj);
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 1cc0ddc6a310..80c3f88310db 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -135,9 +135,6 @@ static int intel_fbdev_mmap(struct fb_info *info, struct vm_area_struct *vma)
return i915_gem_fb_mmap(obj, vma);
}
-__diag_push();
-__diag_ignore_all("-Woverride-init", "Allow overriding the default ops");
-
static const struct fb_ops intelfb_ops = {
.owner = THIS_MODULE,
__FB_DEFAULT_DEFERRED_OPS_RDWR(intel_fbdev),
@@ -149,8 +146,6 @@ static const struct fb_ops intelfb_ops = {
.fb_mmap = intel_fbdev_mmap,
};
-__diag_pop();
-
static int intelfb_alloc(struct drm_fb_helper *helper,
struct drm_fb_helper_surface_size *sizes)
{
diff --git a/drivers/gpu/drm/i915/display/intel_hotplug.c b/drivers/gpu/drm/i915/display/intel_hotplug.c
index 1160fa20433b..5eac7032bb5a 100644
--- a/drivers/gpu/drm/i915/display/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/display/intel_hotplug.c
@@ -211,7 +211,7 @@ intel_hpd_irq_storm_switch_to_polling(struct drm_i915_private *dev_priv)
/* Enable polling and queue hotplug re-enabling. */
if (hpd_disabled) {
- drm_kms_helper_poll_enable(&dev_priv->drm);
+ drm_kms_helper_poll_reschedule(&dev_priv->drm);
mod_delayed_work(dev_priv->unordered_wq,
&dev_priv->display.hotplug.reenable_work,
msecs_to_jiffies(HPD_STORM_REENABLE_DELAY));
@@ -649,7 +649,7 @@ static void i915_hpd_poll_init_work(struct work_struct *work)
drm_connector_list_iter_end(&conn_iter);
if (enabled)
- drm_kms_helper_poll_enable(&dev_priv->drm);
+ drm_kms_helper_poll_reschedule(&dev_priv->drm);
mutex_unlock(&dev_priv->drm.mode_config.mutex);
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 21f92123c844..67e3aaf9b432 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -2752,7 +2752,7 @@ static struct intel_sdvo_connector *intel_sdvo_connector_alloc(void)
__drm_atomic_helper_connector_reset(&sdvo_connector->base.base,
&conn_state->base.base);
- INIT_LIST_HEAD(&sdvo_connector->base.panel.fixed_modes);
+ intel_panel_init_alloc(&sdvo_connector->base);
return sdvo_connector;
}
diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
index df6c9a84252c..6b9f6cf50bf6 100644
--- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c
@@ -1246,8 +1246,10 @@ static int igt_write_huge(struct drm_i915_private *i915,
* times in succession a possibility by enlarging the permutation array.
*/
order = i915_random_order(count * count, &prng);
- if (!order)
- return -ENOMEM;
+ if (!order) {
+ err = -ENOMEM;
+ goto out;
+ }
max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
max = div_u64(max - size, max_page_size);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
index 23857cc08eca..2702ad4c26c8 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
@@ -165,14 +165,60 @@ static u32 preparser_disable(bool state)
return MI_ARB_CHECK | 1 << 8 | state;
}
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg)
+static i915_reg_t gen12_get_aux_inv_reg(struct intel_engine_cs *engine)
{
- u32 gsi_offset = gt->uncore->gsi_offset;
+ switch (engine->id) {
+ case RCS0:
+ return GEN12_CCS_AUX_INV;
+ case BCS0:
+ return GEN12_BCS0_AUX_INV;
+ case VCS0:
+ return GEN12_VD0_AUX_INV;
+ case VCS2:
+ return GEN12_VD2_AUX_INV;
+ case VECS0:
+ return GEN12_VE0_AUX_INV;
+ case CCS0:
+ return GEN12_CCS0_AUX_INV;
+ default:
+ return INVALID_MMIO_REG;
+ }
+}
+
+static bool gen12_needs_ccs_aux_inv(struct intel_engine_cs *engine)
+{
+ i915_reg_t reg = gen12_get_aux_inv_reg(engine);
+
+ if (IS_PONTEVECCHIO(engine->i915))
+ return false;
+
+ /*
+ * So far platforms supported by i915 having flat ccs do not require
+ * AUX invalidation. Check also whether the engine requires it.
+ */
+ return i915_mmio_reg_valid(reg) && !HAS_FLAT_CCS(engine->i915);
+}
+
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs)
+{
+ i915_reg_t inv_reg = gen12_get_aux_inv_reg(engine);
+ u32 gsi_offset = engine->gt->uncore->gsi_offset;
+
+ if (!gen12_needs_ccs_aux_inv(engine))
+ return cs;
*cs++ = MI_LOAD_REGISTER_IMM(1) | MI_LRI_MMIO_REMAP_EN;
*cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
*cs++ = AUX_INV;
- *cs++ = MI_NOOP;
+
+ *cs++ = MI_SEMAPHORE_WAIT_TOKEN |
+ MI_SEMAPHORE_REGISTER_POLL |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_EQ_SDD;
+ *cs++ = 0;
+ *cs++ = i915_mmio_reg_offset(inv_reg) + gsi_offset;
+ *cs++ = 0;
+ *cs++ = 0;
return cs;
}
@@ -202,8 +248,13 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
{
struct intel_engine_cs *engine = rq->engine;
- if (mode & EMIT_FLUSH) {
- u32 flags = 0;
+ /*
+ * On Aux CCS platforms the invalidation of the Aux
+ * table requires quiescing memory traffic beforehand
+ */
+ if (mode & EMIT_FLUSH || gen12_needs_ccs_aux_inv(engine)) {
+ u32 bit_group_0 = 0;
+ u32 bit_group_1 = 0;
int err;
u32 *cs;
@@ -211,32 +262,40 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
if (err)
return err;
- flags |= PIPE_CONTROL_TILE_CACHE_FLUSH;
- flags |= PIPE_CONTROL_FLUSH_L3;
- flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
- flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
+ bit_group_0 |= PIPE_CONTROL0_HDC_PIPELINE_FLUSH;
+
+ /*
+ * When required, in MTL and beyond platforms we
+ * need to set the CCS_FLUSH bit in the pipe control
+ */
+ if (GRAPHICS_VER_FULL(rq->i915) >= IP_VER(12, 70))
+ bit_group_0 |= PIPE_CONTROL_CCS_FLUSH;
+
+ bit_group_1 |= PIPE_CONTROL_TILE_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_L3;
+ bit_group_1 |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
/* Wa_1409600907:tgl,adl-p */
- flags |= PIPE_CONTROL_DEPTH_STALL;
- flags |= PIPE_CONTROL_DC_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_DEPTH_STALL;
+ bit_group_1 |= PIPE_CONTROL_DC_FLUSH_ENABLE;
+ bit_group_1 |= PIPE_CONTROL_FLUSH_ENABLE;
- flags |= PIPE_CONTROL_STORE_DATA_INDEX;
- flags |= PIPE_CONTROL_QW_WRITE;
+ bit_group_1 |= PIPE_CONTROL_STORE_DATA_INDEX;
+ bit_group_1 |= PIPE_CONTROL_QW_WRITE;
- flags |= PIPE_CONTROL_CS_STALL;
+ bit_group_1 |= PIPE_CONTROL_CS_STALL;
if (!HAS_3D_PIPELINE(engine->i915))
- flags &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ARCH_FLAGS;
else if (engine->class == COMPUTE_CLASS)
- flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
+ bit_group_1 &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
cs = intel_ring_begin(rq, 6);
if (IS_ERR(cs))
return PTR_ERR(cs);
- cs = gen12_emit_pipe_control(cs,
- PIPE_CONTROL0_HDC_PIPELINE_FLUSH,
- flags, LRC_PPHWSP_SCRATCH_ADDR);
+ cs = gen12_emit_pipe_control(cs, bit_group_0, bit_group_1,
+ LRC_PPHWSP_SCRATCH_ADDR);
intel_ring_advance(rq, cs);
}
@@ -267,10 +326,9 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
else if (engine->class == COMPUTE_CLASS)
flags &= ~PIPE_CONTROL_3D_ENGINE_FLAGS;
- if (!HAS_FLAT_CCS(rq->engine->i915))
- count = 8 + 4;
- else
- count = 8;
+ count = 8;
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ count += 8;
cs = intel_ring_begin(rq, count);
if (IS_ERR(cs))
@@ -285,11 +343,7 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR);
- if (!HAS_FLAT_CCS(rq->engine->i915)) {
- /* hsdes: 1809175790 */
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(engine, cs);
*cs++ = preparser_disable(false);
intel_ring_advance(rq, cs);
@@ -300,21 +354,14 @@ int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode)
int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
{
- intel_engine_mask_t aux_inv = 0;
- u32 cmd, *cs;
+ u32 cmd = 4;
+ u32 *cs;
- cmd = 4;
if (mode & EMIT_INVALIDATE) {
cmd += 2;
- if (!HAS_FLAT_CCS(rq->engine->i915) &&
- (rq->engine->class == VIDEO_DECODE_CLASS ||
- rq->engine->class == VIDEO_ENHANCEMENT_CLASS)) {
- aux_inv = rq->engine->mask &
- ~GENMASK(_BCS(I915_MAX_BCS - 1), BCS0);
- if (aux_inv)
- cmd += 4;
- }
+ if (gen12_needs_ccs_aux_inv(rq->engine))
+ cmd += 8;
}
cs = intel_ring_begin(rq, cmd);
@@ -338,6 +385,10 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
cmd |= MI_INVALIDATE_TLB;
if (rq->engine->class == VIDEO_DECODE_CLASS)
cmd |= MI_INVALIDATE_BSD;
+
+ if (gen12_needs_ccs_aux_inv(rq->engine) &&
+ rq->engine->class == COPY_ENGINE_CLASS)
+ cmd |= MI_FLUSH_DW_CCS;
}
*cs++ = cmd;
@@ -345,14 +396,7 @@ int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode)
*cs++ = 0; /* upper addr */
*cs++ = 0; /* value */
- if (aux_inv) { /* hsdes: 1809175790 */
- if (rq->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else
- cs = gen12_emit_aux_table_inv(rq->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
+ cs = gen12_emit_aux_table_inv(rq->engine, cs);
if (mode & EMIT_INVALIDATE)
*cs++ = preparser_disable(false);
diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
index 655e5c00ddc2..867ba697aceb 100644
--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
+++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h
@@ -13,6 +13,7 @@
#include "intel_gt_regs.h"
#include "intel_gpu_commands.h"
+struct intel_engine_cs;
struct intel_gt;
struct i915_request;
@@ -46,28 +47,32 @@ u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs);
-u32 *gen12_emit_aux_table_inv(struct intel_gt *gt, u32 *cs, const i915_reg_t inv_reg);
+u32 *gen12_emit_aux_table_inv(struct intel_engine_cs *engine, u32 *cs);
static inline u32 *
-__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+__gen8_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
memset(batch, 0, 6 * sizeof(u32));
- batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0;
- batch[1] = flags1;
+ batch[0] = GFX_OP_PIPE_CONTROL(6) | bit_group_0;
+ batch[1] = bit_group_1;
batch[2] = offset;
return batch + 6;
}
-static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
+static inline u32 *gen8_emit_pipe_control(u32 *batch,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, 0, flags, offset);
+ return __gen8_emit_pipe_control(batch, 0, bit_group_1, offset);
}
-static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset)
+static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 bit_group_0,
+ u32 bit_group_1, u32 offset)
{
- return __gen8_emit_pipe_control(batch, flags0, flags1, offset);
+ return __gen8_emit_pipe_control(batch, bit_group_0,
+ bit_group_1, offset);
}
static inline u32 *
diff --git a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
index 5d143e2a8db0..2bd8d98d2110 100644
--- a/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
+++ b/drivers/gpu/drm/i915/gt/intel_gpu_commands.h
@@ -121,6 +121,7 @@
#define MI_SEMAPHORE_TARGET(engine) ((engine)<<15)
#define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
#define MI_SEMAPHORE_WAIT_TOKEN MI_INSTR(0x1c, 3) /* GEN12+ */
+#define MI_SEMAPHORE_REGISTER_POLL (1 << 16)
#define MI_SEMAPHORE_POLL (1 << 15)
#define MI_SEMAPHORE_SAD_GT_SDD (0 << 12)
#define MI_SEMAPHORE_SAD_GTE_SDD (1 << 12)
@@ -299,6 +300,7 @@
#define PIPE_CONTROL_QW_WRITE (1<<14)
#define PIPE_CONTROL_POST_SYNC_OP_MASK (3<<14)
#define PIPE_CONTROL_DEPTH_STALL (1<<13)
+#define PIPE_CONTROL_CCS_FLUSH (1<<13) /* MTL+ */
#define PIPE_CONTROL_WRITE_FLUSH (1<<12)
#define PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH (1<<12) /* gen6+ */
#define PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE (1<<11) /* MBZ on ILK */
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index 718cb2c80f79..2cdfb2f713d0 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -332,9 +332,11 @@
#define GEN8_PRIVATE_PAT_HI _MMIO(0x40e0 + 4)
#define GEN10_PAT_INDEX(index) _MMIO(0x40e0 + (index) * 4)
#define BSD_HWS_PGA_GEN7 _MMIO(0x4180)
-#define GEN12_GFX_CCS_AUX_NV _MMIO(0x4208)
-#define GEN12_VD0_AUX_NV _MMIO(0x4218)
-#define GEN12_VD1_AUX_NV _MMIO(0x4228)
+
+#define GEN12_CCS_AUX_INV _MMIO(0x4208)
+#define GEN12_VD0_AUX_INV _MMIO(0x4218)
+#define GEN12_VE0_AUX_INV _MMIO(0x4238)
+#define GEN12_BCS0_AUX_INV _MMIO(0x4248)
#define GEN8_RTCR _MMIO(0x4260)
#define GEN8_M1TCR _MMIO(0x4264)
@@ -342,14 +344,12 @@
#define GEN8_BTCR _MMIO(0x426c)
#define GEN8_VTCR _MMIO(0x4270)
-#define GEN12_VD2_AUX_NV _MMIO(0x4298)
-#define GEN12_VD3_AUX_NV _MMIO(0x42a8)
-#define GEN12_VE0_AUX_NV _MMIO(0x4238)
-
#define BLT_HWS_PGA_GEN7 _MMIO(0x4280)
-#define GEN12_VE1_AUX_NV _MMIO(0x42b8)
+#define GEN12_VD2_AUX_INV _MMIO(0x4298)
+#define GEN12_CCS0_AUX_INV _MMIO(0x42c8)
#define AUX_INV REG_BIT(0)
+
#define VEBOX_HWS_PGA_GEN7 _MMIO(0x4380)
#define GEN12_AUX_ERR_DBG _MMIO(0x43f4)
diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c
index a4ec20aaafe2..9477c2422321 100644
--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
+++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
@@ -1364,10 +1364,7 @@ gen12_emit_indirect_ctx_rcs(const struct intel_context *ce, u32 *cs)
IS_DG2_G11(ce->engine->i915))
cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE, 0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915))
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_GFX_CCS_AUX_NV);
+ cs = gen12_emit_aux_table_inv(ce->engine, cs);
/* Wa_16014892111 */
if (IS_MTL_GRAPHICS_STEP(ce->engine->i915, M, STEP_A0, STEP_B0) ||
@@ -1392,17 +1389,7 @@ gen12_emit_indirect_ctx_xcs(const struct intel_context *ce, u32 *cs)
PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE,
0);
- /* hsdes: 1809175790 */
- if (!HAS_FLAT_CCS(ce->engine->i915)) {
- if (ce->engine->class == VIDEO_DECODE_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VD0_AUX_NV);
- else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
- cs = gen12_emit_aux_table_inv(ce->engine->gt,
- cs, GEN12_VE0_AUX_NV);
- }
-
- return cs;
+ return gen12_emit_aux_table_inv(ce->engine, cs);
}
static void
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
index ee9f83af7cf6..477df260ae3a 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_slpc.c
@@ -470,12 +470,19 @@ int intel_guc_slpc_set_ignore_eff_freq(struct intel_guc_slpc *slpc, bool val)
ret = slpc_set_param(slpc,
SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
val);
- if (ret)
+ if (ret) {
guc_probe_error(slpc_to_guc(slpc), "Failed to set efficient freq(%d): %pe\n",
val, ERR_PTR(ret));
- else
+ } else {
slpc->ignore_eff_freq = val;
+ /* Set min to RPn when we disable efficient freq */
+ if (val)
+ ret = slpc_set_param(slpc,
+ SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
+ slpc->min_freq);
+ }
+
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
mutex_unlock(&slpc->lock);
return ret;
@@ -602,9 +609,8 @@ static int slpc_set_softlimits(struct intel_guc_slpc *slpc)
return ret;
if (!slpc->min_freq_softlimit) {
- ret = intel_guc_slpc_get_min_freq(slpc, &slpc->min_freq_softlimit);
- if (unlikely(ret))
- return ret;
+ /* Min softlimit is initialized to RPn */
+ slpc->min_freq_softlimit = slpc->min_freq;
slpc_to_gt(slpc)->defaults.min_freq = slpc->min_freq_softlimit;
} else {
return intel_guc_slpc_set_min_freq(slpc,
@@ -755,6 +761,9 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
return ret;
}
+ /* Set cached value of ignore efficient freq */
+ intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
+
/* Revert SLPC min/max to softlimits if necessary */
ret = slpc_set_softlimits(slpc);
if (unlikely(ret)) {
@@ -765,9 +774,6 @@ int intel_guc_slpc_enable(struct intel_guc_slpc *slpc)
/* Set cached media freq ratio mode */
intel_guc_slpc_set_media_ratio_mode(slpc, slpc->media_ratio_mode);
- /* Set cached value of ignore efficient freq */
- intel_guc_slpc_set_ignore_eff_freq(slpc, slpc->ignore_eff_freq);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
index ddd146265beb..fa70defcb5b2 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c
@@ -26,6 +26,7 @@
* The kernel driver is only responsible for loading the HuC firmware and
* triggering its security authentication. This is done differently depending
* on the platform:
+ *
* - older platforms (from Gen9 to most Gen12s): the load is performed via DMA
* and the authentication via GuC
* - DG2: load and authentication are both performed via GSC.
@@ -33,6 +34,7 @@
* not-DG2 older platforms), while the authentication is done in 2-steps,
* a first auth for clear-media workloads via GuC and a second one for all
* workloads via GSC.
+ *
* On platforms where the GuC does the authentication, to correctly do so the
* HuC binary must be loaded before the GuC one.
* Loading the HuC is optional; however, not using the HuC might negatively
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index 2a0438f12a14..af9afdb53c7f 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -491,7 +491,7 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
return;
}
- msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, reg);
+ msg_length = REG_FIELD_GET(DP_AUX_CH_CTL_MESSAGE_SIZE_MASK, value);
// check the msg in DATA register.
msg = vgpu_vreg(vgpu, offset + 4);
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index 8ef93889061a..5ec293011d99 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_active *ref, struct i915_request *rq)
}
} while (unlikely(is_barrier(active)));
- if (!__i915_active_fence_set(active, fence))
+ fence = __i915_active_fence_set(active, fence);
+ if (!fence)
__i915_active_acquire(ref);
+ else
+ dma_fence_put(fence);
out:
i915_active_release(ref);
@@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_active *ref,
return NULL;
}
- rcu_read_lock();
prev = __i915_active_fence_set(active, fence);
- if (prev)
- prev = dma_fence_get_rcu(prev);
- else
+ if (!prev)
__i915_active_acquire(ref);
- rcu_read_unlock();
return prev;
}
@@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(struct i915_request *rq)
*
* Records the new @fence as the last active fence along its timeline in
* this active tracker, moving the tracking callbacks from the previous
- * fence onto this one. Returns the previous fence (if not already completed),
- * which the caller must ensure is executed before the new fence. To ensure
- * that the order of fences within the timeline of the i915_active_fence is
- * understood, it should be locked by the caller.
+ * fence onto this one. Gets and returns a reference to the previous fence
+ * (if not already completed), which the caller must put after making sure
+ * that it is executed before the new fence. To ensure that the order of
+ * fences within the timeline of the i915_active_fence is understood, it
+ * should be locked by the caller.
*/
struct dma_fence *
__i915_active_fence_set(struct i915_active_fence *active,
@@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_active_fence *active,
struct dma_fence *prev;
unsigned long flags;
- if (fence == rcu_access_pointer(active->fence))
+ /*
+ * In case of fences embedded in i915_requests, their memory is
+ * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
+ * by new requests. Then, there is a risk of passing back a pointer
+ * to a new, completely unrelated fence that reuses the same memory
+ * while tracked under a different active tracker. Combined with i915
+ * perf open/close operations that build await dependencies between
+ * engine kernel context requests and user requests from different
+ * timelines, this can lead to dependency loops and infinite waits.
+ *
+ * As a countermeasure, we try to get a reference to the active->fence
+ * first, so if we succeed and pass it back to our user then it is not
+ * released and potentially reused by an unrelated request before the
+ * user has a chance to set up an await dependency on it.
+ */
+ prev = i915_active_fence_get(active);
+ if (fence == prev)
return fence;
GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
@@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_active_fence *active,
* Consider that we have two threads arriving (A and B), with
* C already resident as the active->fence.
*
- * A does the xchg first, and so it sees C or NULL depending
- * on the timing of the interrupt handler. If it is NULL, the
- * previous fence must have been signaled and we know that
- * we are first on the timeline. If it is still present,
- * we acquire the lock on that fence and serialise with the interrupt
- * handler, in the process removing it from any future interrupt
- * callback. A will then wait on C before executing (if present).
- *
- * As B is second, it sees A as the previous fence and so waits for
- * it to complete its transition and takes over the occupancy for
- * itself -- remembering that it needs to wait on A before executing.
+ * Both A and B have got a reference to C or NULL, depending on the
+ * timing of the interrupt handler. Let's assume that if A has got C
+ * then it has locked C first (before B).
*
* Note the strong ordering of the timeline also provides consistent
* nesting rules for the fence->lock; the inner lock is always the
* older lock.
*/
spin_lock_irqsave(fence->lock, flags);
- prev = xchg(__active_fence_slot(active), fence);
- if (prev) {
- GEM_BUG_ON(prev == fence);
+ if (prev)
spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+
+ /*
+ * A does the cmpxchg first, and so it sees C or NULL, as before, or
+ * something else, depending on the timing of other threads and/or
+ * interrupt handler. If not the same as before then A unlocks C if
+ * applicable and retries, starting from an attempt to get a new
+ * active->fence. Meanwhile, B follows the same path as A.
+ * Once A succeeds with cmpxch, B fails again, retires, gets A from
+ * active->fence, locks it as soon as A completes, and possibly
+ * succeeds with cmpxchg.
+ */
+ while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
+ if (prev) {
+ spin_unlock(prev->lock);
+ dma_fence_put(prev);
+ }
+ spin_unlock_irqrestore(fence->lock, flags);
+
+ prev = i915_active_fence_get(active);
+ GEM_BUG_ON(prev == fence);
+
+ spin_lock_irqsave(fence->lock, flags);
+ if (prev)
+ spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
+ }
+
+ /*
+ * If prev is NULL then the previous fence must have been signaled
+ * and we know that we are first on the timeline. If it is still
+ * present then, having the lock on that fence already acquired, we
+ * serialise with the interrupt handler, in the process of removing it
+ * from any future interrupt callback. A will then wait on C before
+ * executing (if present).
+ *
+ * As B is second, it sees A as the previous fence and so waits for
+ * it to complete its transition and takes over the occupancy for
+ * itself -- remembering that it needs to wait on A before executing.
+ */
+ if (prev) {
__list_del_entry(&active->cb.node);
spin_unlock(prev->lock); /* serialise with prev->cb_list */
}
@@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_active_fence *active,
int err = 0;
/* Must maintain timeline ordering wrt previous active requests */
- rcu_read_lock();
fence = __i915_active_fence_set(active, &rq->fence);
- if (fence) /* but the previous fence may not belong to that timeline! */
- fence = dma_fence_get_rcu(fence);
- rcu_read_unlock();
if (fence) {
err = i915_request_await_dma_fence(rq, fence);
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/i915/i915_driver.c b/drivers/gpu/drm/i915/i915_driver.c
index 0ad0c5885ec2..7d8671fdf447 100644
--- a/drivers/gpu/drm/i915/i915_driver.c
+++ b/drivers/gpu/drm/i915/i915_driver.c
@@ -443,7 +443,6 @@ static int i915_pcode_init(struct drm_i915_private *i915)
static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
{
struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
- struct pci_dev *root_pdev;
int ret;
if (i915_inject_probe_failure(dev_priv))
@@ -557,15 +556,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
intel_bw_init_hw(dev_priv);
- /*
- * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
- * This should be totally removed when we handle the pci states properly
- * on runtime PM and on s2idle cases.
- */
- root_pdev = pcie_find_root_port(pdev);
- if (root_pdev)
- pci_d3cold_disable(root_pdev);
-
return 0;
err_opregion:
@@ -591,7 +581,6 @@ err_perf:
static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
{
struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
- struct pci_dev *root_pdev;
i915_perf_fini(dev_priv);
@@ -599,10 +588,6 @@ static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
if (pdev->msi_enabled)
pci_disable_msi(pdev);
-
- root_pdev = pcie_find_root_port(pdev);
- if (root_pdev)
- pci_d3cold_enable(root_pdev);
}
/**
@@ -1517,6 +1502,8 @@ static int intel_runtime_suspend(struct device *kdev)
{
struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+ struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+ struct pci_dev *root_pdev;
struct intel_gt *gt;
int ret, i;
@@ -1568,6 +1555,15 @@ static int intel_runtime_suspend(struct device *kdev)
drm_err(&dev_priv->drm,
"Unclaimed access detected prior to suspending\n");
+ /*
+ * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
+ * This should be totally removed when we handle the pci states properly
+ * on runtime PM.
+ */
+ root_pdev = pcie_find_root_port(pdev);
+ if (root_pdev)
+ pci_d3cold_disable(root_pdev);
+
rpm->suspended = true;
/*
@@ -1606,6 +1602,8 @@ static int intel_runtime_resume(struct device *kdev)
{
struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
+ struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+ struct pci_dev *root_pdev;
struct intel_gt *gt;
int ret, i;
@@ -1619,6 +1617,11 @@ static int intel_runtime_resume(struct device *kdev)
intel_opregion_notify_adapter(dev_priv, PCI_D0);
rpm->suspended = false;
+
+ root_pdev = pcie_find_root_port(pdev);
+ if (root_pdev)
+ pci_d3cold_enable(root_pdev);
+
if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
drm_dbg(&dev_priv->drm,
"Unclaimed access during suspend, bios?\n");
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 3d7a5db9833b..928975d5fe2f 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -38,9 +38,6 @@
#include "i915_reg.h"
#include "intel_pci_config.h"
-__diag_push();
-__diag_ignore_all("-Woverride-init", "Allow overriding inherited members");
-
#define PLATFORM(x) .platform = (x)
#define GEN(x) \
.__runtime.graphics.ip.ver = (x), \
@@ -846,8 +843,6 @@ static const struct intel_device_info mtl_info = {
#undef PLATFORM
-__diag_pop();
-
/*
* Make sure any device matches here are from most specific to most
* general. For example, since the Quanta match is based on the subsystem
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 7413c11fb562..49c6f1ff1128 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -4431,6 +4431,7 @@ static const struct i915_range mtl_oam_b_counters[] = {
static const struct i915_range xehp_oa_b_counters[] = {
{ .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */
{ .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */
+ {}
};
static const struct i915_range gen7_oa_mux_regs[] = {
diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c
index 894068bb37b6..833b73edefdb 100644
--- a/drivers/gpu/drm/i915/i915_request.c
+++ b/drivers/gpu/drm/i915/i915_request.c
@@ -1661,6 +1661,11 @@ __i915_request_ensure_parallel_ordering(struct i915_request *rq,
request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
+ /*
+ * Users have to put a reference potentially got by
+ * __i915_active_fence_set() to the returned request
+ * when no longer needed
+ */
return to_request(__i915_active_fence_set(&timeline->last_request,
&rq->fence));
}
@@ -1707,6 +1712,10 @@ __i915_request_ensure_ordering(struct i915_request *rq,
0);
}
+ /*
+ * Users have to put the reference to prev potentially got
+ * by __i915_active_fence_set() when no longer needed
+ */
return prev;
}
@@ -1760,6 +1769,8 @@ __i915_request_add_to_timeline(struct i915_request *rq)
prev = __i915_request_ensure_ordering(rq, timeline);
else
prev = __i915_request_ensure_parallel_ordering(rq, timeline);
+ if (prev)
+ i915_request_put(prev);
/*
* Make sure that no request gazumped us - if it was allocated after
diff --git a/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c
index 5f26090b0c98..89585b31b985 100644
--- a/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3/ipuv3-crtc.c
@@ -310,7 +310,7 @@ static void ipu_crtc_mode_set_nofb(struct drm_crtc *crtc)
dev_warn(ipu_crtc->dev, "8-pixel align hactive %d -> %d\n",
sig_cfg.mode.hactive, new_hactive);
- sig_cfg.mode.hfront_porch = new_hactive - sig_cfg.mode.hactive;
+ sig_cfg.mode.hfront_porch -= new_hactive - sig_cfg.mode.hactive;
sig_cfg.mode.hactive = new_hactive;
}
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index a99310b68793..bbb1bf33f98e 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -89,7 +89,7 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit
* since we've already mapped it once in
* submit_reloc()
*/
- if (WARN_ON(!ptr))
+ if (WARN_ON(IS_ERR_OR_NULL(ptr)))
return;
for (i = 0; i < dwords; i++) {
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
index 790f55e24533..e788ed72eb0d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.h
@@ -206,7 +206,7 @@ static const struct a6xx_shader_block {
SHADER(A6XX_SP_LB_3_DATA, 0x800),
SHADER(A6XX_SP_LB_4_DATA, 0x800),
SHADER(A6XX_SP_LB_5_DATA, 0x200),
- SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x2000),
+ SHADER(A6XX_SP_CB_BINDLESS_DATA, 0x800),
SHADER(A6XX_SP_CB_LEGACY_DATA, 0x280),
SHADER(A6XX_SP_UAV_DATA, 0x80),
SHADER(A6XX_SP_INST_TAG, 0x80),
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index cb94cfd137a8..ce8d0b2475bf 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -369,8 +369,6 @@ static const struct adreno_info gpulist[] = {
.hwcg = a640_hwcg,
}, {
.rev = ADRENO_REV(6, 9, 0, ANY_ID),
- .revn = 690,
- .name = "A690",
.fw = {
[ADRENO_FW_SQE] = "a660_sqe.fw",
[ADRENO_FW_GMU] = "a690_gmu.bin",
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index d8c9e8cc3753..845019891ad1 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -149,7 +149,8 @@ bool adreno_cmp_rev(struct adreno_rev rev1, struct adreno_rev rev2);
static inline bool adreno_is_revn(const struct adreno_gpu *gpu, uint32_t revn)
{
- WARN_ON_ONCE(!gpu->revn);
+ /* revn can be zero, but if not is set at same time as info */
+ WARN_ON_ONCE(!gpu->info);
return gpu->revn == revn;
}
@@ -161,14 +162,16 @@ static inline bool adreno_has_gmu_wrapper(const struct adreno_gpu *gpu)
static inline bool adreno_is_a2xx(const struct adreno_gpu *gpu)
{
- WARN_ON_ONCE(!gpu->revn);
+ /* revn can be zero, but if not is set at same time as info */
+ WARN_ON_ONCE(!gpu->info);
return (gpu->revn < 300);
}
static inline bool adreno_is_a20x(const struct adreno_gpu *gpu)
{
- WARN_ON_ONCE(!gpu->revn);
+ /* revn can be zero, but if not is set at same time as info */
+ WARN_ON_ONCE(!gpu->info);
return (gpu->revn < 210);
}
@@ -307,7 +310,8 @@ static inline int adreno_is_a680(const struct adreno_gpu *gpu)
static inline int adreno_is_a690(const struct adreno_gpu *gpu)
{
- return adreno_is_revn(gpu, 690);
+ /* The order of args is important here to handle ANY_ID correctly */
+ return adreno_cmp_rev(ADRENO_REV(6, 9, 0, ANY_ID), gpu->rev);
};
/* check for a615, a616, a618, a619 or any derivatives */
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
index e3795995e145..29bb8ee2bc26 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_core_perf.h
@@ -15,19 +15,6 @@
#define DPU_PERF_DEFAULT_MAX_CORE_CLK_RATE 412500000
/**
- * enum dpu_core_perf_data_bus_id - data bus identifier
- * @DPU_CORE_PERF_DATA_BUS_ID_MNOC: DPU/MNOC data bus
- * @DPU_CORE_PERF_DATA_BUS_ID_LLCC: MNOC/LLCC data bus
- * @DPU_CORE_PERF_DATA_BUS_ID_EBI: LLCC/EBI data bus
- */
-enum dpu_core_perf_data_bus_id {
- DPU_CORE_PERF_DATA_BUS_ID_MNOC,
- DPU_CORE_PERF_DATA_BUS_ID_LLCC,
- DPU_CORE_PERF_DATA_BUS_ID_EBI,
- DPU_CORE_PERF_DATA_BUS_ID_MAX,
-};
-
-/**
* struct dpu_core_perf_params - definition of performance parameters
* @max_per_pipe_ib: maximum instantaneous bandwidth request
* @bw_ctl: arbitrated bandwidth request
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
index c278fb9d2b5b..86182c734606 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
@@ -51,7 +51,7 @@
static const u32 fetch_tbl[SSPP_MAX] = {CTL_INVALID_BIT, 16, 17, 18, 19,
CTL_INVALID_BIT, CTL_INVALID_BIT, CTL_INVALID_BIT, CTL_INVALID_BIT, 0,
- 1, 2, 3, CTL_INVALID_BIT, CTL_INVALID_BIT};
+ 1, 2, 3, 4, 5};
static int _mixer_stages(const struct dpu_lm_cfg *mixer, int count,
enum dpu_lm lm)
@@ -198,6 +198,12 @@ static void dpu_hw_ctl_update_pending_flush_sspp(struct dpu_hw_ctl *ctx,
case SSPP_DMA3:
ctx->pending_flush_mask |= BIT(25);
break;
+ case SSPP_DMA4:
+ ctx->pending_flush_mask |= BIT(13);
+ break;
+ case SSPP_DMA5:
+ ctx->pending_flush_mask |= BIT(14);
+ break;
case SSPP_CURSOR0:
ctx->pending_flush_mask |= BIT(22);
break;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
index 3ce45b023e63..31deda1c664a 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
@@ -1087,8 +1087,6 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs = {
const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs = {
.has_phy_lane = true,
- .regulator_data = dsi_phy_14nm_17mA_regulators,
- .num_regulators = ARRAY_SIZE(dsi_phy_14nm_17mA_regulators),
.ops = {
.enable = dsi_14nm_phy_enable,
.disable = dsi_14nm_phy_disable,
diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c
index 96599ec3eb78..1a5d4f1c8b42 100644
--- a/drivers/gpu/drm/msm/msm_fence.c
+++ b/drivers/gpu/drm/msm/msm_fence.c
@@ -191,6 +191,12 @@ msm_fence_init(struct dma_fence *fence, struct msm_fence_context *fctx)
f->fctx = fctx;
+ /*
+ * Until this point, the fence was just some pre-allocated memory,
+ * no-one should have taken a reference to it yet.
+ */
+ WARN_ON(kref_read(&fence->refcount));
+
dma_fence_init(&f->base, &msm_fence_ops, &fctx->spinlock,
fctx->context, ++fctx->last_fence);
}
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 3f1aa4de3b87..63c96416e183 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -86,7 +86,19 @@ void __msm_gem_submit_destroy(struct kref *kref)
}
dma_fence_put(submit->user_fence);
- dma_fence_put(submit->hw_fence);
+
+ /*
+ * If the submit is freed before msm_job_run(), then hw_fence is
+ * just some pre-allocated memory, not a reference counted fence.
+ * Once the job runs and the hw_fence is initialized, it will
+ * have a refcount of at least one, since the submit holds a ref
+ * to the hw_fence.
+ */
+ if (kref_read(&submit->hw_fence->refcount) == 0) {
+ kfree(submit->hw_fence);
+ } else {
+ dma_fence_put(submit->hw_fence);
+ }
put_pid(submit->pid);
msm_submitqueue_put(submit->queue);
@@ -889,7 +901,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
* after the job is armed
*/
if ((args->flags & MSM_SUBMIT_FENCE_SN_IN) &&
- idr_find(&queue->fence_idr, args->fence)) {
+ (!args->fence || idr_find(&queue->fence_idr, args->fence))) {
spin_unlock(&queue->idr_lock);
idr_preload_end();
ret = -EINVAL;
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index 05648c910c68..798bd4f3b662 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -189,6 +189,7 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss)
#define UBWC_2_0 0x20000000
#define UBWC_3_0 0x30000000
#define UBWC_4_0 0x40000000
+#define UBWC_4_3 0x40030000
static void msm_mdss_setup_ubwc_dec_20(struct msm_mdss *msm_mdss)
{
@@ -227,7 +228,10 @@ static void msm_mdss_setup_ubwc_dec_40(struct msm_mdss *msm_mdss)
writel_relaxed(1, msm_mdss->mmio + UBWC_CTRL_2);
writel_relaxed(0, msm_mdss->mmio + UBWC_PREDICTION_MODE);
} else {
- writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2);
+ if (data->ubwc_dec_version == UBWC_4_3)
+ writel_relaxed(3, msm_mdss->mmio + UBWC_CTRL_2);
+ else
+ writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2);
writel_relaxed(1, msm_mdss->mmio + UBWC_PREDICTION_MODE);
}
}
@@ -271,6 +275,7 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
msm_mdss_setup_ubwc_dec_30(msm_mdss);
break;
case UBWC_4_0:
+ case UBWC_4_3:
msm_mdss_setup_ubwc_dec_40(msm_mdss);
break;
default:
@@ -569,6 +574,16 @@ static const struct msm_mdss_data sm8250_data = {
.macrotile_mode = 1,
};
+static const struct msm_mdss_data sm8550_data = {
+ .ubwc_version = UBWC_4_0,
+ .ubwc_dec_version = UBWC_4_3,
+ .ubwc_swizzle = 6,
+ .ubwc_static = 1,
+ /* TODO: highest_bank_bit = 2 for LP_DDR4 */
+ .highest_bank_bit = 3,
+ .macrotile_mode = 1,
+};
+
static const struct of_device_id mdss_dt_match[] = {
{ .compatible = "qcom,mdss" },
{ .compatible = "qcom,msm8998-mdss" },
@@ -585,7 +600,7 @@ static const struct of_device_id mdss_dt_match[] = {
{ .compatible = "qcom,sm8250-mdss", .data = &sm8250_data },
{ .compatible = "qcom,sm8350-mdss", .data = &sm8250_data },
{ .compatible = "qcom,sm8450-mdss", .data = &sm8250_data },
- { .compatible = "qcom,sm8550-mdss", .data = &sm8250_data },
+ { .compatible = "qcom,sm8550-mdss", .data = &sm8550_data },
{}
};
MODULE_DEVICE_TABLE(of, mdss_dt_match);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 38d3fad0d97a..1bec819da876 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -1877,6 +1877,8 @@ nv50_pior_destroy(struct drm_encoder *encoder)
nvif_outp_dtor(&nv_encoder->outp);
drm_encoder_cleanup(encoder);
+
+ mutex_destroy(&nv_encoder->dp.hpd_irq_lock);
kfree(encoder);
}
@@ -1921,6 +1923,8 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
nv_encoder->i2c = ddc;
nv_encoder->aux = aux;
+ mutex_init(&nv_encoder->dp.hpd_irq_lock);
+
encoder = to_drm_encoder(nv_encoder);
encoder->possible_crtcs = dcbe->heads;
encoder->possible_clones = 0;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
index 40a1065ae626..ef441dfdea09 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
@@ -16,7 +16,7 @@ struct nvkm_i2c_bus {
const struct nvkm_i2c_bus_func *func;
struct nvkm_i2c_pad *pad;
#define NVKM_I2C_BUS_CCB(n) /* 'n' is ccb index */ (n)
-#define NVKM_I2C_BUS_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x100)
+#define NVKM_I2C_BUS_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x10)
#define NVKM_I2C_BUS_PRI /* ccb primary comm. port */ -1
#define NVKM_I2C_BUS_SEC /* ccb secondary comm. port */ -2
int id;
@@ -38,7 +38,7 @@ struct nvkm_i2c_aux {
const struct nvkm_i2c_aux_func *func;
struct nvkm_i2c_pad *pad;
#define NVKM_I2C_AUX_CCB(n) /* 'n' is ccb index */ (n)
-#define NVKM_I2C_AUX_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x100)
+#define NVKM_I2C_AUX_EXT(n) /* 'n' is dcb external encoder type */ ((n) + 0x10)
int id;
struct mutex mutex;
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index f75c6f09dd2a..622f6eb9a8bf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -967,7 +967,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
/* Determine display colour depth for everything except LVDS now,
* DP requires this before mode_valid() is called.
*/
- if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS && nv_connector->native_mode)
+ if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS)
nouveau_connector_detect_depth(connector);
/* Find the native mode if this is a digital panel, if we didn't
@@ -1408,8 +1408,7 @@ nouveau_connector_create(struct drm_device *dev,
ret = nvif_conn_ctor(&disp->disp, nv_connector->base.name, nv_connector->index,
&nv_connector->conn);
if (ret) {
- kfree(nv_connector);
- return ERR_PTR(ret);
+ goto drm_conn_err;
}
ret = nvif_conn_event_ctor(&nv_connector->conn, "kmsHotplug",
@@ -1426,8 +1425,7 @@ nouveau_connector_create(struct drm_device *dev,
if (ret) {
nvif_event_dtor(&nv_connector->hpd);
nvif_conn_dtor(&nv_connector->conn);
- kfree(nv_connector);
- return ERR_PTR(ret);
+ goto drm_conn_err;
}
}
}
@@ -1475,4 +1473,9 @@ nouveau_connector_create(struct drm_device *dev,
drm_connector_register(connector);
return connector;
+
+drm_conn_err:
+ drm_connector_cleanup(connector);
+ kfree(nv_connector);
+ return ERR_PTR(ret);
}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
index 40c8ea43c42f..b8ac66b4a2c4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
@@ -26,6 +26,8 @@
#include "head.h"
#include "ior.h"
+#include <drm/display/drm_dp.h>
+
#include <subdev/bios.h>
#include <subdev/bios/init.h>
#include <subdev/gpio.h>
@@ -634,6 +636,50 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp)
return outp->dp.rates != 0;
}
+/* XXX: This is a big fat hack, and this is just drm_dp_read_dpcd_caps()
+ * converted to work inside nvkm. This is a temporary holdover until we start
+ * passing the drm_dp_aux device through NVKM
+ */
+static int
+nvkm_dp_read_dpcd_caps(struct nvkm_outp *outp)
+{
+ struct nvkm_i2c_aux *aux = outp->dp.aux;
+ u8 dpcd_ext[DP_RECEIVER_CAP_SIZE];
+ int ret;
+
+ ret = nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, DP_RECEIVER_CAP_SIZE);
+ if (ret < 0)
+ return ret;
+
+ /*
+ * Prior to DP1.3 the bit represented by
+ * DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT was reserved.
+ * If it is set DP_DPCD_REV at 0000h could be at a value less than
+ * the true capability of the panel. The only way to check is to
+ * then compare 0000h and 2200h.
+ */
+ if (!(outp->dp.dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+ DP_EXTENDED_RECEIVER_CAP_FIELD_PRESENT))
+ return 0;
+
+ ret = nvkm_rdaux(aux, DP_DP13_DPCD_REV, dpcd_ext, sizeof(dpcd_ext));
+ if (ret < 0)
+ return ret;
+
+ if (outp->dp.dpcd[DP_DPCD_REV] > dpcd_ext[DP_DPCD_REV]) {
+ OUTP_DBG(outp, "Extended DPCD rev less than base DPCD rev (%d > %d)\n",
+ outp->dp.dpcd[DP_DPCD_REV], dpcd_ext[DP_DPCD_REV]);
+ return 0;
+ }
+
+ if (!memcmp(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext)))
+ return 0;
+
+ memcpy(outp->dp.dpcd, dpcd_ext, sizeof(dpcd_ext));
+
+ return 0;
+}
+
void
nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
{
@@ -689,7 +735,7 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
memset(outp->dp.lttpr, 0x00, sizeof(outp->dp.lttpr));
}
- if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dp.dpcd, sizeof(outp->dp.dpcd))) {
+ if (!nvkm_dp_read_dpcd_caps(outp)) {
const u8 rates[] = { 0x1e, 0x14, 0x0a, 0x06, 0 };
const u8 *rate;
int rate_max;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
index dad942be6679..46b057fe1412 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
@@ -81,20 +81,29 @@ nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_
return -ENOSYS;
list_for_each_entry(outp, &conn->disp->outps, head) {
- if (outp->info.connector == conn->index && outp->dp.aux) {
- if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG ) bits |= NVKM_I2C_PLUG;
- if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
- if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ ) bits |= NVKM_I2C_IRQ;
+ if (outp->info.connector == conn->index)
+ break;
+ }
- return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
- nvkm_uconn_uevent_aux);
- }
+ if (&outp->head == &conn->disp->outps)
+ return -EINVAL;
+
+ if (outp->dp.aux && !outp->info.location) {
+ if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG ) bits |= NVKM_I2C_PLUG;
+ if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
+ if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ ) bits |= NVKM_I2C_IRQ;
+
+ return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
+ nvkm_uconn_uevent_aux);
}
if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG ) bits |= NVKM_GPIO_HI;
if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO;
- if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ)
- return -EINVAL;
+ if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ) {
+ /* TODO: support DP IRQ on ANX9805 and remove this hack. */
+ if (!outp->info.location)
+ return -EINVAL;
+ }
return nvkm_uevent_add(uevent, &device->gpio->event, conn->info.hpd, bits,
nvkm_uconn_uevent_gpio);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 00dbeda7e346..de161e7a04aa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -117,6 +117,7 @@ void gk104_grctx_generate_r418800(struct gf100_gr *);
extern const struct gf100_grctx_func gk110_grctx;
void gk110_grctx_generate_r419eb0(struct gf100_gr *);
+void gk110_grctx_generate_r419f78(struct gf100_gr *);
extern const struct gf100_grctx_func gk110b_grctx;
extern const struct gf100_grctx_func gk208_grctx;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 94233d0119df..52a234b1ef01 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -906,7 +906,9 @@ static void
gk104_grctx_generate_r419f78(struct gf100_gr *gr)
{
struct nvkm_device *device = gr->base.engine.subdev.device;
- nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000);
+
+ /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+ nvkm_mask(device, 0x419f78, 0x00000009, 0x00000000);
}
void
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 4391458e1fb2..3acdd9eeb74a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -820,6 +820,15 @@ gk110_grctx_generate_r419eb0(struct gf100_gr *gr)
nvkm_mask(device, 0x419eb0, 0x00001000, 0x00001000);
}
+void
+gk110_grctx_generate_r419f78(struct gf100_gr *gr)
+{
+ struct nvkm_device *device = gr->base.engine.subdev.device;
+
+ /* bit 3 set disables loads in fp helper invocations, we need it enabled */
+ nvkm_mask(device, 0x419f78, 0x00000008, 0x00000000);
+}
+
const struct gf100_grctx_func
gk110_grctx = {
.main = gf100_grctx_generate_main,
@@ -854,4 +863,5 @@ gk110_grctx = {
.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
.r418800 = gk104_grctx_generate_r418800,
.r419eb0 = gk110_grctx_generate_r419eb0,
+ .r419f78 = gk110_grctx_generate_r419f78,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index 7b9a34f9ec3c..5597e87624ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -103,4 +103,5 @@ gk110b_grctx = {
.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
.r418800 = gk104_grctx_generate_r418800,
.r419eb0 = gk110_grctx_generate_r419eb0,
+ .r419f78 = gk110_grctx_generate_r419f78,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index c78d07a8bb7d..612656496541 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -568,4 +568,5 @@ gk208_grctx = {
.dist_skip_table = gf117_grctx_generate_dist_skip_table,
.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
.r418800 = gk104_grctx_generate_r418800,
+ .r419f78 = gk110_grctx_generate_r419f78,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index beac66eb2a80..9906974ac3f0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -988,4 +988,5 @@ gm107_grctx = {
.r406500 = gm107_grctx_generate_r406500,
.gpc_tpc_nr = gk104_grctx_generate_gpc_tpc_nr,
.r419e00 = gm107_grctx_generate_r419e00,
+ .r419f78 = gk110_grctx_generate_r419f78,
};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
index 3b6c8100a242..a7775aa18541 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
@@ -206,19 +206,6 @@ tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack);
}
-int
-tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
-{
- int ret;
-
- ret = gm200_gr_load(gr, ver, fwif);
- if (ret)
- return ret;
-
- return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid,
- &gr->bundle_veid);
-}
-
static const struct gf100_gr_fwif
tu102_gr_fwif[] = {
{ 0, gm200_gr_load, &tu102_gr, &gp108_gr_fecs_acr, &gp108_gr_gpccs_acr },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
index 976539de4220..731b2f68d3db 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
@@ -260,10 +260,11 @@ nvkm_i2c_new_(const struct nvkm_i2c_func *func, struct nvkm_device *device,
{
struct nvkm_bios *bios = device->bios;
struct nvkm_i2c *i2c;
+ struct nvkm_i2c_aux *aux;
struct dcb_i2c_entry ccbE;
struct dcb_output dcbE;
u8 ver, hdr;
- int ret, i;
+ int ret, i, ids;
if (!(i2c = *pi2c = kzalloc(sizeof(*i2c), GFP_KERNEL)))
return -ENOMEM;
@@ -406,5 +407,11 @@ nvkm_i2c_new_(const struct nvkm_i2c_func *func, struct nvkm_device *device,
}
}
- return nvkm_event_init(&nvkm_i2c_intr_func, &i2c->subdev, 4, i, &i2c->event);
+ ids = 0;
+ list_for_each_entry(aux, &i2c->aux, head)
+ ids = max(ids, aux->id + 1);
+ if (!ids)
+ return 0;
+
+ return nvkm_event_init(&nvkm_i2c_intr_func, &i2c->subdev, 4, ids, &i2c->event);
}
diff --git a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
index 8f4f137a2af6..213008499caa 100644
--- a/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
+++ b/drivers/gpu/drm/panel/panel-jdi-lt070me05000.c
@@ -404,38 +404,30 @@ static int jdi_panel_add(struct jdi_panel *jdi)
ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(jdi->supplies),
jdi->supplies);
- if (ret < 0) {
- dev_err(dev, "failed to init regulator, ret=%d\n", ret);
- return ret;
- }
+ if (ret < 0)
+ return dev_err_probe(dev, ret,
+ "failed to init regulator, ret=%d\n", ret);
jdi->enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_LOW);
if (IS_ERR(jdi->enable_gpio)) {
- ret = PTR_ERR(jdi->enable_gpio);
- dev_err(dev, "cannot get enable-gpio %d\n", ret);
- return ret;
+ return dev_err_probe(dev, PTR_ERR(jdi->enable_gpio),
+ "cannot get enable-gpio %d\n", ret);
}
jdi->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_HIGH);
- if (IS_ERR(jdi->reset_gpio)) {
- ret = PTR_ERR(jdi->reset_gpio);
- dev_err(dev, "cannot get reset-gpios %d\n", ret);
- return ret;
- }
+ if (IS_ERR(jdi->reset_gpio))
+ return dev_err_probe(dev, PTR_ERR(jdi->reset_gpio),
+ "cannot get reset-gpios %d\n", ret);
jdi->dcdc_en_gpio = devm_gpiod_get(dev, "dcdc-en", GPIOD_OUT_LOW);
- if (IS_ERR(jdi->dcdc_en_gpio)) {
- ret = PTR_ERR(jdi->dcdc_en_gpio);
- dev_err(dev, "cannot get dcdc-en-gpio %d\n", ret);
- return ret;
- }
+ if (IS_ERR(jdi->dcdc_en_gpio))
+ return dev_err_probe(dev, PTR_ERR(jdi->dcdc_en_gpio),
+ "cannot get dcdc-en-gpio %d\n", ret);
jdi->backlight = drm_panel_create_dsi_backlight(jdi->dsi);
- if (IS_ERR(jdi->backlight)) {
- ret = PTR_ERR(jdi->backlight);
- dev_err(dev, "failed to register backlight %d\n", ret);
- return ret;
- }
+ if (IS_ERR(jdi->backlight))
+ return dev_err_probe(dev, PTR_ERR(jdi->backlight),
+ "failed to register backlight %d\n", ret);
drm_panel_init(&jdi->base, &jdi->dsi->dev, &jdi_panel_funcs,
DRM_MODE_CONNECTOR_DSI);
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
index 102e1fc7ee38..be4ec5bb5223 100644
--- a/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
+++ b/drivers/gpu/drm/panel/panel-samsung-s6d7aa0.c
@@ -569,6 +569,7 @@ static const struct of_device_id s6d7aa0_of_match[] = {
},
{ /* sentinel */ }
};
+MODULE_DEVICE_TABLE(of, s6d7aa0_of_match);
static struct mipi_dsi_driver s6d7aa0_driver = {
.probe = s6d7aa0_probe,
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index aaba36b3a674..b38d0e95cd54 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -999,21 +999,21 @@ static const struct panel_desc auo_g104sn02 = {
.connector_type = DRM_MODE_CONNECTOR_LVDS,
};
-static const struct drm_display_mode auo_g121ean01_mode = {
- .clock = 66700,
- .hdisplay = 1280,
- .hsync_start = 1280 + 58,
- .hsync_end = 1280 + 58 + 8,
- .htotal = 1280 + 58 + 8 + 70,
- .vdisplay = 800,
- .vsync_start = 800 + 6,
- .vsync_end = 800 + 6 + 4,
- .vtotal = 800 + 6 + 4 + 10,
+static const struct display_timing auo_g121ean01_timing = {
+ .pixelclock = { 60000000, 74400000, 90000000 },
+ .hactive = { 1280, 1280, 1280 },
+ .hfront_porch = { 20, 50, 100 },
+ .hback_porch = { 20, 50, 100 },
+ .hsync_len = { 30, 100, 200 },
+ .vactive = { 800, 800, 800 },
+ .vfront_porch = { 2, 10, 25 },
+ .vback_porch = { 2, 10, 25 },
+ .vsync_len = { 4, 18, 50 },
};
static const struct panel_desc auo_g121ean01 = {
- .modes = &auo_g121ean01_mode,
- .num_modes = 1,
+ .timings = &auo_g121ean01_timing,
+ .num_timings = 1,
.bpc = 8,
.size = {
.width = 261,
diff --git a/drivers/gpu/drm/panfrost/panfrost_devfreq.c b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
index 58dfb15a8757..e78de99e9933 100644
--- a/drivers/gpu/drm/panfrost/panfrost_devfreq.c
+++ b/drivers/gpu/drm/panfrost/panfrost_devfreq.c
@@ -96,7 +96,7 @@ static int panfrost_read_speedbin(struct device *dev)
* keep going without it; any other error means that we are
* supposed to read the bin value, but we failed doing so.
*/
- if (ret != -ENOENT) {
+ if (ret != -ENOENT && ret != -EOPNOTSUPP) {
DRM_DEV_ERROR(dev, "Cannot read speed-bin (%d).", ret);
return ret;
}
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index ea993d7162e8..307a890fde13 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -310,7 +310,7 @@ int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
u32 domain,
size_t size,
struct qxl_surface *surf,
- struct qxl_bo **qobj,
+ struct drm_gem_object **gobj,
uint32_t *handle);
void qxl_gem_object_free(struct drm_gem_object *gobj);
int qxl_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_priv);
diff --git a/drivers/gpu/drm/qxl/qxl_dumb.c b/drivers/gpu/drm/qxl/qxl_dumb.c
index d636ba685451..17df5c7ccf69 100644
--- a/drivers/gpu/drm/qxl/qxl_dumb.c
+++ b/drivers/gpu/drm/qxl/qxl_dumb.c
@@ -34,6 +34,7 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
{
struct qxl_device *qdev = to_qxl(dev);
struct qxl_bo *qobj;
+ struct drm_gem_object *gobj;
uint32_t handle;
int r;
struct qxl_surface surf;
@@ -62,11 +63,13 @@ int qxl_mode_dumb_create(struct drm_file *file_priv,
r = qxl_gem_object_create_with_handle(qdev, file_priv,
QXL_GEM_DOMAIN_CPU,
- args->size, &surf, &qobj,
+ args->size, &surf, &gobj,
&handle);
if (r)
return r;
+ qobj = gem_to_qxl_bo(gobj);
qobj->is_dumb = true;
+ drm_gem_object_put(gobj);
args->pitch = pitch;
args->handle = handle;
return 0;
diff --git a/drivers/gpu/drm/qxl/qxl_gem.c b/drivers/gpu/drm/qxl/qxl_gem.c
index a08da0bd9098..fc5e3763c359 100644
--- a/drivers/gpu/drm/qxl/qxl_gem.c
+++ b/drivers/gpu/drm/qxl/qxl_gem.c
@@ -72,32 +72,41 @@ int qxl_gem_object_create(struct qxl_device *qdev, int size,
return 0;
}
+/*
+ * If the caller passed a valid gobj pointer, it is responsible to call
+ * drm_gem_object_put() when it no longer needs to acess the object.
+ *
+ * If gobj is NULL, it is handled internally.
+ */
int qxl_gem_object_create_with_handle(struct qxl_device *qdev,
struct drm_file *file_priv,
u32 domain,
size_t size,
struct qxl_surface *surf,
- struct qxl_bo **qobj,
+ struct drm_gem_object **gobj,
uint32_t *handle)
{
- struct drm_gem_object *gobj;
int r;
+ struct drm_gem_object *local_gobj;
- BUG_ON(!qobj);
BUG_ON(!handle);
r = qxl_gem_object_create(qdev, size, 0,
domain,
false, false, surf,
- &gobj);
+ &local_gobj);
if (r)
return -ENOMEM;
- r = drm_gem_handle_create(file_priv, gobj, handle);
+ r = drm_gem_handle_create(file_priv, local_gobj, handle);
if (r)
return r;
- /* drop reference from allocate - handle holds it now */
- *qobj = gem_to_qxl_bo(gobj);
- drm_gem_object_put(gobj);
+
+ if (gobj)
+ *gobj = local_gobj;
+ else
+ /* drop reference from allocate - handle holds it now */
+ drm_gem_object_put(local_gobj);
+
return 0;
}
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 30f58b21372a..dd0f834d881c 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -38,7 +38,6 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr
struct qxl_device *qdev = to_qxl(dev);
struct drm_qxl_alloc *qxl_alloc = data;
int ret;
- struct qxl_bo *qobj;
uint32_t handle;
u32 domain = QXL_GEM_DOMAIN_VRAM;
@@ -50,7 +49,7 @@ int qxl_alloc_ioctl(struct drm_device *dev, void *data, struct drm_file *file_pr
domain,
qxl_alloc->size,
NULL,
- &qobj, &handle);
+ NULL, &handle);
if (ret) {
DRM_ERROR("%s: failed to create gem ret=%d\n",
__func__, ret);
@@ -386,7 +385,6 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
{
struct qxl_device *qdev = to_qxl(dev);
struct drm_qxl_alloc_surf *param = data;
- struct qxl_bo *qobj;
int handle;
int ret;
int size, actual_stride;
@@ -406,7 +404,7 @@ int qxl_alloc_surf_ioctl(struct drm_device *dev, void *data, struct drm_file *fi
QXL_GEM_DOMAIN_SURFACE,
size,
&surf,
- &qobj, &handle);
+ NULL, &handle);
if (ret) {
DRM_ERROR("%s: failed to create gem ret=%d\n",
__func__, ret);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index a530ecc4d207..bf34498c1b6d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -833,12 +833,12 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
* need align with 2 pixel.
*/
if (fb->format->is_yuv && ((new_plane_state->src.x1 >> 16) % 2)) {
- DRM_ERROR("Invalid Source: Yuv format not support odd xpos\n");
+ DRM_DEBUG_KMS("Invalid Source: Yuv format not support odd xpos\n");
return -EINVAL;
}
if (fb->format->is_yuv && new_plane_state->rotation & DRM_MODE_REFLECT_Y) {
- DRM_ERROR("Invalid Source: Yuv format does not support this rotation\n");
+ DRM_DEBUG_KMS("Invalid Source: Yuv format does not support this rotation\n");
return -EINVAL;
}
@@ -846,7 +846,7 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
struct vop *vop = to_vop(crtc);
if (!vop->data->afbc) {
- DRM_ERROR("vop does not support AFBC\n");
+ DRM_DEBUG_KMS("vop does not support AFBC\n");
return -EINVAL;
}
@@ -855,15 +855,16 @@ static int vop_plane_atomic_check(struct drm_plane *plane,
return ret;
if (new_plane_state->src.x1 || new_plane_state->src.y1) {
- DRM_ERROR("AFBC does not support offset display, xpos=%d, ypos=%d, offset=%d\n",
- new_plane_state->src.x1,
- new_plane_state->src.y1, fb->offsets[0]);
+ DRM_DEBUG_KMS("AFBC does not support offset display, " \
+ "xpos=%d, ypos=%d, offset=%d\n",
+ new_plane_state->src.x1, new_plane_state->src.y1,
+ fb->offsets[0]);
return -EINVAL;
}
if (new_plane_state->rotation && new_plane_state->rotation != DRM_MODE_ROTATE_0) {
- DRM_ERROR("No rotation support in AFBC, rotation=%d\n",
- new_plane_state->rotation);
+ DRM_DEBUG_KMS("No rotation support in AFBC, rotation=%d\n",
+ new_plane_state->rotation);
return -EINVAL;
}
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 7139a522b2f3..54e3083076b7 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -519,7 +519,8 @@ static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo,
if (bo->pin_count) {
*locked = false;
- *busy = false;
+ if (busy)
+ *busy = false;
return false;
}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 82094c137855..c43853597776 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -497,10 +497,9 @@ static int vmw_user_bo_synccpu_release(struct drm_file *filp,
if (!(flags & drm_vmw_synccpu_allow_cs)) {
atomic_dec(&vmw_bo->cpu_writers);
}
- ttm_bo_put(&vmw_bo->tbo);
+ vmw_user_bo_unref(vmw_bo);
}
- drm_gem_object_put(&vmw_bo->tbo.base);
return ret;
}
@@ -540,8 +539,7 @@ int vmw_user_bo_synccpu_ioctl(struct drm_device *dev, void *data,
return ret;
ret = vmw_user_bo_synccpu_grab(vbo, arg->flags);
- vmw_bo_unreference(&vbo);
- drm_gem_object_put(&vbo->tbo.base);
+ vmw_user_bo_unref(vbo);
if (unlikely(ret != 0)) {
if (ret == -ERESTARTSYS || ret == -EBUSY)
return -EBUSY;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
index 50a836e70994..1d433fceed3d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h
@@ -195,6 +195,14 @@ static inline struct vmw_bo *vmw_bo_reference(struct vmw_bo *buf)
return buf;
}
+static inline void vmw_user_bo_unref(struct vmw_bo *vbo)
+{
+ if (vbo) {
+ ttm_bo_put(&vbo->tbo);
+ drm_gem_object_put(&vbo->tbo.base);
+ }
+}
+
static inline struct vmw_bo *to_vmw_bo(struct drm_gem_object *gobj)
{
return container_of((gobj), struct vmw_bo, tbo.base);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 3810a9984a7f..58bfdf203eca 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -1513,4 +1513,16 @@ static inline bool vmw_has_fences(struct vmw_private *vmw)
return (vmw_fifo_caps(vmw) & SVGA_FIFO_CAP_FENCE) != 0;
}
+static inline bool vmw_shadertype_is_valid(enum vmw_sm_type shader_model,
+ u32 shader_type)
+{
+ SVGA3dShaderType max_allowed = SVGA3D_SHADERTYPE_PREDX_MAX;
+
+ if (shader_model >= VMW_SM_5)
+ max_allowed = SVGA3D_SHADERTYPE_MAX;
+ else if (shader_model >= VMW_SM_4)
+ max_allowed = SVGA3D_SHADERTYPE_DX10_MAX;
+ return shader_type >= SVGA3D_SHADERTYPE_MIN && shader_type < max_allowed;
+}
+
#endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 6b9aa2b4ef54..98e0723ca6f5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -1164,8 +1164,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
}
vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_MOB, VMW_BO_DOMAIN_MOB);
ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
- ttm_bo_put(&vmw_bo->tbo);
- drm_gem_object_put(&vmw_bo->tbo.base);
+ vmw_user_bo_unref(vmw_bo);
if (unlikely(ret != 0))
return ret;
@@ -1221,8 +1220,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
vmw_bo_placement_set(vmw_bo, VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM,
VMW_BO_DOMAIN_GMR | VMW_BO_DOMAIN_VRAM);
ret = vmw_validation_add_bo(sw_context->ctx, vmw_bo);
- ttm_bo_put(&vmw_bo->tbo);
- drm_gem_object_put(&vmw_bo->tbo.base);
+ vmw_user_bo_unref(vmw_bo);
if (unlikely(ret != 0))
return ret;
@@ -1992,7 +1990,7 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
cmd = container_of(header, typeof(*cmd), header);
- if (cmd->body.type >= SVGA3D_SHADERTYPE_PREDX_MAX) {
+ if (!vmw_shadertype_is_valid(VMW_SM_LEGACY, cmd->body.type)) {
VMW_DEBUG_USER("Illegal shader type %u.\n",
(unsigned int) cmd->body.type);
return -EINVAL;
@@ -2115,8 +2113,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
SVGA3dCmdHeader *header)
{
VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetSingleConstantBuffer);
- SVGA3dShaderType max_shader_num = has_sm5_context(dev_priv) ?
- SVGA3D_NUM_SHADERTYPE : SVGA3D_NUM_SHADERTYPE_DX10;
struct vmw_resource *res = NULL;
struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
@@ -2133,6 +2129,14 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
if (unlikely(ret != 0))
return ret;
+ if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type) ||
+ cmd->body.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
+ VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
+ (unsigned int) cmd->body.type,
+ (unsigned int) cmd->body.slot);
+ return -EINVAL;
+ }
+
binding.bi.ctx = ctx_node->ctx;
binding.bi.res = res;
binding.bi.bt = vmw_ctx_binding_cb;
@@ -2141,14 +2145,6 @@ vmw_cmd_dx_set_single_constant_buffer(struct vmw_private *dev_priv,
binding.size = cmd->body.sizeInBytes;
binding.slot = cmd->body.slot;
- if (binding.shader_slot >= max_shader_num ||
- binding.slot >= SVGA3D_DX_MAX_CONSTBUFFERS) {
- VMW_DEBUG_USER("Illegal const buffer shader %u slot %u.\n",
- (unsigned int) cmd->body.type,
- (unsigned int) binding.slot);
- return -EINVAL;
- }
-
vmw_binding_add(ctx_node->staged, &binding.bi, binding.shader_slot,
binding.slot);
@@ -2207,15 +2203,13 @@ static int vmw_cmd_dx_set_shader_res(struct vmw_private *dev_priv,
{
VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShaderResources) =
container_of(header, typeof(*cmd), header);
- SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
- SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
u32 num_sr_view = (cmd->header.size - sizeof(cmd->body)) /
sizeof(SVGA3dShaderResourceViewId);
if ((u64) cmd->body.startView + (u64) num_sr_view >
(u64) SVGA3D_DX_MAX_SRVIEWS ||
- cmd->body.type >= max_allowed) {
+ !vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
VMW_DEBUG_USER("Invalid shader binding.\n");
return -EINVAL;
}
@@ -2239,8 +2233,6 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
SVGA3dCmdHeader *header)
{
VMW_DECLARE_CMD_VAR(*cmd, SVGA3dCmdDXSetShader);
- SVGA3dShaderType max_allowed = has_sm5_context(dev_priv) ?
- SVGA3D_SHADERTYPE_MAX : SVGA3D_SHADERTYPE_DX10_MAX;
struct vmw_resource *res = NULL;
struct vmw_ctx_validation_info *ctx_node = VMW_GET_CTX_NODE(sw_context);
struct vmw_ctx_bindinfo_shader binding;
@@ -2251,8 +2243,7 @@ static int vmw_cmd_dx_set_shader(struct vmw_private *dev_priv,
cmd = container_of(header, typeof(*cmd), header);
- if (cmd->body.type >= max_allowed ||
- cmd->body.type < SVGA3D_SHADERTYPE_MIN) {
+ if (!vmw_shadertype_is_valid(dev_priv->sm_type, cmd->body.type)) {
VMW_DEBUG_USER("Illegal shader type %u.\n",
(unsigned int) cmd->body.type);
return -EINVAL;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index b62207be3363..1489ad73c103 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -1665,10 +1665,8 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev,
err_out:
/* vmw_user_lookup_handle takes one ref so does new_fb */
- if (bo) {
- vmw_bo_unreference(&bo);
- drm_gem_object_put(&bo->tbo.base);
- }
+ if (bo)
+ vmw_user_bo_unref(bo);
if (surface)
vmw_surface_unreference(&surface);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
index 7e112319a23c..fb85f244c3d0 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c
@@ -451,8 +451,7 @@ int vmw_overlay_ioctl(struct drm_device *dev, void *data,
ret = vmw_overlay_update_stream(dev_priv, buf, arg, true);
- vmw_bo_unreference(&buf);
- drm_gem_object_put(&buf->tbo.base);
+ vmw_user_bo_unref(buf);
out_unlock:
mutex_unlock(&overlay->mutex);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index e7226db8b242..1e81ff2422cf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -809,8 +809,7 @@ static int vmw_shader_define(struct drm_device *dev, struct drm_file *file_priv,
shader_type, num_input_sig,
num_output_sig, tfile, shader_handle);
out_bad_arg:
- vmw_bo_unreference(&buffer);
- drm_gem_object_put(&buffer->tbo.base);
+ vmw_user_bo_unref(buffer);
return ret;
}
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 5978e9dbc286..ebf15f31d97e 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -209,8 +209,7 @@ int vmbus_connect(void)
* Setup the vmbus event connection for channel interrupt
* abstraction stuff
*/
- vmbus_connection.int_page =
- (void *)hv_alloc_hyperv_zeroed_page();
+ vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page();
if (vmbus_connection.int_page == NULL) {
ret = -ENOMEM;
goto cleanup;
@@ -225,8 +224,8 @@ int vmbus_connect(void)
* Setup the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
- vmbus_connection.monitor_pages[0] = (void *)hv_alloc_hyperv_page();
- vmbus_connection.monitor_pages[1] = (void *)hv_alloc_hyperv_page();
+ vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page();
+ vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page();
if ((vmbus_connection.monitor_pages[0] == NULL) ||
(vmbus_connection.monitor_pages[1] == NULL)) {
ret = -ENOMEM;
@@ -333,15 +332,15 @@ void vmbus_disconnect(void)
destroy_workqueue(vmbus_connection.work_queue);
if (vmbus_connection.int_page) {
- hv_free_hyperv_page((unsigned long)vmbus_connection.int_page);
+ hv_free_hyperv_page(vmbus_connection.int_page);
vmbus_connection.int_page = NULL;
}
set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[0], 1);
set_memory_encrypted((unsigned long)vmbus_connection.monitor_pages[1], 1);
- hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[0]);
- hv_free_hyperv_page((unsigned long)vmbus_connection.monitor_pages[1]);
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+ hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
vmbus_connection.monitor_pages[0] = NULL;
vmbus_connection.monitor_pages[1] = NULL;
}
diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c
index dffcc894f117..0d7a3ba66396 100644
--- a/drivers/hv/hv_balloon.c
+++ b/drivers/hv/hv_balloon.c
@@ -1628,7 +1628,7 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order));
local_irq_save(flags);
- hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg);
+ hint = *this_cpu_ptr(hyperv_pcpu_input_arg);
if (!hint) {
local_irq_restore(flags);
return -ENOSPC;
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 542a1d53b303..6a2258fef1fe 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -115,12 +115,12 @@ void *hv_alloc_hyperv_zeroed_page(void)
}
EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
-void hv_free_hyperv_page(unsigned long addr)
+void hv_free_hyperv_page(void *addr)
{
if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- free_page(addr);
+ free_page((unsigned long)addr);
else
- kfree((void *)addr);
+ kfree(addr);
}
EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
@@ -253,7 +253,7 @@ static void hv_kmsg_dump_unregister(void)
atomic_notifier_chain_unregister(&panic_notifier_list,
&hyperv_panic_report_block);
- hv_free_hyperv_page((unsigned long)hv_panic_page);
+ hv_free_hyperv_page(hv_panic_page);
hv_panic_page = NULL;
}
@@ -270,7 +270,7 @@ static void hv_kmsg_dump_register(void)
ret = kmsg_dump_register(&hv_kmsg_dumper);
if (ret) {
pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
- hv_free_hyperv_page((unsigned long)hv_panic_page);
+ hv_free_hyperv_page(hv_panic_page);
hv_panic_page = NULL;
}
}
diff --git a/drivers/hwmon/aquacomputer_d5next.c b/drivers/hwmon/aquacomputer_d5next.c
index a981f7086114..023807859be7 100644
--- a/drivers/hwmon/aquacomputer_d5next.c
+++ b/drivers/hwmon/aquacomputer_d5next.c
@@ -13,9 +13,11 @@
#include <linux/crc16.h>
#include <linux/debugfs.h>
+#include <linux/delay.h>
#include <linux/hid.h>
#include <linux/hwmon.h>
#include <linux/jiffies.h>
+#include <linux/ktime.h>
#include <linux/module.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
@@ -63,6 +65,8 @@ static const char *const aqc_device_names[] = {
#define CTRL_REPORT_ID 0x03
#define AQUAERO_CTRL_REPORT_ID 0x0b
+#define CTRL_REPORT_DELAY 200 /* ms */
+
/* The HID report that the official software always sends
* after writing values, currently same for all devices
*/
@@ -527,6 +531,9 @@ struct aqc_data {
int secondary_ctrl_report_size;
u8 *secondary_ctrl_report;
+ ktime_t last_ctrl_report_op;
+ int ctrl_report_delay; /* Delay between two ctrl report operations, in ms */
+
int buffer_size;
u8 *buffer;
int checksum_start;
@@ -611,17 +618,35 @@ static int aqc_aquastreamxt_convert_fan_rpm(u16 val)
return 0;
}
+static void aqc_delay_ctrl_report(struct aqc_data *priv)
+{
+ /*
+ * If previous read or write is too close to this one, delay the current operation
+ * to give the device enough time to process the previous one.
+ */
+ if (priv->ctrl_report_delay) {
+ s64 delta = ktime_ms_delta(ktime_get(), priv->last_ctrl_report_op);
+
+ if (delta < priv->ctrl_report_delay)
+ msleep(priv->ctrl_report_delay - delta);
+ }
+}
+
/* Expects the mutex to be locked */
static int aqc_get_ctrl_data(struct aqc_data *priv)
{
int ret;
+ aqc_delay_ctrl_report(priv);
+
memset(priv->buffer, 0x00, priv->buffer_size);
ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
HID_FEATURE_REPORT, HID_REQ_GET_REPORT);
if (ret < 0)
ret = -ENODATA;
+ priv->last_ctrl_report_op = ktime_get();
+
return ret;
}
@@ -631,6 +656,8 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
int ret;
u16 checksum;
+ aqc_delay_ctrl_report(priv);
+
/* Checksum is not needed for Aquaero */
if (priv->kind != aquaero) {
/* Init and xorout value for CRC-16/USB is 0xffff */
@@ -646,12 +673,16 @@ static int aqc_send_ctrl_data(struct aqc_data *priv)
ret = hid_hw_raw_request(priv->hdev, priv->ctrl_report_id, priv->buffer, priv->buffer_size,
HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
if (ret < 0)
- return ret;
+ goto record_access_and_ret;
/* The official software sends this report after every change, so do it here as well */
ret = hid_hw_raw_request(priv->hdev, priv->secondary_ctrl_report_id,
priv->secondary_ctrl_report, priv->secondary_ctrl_report_size,
HID_FEATURE_REPORT, HID_REQ_SET_REPORT);
+
+record_access_and_ret:
+ priv->last_ctrl_report_op = ktime_get();
+
return ret;
}
@@ -1027,7 +1058,7 @@ static int aqc_read(struct device *dev, enum hwmon_sensor_types type, u32 attr,
if (ret < 0)
return ret;
- *val = aqc_percent_to_pwm(ret);
+ *val = aqc_percent_to_pwm(*val);
break;
}
break;
@@ -1524,6 +1555,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
priv->buffer_size = AQUAERO_CTRL_REPORT_SIZE;
priv->temp_ctrl_offset = AQUAERO_TEMP_CTRL_OFFSET;
+ priv->ctrl_report_delay = CTRL_REPORT_DELAY;
priv->temp_label = label_temp_sensors;
priv->virtual_temp_label = label_virtual_temp_sensors;
@@ -1547,6 +1579,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
priv->temp_ctrl_offset = D5NEXT_TEMP_CTRL_OFFSET;
priv->buffer_size = D5NEXT_CTRL_REPORT_SIZE;
+ priv->ctrl_report_delay = CTRL_REPORT_DELAY;
priv->power_cycle_count_offset = D5NEXT_POWER_CYCLES;
@@ -1597,6 +1630,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
priv->temp_ctrl_offset = OCTO_TEMP_CTRL_OFFSET;
priv->buffer_size = OCTO_CTRL_REPORT_SIZE;
+ priv->ctrl_report_delay = CTRL_REPORT_DELAY;
priv->power_cycle_count_offset = OCTO_POWER_CYCLES;
@@ -1624,6 +1658,7 @@ static int aqc_probe(struct hid_device *hdev, const struct hid_device_id *id)
priv->temp_ctrl_offset = QUADRO_TEMP_CTRL_OFFSET;
priv->buffer_size = QUADRO_CTRL_REPORT_SIZE;
+ priv->ctrl_report_delay = CTRL_REPORT_DELAY;
priv->flow_pulses_ctrl_offset = QUADRO_FLOW_PULSES_CTRL_OFFSET;
priv->power_cycle_count_offset = QUADRO_POWER_CYCLES;
diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c
index 7b177b9fbb09..a267b11731a8 100644
--- a/drivers/hwmon/k10temp.c
+++ b/drivers/hwmon/k10temp.c
@@ -77,6 +77,13 @@ static DEFINE_MUTEX(nb_smu_ind_mutex);
#define ZEN_CUR_TEMP_RANGE_SEL_MASK BIT(19)
#define ZEN_CUR_TEMP_TJ_SEL_MASK GENMASK(17, 16)
+/*
+ * AMD's Industrial processor 3255 supports temperature from -40 deg to 105 deg Celsius.
+ * Use the model name to identify 3255 CPUs and set a flag to display negative temperature.
+ * Do not round off to zero for negative Tctl or Tdie values if the flag is set
+ */
+#define AMD_I3255_STR "3255"
+
struct k10temp_data {
struct pci_dev *pdev;
void (*read_htcreg)(struct pci_dev *pdev, u32 *regval);
@@ -86,6 +93,7 @@ struct k10temp_data {
u32 show_temp;
bool is_zen;
u32 ccd_offset;
+ bool disp_negative;
};
#define TCTL_BIT 0
@@ -204,12 +212,12 @@ static int k10temp_read_temp(struct device *dev, u32 attr, int channel,
switch (channel) {
case 0: /* Tctl */
*val = get_raw_temp(data);
- if (*val < 0)
+ if (*val < 0 && !data->disp_negative)
*val = 0;
break;
case 1: /* Tdie */
*val = get_raw_temp(data) - data->temp_offset;
- if (*val < 0)
+ if (*val < 0 && !data->disp_negative)
*val = 0;
break;
case 2 ... 13: /* Tccd{1-12} */
@@ -405,6 +413,11 @@ static int k10temp_probe(struct pci_dev *pdev, const struct pci_device_id *id)
data->pdev = pdev;
data->show_temp |= BIT(TCTL_BIT); /* Always show Tctl */
+ if (boot_cpu_data.x86 == 0x17 &&
+ strstr(boot_cpu_data.x86_model_id, AMD_I3255_STR)) {
+ data->disp_negative = true;
+ }
+
if (boot_cpu_data.x86 == 0x15 &&
((boot_cpu_data.x86_model & 0xf0) == 0x60 ||
(boot_cpu_data.x86_model & 0xf0) == 0x70)) {
diff --git a/drivers/hwmon/nct6775-core.c b/drivers/hwmon/nct6775-core.c
index 236dc97f4d22..08ce4984151d 100644
--- a/drivers/hwmon/nct6775-core.c
+++ b/drivers/hwmon/nct6775-core.c
@@ -955,14 +955,25 @@ static const u16 scale_in[15] = {
800, 800
};
-static inline long in_from_reg(u8 reg, u8 nr)
+/*
+ * NCT6798 scaling:
+ * CPUVC, IN1, AVSB, 3VCC, IN0, IN8, IN4, 3VSB, VBAT, VTT, IN5, IN6, IN2,
+ * IN3, IN7
+ * Additional scales to be added later: IN9 (800), VHIF (1600)
+ */
+static const u16 scale_in_6798[15] = {
+ 800, 800, 1600, 1600, 800, 800, 800, 1600, 1600, 1600, 1600, 1600, 800,
+ 800, 800
+};
+
+static inline long in_from_reg(u8 reg, u8 nr, const u16 *scales)
{
- return DIV_ROUND_CLOSEST(reg * scale_in[nr], 100);
+ return DIV_ROUND_CLOSEST(reg * scales[nr], 100);
}
-static inline u8 in_to_reg(u32 val, u8 nr)
+static inline u8 in_to_reg(u32 val, u8 nr, const u16 *scales)
{
- return clamp_val(DIV_ROUND_CLOSEST(val * 100, scale_in[nr]), 0, 255);
+ return clamp_val(DIV_ROUND_CLOSEST(val * 100, scales[nr]), 0, 255);
}
/* TSI temperatures are in 8.3 format */
@@ -1673,7 +1684,8 @@ show_in_reg(struct device *dev, struct device_attribute *attr, char *buf)
if (IS_ERR(data))
return PTR_ERR(data);
- return sprintf(buf, "%ld\n", in_from_reg(data->in[nr][index], nr));
+ return sprintf(buf, "%ld\n",
+ in_from_reg(data->in[nr][index], nr, data->scale_in));
}
static ssize_t
@@ -1691,7 +1703,7 @@ store_in_reg(struct device *dev, struct device_attribute *attr, const char *buf,
if (err < 0)
return err;
mutex_lock(&data->update_lock);
- data->in[nr][index] = in_to_reg(val, nr);
+ data->in[nr][index] = in_to_reg(val, nr, data->scale_in);
err = nct6775_write_value(data, data->REG_IN_MINMAX[index - 1][nr], data->in[nr][index]);
mutex_unlock(&data->update_lock);
return err ? : count;
@@ -3462,6 +3474,7 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
mutex_init(&data->update_lock);
data->name = nct6775_device_names[data->kind];
data->bank = 0xff; /* Force initial bank selection */
+ data->scale_in = scale_in;
switch (data->kind) {
case nct6106:
@@ -3977,6 +3990,9 @@ int nct6775_probe(struct device *dev, struct nct6775_data *data,
break;
}
+ if (data->kind == nct6798 || data->kind == nct6799)
+ data->scale_in = scale_in_6798;
+
reg_temp = NCT6779_REG_TEMP;
num_reg_temp = ARRAY_SIZE(NCT6779_REG_TEMP);
if (data->kind == nct6791) {
diff --git a/drivers/hwmon/nct6775-platform.c b/drivers/hwmon/nct6775-platform.c
index ada867d6b98a..a409d7a0b813 100644
--- a/drivers/hwmon/nct6775-platform.c
+++ b/drivers/hwmon/nct6775-platform.c
@@ -586,7 +586,7 @@ nct6775_check_fan_inputs(struct nct6775_data *data, struct nct6775_sio_data *sio
int creb;
int cred;
- cre6 = sio_data->sio_inb(sio_data, 0xe0);
+ cre6 = sio_data->sio_inb(sio_data, 0xe6);
sio_data->sio_select(sio_data, NCT6775_LD_12);
cre0 = sio_data->sio_inb(sio_data, 0xe0);
diff --git a/drivers/hwmon/nct6775.h b/drivers/hwmon/nct6775.h
index 44f79c5726a9..a84c6ce7275d 100644
--- a/drivers/hwmon/nct6775.h
+++ b/drivers/hwmon/nct6775.h
@@ -98,6 +98,7 @@ struct nct6775_data {
u8 bank; /* current register bank */
u8 in_num; /* number of in inputs we have */
u8 in[15][3]; /* [0]=in, [1]=in_max, [2]=in_min */
+ const u16 *scale_in; /* internal scaling factors */
unsigned int rpm[NUM_FAN];
u16 fan_min[NUM_FAN];
u8 fan_pulses[NUM_FAN];
diff --git a/drivers/hwmon/nct7802.c b/drivers/hwmon/nct7802.c
index 9339bfc02a3e..024cff151c36 100644
--- a/drivers/hwmon/nct7802.c
+++ b/drivers/hwmon/nct7802.c
@@ -725,7 +725,7 @@ static umode_t nct7802_temp_is_visible(struct kobject *kobj,
if (index >= 38 && index < 46 && !(reg & 0x01)) /* PECI 0 */
return 0;
- if (index >= 0x46 && (!(reg & 0x02))) /* PECI 1 */
+ if (index >= 46 && !(reg & 0x02)) /* PECI 1 */
return 0;
return attr->mode;
diff --git a/drivers/hwmon/oxp-sensors.c b/drivers/hwmon/oxp-sensors.c
index e1a907cae820..1e1cc67bcdea 100644
--- a/drivers/hwmon/oxp-sensors.c
+++ b/drivers/hwmon/oxp-sensors.c
@@ -220,6 +220,20 @@ static int tt_toggle_disable(void)
}
/* Callbacks for turbo toggle attribute */
+static umode_t tt_toggle_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ switch (board) {
+ case aok_zoe_a1:
+ case oxp_mini_amd_a07:
+ case oxp_mini_amd_pro:
+ return attr->mode;
+ default:
+ break;
+ }
+ return 0;
+}
+
static ssize_t tt_toggle_store(struct device *dev,
struct device_attribute *attr, const char *buf,
size_t count)
@@ -396,7 +410,15 @@ static struct attribute *oxp_ec_attrs[] = {
NULL
};
-ATTRIBUTE_GROUPS(oxp_ec);
+static struct attribute_group oxp_ec_attribute_group = {
+ .is_visible = tt_toggle_is_visible,
+ .attrs = oxp_ec_attrs,
+};
+
+static const struct attribute_group *oxp_ec_groups[] = {
+ &oxp_ec_attribute_group,
+ NULL
+};
static const struct hwmon_ops oxp_ec_hwmon_ops = {
.is_visible = oxp_ec_hwmon_is_visible,
@@ -415,7 +437,6 @@ static int oxp_platform_probe(struct platform_device *pdev)
const struct dmi_system_id *dmi_entry;
struct device *dev = &pdev->dev;
struct device *hwdev;
- int ret;
/*
* Have to check for AMD processor here because DMI strings are the
@@ -430,18 +451,6 @@ static int oxp_platform_probe(struct platform_device *pdev)
board = (enum oxp_board)(unsigned long)dmi_entry->driver_data;
- switch (board) {
- case aok_zoe_a1:
- case oxp_mini_amd_a07:
- case oxp_mini_amd_pro:
- ret = devm_device_add_groups(dev, oxp_ec_groups);
- if (ret)
- return ret;
- break;
- default:
- break;
- }
-
hwdev = devm_hwmon_device_register_with_info(dev, "oxpec", NULL,
&oxp_ec_chip_info, NULL);
@@ -451,6 +460,7 @@ static int oxp_platform_probe(struct platform_device *pdev)
static struct platform_driver oxp_platform_driver = {
.driver = {
.name = "oxp-platform",
+ .dev_groups = oxp_ec_groups,
},
.probe = oxp_platform_probe,
};
diff --git a/drivers/hwmon/pmbus/bel-pfe.c b/drivers/hwmon/pmbus/bel-pfe.c
index fa5070ae26bc..7c5f4b10a7c1 100644
--- a/drivers/hwmon/pmbus/bel-pfe.c
+++ b/drivers/hwmon/pmbus/bel-pfe.c
@@ -17,12 +17,13 @@
enum chips {pfe1100, pfe3000};
/*
- * Disable status check for pfe3000 devices, because some devices report
- * communication error (invalid command) for VOUT_MODE command (0x20)
- * although correct VOUT_MODE (0x16) is returned: it leads to incorrect
- * exponent in linear mode.
+ * Disable status check because some devices report communication error
+ * (invalid command) for VOUT_MODE command (0x20) although the correct
+ * VOUT_MODE (0x16) is returned: it leads to incorrect exponent in linear
+ * mode.
+ * This affects both pfe3000 and pfe1100.
*/
-static struct pmbus_platform_data pfe3000_plat_data = {
+static struct pmbus_platform_data pfe_plat_data = {
.flags = PMBUS_SKIP_STATUS_CHECK,
};
@@ -94,16 +95,15 @@ static int pfe_pmbus_probe(struct i2c_client *client)
int model;
model = (int)i2c_match_id(pfe_device_id, client)->driver_data;
+ client->dev.platform_data = &pfe_plat_data;
/*
* PFE3000-12-069RA devices may not stay in page 0 during device
* probe which leads to probe failure (read status word failed).
* So let's set the device to page 0 at the beginning.
*/
- if (model == pfe3000) {
- client->dev.platform_data = &pfe3000_plat_data;
+ if (model == pfe3000)
i2c_smbus_write_byte_data(client, PMBUS_PAGE, 0);
- }
return pmbus_do_probe(client, &pfe_driver_info[model]);
}
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index fa06325f5a7c..69a4e62b6c8d 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -2745,9 +2745,8 @@ static const struct pmbus_status_category __maybe_unused pmbus_status_flag_map[]
},
};
-static int _pmbus_is_enabled(struct device *dev, u8 page)
+static int _pmbus_is_enabled(struct i2c_client *client, u8 page)
{
- struct i2c_client *client = to_i2c_client(dev->parent);
int ret;
ret = _pmbus_read_byte_data(client, page, PMBUS_OPERATION);
@@ -2758,17 +2757,16 @@ static int _pmbus_is_enabled(struct device *dev, u8 page)
return !!(ret & PB_OPERATION_CONTROL_ON);
}
-static int __maybe_unused pmbus_is_enabled(struct device *dev, u8 page)
+static int __maybe_unused pmbus_is_enabled(struct i2c_client *client, u8 page)
{
- struct i2c_client *client = to_i2c_client(dev->parent);
struct pmbus_data *data = i2c_get_clientdata(client);
int ret;
mutex_lock(&data->update_lock);
- ret = _pmbus_is_enabled(dev, page);
+ ret = _pmbus_is_enabled(client, page);
mutex_unlock(&data->update_lock);
- return !!(ret & PB_OPERATION_CONTROL_ON);
+ return ret;
}
#define to_dev_attr(_dev_attr) \
@@ -2844,7 +2842,7 @@ static int _pmbus_get_flags(struct pmbus_data *data, u8 page, unsigned int *flag
if (status < 0)
return status;
- if (_pmbus_is_enabled(dev, page)) {
+ if (_pmbus_is_enabled(client, page)) {
if (status & PB_STATUS_OFF) {
*flags |= REGULATOR_ERROR_FAIL;
*event |= REGULATOR_EVENT_FAIL;
@@ -2898,7 +2896,10 @@ static int __maybe_unused pmbus_get_flags(struct pmbus_data *data, u8 page, unsi
#if IS_ENABLED(CONFIG_REGULATOR)
static int pmbus_regulator_is_enabled(struct regulator_dev *rdev)
{
- return pmbus_is_enabled(rdev_get_dev(rdev), rdev_get_id(rdev));
+ struct device *dev = rdev_get_dev(rdev);
+ struct i2c_client *client = to_i2c_client(dev->parent);
+
+ return pmbus_is_enabled(client, rdev_get_id(rdev));
}
static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable)
@@ -2945,6 +2946,7 @@ static int pmbus_regulator_get_status(struct regulator_dev *rdev)
struct pmbus_data *data = i2c_get_clientdata(client);
u8 page = rdev_get_id(rdev);
int status, ret;
+ int event;
mutex_lock(&data->update_lock);
status = pmbus_get_status(client, page, PMBUS_STATUS_WORD);
@@ -2964,7 +2966,7 @@ static int pmbus_regulator_get_status(struct regulator_dev *rdev)
goto unlock;
}
- ret = pmbus_regulator_get_error_flags(rdev, &status);
+ ret = _pmbus_get_flags(data, rdev_get_id(rdev), &status, &event, false);
if (ret)
goto unlock;
diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c
index 2d8342fdc25d..05c80680dff4 100644
--- a/drivers/i2c/busses/i2c-bcm-iproc.c
+++ b/drivers/i2c/busses/i2c-bcm-iproc.c
@@ -233,13 +233,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
u32 offset)
{
u32 val;
+ unsigned long flags;
if (iproc_i2c->idm_base) {
- spin_lock(&iproc_i2c->idm_lock);
+ spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
writel(iproc_i2c->ape_addr_mask,
iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
val = readl(iproc_i2c->base + offset);
- spin_unlock(&iproc_i2c->idm_lock);
+ spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
} else {
val = readl(iproc_i2c->base + offset);
}
@@ -250,12 +251,14 @@ static inline u32 iproc_i2c_rd_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
static inline void iproc_i2c_wr_reg(struct bcm_iproc_i2c_dev *iproc_i2c,
u32 offset, u32 val)
{
+ unsigned long flags;
+
if (iproc_i2c->idm_base) {
- spin_lock(&iproc_i2c->idm_lock);
+ spin_lock_irqsave(&iproc_i2c->idm_lock, flags);
writel(iproc_i2c->ape_addr_mask,
iproc_i2c->idm_base + IDM_CTRL_DIRECT_OFFSET);
writel(val, iproc_i2c->base + offset);
- spin_unlock(&iproc_i2c->idm_lock);
+ spin_unlock_irqrestore(&iproc_i2c->idm_lock, flags);
} else {
writel(val, iproc_i2c->base + offset);
}
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 3bfd7a2232db..24bef0025c98 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -588,9 +588,21 @@ i2c_dw_read(struct dw_i2c_dev *dev)
u32 flags = msgs[dev->msg_read_idx].flags;
regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
+ tmp &= DW_IC_DATA_CMD_DAT;
/* Ensure length byte is a valid value */
- if (flags & I2C_M_RECV_LEN &&
- (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
+ if (flags & I2C_M_RECV_LEN) {
+ /*
+ * if IC_EMPTYFIFO_HOLD_MASTER_EN is set, which cannot be
+ * detected from the registers, the controller can be
+ * disabled if the STOP bit is set. But it is only set
+ * after receiving block data response length in
+ * I2C_FUNC_SMBUS_BLOCK_DATA case. That needs to read
+ * another byte with STOP bit set when the block data
+ * response length is invalid to complete the transaction.
+ */
+ if (!tmp || tmp > I2C_SMBUS_BLOCK_MAX)
+ tmp = 1;
+
len = i2c_dw_recv_len(dev, tmp);
}
*buf++ = tmp;
diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c
index e067671b3ce2..0980c773cb5b 100644
--- a/drivers/i2c/busses/i2c-hisi.c
+++ b/drivers/i2c/busses/i2c-hisi.c
@@ -330,6 +330,14 @@ static irqreturn_t hisi_i2c_irq(int irq, void *context)
struct hisi_i2c_controller *ctlr = context;
u32 int_stat;
+ /*
+ * Don't handle the interrupt if cltr->completion is NULL. We may
+ * reach here because the interrupt is spurious or the transfer is
+ * started by another port (e.g. firmware) rather than us.
+ */
+ if (!ctlr->completion)
+ return IRQ_NONE;
+
int_stat = readl(ctlr->iobase + HISI_I2C_INT_MSTAT);
hisi_i2c_clear_int(ctlr, int_stat);
if (!(int_stat & HISI_I2C_INT_ALL))
diff --git a/drivers/i2c/busses/i2c-imx-lpi2c.c b/drivers/i2c/busses/i2c-imx-lpi2c.c
index c3287c887c6f..150d923ca7f1 100644
--- a/drivers/i2c/busses/i2c-imx-lpi2c.c
+++ b/drivers/i2c/busses/i2c-imx-lpi2c.c
@@ -209,6 +209,9 @@ static int lpi2c_imx_config(struct lpi2c_imx_struct *lpi2c_imx)
lpi2c_imx_set_mode(lpi2c_imx);
clk_rate = clk_get_rate(lpi2c_imx->clks[0].clk);
+ if (!clk_rate)
+ return -EINVAL;
+
if (lpi2c_imx->mode == HS || lpi2c_imx->mode == ULTRA_FAST)
filt = 0;
else
diff --git a/drivers/i2c/busses/i2c-sun6i-p2wi.c b/drivers/i2c/busses/i2c-sun6i-p2wi.c
index ad8270cdbd3e..fa6020dced59 100644
--- a/drivers/i2c/busses/i2c-sun6i-p2wi.c
+++ b/drivers/i2c/busses/i2c-sun6i-p2wi.c
@@ -250,7 +250,8 @@ static int p2wi_probe(struct platform_device *pdev)
p2wi->rstc = devm_reset_control_get_exclusive(dev, NULL);
if (IS_ERR(p2wi->rstc)) {
- dev_err(dev, "failed to retrieve reset controller: %d\n", ret);
+ dev_err(dev, "failed to retrieve reset controller: %pe\n",
+ p2wi->rstc);
return PTR_ERR(p2wi->rstc);
}
diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index bcbbf23aa530..03fc10b45bd6 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -442,7 +442,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev)
if (IS_VI(i2c_dev))
return 0;
- if (!i2c_dev->hw->has_apb_dma) {
+ if (i2c_dev->hw->has_apb_dma) {
if (!IS_ENABLED(CONFIG_TEGRA20_APB_DMA)) {
dev_dbg(i2c_dev->dev, "APB DMA support not enabled\n");
return 0;
@@ -460,6 +460,7 @@ static int tegra_i2c_init_dma(struct tegra_i2c_dev *i2c_dev)
i2c_dev->dma_chan = dma_request_chan(i2c_dev->dev, "tx");
if (IS_ERR(i2c_dev->dma_chan)) {
err = PTR_ERR(i2c_dev->dma_chan);
+ i2c_dev->dma_chan = NULL;
goto err_out;
}
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b930036edbbe..256c2d42e350 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -199,43 +199,6 @@ static __cpuidle int intel_idle_xstate(struct cpuidle_device *dev,
return __intel_idle(dev, drv, index);
}
-static __always_inline int __intel_idle_hlt(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
-{
- raw_safe_halt();
- raw_local_irq_disable();
- return index;
-}
-
-/**
- * intel_idle_hlt - Ask the processor to enter the given idle state using hlt.
- * @dev: cpuidle device of the target CPU.
- * @drv: cpuidle driver (assumed to point to intel_idle_driver).
- * @index: Target idle state index.
- *
- * Use the HLT instruction to notify the processor that the CPU represented by
- * @dev is idle and it can try to enter the idle state corresponding to @index.
- *
- * Must be called under local_irq_disable().
- */
-static __cpuidle int intel_idle_hlt(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
-{
- return __intel_idle_hlt(dev, drv, index);
-}
-
-static __cpuidle int intel_idle_hlt_irq_on(struct cpuidle_device *dev,
- struct cpuidle_driver *drv, int index)
-{
- int ret;
-
- raw_local_irq_enable();
- ret = __intel_idle_hlt(dev, drv, index);
- raw_local_irq_disable();
-
- return ret;
-}
-
/**
* intel_idle_s2idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
@@ -1279,25 +1242,6 @@ static struct cpuidle_state snr_cstates[] __initdata = {
.enter = NULL }
};
-static struct cpuidle_state vmguest_cstates[] __initdata = {
- {
- .name = "C1",
- .desc = "HLT",
- .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_IRQ_ENABLE,
- .exit_latency = 5,
- .target_residency = 10,
- .enter = &intel_idle_hlt, },
- {
- .name = "C1L",
- .desc = "Long HLT",
- .flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TLB_FLUSHED,
- .exit_latency = 5,
- .target_residency = 200,
- .enter = &intel_idle_hlt, },
- {
- .enter = NULL }
-};
-
static const struct idle_cpu idle_cpu_nehalem __initconst = {
.state_table = nehalem_cstates,
.auto_demotion_disable_flags = NHM_C1_AUTO_DEMOTE | NHM_C3_AUTO_DEMOTE,
@@ -1897,16 +1841,6 @@ static bool __init intel_idle_verify_cstate(unsigned int mwait_hint)
static void state_update_enter_method(struct cpuidle_state *state, int cstate)
{
- if (state->enter == intel_idle_hlt) {
- if (force_irq_on) {
- pr_info("forced intel_idle_irq for state %d\n", cstate);
- state->enter = intel_idle_hlt_irq_on;
- }
- return;
- }
- if (state->enter == intel_idle_hlt_irq_on)
- return; /* no update scenarios */
-
if (state->flags & CPUIDLE_FLAG_INIT_XSTATE) {
/*
* Combining with XSTATE with IBRS or IRQ_ENABLE flags
@@ -1940,21 +1874,6 @@ static void state_update_enter_method(struct cpuidle_state *state, int cstate)
}
}
-/*
- * For mwait based states, we want to verify the cpuid data to see if the state
- * is actually supported by this specific CPU.
- * For non-mwait based states, this check should be skipped.
- */
-static bool should_verify_mwait(struct cpuidle_state *state)
-{
- if (state->enter == intel_idle_hlt)
- return false;
- if (state->enter == intel_idle_hlt_irq_on)
- return false;
-
- return true;
-}
-
static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
{
int cstate;
@@ -2003,7 +1922,7 @@ static void __init intel_idle_init_cstates_icpu(struct cpuidle_driver *drv)
}
mwait_hint = flg2MWAIT(cpuidle_state_table[cstate].flags);
- if (should_verify_mwait(&cpuidle_state_table[cstate]) && !intel_idle_verify_cstate(mwait_hint))
+ if (!intel_idle_verify_cstate(mwait_hint))
continue;
/* Structure copy. */
@@ -2137,93 +2056,6 @@ static void __init intel_idle_cpuidle_devices_uninit(void)
cpuidle_unregister_device(per_cpu_ptr(intel_idle_cpuidle_devices, i));
}
-/*
- * Match up the latency and break even point of the bare metal (cpu based)
- * states with the deepest VM available state.
- *
- * We only want to do this for the deepest state, the ones that has
- * the TLB_FLUSHED flag set on the .
- *
- * All our short idle states are dominated by vmexit/vmenter latencies,
- * not the underlying hardware latencies so we keep our values for these.
- */
-static void __init matchup_vm_state_with_baremetal(void)
-{
- int cstate;
-
- for (cstate = 0; cstate < CPUIDLE_STATE_MAX; ++cstate) {
- int matching_cstate;
-
- if (intel_idle_max_cstate_reached(cstate))
- break;
-
- if (!cpuidle_state_table[cstate].enter)
- break;
-
- if (!(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_TLB_FLUSHED))
- continue;
-
- for (matching_cstate = 0; matching_cstate < CPUIDLE_STATE_MAX; ++matching_cstate) {
- if (!icpu->state_table[matching_cstate].enter)
- break;
- if (icpu->state_table[matching_cstate].exit_latency > cpuidle_state_table[cstate].exit_latency) {
- cpuidle_state_table[cstate].exit_latency = icpu->state_table[matching_cstate].exit_latency;
- cpuidle_state_table[cstate].target_residency = icpu->state_table[matching_cstate].target_residency;
- }
- }
-
- }
-}
-
-
-static int __init intel_idle_vminit(const struct x86_cpu_id *id)
-{
- int retval;
-
- cpuidle_state_table = vmguest_cstates;
-
- icpu = (const struct idle_cpu *)id->driver_data;
-
- pr_debug("v" INTEL_IDLE_VERSION " model 0x%X\n",
- boot_cpu_data.x86_model);
-
- intel_idle_cpuidle_devices = alloc_percpu(struct cpuidle_device);
- if (!intel_idle_cpuidle_devices)
- return -ENOMEM;
-
- /*
- * We don't know exactly what the host will do when we go idle, but as a worst estimate
- * we can assume that the exit latency of the deepest host state will be hit for our
- * deep (long duration) guest idle state.
- * The same logic applies to the break even point for the long duration guest idle state.
- * So lets copy these two properties from the table we found for the host CPU type.
- */
- matchup_vm_state_with_baremetal();
-
- intel_idle_cpuidle_driver_init(&intel_idle_driver);
-
- retval = cpuidle_register_driver(&intel_idle_driver);
- if (retval) {
- struct cpuidle_driver *drv = cpuidle_get_driver();
- printk(KERN_DEBUG pr_fmt("intel_idle yielding to %s\n"),
- drv ? drv->name : "none");
- goto init_driver_fail;
- }
-
- retval = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "idle/intel:online",
- intel_idle_cpu_online, NULL);
- if (retval < 0)
- goto hp_setup_fail;
-
- return 0;
-hp_setup_fail:
- intel_idle_cpuidle_devices_uninit();
- cpuidle_unregister_driver(&intel_idle_driver);
-init_driver_fail:
- free_percpu(intel_idle_cpuidle_devices);
- return retval;
-}
-
static int __init intel_idle_init(void)
{
const struct x86_cpu_id *id;
@@ -2242,8 +2074,6 @@ static int __init intel_idle_init(void)
id = x86_match_cpu(intel_idle_ids);
if (id) {
if (!boot_cpu_has(X86_FEATURE_MWAIT)) {
- if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
- return intel_idle_vminit(id);
pr_debug("Please enable MWAIT in BIOS SETUP\n");
return -ENODEV;
}
diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
index 8685e0b58a83..7bc3ebfe8081 100644
--- a/drivers/iio/adc/ad7192.c
+++ b/drivers/iio/adc/ad7192.c
@@ -62,7 +62,6 @@
#define AD7192_MODE_STA_MASK BIT(20) /* Status Register transmission Mask */
#define AD7192_MODE_CLKSRC(x) (((x) & 0x3) << 18) /* Clock Source Select */
#define AD7192_MODE_SINC3 BIT(15) /* SINC3 Filter Select */
-#define AD7192_MODE_ACX BIT(14) /* AC excitation enable(AD7195 only)*/
#define AD7192_MODE_ENPAR BIT(13) /* Parity Enable */
#define AD7192_MODE_CLKDIV BIT(12) /* Clock divide by 2 (AD7190/2 only)*/
#define AD7192_MODE_SCYCLE BIT(11) /* Single cycle conversion */
@@ -91,6 +90,7 @@
/* Configuration Register Bit Designations (AD7192_REG_CONF) */
#define AD7192_CONF_CHOP BIT(23) /* CHOP enable */
+#define AD7192_CONF_ACX BIT(22) /* AC excitation enable(AD7195 only) */
#define AD7192_CONF_REFSEL BIT(20) /* REFIN1/REFIN2 Reference Select */
#define AD7192_CONF_CHAN(x) ((x) << 8) /* Channel select */
#define AD7192_CONF_CHAN_MASK (0x7FF << 8) /* Channel select mask */
@@ -472,7 +472,7 @@ static ssize_t ad7192_show_ac_excitation(struct device *dev,
struct iio_dev *indio_dev = dev_to_iio_dev(dev);
struct ad7192_state *st = iio_priv(indio_dev);
- return sysfs_emit(buf, "%d\n", !!(st->mode & AD7192_MODE_ACX));
+ return sysfs_emit(buf, "%d\n", !!(st->conf & AD7192_CONF_ACX));
}
static ssize_t ad7192_show_bridge_switch(struct device *dev,
@@ -513,13 +513,13 @@ static ssize_t ad7192_set(struct device *dev,
ad_sd_write_reg(&st->sd, AD7192_REG_GPOCON, 1, st->gpocon);
break;
- case AD7192_REG_MODE:
+ case AD7192_REG_CONF:
if (val)
- st->mode |= AD7192_MODE_ACX;
+ st->conf |= AD7192_CONF_ACX;
else
- st->mode &= ~AD7192_MODE_ACX;
+ st->conf &= ~AD7192_CONF_ACX;
- ad_sd_write_reg(&st->sd, AD7192_REG_MODE, 3, st->mode);
+ ad_sd_write_reg(&st->sd, AD7192_REG_CONF, 3, st->conf);
break;
default:
ret = -EINVAL;
@@ -579,12 +579,11 @@ static IIO_DEVICE_ATTR(bridge_switch_en, 0644,
static IIO_DEVICE_ATTR(ac_excitation_en, 0644,
ad7192_show_ac_excitation, ad7192_set,
- AD7192_REG_MODE);
+ AD7192_REG_CONF);
static struct attribute *ad7192_attributes[] = {
&iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr,
&iio_dev_attr_bridge_switch_en.dev_attr.attr,
- &iio_dev_attr_ac_excitation_en.dev_attr.attr,
NULL
};
@@ -595,6 +594,7 @@ static const struct attribute_group ad7192_attribute_group = {
static struct attribute *ad7195_attributes[] = {
&iio_dev_attr_filter_low_pass_3db_frequency_available.dev_attr.attr,
&iio_dev_attr_bridge_switch_en.dev_attr.attr,
+ &iio_dev_attr_ac_excitation_en.dev_attr.attr,
NULL
};
diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
index 213526c1592f..aea83f369437 100644
--- a/drivers/iio/adc/ina2xx-adc.c
+++ b/drivers/iio/adc/ina2xx-adc.c
@@ -124,6 +124,7 @@ static const struct regmap_config ina2xx_regmap_config = {
enum ina2xx_ids { ina219, ina226 };
struct ina2xx_config {
+ const char *name;
u16 config_default;
int calibration_value;
int shunt_voltage_lsb; /* nV */
@@ -155,6 +156,7 @@ struct ina2xx_chip_info {
static const struct ina2xx_config ina2xx_config[] = {
[ina219] = {
+ .name = "ina219",
.config_default = INA219_CONFIG_DEFAULT,
.calibration_value = 4096,
.shunt_voltage_lsb = 10000,
@@ -164,6 +166,7 @@ static const struct ina2xx_config ina2xx_config[] = {
.chip_id = ina219,
},
[ina226] = {
+ .name = "ina226",
.config_default = INA226_CONFIG_DEFAULT,
.calibration_value = 2048,
.shunt_voltage_lsb = 2500,
@@ -996,7 +999,7 @@ static int ina2xx_probe(struct i2c_client *client)
/* Patch the current config register with default. */
val = chip->config->config_default;
- if (id->driver_data == ina226) {
+ if (type == ina226) {
ina226_set_average(chip, INA226_DEFAULT_AVG, &val);
ina226_set_int_time_vbus(chip, INA226_DEFAULT_IT, &val);
ina226_set_int_time_vshunt(chip, INA226_DEFAULT_IT, &val);
@@ -1015,7 +1018,7 @@ static int ina2xx_probe(struct i2c_client *client)
}
indio_dev->modes = INDIO_DIRECT_MODE;
- if (id->driver_data == ina226) {
+ if (type == ina226) {
indio_dev->channels = ina226_channels;
indio_dev->num_channels = ARRAY_SIZE(ina226_channels);
indio_dev->info = &ina226_info;
@@ -1024,7 +1027,7 @@ static int ina2xx_probe(struct i2c_client *client)
indio_dev->num_channels = ARRAY_SIZE(ina219_channels);
indio_dev->info = &ina219_info;
}
- indio_dev->name = id->name;
+ indio_dev->name = id ? id->name : chip->config->name;
ret = devm_iio_kfifo_buffer_setup(&client->dev, indio_dev,
&ina2xx_setup_ops);
diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
index af6bfcc19075..eb78a6f17fd0 100644
--- a/drivers/iio/adc/meson_saradc.c
+++ b/drivers/iio/adc/meson_saradc.c
@@ -916,12 +916,6 @@ static int meson_sar_adc_hw_enable(struct iio_dev *indio_dev)
goto err_vref;
}
- ret = clk_prepare_enable(priv->core_clk);
- if (ret) {
- dev_err(dev, "failed to enable core clk\n");
- goto err_core_clk;
- }
-
regval = FIELD_PREP(MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, 1);
regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG0,
MESON_SAR_ADC_REG0_FIFO_CNT_IRQ_MASK, regval);
@@ -948,8 +942,6 @@ err_adc_clk:
regmap_update_bits(priv->regmap, MESON_SAR_ADC_REG3,
MESON_SAR_ADC_REG3_ADC_EN, 0);
meson_sar_adc_set_bandgap(indio_dev, false);
- clk_disable_unprepare(priv->core_clk);
-err_core_clk:
regulator_disable(priv->vref);
err_vref:
meson_sar_adc_unlock(indio_dev);
@@ -977,8 +969,6 @@ static void meson_sar_adc_hw_disable(struct iio_dev *indio_dev)
meson_sar_adc_set_bandgap(indio_dev, false);
- clk_disable_unprepare(priv->core_clk);
-
regulator_disable(priv->vref);
if (!ret)
@@ -1211,7 +1201,7 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
if (IS_ERR(priv->clkin))
return dev_err_probe(dev, PTR_ERR(priv->clkin), "failed to get clkin\n");
- priv->core_clk = devm_clk_get(dev, "core");
+ priv->core_clk = devm_clk_get_enabled(dev, "core");
if (IS_ERR(priv->core_clk))
return dev_err_probe(dev, PTR_ERR(priv->core_clk), "failed to get core clk\n");
@@ -1294,15 +1284,26 @@ static int meson_sar_adc_remove(struct platform_device *pdev)
static int meson_sar_adc_suspend(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
meson_sar_adc_hw_disable(indio_dev);
+ clk_disable_unprepare(priv->core_clk);
+
return 0;
}
static int meson_sar_adc_resume(struct device *dev)
{
struct iio_dev *indio_dev = dev_get_drvdata(dev);
+ struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
+ int ret;
+
+ ret = clk_prepare_enable(priv->core_clk);
+ if (ret) {
+ dev_err(dev, "failed to enable core clk\n");
+ return ret;
+ }
return meson_sar_adc_hw_enable(indio_dev);
}
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
index 943e9e14d1e9..b72d39fc2434 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
@@ -253,7 +253,7 @@ int cros_ec_sensors_core_init(struct platform_device *pdev,
platform_set_drvdata(pdev, indio_dev);
state->ec = ec->ec_dev;
- state->msg = devm_kzalloc(&pdev->dev,
+ state->msg = devm_kzalloc(&pdev->dev, sizeof(*state->msg) +
max((u16)sizeof(struct ec_params_motion_sense),
state->ec->max_response), GFP_KERNEL);
if (!state->msg)
diff --git a/drivers/iio/frequency/admv1013.c b/drivers/iio/frequency/admv1013.c
index 9bf8337806fc..8c8e0bbfc99f 100644
--- a/drivers/iio/frequency/admv1013.c
+++ b/drivers/iio/frequency/admv1013.c
@@ -344,9 +344,12 @@ static int admv1013_update_quad_filters(struct admv1013_state *st)
static int admv1013_update_mixer_vgate(struct admv1013_state *st)
{
- unsigned int vcm, mixer_vgate;
+ unsigned int mixer_vgate;
+ int vcm;
vcm = regulator_get_voltage(st->reg);
+ if (vcm < 0)
+ return vcm;
if (vcm < 1800000)
mixer_vgate = (2389 * vcm / 1000000 + 8100) / 100;
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 6a18b363cf73..b6e6b1df8a61 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -2687,7 +2687,7 @@ unknown_format:
static int lsm6dsx_get_acpi_mount_matrix(struct device *dev,
struct iio_mount_matrix *orientation)
{
- return false;
+ return -EOPNOTSUPP;
}
#endif
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index c117f50d0cf3..adcba832e6fa 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -1888,7 +1888,7 @@ static const struct iio_buffer_setup_ops noop_ring_setup_ops;
int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
{
struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
- struct fwnode_handle *fwnode;
+ struct fwnode_handle *fwnode = NULL;
int ret;
if (!indio_dev->info)
@@ -1899,7 +1899,8 @@ int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod)
/* If the calling driver did not initialize firmware node, do it here */
if (dev_fwnode(&indio_dev->dev))
fwnode = dev_fwnode(&indio_dev->dev);
- else
+ /* The default dummy IIO device has no parent */
+ else if (indio_dev->dev.parent)
fwnode = dev_fwnode(indio_dev->dev.parent);
device_set_node(&indio_dev->dev, fwnode);
diff --git a/drivers/iio/light/rohm-bu27008.c b/drivers/iio/light/rohm-bu27008.c
index 489902bed7f0..b50bf8973d9a 100644
--- a/drivers/iio/light/rohm-bu27008.c
+++ b/drivers/iio/light/rohm-bu27008.c
@@ -190,7 +190,7 @@ static const struct iio_itime_sel_mul bu27008_itimes[] = {
.address = BU27008_REG_##data##_LO, \
.scan_index = BU27008_##color, \
.scan_type = { \
- .sign = 's', \
+ .sign = 'u', \
.realbits = 16, \
.storagebits = 16, \
.endianness = IIO_LE, \
@@ -633,7 +633,7 @@ static int bu27008_try_find_new_time_gain(struct bu27008_data *data, int val,
for (i = 0; i < data->gts.num_itime; i++) {
new_time_sel = data->gts.itime_table[i].sel;
ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts,
- new_time_sel, val, val2 * 1000, gain_sel);
+ new_time_sel, val, val2, gain_sel);
if (!ret)
break;
}
@@ -662,7 +662,7 @@ static int bu27008_set_scale(struct bu27008_data *data,
goto unlock_out;
ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel,
- val, val2 * 1000, &gain_sel);
+ val, val2, &gain_sel);
if (ret) {
ret = bu27008_try_find_new_time_gain(data, val, val2, &gain_sel);
if (ret)
@@ -677,6 +677,21 @@ unlock_out:
return ret;
}
+static int bu27008_write_raw_get_fmt(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ long mask)
+{
+
+ switch (mask) {
+ case IIO_CHAN_INFO_SCALE:
+ return IIO_VAL_INT_PLUS_NANO;
+ case IIO_CHAN_INFO_INT_TIME:
+ return IIO_VAL_INT_PLUS_MICRO;
+ default:
+ return -EINVAL;
+ }
+}
+
static int bu27008_write_raw(struct iio_dev *idev,
struct iio_chan_spec const *chan,
int val, int val2, long mask)
@@ -756,6 +771,7 @@ static int bu27008_update_scan_mode(struct iio_dev *idev,
static const struct iio_info bu27008_info = {
.read_raw = &bu27008_read_raw,
.write_raw = &bu27008_write_raw,
+ .write_raw_get_fmt = &bu27008_write_raw_get_fmt,
.read_avail = &bu27008_read_avail,
.update_scan_mode = bu27008_update_scan_mode,
.validate_trigger = iio_validate_own_trigger,
diff --git a/drivers/iio/light/rohm-bu27034.c b/drivers/iio/light/rohm-bu27034.c
index e63ef5789cde..bf3de853a811 100644
--- a/drivers/iio/light/rohm-bu27034.c
+++ b/drivers/iio/light/rohm-bu27034.c
@@ -575,7 +575,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
return -EINVAL;
if (chan == BU27034_CHAN_ALS) {
- if (val == 0 && val2 == 1000)
+ if (val == 0 && val2 == 1000000)
return 0;
return -EINVAL;
@@ -587,7 +587,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
goto unlock_out;
ret = iio_gts_find_gain_sel_for_scale_using_time(&data->gts, time_sel,
- val, val2 * 1000, &gain_sel);
+ val, val2, &gain_sel);
if (ret) {
/*
* Could not support scale with given time. Need to change time.
@@ -624,7 +624,7 @@ static int bu27034_set_scale(struct bu27034_data *data, int chan,
/* Can we provide requested scale with this time? */
ret = iio_gts_find_gain_sel_for_scale_using_time(
- &data->gts, new_time_sel, val, val2 * 1000,
+ &data->gts, new_time_sel, val, val2,
&gain_sel);
if (ret)
continue;
@@ -1217,6 +1217,21 @@ static int bu27034_read_raw(struct iio_dev *idev,
}
}
+static int bu27034_write_raw_get_fmt(struct iio_dev *indio_dev,
+ struct iio_chan_spec const *chan,
+ long mask)
+{
+
+ switch (mask) {
+ case IIO_CHAN_INFO_SCALE:
+ return IIO_VAL_INT_PLUS_NANO;
+ case IIO_CHAN_INFO_INT_TIME:
+ return IIO_VAL_INT_PLUS_MICRO;
+ default:
+ return -EINVAL;
+ }
+}
+
static int bu27034_write_raw(struct iio_dev *idev,
struct iio_chan_spec const *chan,
int val, int val2, long mask)
@@ -1267,6 +1282,7 @@ static int bu27034_read_avail(struct iio_dev *idev,
static const struct iio_info bu27034_info = {
.read_raw = &bu27034_read_raw,
.write_raw = &bu27034_write_raw,
+ .write_raw_get_fmt = &bu27034_write_raw_get_fmt,
.read_avail = &bu27034_read_avail,
};
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 1ee87c3aaeab..9891c7dc2af5 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -4062,6 +4062,8 @@ static int resolve_prepare_src(struct rdma_id_private *id_priv,
RDMA_CM_ADDR_QUERY)))
return -EINVAL;
+ } else {
+ memcpy(cma_dst_addr(id_priv), dst_addr, rdma_addr_size(dst_addr));
}
if (cma_family(id_priv) != dst_addr->sa_family) {
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 755a9c57db6f..f9ab671c8eda 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -85,6 +85,8 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
dma_addr_t mask;
int i;
+ umem->iova = va = virt;
+
if (umem->is_odp) {
unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
@@ -100,7 +102,6 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
*/
pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
- umem->iova = va = virt;
/* The best result is the smallest page size that results in the minimum
* number of required pages. Compute the largest page size that could
* work based on VA address bits that don't change.
diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
index abef0b8baa7c..03cc45a5458d 100644
--- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
+++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
@@ -869,7 +869,10 @@ fail:
int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
{
struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+ struct bnxt_qplib_qp *qplib_qp = &qp->qplib_qp;
struct bnxt_re_dev *rdev = qp->rdev;
+ struct bnxt_qplib_nq *scq_nq = NULL;
+ struct bnxt_qplib_nq *rcq_nq = NULL;
unsigned int flags;
int rc;
@@ -903,6 +906,15 @@ int bnxt_re_destroy_qp(struct ib_qp *ib_qp, struct ib_udata *udata)
ib_umem_release(qp->rumem);
ib_umem_release(qp->sumem);
+ /* Flush all the entries of notification queue associated with
+ * given qp.
+ */
+ scq_nq = qplib_qp->scq->nq;
+ rcq_nq = qplib_qp->rcq->nq;
+ bnxt_re_synchronize_nq(scq_nq);
+ if (scq_nq != rcq_nq)
+ bnxt_re_synchronize_nq(rcq_nq);
+
return 0;
}
diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c
index b42166fe7454..63e98e2d3596 100644
--- a/drivers/infiniband/hw/bnxt_re/main.c
+++ b/drivers/infiniband/hw/bnxt_re/main.c
@@ -1253,6 +1253,8 @@ static int bnxt_re_dev_init(struct bnxt_re_dev *rdev, u8 wqe_mode)
rc = bnxt_re_setup_chip_ctx(rdev, wqe_mode);
if (rc) {
+ bnxt_unregister_dev(rdev->en_dev);
+ clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
ibdev_err(&rdev->ibdev, "Failed to get chip context\n");
return -EINVAL;
}
@@ -1526,8 +1528,8 @@ static void bnxt_re_remove(struct auxiliary_device *adev)
}
bnxt_re_setup_cc(rdev, false);
ib_unregister_device(&rdev->ibdev);
- ib_dealloc_device(&rdev->ibdev);
bnxt_re_dev_uninit(rdev);
+ ib_dealloc_device(&rdev->ibdev);
skip_remove:
mutex_unlock(&bnxt_re_mutex);
}
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.c b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
index 91aed77ce40d..a42555623aed 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.c
@@ -381,6 +381,24 @@ static void bnxt_qplib_service_nq(struct tasklet_struct *t)
spin_unlock_bh(&hwq->lock);
}
+/* bnxt_re_synchronize_nq - self polling notification queue.
+ * @nq - notification queue pointer
+ *
+ * This function will start polling entries of a given notification queue
+ * for all pending entries.
+ * This function is useful to synchronize notification entries while resources
+ * are going away.
+ */
+
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq)
+{
+ int budget = nq->budget;
+
+ nq->budget = nq->hwq.max_elements;
+ bnxt_qplib_service_nq(&nq->nq_tasklet);
+ nq->budget = budget;
+}
+
static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
{
struct bnxt_qplib_nq *nq = dev_instance;
@@ -402,19 +420,19 @@ void bnxt_qplib_nq_stop_irq(struct bnxt_qplib_nq *nq, bool kill)
if (!nq->requested)
return;
- tasklet_disable(&nq->nq_tasklet);
+ nq->requested = false;
/* Mask h/w interrupt */
bnxt_qplib_ring_nq_db(&nq->nq_db.dbinfo, nq->res->cctx, false);
/* Sync with last running IRQ handler */
synchronize_irq(nq->msix_vec);
- if (kill)
- tasklet_kill(&nq->nq_tasklet);
-
irq_set_affinity_hint(nq->msix_vec, NULL);
free_irq(nq->msix_vec, nq);
kfree(nq->name);
nq->name = NULL;
- nq->requested = false;
+
+ if (kill)
+ tasklet_kill(&nq->nq_tasklet);
+ tasklet_disable(&nq->nq_tasklet);
}
void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_fp.h b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
index a42820821c47..404b851091ca 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_fp.h
+++ b/drivers/infiniband/hw/bnxt_re/qplib_fp.h
@@ -553,6 +553,7 @@ int bnxt_qplib_process_flush_list(struct bnxt_qplib_cq *cq,
struct bnxt_qplib_cqe *cqe,
int num_cqes);
void bnxt_qplib_flush_cqn_wq(struct bnxt_qplib_qp *qp);
+void bnxt_re_synchronize_nq(struct bnxt_qplib_nq *nq);
static inline void *bnxt_qplib_get_swqe(struct bnxt_qplib_q *que, u32 *swq_idx)
{
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
index b30e66b64827..bc3aea4592b9 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_rcfw.c
@@ -989,19 +989,18 @@ void bnxt_qplib_rcfw_stop_irq(struct bnxt_qplib_rcfw *rcfw, bool kill)
if (!creq->requested)
return;
- tasklet_disable(&creq->creq_tasklet);
+ creq->requested = false;
/* Mask h/w interrupts */
bnxt_qplib_ring_nq_db(&creq->creq_db.dbinfo, rcfw->res->cctx, false);
/* Sync with last running IRQ-handler */
synchronize_irq(creq->msix_vec);
- if (kill)
- tasklet_kill(&creq->creq_tasklet);
-
free_irq(creq->msix_vec, rcfw);
kfree(creq->irq_name);
creq->irq_name = NULL;
- creq->requested = false;
atomic_set(&rcfw->rcfw_intr_enabled, 0);
+ if (kill)
+ tasklet_kill(&creq->creq_tasklet);
+ tasklet_disable(&creq->creq_tasklet);
}
void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c
index 5fd8f7c90bb0..739d942761d1 100644
--- a/drivers/infiniband/hw/bnxt_re/qplib_res.c
+++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c
@@ -819,6 +819,7 @@ static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res *res,
}
memset((u8 *)dpit->tbl, 0xFF, bytes);
+ mutex_init(&res->dpi_tbl_lock);
dpit->priv_db = dpit->ucreg.bar_reg + dpit->ucreg.offset;
return 0;
diff --git a/drivers/infiniband/hw/hfi1/chip.c b/drivers/infiniband/hw/hfi1/chip.c
index 9dbb89e9f4af..baaa4406d5e6 100644
--- a/drivers/infiniband/hw/hfi1/chip.c
+++ b/drivers/infiniband/hw/hfi1/chip.c
@@ -12307,6 +12307,7 @@ static void free_cntrs(struct hfi1_devdata *dd)
if (dd->synth_stats_timer.function)
del_timer_sync(&dd->synth_stats_timer);
+ cancel_work_sync(&dd->update_cntr_work);
ppd = (struct hfi1_pportdata *)(dd + 1);
for (i = 0; i < dd->num_pports; i++, ppd++) {
kfree(ppd->cntrs);
diff --git a/drivers/infiniband/hw/irdma/ctrl.c b/drivers/infiniband/hw/irdma/ctrl.c
index d88c9184007e..45e3344daa04 100644
--- a/drivers/infiniband/hw/irdma/ctrl.c
+++ b/drivers/infiniband/hw/irdma/ctrl.c
@@ -2712,13 +2712,13 @@ static int irdma_sc_cq_modify(struct irdma_sc_cq *cq,
*/
void irdma_check_cqp_progress(struct irdma_cqp_timeout *timeout, struct irdma_sc_dev *dev)
{
- if (timeout->compl_cqp_cmds != dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]) {
- timeout->compl_cqp_cmds = dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
+ u64 completed_ops = atomic64_read(&dev->cqp->completed_ops);
+
+ if (timeout->compl_cqp_cmds != completed_ops) {
+ timeout->compl_cqp_cmds = completed_ops;
timeout->count = 0;
- } else {
- if (dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS] !=
- timeout->compl_cqp_cmds)
- timeout->count++;
+ } else if (timeout->compl_cqp_cmds != dev->cqp->requested_ops) {
+ timeout->count++;
}
}
@@ -2761,7 +2761,7 @@ static int irdma_cqp_poll_registers(struct irdma_sc_cqp *cqp, u32 tail,
if (newtail != tail) {
/* SUCCESS */
IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
- cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++;
+ atomic64_inc(&cqp->completed_ops);
return 0;
}
udelay(cqp->dev->hw_attrs.max_sleep_count);
@@ -3121,8 +3121,8 @@ int irdma_sc_cqp_init(struct irdma_sc_cqp *cqp,
info->dev->cqp = cqp;
IRDMA_RING_INIT(cqp->sq_ring, cqp->sq_size);
- cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS] = 0;
- cqp->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS] = 0;
+ cqp->requested_ops = 0;
+ atomic64_set(&cqp->completed_ops, 0);
/* for the cqp commands backlog. */
INIT_LIST_HEAD(&cqp->dev->cqp_cmd_head);
@@ -3274,7 +3274,7 @@ __le64 *irdma_sc_cqp_get_next_send_wqe_idx(struct irdma_sc_cqp *cqp, u64 scratch
if (ret_code)
return NULL;
- cqp->dev->cqp_cmd_stats[IRDMA_OP_REQ_CMDS]++;
+ cqp->requested_ops++;
if (!*wqe_idx)
cqp->polarity = !cqp->polarity;
wqe = cqp->sq_base[*wqe_idx].elem;
@@ -3363,6 +3363,9 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
if (polarity != ccq->cq_uk.polarity)
return -ENOENT;
+ /* Ensure CEQE contents are read after valid bit is checked */
+ dma_rmb();
+
get_64bit_val(cqe, 8, &qp_ctx);
cqp = (struct irdma_sc_cqp *)(unsigned long)qp_ctx;
info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, temp);
@@ -3397,7 +3400,7 @@ int irdma_sc_ccq_get_cqe_info(struct irdma_sc_cq *ccq,
dma_wmb(); /* make sure shadow area is updated before moving tail */
IRDMA_RING_MOVE_TAIL(cqp->sq_ring);
- ccq->dev->cqp_cmd_stats[IRDMA_OP_CMPL_CMDS]++;
+ atomic64_inc(&cqp->completed_ops);
return ret_code;
}
@@ -4009,13 +4012,17 @@ int irdma_sc_get_next_aeqe(struct irdma_sc_aeq *aeq,
u8 polarity;
aeqe = IRDMA_GET_CURRENT_AEQ_ELEM(aeq);
- get_64bit_val(aeqe, 0, &compl_ctx);
get_64bit_val(aeqe, 8, &temp);
polarity = (u8)FIELD_GET(IRDMA_AEQE_VALID, temp);
if (aeq->polarity != polarity)
return -ENOENT;
+ /* Ensure AEQE contents are read after valid bit is checked */
+ dma_rmb();
+
+ get_64bit_val(aeqe, 0, &compl_ctx);
+
print_hex_dump_debug("WQE: AEQ_ENTRY WQE", DUMP_PREFIX_OFFSET, 16, 8,
aeqe, 16, false);
diff --git a/drivers/infiniband/hw/irdma/defs.h b/drivers/infiniband/hw/irdma/defs.h
index 6014b9d06a9b..d06e45d2c23f 100644
--- a/drivers/infiniband/hw/irdma/defs.h
+++ b/drivers/infiniband/hw/irdma/defs.h
@@ -191,32 +191,30 @@ enum irdma_cqp_op_type {
IRDMA_OP_MANAGE_VF_PBLE_BP = 25,
IRDMA_OP_QUERY_FPM_VAL = 26,
IRDMA_OP_COMMIT_FPM_VAL = 27,
- IRDMA_OP_REQ_CMDS = 28,
- IRDMA_OP_CMPL_CMDS = 29,
- IRDMA_OP_AH_CREATE = 30,
- IRDMA_OP_AH_MODIFY = 31,
- IRDMA_OP_AH_DESTROY = 32,
- IRDMA_OP_MC_CREATE = 33,
- IRDMA_OP_MC_DESTROY = 34,
- IRDMA_OP_MC_MODIFY = 35,
- IRDMA_OP_STATS_ALLOCATE = 36,
- IRDMA_OP_STATS_FREE = 37,
- IRDMA_OP_STATS_GATHER = 38,
- IRDMA_OP_WS_ADD_NODE = 39,
- IRDMA_OP_WS_MODIFY_NODE = 40,
- IRDMA_OP_WS_DELETE_NODE = 41,
- IRDMA_OP_WS_FAILOVER_START = 42,
- IRDMA_OP_WS_FAILOVER_COMPLETE = 43,
- IRDMA_OP_SET_UP_MAP = 44,
- IRDMA_OP_GEN_AE = 45,
- IRDMA_OP_QUERY_RDMA_FEATURES = 46,
- IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 47,
- IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 48,
- IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 49,
- IRDMA_OP_CQ_MODIFY = 50,
+ IRDMA_OP_AH_CREATE = 28,
+ IRDMA_OP_AH_MODIFY = 29,
+ IRDMA_OP_AH_DESTROY = 30,
+ IRDMA_OP_MC_CREATE = 31,
+ IRDMA_OP_MC_DESTROY = 32,
+ IRDMA_OP_MC_MODIFY = 33,
+ IRDMA_OP_STATS_ALLOCATE = 34,
+ IRDMA_OP_STATS_FREE = 35,
+ IRDMA_OP_STATS_GATHER = 36,
+ IRDMA_OP_WS_ADD_NODE = 37,
+ IRDMA_OP_WS_MODIFY_NODE = 38,
+ IRDMA_OP_WS_DELETE_NODE = 39,
+ IRDMA_OP_WS_FAILOVER_START = 40,
+ IRDMA_OP_WS_FAILOVER_COMPLETE = 41,
+ IRDMA_OP_SET_UP_MAP = 42,
+ IRDMA_OP_GEN_AE = 43,
+ IRDMA_OP_QUERY_RDMA_FEATURES = 44,
+ IRDMA_OP_ALLOC_LOCAL_MAC_ENTRY = 45,
+ IRDMA_OP_ADD_LOCAL_MAC_ENTRY = 46,
+ IRDMA_OP_DELETE_LOCAL_MAC_ENTRY = 47,
+ IRDMA_OP_CQ_MODIFY = 48,
/* Must be last entry*/
- IRDMA_MAX_CQP_OPS = 51,
+ IRDMA_MAX_CQP_OPS = 49,
};
/* CQP SQ WQES */
diff --git a/drivers/infiniband/hw/irdma/hw.c b/drivers/infiniband/hw/irdma/hw.c
index 795f7fd4f257..457368e324e1 100644
--- a/drivers/infiniband/hw/irdma/hw.c
+++ b/drivers/infiniband/hw/irdma/hw.c
@@ -191,6 +191,7 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
case IRDMA_AE_AMP_MWBIND_INVALID_RIGHTS:
case IRDMA_AE_AMP_MWBIND_BIND_DISABLED:
case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
+ case IRDMA_AE_AMP_MWBIND_VALID_STAG:
qp->flush_code = FLUSH_MW_BIND_ERR;
qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
break;
@@ -2075,7 +2076,7 @@ void irdma_cqp_ce_handler(struct irdma_pci_f *rf, struct irdma_sc_cq *cq)
cqp_request->compl_info.error = info.error;
if (cqp_request->waiting) {
- cqp_request->request_done = true;
+ WRITE_ONCE(cqp_request->request_done, true);
wake_up(&cqp_request->waitq);
irdma_put_cqp_request(&rf->cqp, cqp_request);
} else {
diff --git a/drivers/infiniband/hw/irdma/main.h b/drivers/infiniband/hw/irdma/main.h
index def6dd58dcd4..2323962cdeac 100644
--- a/drivers/infiniband/hw/irdma/main.h
+++ b/drivers/infiniband/hw/irdma/main.h
@@ -161,8 +161,8 @@ struct irdma_cqp_request {
void (*callback_fcn)(struct irdma_cqp_request *cqp_request);
void *param;
struct irdma_cqp_compl_info compl_info;
+ bool request_done; /* READ/WRITE_ONCE macros operate on it */
bool waiting:1;
- bool request_done:1;
bool dynamic:1;
};
diff --git a/drivers/infiniband/hw/irdma/puda.c b/drivers/infiniband/hw/irdma/puda.c
index 4ec9639f1bdb..562531712ea4 100644
--- a/drivers/infiniband/hw/irdma/puda.c
+++ b/drivers/infiniband/hw/irdma/puda.c
@@ -230,6 +230,9 @@ static int irdma_puda_poll_info(struct irdma_sc_cq *cq,
if (valid_bit != cq_uk->polarity)
return -ENOENT;
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
if (cq->dev->hw_attrs.uk_attrs.hw_rev >= IRDMA_GEN_2)
ext_valid = (bool)FIELD_GET(IRDMA_CQ_EXTCQE, qword3);
@@ -243,6 +246,9 @@ static int irdma_puda_poll_info(struct irdma_sc_cq *cq,
if (polarity != cq_uk->polarity)
return -ENOENT;
+ /* Ensure ext CQE contents are read after ext valid bit is checked */
+ dma_rmb();
+
IRDMA_RING_MOVE_HEAD_NOCHECK(cq_uk->cq_ring);
if (!IRDMA_RING_CURRENT_HEAD(cq_uk->cq_ring))
cq_uk->polarity = !cq_uk->polarity;
diff --git a/drivers/infiniband/hw/irdma/type.h b/drivers/infiniband/hw/irdma/type.h
index 5ee68604e59f..a20709577ab0 100644
--- a/drivers/infiniband/hw/irdma/type.h
+++ b/drivers/infiniband/hw/irdma/type.h
@@ -365,6 +365,8 @@ struct irdma_sc_cqp {
struct irdma_dcqcn_cc_params dcqcn_params;
__le64 *host_ctx;
u64 *scratch_array;
+ u64 requested_ops;
+ atomic64_t completed_ops;
u32 cqp_id;
u32 sq_size;
u32 hw_sq_size;
diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index dd428d915c17..280d633d4ec4 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -1161,7 +1161,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
}
wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
- info->op_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+ info->op_type = (u8)FIELD_GET(IRDMACQ_OP, qword3);
if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
u32 array_idx;
@@ -1527,6 +1527,9 @@ void irdma_uk_clean_cq(void *q, struct irdma_cq_uk *cq)
if (polarity != temp)
break;
+ /* Ensure CQE contents are read after valid bit is checked */
+ dma_rmb();
+
get_64bit_val(cqe, 8, &comp_ctx);
if ((void *)(unsigned long)comp_ctx == q)
set_64bit_val(cqe, 8, 0);
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 71e1c5d34709..eb083f70b09f 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -481,7 +481,7 @@ void irdma_free_cqp_request(struct irdma_cqp *cqp,
if (cqp_request->dynamic) {
kfree(cqp_request);
} else {
- cqp_request->request_done = false;
+ WRITE_ONCE(cqp_request->request_done, false);
cqp_request->callback_fcn = NULL;
cqp_request->waiting = false;
@@ -515,7 +515,7 @@ irdma_free_pending_cqp_request(struct irdma_cqp *cqp,
{
if (cqp_request->waiting) {
cqp_request->compl_info.error = true;
- cqp_request->request_done = true;
+ WRITE_ONCE(cqp_request->request_done, true);
wake_up(&cqp_request->waitq);
}
wait_event_timeout(cqp->remove_wq,
@@ -567,11 +567,11 @@ static int irdma_wait_event(struct irdma_pci_f *rf,
bool cqp_error = false;
int err_code = 0;
- cqp_timeout.compl_cqp_cmds = rf->sc_dev.cqp_cmd_stats[IRDMA_OP_CMPL_CMDS];
+ cqp_timeout.compl_cqp_cmds = atomic64_read(&rf->sc_dev.cqp->completed_ops);
do {
irdma_cqp_ce_handler(rf, &rf->ccq.sc_cq);
if (wait_event_timeout(cqp_request->waitq,
- cqp_request->request_done,
+ READ_ONCE(cqp_request->request_done),
msecs_to_jiffies(CQP_COMPL_WAIT_TIME_MS)))
break;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 456656617c33..9d08aa99f3cb 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -565,15 +565,15 @@ static int set_qp_rss(struct mlx4_ib_dev *dev, struct mlx4_ib_rss *rss_ctx,
return (-EOPNOTSUPP);
}
- if (ucmd->rx_hash_fields_mask & ~(MLX4_IB_RX_HASH_SRC_IPV4 |
- MLX4_IB_RX_HASH_DST_IPV4 |
- MLX4_IB_RX_HASH_SRC_IPV6 |
- MLX4_IB_RX_HASH_DST_IPV6 |
- MLX4_IB_RX_HASH_SRC_PORT_TCP |
- MLX4_IB_RX_HASH_DST_PORT_TCP |
- MLX4_IB_RX_HASH_SRC_PORT_UDP |
- MLX4_IB_RX_HASH_DST_PORT_UDP |
- MLX4_IB_RX_HASH_INNER)) {
+ if (ucmd->rx_hash_fields_mask & ~(u64)(MLX4_IB_RX_HASH_SRC_IPV4 |
+ MLX4_IB_RX_HASH_DST_IPV4 |
+ MLX4_IB_RX_HASH_SRC_IPV6 |
+ MLX4_IB_RX_HASH_DST_IPV6 |
+ MLX4_IB_RX_HASH_SRC_PORT_TCP |
+ MLX4_IB_RX_HASH_DST_PORT_TCP |
+ MLX4_IB_RX_HASH_SRC_PORT_UDP |
+ MLX4_IB_RX_HASH_DST_PORT_UDP |
+ MLX4_IB_RX_HASH_INNER)) {
pr_debug("RX Hash fields_mask has unsupported mask (0x%llx)\n",
ucmd->rx_hash_fields_mask);
return (-EOPNOTSUPP);
diff --git a/drivers/infiniband/hw/mthca/mthca_qp.c b/drivers/infiniband/hw/mthca/mthca_qp.c
index 69bba0ef4a5d..53f43649f7d0 100644
--- a/drivers/infiniband/hw/mthca/mthca_qp.c
+++ b/drivers/infiniband/hw/mthca/mthca_qp.c
@@ -1393,7 +1393,7 @@ int mthca_alloc_sqp(struct mthca_dev *dev,
if (mthca_array_get(&dev->qp_table.qp, mqpn))
err = -EBUSY;
else
- mthca_array_set(&dev->qp_table.qp, mqpn, qp->sqp);
+ mthca_array_set(&dev->qp_table.qp, mqpn, qp);
spin_unlock_irq(&dev->qp_table.lock);
if (err)
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
index a973905afd13..ed7d4b02f45a 100644
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -64,9 +64,8 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_blocks = 0;
- inode->i_atime = current_time(inode);
+ inode->i_atime = inode_set_ctime_current(inode);
inode->i_mtime = inode->i_atime;
- inode->i_ctime = inode->i_atime;
inode->i_private = data;
if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index d8a43d87de93..d9312b5c9d20 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -199,7 +199,8 @@ int rxe_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
if (access & ~RXE_ACCESS_SUPPORTED_MW) {
rxe_err_mw(mw, "access %#x not supported", access);
- return -EOPNOTSUPP;
+ ret = -EOPNOTSUPP;
+ goto err_drop_mr;
}
spin_lock_bh(&mw->lock);
diff --git a/drivers/interconnect/qcom/bcm-voter.c b/drivers/interconnect/qcom/bcm-voter.c
index 8f385f9c2dd3..d5f2a6b5376b 100644
--- a/drivers/interconnect/qcom/bcm-voter.c
+++ b/drivers/interconnect/qcom/bcm-voter.c
@@ -83,6 +83,11 @@ static void bcm_aggregate(struct qcom_icc_bcm *bcm)
temp = agg_peak[bucket] * bcm->vote_scale;
bcm->vote_y[bucket] = bcm_div(temp, bcm->aux_data.unit);
+
+ if (bcm->enable_mask && (bcm->vote_x[bucket] || bcm->vote_y[bucket])) {
+ bcm->vote_x[bucket] = 0;
+ bcm->vote_y[bucket] = bcm->enable_mask;
+ }
}
if (bcm->keepalive && bcm->vote_x[QCOM_ICC_BUCKET_AMC] == 0 &&
diff --git a/drivers/interconnect/qcom/icc-rpmh.h b/drivers/interconnect/qcom/icc-rpmh.h
index 04391c1ba465..7843d8864d6b 100644
--- a/drivers/interconnect/qcom/icc-rpmh.h
+++ b/drivers/interconnect/qcom/icc-rpmh.h
@@ -81,6 +81,7 @@ struct qcom_icc_node {
* @vote_x: aggregated threshold values, represents sum_bw when @type is bw bcm
* @vote_y: aggregated threshold values, represents peak_bw when @type is bw bcm
* @vote_scale: scaling factor for vote_x and vote_y
+ * @enable_mask: optional mask to send as vote instead of vote_x/vote_y
* @dirty: flag used to indicate whether the bcm needs to be committed
* @keepalive: flag used to indicate whether a keepalive is required
* @aux_data: auxiliary data used when calculating threshold values and
@@ -97,6 +98,7 @@ struct qcom_icc_bcm {
u64 vote_x[QCOM_ICC_NUM_BUCKETS];
u64 vote_y[QCOM_ICC_NUM_BUCKETS];
u64 vote_scale;
+ u32 enable_mask;
bool dirty;
bool keepalive;
struct bcm_db aux_data;
diff --git a/drivers/interconnect/qcom/sa8775p.c b/drivers/interconnect/qcom/sa8775p.c
index da21cc31a580..f56538669de0 100644
--- a/drivers/interconnect/qcom/sa8775p.c
+++ b/drivers/interconnect/qcom/sa8775p.c
@@ -1873,6 +1873,7 @@ static struct qcom_icc_node srvc_snoc = {
static struct qcom_icc_bcm bcm_acv = {
.name = "ACV",
+ .enable_mask = 0x8,
.num_nodes = 1,
.nodes = { &ebi },
};
diff --git a/drivers/interconnect/qcom/sm8450.c b/drivers/interconnect/qcom/sm8450.c
index 2d7a8e7b85ec..e64c214b4020 100644
--- a/drivers/interconnect/qcom/sm8450.c
+++ b/drivers/interconnect/qcom/sm8450.c
@@ -1337,6 +1337,7 @@ static struct qcom_icc_node qns_mem_noc_sf_disp = {
static struct qcom_icc_bcm bcm_acv = {
.name = "ACV",
+ .enable_mask = 0x8,
.num_nodes = 1,
.nodes = { &ebi },
};
@@ -1349,6 +1350,7 @@ static struct qcom_icc_bcm bcm_ce0 = {
static struct qcom_icc_bcm bcm_cn0 = {
.name = "CN0",
+ .enable_mask = 0x1,
.keepalive = true,
.num_nodes = 55,
.nodes = { &qnm_gemnoc_cnoc, &qnm_gemnoc_pcie,
@@ -1383,6 +1385,7 @@ static struct qcom_icc_bcm bcm_cn0 = {
static struct qcom_icc_bcm bcm_co0 = {
.name = "CO0",
+ .enable_mask = 0x1,
.num_nodes = 2,
.nodes = { &qxm_nsp, &qns_nsp_gemnoc },
};
@@ -1403,6 +1406,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
static struct qcom_icc_bcm bcm_mm1 = {
.name = "MM1",
+ .enable_mask = 0x1,
.num_nodes = 12,
.nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp,
&qnm_camnoc_sf, &qnm_mdp,
@@ -1445,6 +1449,7 @@ static struct qcom_icc_bcm bcm_sh0 = {
static struct qcom_icc_bcm bcm_sh1 = {
.name = "SH1",
+ .enable_mask = 0x1,
.num_nodes = 7,
.nodes = { &alm_gpu_tcu, &alm_sys_tcu,
&qnm_nsp_gemnoc, &qnm_pcie,
@@ -1461,6 +1466,7 @@ static struct qcom_icc_bcm bcm_sn0 = {
static struct qcom_icc_bcm bcm_sn1 = {
.name = "SN1",
+ .enable_mask = 0x1,
.num_nodes = 4,
.nodes = { &qhm_gic, &qxm_pimem,
&xm_gic, &qns_gemnoc_gc },
@@ -1492,6 +1498,7 @@ static struct qcom_icc_bcm bcm_sn7 = {
static struct qcom_icc_bcm bcm_acv_disp = {
.name = "ACV",
+ .enable_mask = 0x1,
.num_nodes = 1,
.nodes = { &ebi_disp },
};
@@ -1510,6 +1517,7 @@ static struct qcom_icc_bcm bcm_mm0_disp = {
static struct qcom_icc_bcm bcm_mm1_disp = {
.name = "MM1",
+ .enable_mask = 0x1,
.num_nodes = 3,
.nodes = { &qnm_mdp_disp, &qnm_rot_disp,
&qns_mem_noc_sf_disp },
@@ -1523,6 +1531,7 @@ static struct qcom_icc_bcm bcm_sh0_disp = {
static struct qcom_icc_bcm bcm_sh1_disp = {
.name = "SH1",
+ .enable_mask = 0x1,
.num_nodes = 1,
.nodes = { &qnm_pcie_disp },
};
diff --git a/drivers/interconnect/qcom/sm8550.c b/drivers/interconnect/qcom/sm8550.c
index d823ba988ef6..0864ed285375 100644
--- a/drivers/interconnect/qcom/sm8550.c
+++ b/drivers/interconnect/qcom/sm8550.c
@@ -1473,6 +1473,7 @@ static struct qcom_icc_node qns_mem_noc_sf_cam_ife_2 = {
static struct qcom_icc_bcm bcm_acv = {
.name = "ACV",
+ .enable_mask = 0x8,
.num_nodes = 1,
.nodes = { &ebi },
};
@@ -1485,6 +1486,7 @@ static struct qcom_icc_bcm bcm_ce0 = {
static struct qcom_icc_bcm bcm_cn0 = {
.name = "CN0",
+ .enable_mask = 0x1,
.keepalive = true,
.num_nodes = 54,
.nodes = { &qsm_cfg, &qhs_ahb2phy0,
@@ -1524,6 +1526,7 @@ static struct qcom_icc_bcm bcm_cn1 = {
static struct qcom_icc_bcm bcm_co0 = {
.name = "CO0",
+ .enable_mask = 0x1,
.num_nodes = 2,
.nodes = { &qxm_nsp, &qns_nsp_gemnoc },
};
@@ -1549,6 +1552,7 @@ static struct qcom_icc_bcm bcm_mm0 = {
static struct qcom_icc_bcm bcm_mm1 = {
.name = "MM1",
+ .enable_mask = 0x1,
.num_nodes = 8,
.nodes = { &qnm_camnoc_hf, &qnm_camnoc_icp,
&qnm_camnoc_sf, &qnm_vapss_hcp,
@@ -1589,6 +1593,7 @@ static struct qcom_icc_bcm bcm_sh0 = {
static struct qcom_icc_bcm bcm_sh1 = {
.name = "SH1",
+ .enable_mask = 0x1,
.num_nodes = 13,
.nodes = { &alm_gpu_tcu, &alm_sys_tcu,
&chm_apps, &qnm_gpu,
@@ -1608,6 +1613,7 @@ static struct qcom_icc_bcm bcm_sn0 = {
static struct qcom_icc_bcm bcm_sn1 = {
.name = "SN1",
+ .enable_mask = 0x1,
.num_nodes = 3,
.nodes = { &qhm_gic, &xm_gic,
&qns_gemnoc_gc },
@@ -1633,6 +1639,7 @@ static struct qcom_icc_bcm bcm_sn7 = {
static struct qcom_icc_bcm bcm_acv_disp = {
.name = "ACV",
+ .enable_mask = 0x1,
.num_nodes = 1,
.nodes = { &ebi_disp },
};
@@ -1657,12 +1664,14 @@ static struct qcom_icc_bcm bcm_sh0_disp = {
static struct qcom_icc_bcm bcm_sh1_disp = {
.name = "SH1",
+ .enable_mask = 0x1,
.num_nodes = 2,
.nodes = { &qnm_mnoc_hf_disp, &qnm_pcie_disp },
};
static struct qcom_icc_bcm bcm_acv_cam_ife_0 = {
.name = "ACV",
+ .enable_mask = 0x0,
.num_nodes = 1,
.nodes = { &ebi_cam_ife_0 },
};
@@ -1681,6 +1690,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_0 = {
static struct qcom_icc_bcm bcm_mm1_cam_ife_0 = {
.name = "MM1",
+ .enable_mask = 0x1,
.num_nodes = 4,
.nodes = { &qnm_camnoc_hf_cam_ife_0, &qnm_camnoc_icp_cam_ife_0,
&qnm_camnoc_sf_cam_ife_0, &qns_mem_noc_sf_cam_ife_0 },
@@ -1694,6 +1704,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_0 = {
static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = {
.name = "SH1",
+ .enable_mask = 0x1,
.num_nodes = 3,
.nodes = { &qnm_mnoc_hf_cam_ife_0, &qnm_mnoc_sf_cam_ife_0,
&qnm_pcie_cam_ife_0 },
@@ -1701,6 +1712,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_0 = {
static struct qcom_icc_bcm bcm_acv_cam_ife_1 = {
.name = "ACV",
+ .enable_mask = 0x0,
.num_nodes = 1,
.nodes = { &ebi_cam_ife_1 },
};
@@ -1719,6 +1731,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_1 = {
static struct qcom_icc_bcm bcm_mm1_cam_ife_1 = {
.name = "MM1",
+ .enable_mask = 0x1,
.num_nodes = 4,
.nodes = { &qnm_camnoc_hf_cam_ife_1, &qnm_camnoc_icp_cam_ife_1,
&qnm_camnoc_sf_cam_ife_1, &qns_mem_noc_sf_cam_ife_1 },
@@ -1732,6 +1745,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_1 = {
static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = {
.name = "SH1",
+ .enable_mask = 0x1,
.num_nodes = 3,
.nodes = { &qnm_mnoc_hf_cam_ife_1, &qnm_mnoc_sf_cam_ife_1,
&qnm_pcie_cam_ife_1 },
@@ -1739,6 +1753,7 @@ static struct qcom_icc_bcm bcm_sh1_cam_ife_1 = {
static struct qcom_icc_bcm bcm_acv_cam_ife_2 = {
.name = "ACV",
+ .enable_mask = 0x0,
.num_nodes = 1,
.nodes = { &ebi_cam_ife_2 },
};
@@ -1757,6 +1772,7 @@ static struct qcom_icc_bcm bcm_mm0_cam_ife_2 = {
static struct qcom_icc_bcm bcm_mm1_cam_ife_2 = {
.name = "MM1",
+ .enable_mask = 0x1,
.num_nodes = 4,
.nodes = { &qnm_camnoc_hf_cam_ife_2, &qnm_camnoc_icp_cam_ife_2,
&qnm_camnoc_sf_cam_ife_2, &qns_mem_noc_sf_cam_ife_2 },
@@ -1770,6 +1786,7 @@ static struct qcom_icc_bcm bcm_sh0_cam_ife_2 = {
static struct qcom_icc_bcm bcm_sh1_cam_ife_2 = {
.name = "SH1",
+ .enable_mask = 0x1,
.num_nodes = 3,
.nodes = { &qnm_mnoc_hf_cam_ife_2, &qnm_mnoc_sf_cam_ife_2,
&qnm_pcie_cam_ife_2 },
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 29d05663d4d1..ed2937a4e196 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -109,10 +109,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
*/
void iommufd_device_unbind(struct iommufd_device *idev)
{
- bool was_destroyed;
-
- was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj);
- WARN_ON(!was_destroyed);
+ iommufd_object_destroy_user(idev->ictx, &idev->obj);
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
@@ -382,7 +379,7 @@ void iommufd_device_detach(struct iommufd_device *idev)
mutex_unlock(&hwpt->devices_lock);
if (hwpt->auto_domain)
- iommufd_object_destroy_user(idev->ictx, &hwpt->obj);
+ iommufd_object_deref_user(idev->ictx, &hwpt->obj);
else
refcount_dec(&hwpt->obj.users);
@@ -456,10 +453,7 @@ EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
*/
void iommufd_access_destroy(struct iommufd_access *access)
{
- bool was_destroyed;
-
- was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj);
- WARN_ON(!was_destroyed);
+ iommufd_object_destroy_user(access->ictx, &access->obj);
}
EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index b38e67d1988b..f9790983699c 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -176,8 +176,19 @@ void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
void iommufd_object_finalize(struct iommufd_ctx *ictx,
struct iommufd_object *obj);
-bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
- struct iommufd_object *obj);
+void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj, bool allow_fail);
+static inline void iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj)
+{
+ __iommufd_object_destroy_user(ictx, obj, false);
+}
+static inline void iommufd_object_deref_user(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj)
+{
+ __iommufd_object_destroy_user(ictx, obj, true);
+}
+
struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
size_t size,
enum iommufd_object_type type);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 3fbe636c3d8a..4cf5f73f2708 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -117,13 +117,55 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
}
/*
+ * Remove the given object id from the xarray if the only reference to the
+ * object is held by the xarray. The caller must call ops destroy().
+ */
+static struct iommufd_object *iommufd_object_remove(struct iommufd_ctx *ictx,
+ u32 id, bool extra_put)
+{
+ struct iommufd_object *obj;
+ XA_STATE(xas, &ictx->objects, id);
+
+ xa_lock(&ictx->objects);
+ obj = xas_load(&xas);
+ if (xa_is_zero(obj) || !obj) {
+ obj = ERR_PTR(-ENOENT);
+ goto out_xa;
+ }
+
+ /*
+ * If the caller is holding a ref on obj we put it here under the
+ * spinlock.
+ */
+ if (extra_put)
+ refcount_dec(&obj->users);
+
+ if (!refcount_dec_if_one(&obj->users)) {
+ obj = ERR_PTR(-EBUSY);
+ goto out_xa;
+ }
+
+ xas_store(&xas, NULL);
+ if (ictx->vfio_ioas == container_of(obj, struct iommufd_ioas, obj))
+ ictx->vfio_ioas = NULL;
+
+out_xa:
+ xa_unlock(&ictx->objects);
+
+ /* The returned object reference count is zero */
+ return obj;
+}
+
+/*
* The caller holds a users refcount and wants to destroy the object. Returns
* true if the object was destroyed. In all cases the caller no longer has a
* reference on obj.
*/
-bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
- struct iommufd_object *obj)
+void __iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+ struct iommufd_object *obj, bool allow_fail)
{
+ struct iommufd_object *ret;
+
/*
* The purpose of the destroy_rwsem is to ensure deterministic
* destruction of objects used by external drivers and destroyed by this
@@ -131,22 +173,22 @@ bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
* side of this, such as during ioctl execution.
*/
down_write(&obj->destroy_rwsem);
- xa_lock(&ictx->objects);
- refcount_dec(&obj->users);
- if (!refcount_dec_if_one(&obj->users)) {
- xa_unlock(&ictx->objects);
- up_write(&obj->destroy_rwsem);
- return false;
- }
- __xa_erase(&ictx->objects, obj->id);
- if (ictx->vfio_ioas && &ictx->vfio_ioas->obj == obj)
- ictx->vfio_ioas = NULL;
- xa_unlock(&ictx->objects);
+ ret = iommufd_object_remove(ictx, obj->id, true);
up_write(&obj->destroy_rwsem);
+ if (allow_fail && IS_ERR(ret))
+ return;
+
+ /*
+ * If there is a bug and we couldn't destroy the object then we did put
+ * back the caller's refcount and will eventually try to free it again
+ * during close.
+ */
+ if (WARN_ON(IS_ERR(ret)))
+ return;
+
iommufd_object_ops[obj->type].destroy(obj);
kfree(obj);
- return true;
}
static int iommufd_destroy(struct iommufd_ucmd *ucmd)
@@ -154,13 +196,11 @@ static int iommufd_destroy(struct iommufd_ucmd *ucmd)
struct iommu_destroy *cmd = ucmd->cmd;
struct iommufd_object *obj;
- obj = iommufd_get_object(ucmd->ictx, cmd->id, IOMMUFD_OBJ_ANY);
+ obj = iommufd_object_remove(ucmd->ictx, cmd->id, false);
if (IS_ERR(obj))
return PTR_ERR(obj);
- iommufd_ref_to_users(obj);
- /* See iommufd_ref_to_users() */
- if (!iommufd_object_destroy_user(ucmd->ictx, obj))
- return -EBUSY;
+ iommufd_object_ops[obj->type].destroy(obj);
+ kfree(obj);
return 0;
}
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 412ca96be128..8d9aa297c117 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -297,7 +297,7 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
batch->pfns[0] = batch->pfns[batch->end - 1] +
(batch->npfns[batch->end - 1] - keep_pfns);
batch->npfns[0] = keep_pfns;
- batch->end = 0;
+ batch->end = 1;
}
static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns)
diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
index fa113cb2529a..6341c0167c4a 100644
--- a/drivers/irqchip/irq-bcm6345-l1.c
+++ b/drivers/irqchip/irq-bcm6345-l1.c
@@ -82,6 +82,7 @@ struct bcm6345_l1_chip {
};
struct bcm6345_l1_cpu {
+ struct bcm6345_l1_chip *intc;
void __iomem *map_base;
unsigned int parent_irq;
u32 enable_cache[];
@@ -115,17 +116,11 @@ static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc,
static void bcm6345_l1_irq_handle(struct irq_desc *desc)
{
- struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc);
- struct bcm6345_l1_cpu *cpu;
+ struct bcm6345_l1_cpu *cpu = irq_desc_get_handler_data(desc);
+ struct bcm6345_l1_chip *intc = cpu->intc;
struct irq_chip *chip = irq_desc_get_chip(desc);
unsigned int idx;
-#ifdef CONFIG_SMP
- cpu = intc->cpus[cpu_logical_map(smp_processor_id())];
-#else
- cpu = intc->cpus[0];
-#endif
-
chained_irq_enter(chip, desc);
for (idx = 0; idx < intc->n_words; idx++) {
@@ -253,6 +248,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
if (!cpu)
return -ENOMEM;
+ cpu->intc = intc;
cpu->map_base = ioremap(res.start, sz);
if (!cpu->map_base)
return -ENOMEM;
@@ -271,7 +267,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
return -EINVAL;
}
irq_set_chained_handler_and_data(cpu->parent_irq,
- bcm6345_l1_irq_handle, intc);
+ bcm6345_l1_irq_handle, cpu);
return 0;
}
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index 1994541eaef8..e0c2b10d154d 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -273,13 +273,23 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags)
raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
}
+static struct irq_chip its_vpe_irq_chip;
+
static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
{
- struct its_vlpi_map *map = get_vlpi_map(d);
+ struct its_vpe *vpe = NULL;
int cpu;
- if (map) {
- cpu = vpe_to_cpuid_lock(map->vpe, flags);
+ if (d->chip == &its_vpe_irq_chip) {
+ vpe = irq_data_get_irq_chip_data(d);
+ } else {
+ struct its_vlpi_map *map = get_vlpi_map(d);
+ if (map)
+ vpe = map->vpe;
+ }
+
+ if (vpe) {
+ cpu = vpe_to_cpuid_lock(vpe, flags);
} else {
/* Physical LPIs are already locked via the irq_desc lock */
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
@@ -293,10 +303,18 @@ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
{
- struct its_vlpi_map *map = get_vlpi_map(d);
+ struct its_vpe *vpe = NULL;
+
+ if (d->chip == &its_vpe_irq_chip) {
+ vpe = irq_data_get_irq_chip_data(d);
+ } else {
+ struct its_vlpi_map *map = get_vlpi_map(d);
+ if (map)
+ vpe = map->vpe;
+ }
- if (map)
- vpe_to_cpuid_unlock(map->vpe, flags);
+ if (vpe)
+ vpe_to_cpuid_unlock(vpe, flags);
}
static struct its_collection *valid_col(struct its_collection *col)
@@ -1433,14 +1451,29 @@ static void wait_for_syncr(void __iomem *rdbase)
cpu_relax();
}
-static void direct_lpi_inv(struct irq_data *d)
+static void __direct_lpi_inv(struct irq_data *d, u64 val)
{
- struct its_vlpi_map *map = get_vlpi_map(d);
void __iomem *rdbase;
unsigned long flags;
- u64 val;
int cpu;
+ /* Target the redistributor this LPI is currently routed to */
+ cpu = irq_to_cpuid_lock(d, &flags);
+ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
+
+ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
+ gic_write_lpir(val, rdbase + GICR_INVLPIR);
+ wait_for_syncr(rdbase);
+
+ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
+ irq_to_cpuid_unlock(d, flags);
+}
+
+static void direct_lpi_inv(struct irq_data *d)
+{
+ struct its_vlpi_map *map = get_vlpi_map(d);
+ u64 val;
+
if (map) {
struct its_device *its_dev = irq_data_get_irq_chip_data(d);
@@ -1453,15 +1486,7 @@ static void direct_lpi_inv(struct irq_data *d)
val = d->hwirq;
}
- /* Target the redistributor this LPI is currently routed to */
- cpu = irq_to_cpuid_lock(d, &flags);
- raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
- rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
- gic_write_lpir(val, rdbase + GICR_INVLPIR);
-
- wait_for_syncr(rdbase);
- raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
- irq_to_cpuid_unlock(d, flags);
+ __direct_lpi_inv(d, val);
}
static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
@@ -3953,18 +3978,10 @@ static void its_vpe_send_inv(struct irq_data *d)
{
struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
- if (gic_rdists->has_direct_lpi) {
- void __iomem *rdbase;
-
- /* Target the redistributor this VPE is currently known on */
- raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
- rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
- gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
- wait_for_syncr(rdbase);
- raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
- } else {
+ if (gic_rdists->has_direct_lpi)
+ __direct_lpi_inv(d, d->parent_data->hwirq);
+ else
its_vpe_send_cmd(vpe, its_send_inv);
- }
}
static void its_vpe_mask_irq(struct irq_data *d)
@@ -4727,7 +4744,8 @@ static bool __maybe_unused its_enable_rk3588001(void *data)
{
struct its_node *its = data;
- if (!of_machine_is_compatible("rockchip,rk3588"))
+ if (!of_machine_is_compatible("rockchip,rk3588") &&
+ !of_machine_is_compatible("rockchip,rk3588s"))
return false;
its->flags |= ITS_FLAGS_FORCE_NON_SHAREABLE;
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 0c6c1af9a5b7..eedfa8e9f077 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -69,6 +69,8 @@ struct gic_chip_data {
static void __iomem *t241_dist_base_alias[T241_CHIPS_MAX] __read_mostly;
static DEFINE_STATIC_KEY_FALSE(gic_nvidia_t241_erratum);
+static DEFINE_STATIC_KEY_FALSE(gic_arm64_2941627_erratum);
+
static struct gic_chip_data gic_data __read_mostly;
static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key);
@@ -592,10 +594,39 @@ static void gic_irq_nmi_teardown(struct irq_data *d)
gic_irq_set_prio(d, GICD_INT_DEF_PRI);
}
+static bool gic_arm64_erratum_2941627_needed(struct irq_data *d)
+{
+ enum gic_intid_range range;
+
+ if (!static_branch_unlikely(&gic_arm64_2941627_erratum))
+ return false;
+
+ range = get_intid_range(d);
+
+ /*
+ * The workaround is needed if the IRQ is an SPI and
+ * the target cpu is different from the one we are
+ * executing on.
+ */
+ return (range == SPI_RANGE || range == ESPI_RANGE) &&
+ !cpumask_test_cpu(raw_smp_processor_id(),
+ irq_data_get_effective_affinity_mask(d));
+}
+
static void gic_eoi_irq(struct irq_data *d)
{
write_gicreg(gic_irq(d), ICC_EOIR1_EL1);
isb();
+
+ if (gic_arm64_erratum_2941627_needed(d)) {
+ /*
+ * Make sure the GIC stream deactivate packet
+ * issued by ICC_EOIR1_EL1 has completed before
+ * deactivating through GICD_IACTIVER.
+ */
+ dsb(sy);
+ gic_poke_irq(d, GICD_ICACTIVER);
+ }
}
static void gic_eoimode1_eoi_irq(struct irq_data *d)
@@ -606,7 +637,11 @@ static void gic_eoimode1_eoi_irq(struct irq_data *d)
*/
if (gic_irq(d) >= 8192 || irqd_is_forwarded_to_vcpu(d))
return;
- gic_write_dir(gic_irq(d));
+
+ if (!gic_arm64_erratum_2941627_needed(d))
+ gic_write_dir(gic_irq(d));
+ else
+ gic_poke_irq(d, GICD_ICACTIVER);
}
static int gic_set_type(struct irq_data *d, unsigned int type)
@@ -1816,6 +1851,12 @@ static bool gic_enable_quirk_asr8601(void *data)
return true;
}
+static bool gic_enable_quirk_arm64_2941627(void *data)
+{
+ static_branch_enable(&gic_arm64_2941627_erratum);
+ return true;
+}
+
static const struct gic_quirk gic_quirks[] = {
{
.desc = "GICv3: Qualcomm MSM8996 broken firmware",
@@ -1864,6 +1905,25 @@ static const struct gic_quirk gic_quirks[] = {
.init = gic_enable_quirk_nvidia_t241,
},
{
+ /*
+ * GIC-700: 2941627 workaround - IP variant [0,1]
+ *
+ */
+ .desc = "GICv3: ARM64 erratum 2941627",
+ .iidr = 0x0400043b,
+ .mask = 0xff0e0fff,
+ .init = gic_enable_quirk_arm64_2941627,
+ },
+ {
+ /*
+ * GIC-700: 2941627 workaround - IP variant [2]
+ */
+ .desc = "GICv3: ARM64 erratum 2941627",
+ .iidr = 0x0402043b,
+ .mask = 0xff0f0fff,
+ .init = gic_enable_quirk_arm64_2941627,
+ },
+ {
}
};
diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index c0331b268010..fe391de1aba3 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
*z1t = cpu_to_le16(new_z1); /* now send data */
if (bch->tx_idx < bch->tx_skb->len)
return;
- dev_kfree_skb(bch->tx_skb);
+ dev_kfree_skb_any(bch->tx_skb);
if (get_next_bframe(bch))
goto next_t_frame;
return;
@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
}
bz->za[new_f1].z1 = cpu_to_le16(new_z1); /* for next buffer */
bz->f1 = new_f1; /* next frame */
- dev_kfree_skb(bch->tx_skb);
+ dev_kfree_skb_any(bch->tx_skb);
get_next_bframe(bch);
}
@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch)
if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
hfcpci_fill_fifo(bch);
else {
- dev_kfree_skb(bch->tx_skb);
+ dev_kfree_skb_any(bch->tx_skb);
if (get_next_bframe(bch))
hfcpci_fill_fifo(bch);
}
@@ -2277,7 +2277,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
return 0;
if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
- spin_lock(&hc->lock);
+ spin_lock_irq(&hc->lock);
bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
main_rec_hfcpci(bch);
@@ -2288,7 +2288,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
main_rec_hfcpci(bch);
tx_birq(bch);
}
- spin_unlock(&hc->lock);
+ spin_unlock_irq(&hc->lock);
}
return 0;
}
diff --git a/drivers/isdn/mISDN/dsp.h b/drivers/isdn/mISDN/dsp.h
index fa09d511a8ed..baf31258f5c9 100644
--- a/drivers/isdn/mISDN/dsp.h
+++ b/drivers/isdn/mISDN/dsp.h
@@ -247,7 +247,7 @@ extern void dsp_cmx_hardware(struct dsp_conf *conf, struct dsp *dsp);
extern int dsp_cmx_conf(struct dsp *dsp, u32 conf_id);
extern void dsp_cmx_receive(struct dsp *dsp, struct sk_buff *skb);
extern void dsp_cmx_hdlc(struct dsp *dsp, struct sk_buff *skb);
-extern void dsp_cmx_send(void *arg);
+extern void dsp_cmx_send(struct timer_list *arg);
extern void dsp_cmx_transmit(struct dsp *dsp, struct sk_buff *skb);
extern int dsp_cmx_del_conf_member(struct dsp *dsp);
extern int dsp_cmx_del_conf(struct dsp_conf *conf);
diff --git a/drivers/isdn/mISDN/dsp_cmx.c b/drivers/isdn/mISDN/dsp_cmx.c
index 357b87592eb4..61cb45c5d0d8 100644
--- a/drivers/isdn/mISDN/dsp_cmx.c
+++ b/drivers/isdn/mISDN/dsp_cmx.c
@@ -1614,7 +1614,7 @@ static u16 dsp_count; /* last sample count */
static int dsp_count_valid; /* if we have last sample count */
void
-dsp_cmx_send(void *arg)
+dsp_cmx_send(struct timer_list *arg)
{
struct dsp_conf *conf;
struct dsp_conf_member *member;
diff --git a/drivers/isdn/mISDN/dsp_core.c b/drivers/isdn/mISDN/dsp_core.c
index 386084530c2f..fae95f166688 100644
--- a/drivers/isdn/mISDN/dsp_core.c
+++ b/drivers/isdn/mISDN/dsp_core.c
@@ -1195,7 +1195,7 @@ static int __init dsp_init(void)
}
/* set sample timer */
- timer_setup(&dsp_spl_tl, (void *)dsp_cmx_send, 0);
+ timer_setup(&dsp_spl_tl, dsp_cmx_send, 0);
dsp_spl_tl.expires = jiffies + dsp_tics;
dsp_spl_jiffies = dsp_spl_tl.expires;
add_timer(&dsp_spl_tl);
diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c
index c9bc5a91ec83..03c58e50cc44 100644
--- a/drivers/leds/trigger/ledtrig-netdev.c
+++ b/drivers/leds/trigger/ledtrig-netdev.c
@@ -406,15 +406,15 @@ static ssize_t interval_store(struct device *dev,
static DEVICE_ATTR_RW(interval);
-static ssize_t hw_control_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t offloaded_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct led_netdev_data *trigger_data = led_trigger_get_drvdata(dev);
return sprintf(buf, "%d\n", trigger_data->hw_control);
}
-static DEVICE_ATTR_RO(hw_control);
+static DEVICE_ATTR_RO(offloaded);
static struct attribute *netdev_trig_attrs[] = {
&dev_attr_device_name.attr,
@@ -427,7 +427,7 @@ static struct attribute *netdev_trig_attrs[] = {
&dev_attr_rx.attr,
&dev_attr_tx.attr,
&dev_attr_interval.attr,
- &dev_attr_hw_control.attr,
+ &dev_attr_offloaded.attr,
NULL
};
ATTRIBUTE_GROUPS(netdev_trig);
diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c
index 493a8715dc8f..8bd2ad743d9a 100644
--- a/drivers/md/dm-cache-policy-smq.c
+++ b/drivers/md/dm-cache-policy-smq.c
@@ -857,7 +857,13 @@ struct smq_policy {
struct background_tracker *bg_work;
- bool migrations_allowed;
+ bool migrations_allowed:1;
+
+ /*
+ * If this is set the policy will try and clean the whole cache
+ * even if the device is not idle.
+ */
+ bool cleaner:1;
};
/*----------------------------------------------------------------*/
@@ -1138,7 +1144,7 @@ static bool clean_target_met(struct smq_policy *mq, bool idle)
* Cache entries may not be populated. So we cannot rely on the
* size of the clean queue.
*/
- if (idle) {
+ if (idle || mq->cleaner) {
/*
* We'd like to clean everything.
*/
@@ -1722,11 +1728,9 @@ static void calc_hotspot_params(sector_t origin_size,
*hotspot_block_size /= 2u;
}
-static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
- sector_t origin_size,
- sector_t cache_block_size,
- bool mimic_mq,
- bool migrations_allowed)
+static struct dm_cache_policy *
+__smq_create(dm_cblock_t cache_size, sector_t origin_size, sector_t cache_block_size,
+ bool mimic_mq, bool migrations_allowed, bool cleaner)
{
unsigned int i;
unsigned int nr_sentinels_per_queue = 2u * NR_CACHE_LEVELS;
@@ -1813,6 +1817,7 @@ static struct dm_cache_policy *__smq_create(dm_cblock_t cache_size,
goto bad_btracker;
mq->migrations_allowed = migrations_allowed;
+ mq->cleaner = cleaner;
return &mq->policy;
@@ -1836,21 +1841,24 @@ static struct dm_cache_policy *smq_create(dm_cblock_t cache_size,
sector_t origin_size,
sector_t cache_block_size)
{
- return __smq_create(cache_size, origin_size, cache_block_size, false, true);
+ return __smq_create(cache_size, origin_size, cache_block_size,
+ false, true, false);
}
static struct dm_cache_policy *mq_create(dm_cblock_t cache_size,
sector_t origin_size,
sector_t cache_block_size)
{
- return __smq_create(cache_size, origin_size, cache_block_size, true, true);
+ return __smq_create(cache_size, origin_size, cache_block_size,
+ true, true, false);
}
static struct dm_cache_policy *cleaner_create(dm_cblock_t cache_size,
sector_t origin_size,
sector_t cache_block_size)
{
- return __smq_create(cache_size, origin_size, cache_block_size, false, false);
+ return __smq_create(cache_size, origin_size, cache_block_size,
+ false, false, true);
}
/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 3d5c56e0a062..97a8d5fc9ebb 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -2676,6 +2676,7 @@ oom:
recalc_tags = kvmalloc(recalc_tags_size, GFP_NOIO);
if (!recalc_tags) {
vfree(recalc_buffer);
+ recalc_buffer = NULL;
goto oom;
}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 8846bf510a35..becdb689190e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3251,8 +3251,7 @@ size_check:
r = md_start(&rs->md);
if (r) {
ti->error = "Failed to start raid array";
- mddev_unlock(&rs->md);
- goto bad_md_start;
+ goto bad_unlock;
}
/* If raid4/5/6 journal mode explicitly requested (only possible with journal dev) -> set it */
@@ -3260,8 +3259,7 @@ size_check:
r = r5c_journal_mode_set(&rs->md, rs->journal_dev.mode);
if (r) {
ti->error = "Failed to set raid4/5/6 journal mode";
- mddev_unlock(&rs->md);
- goto bad_journal_mode_set;
+ goto bad_unlock;
}
}
@@ -3272,14 +3270,14 @@ size_check:
if (rs_is_raid456(rs)) {
r = rs_set_raid456_stripe_cache(rs);
if (r)
- goto bad_stripe_cache;
+ goto bad_unlock;
}
/* Now do an early reshape check */
if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
r = rs_check_reshape(rs);
if (r)
- goto bad_check_reshape;
+ goto bad_unlock;
/* Restore new, ctr requested layout to perform check */
rs_config_restore(rs, &rs_layout);
@@ -3288,7 +3286,7 @@ size_check:
r = rs->md.pers->check_reshape(&rs->md);
if (r) {
ti->error = "Reshape check failed";
- goto bad_check_reshape;
+ goto bad_unlock;
}
}
}
@@ -3299,11 +3297,9 @@ size_check:
mddev_unlock(&rs->md);
return 0;
-bad_md_start:
-bad_journal_mode_set:
-bad_stripe_cache:
-bad_check_reshape:
+bad_unlock:
md_stop(&rs->md);
+ mddev_unlock(&rs->md);
bad:
raid_set_free(rs);
@@ -3314,7 +3310,9 @@ static void raid_dtr(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
+ mddev_lock_nointr(&rs->md);
md_stop(&rs->md);
+ mddev_unlock(&rs->md);
raid_set_free(rs);
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2e38ef421d69..78be7811a89f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6247,6 +6247,8 @@ static void __md_stop(struct mddev *mddev)
void md_stop(struct mddev *mddev)
{
+ lockdep_assert_held(&mddev->reconfig_mutex);
+
/* stop the array and free an attached data structures.
* This is called from dm-raid
*/
diff --git a/drivers/media/cec/usb/pulse8/pulse8-cec.c b/drivers/media/cec/usb/pulse8/pulse8-cec.c
index 04b13cdc38d2..ba67587bd43e 100644
--- a/drivers/media/cec/usb/pulse8/pulse8-cec.c
+++ b/drivers/media/cec/usb/pulse8/pulse8-cec.c
@@ -809,8 +809,11 @@ static void pulse8_ping_eeprom_work_handler(struct work_struct *work)
mutex_lock(&pulse8->lock);
cmd = MSGCODE_PING;
- pulse8_send_and_wait(pulse8, &cmd, 1,
- MSGCODE_COMMAND_ACCEPTED, 0);
+ if (pulse8_send_and_wait(pulse8, &cmd, 1,
+ MSGCODE_COMMAND_ACCEPTED, 0)) {
+ dev_warn(pulse8->dev, "failed to ping EEPROM\n");
+ goto unlock;
+ }
if (pulse8->vers < 2)
goto unlock;
diff --git a/drivers/media/i2c/tc358746.c b/drivers/media/i2c/tc358746.c
index e9b2d906c177..3f7e147ef594 100644
--- a/drivers/media/i2c/tc358746.c
+++ b/drivers/media/i2c/tc358746.c
@@ -813,8 +813,8 @@ static unsigned long tc358746_find_pll_settings(struct tc358746 *tc358746,
u32 min_delta = 0xffffffff;
u16 prediv_max = 17;
u16 prediv_min = 1;
- u16 m_best, mul;
- u16 p_best, p;
+ u16 m_best = 0, mul;
+ u16 p_best = 1, p;
u8 postdiv;
if (fout > 1000 * HZ_PER_MHZ) {
diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c
index 8fd5b6ef2428..7551ca4a322a 100644
--- a/drivers/media/pci/cx23885/cx23885-dvb.c
+++ b/drivers/media/pci/cx23885/cx23885-dvb.c
@@ -2459,16 +2459,10 @@ static int dvb_register(struct cx23885_tsport *port)
request_module("%s", info.type);
client_tuner = i2c_new_client_device(&dev->i2c_bus[1].i2c_adap, &info);
if (!i2c_client_has_driver(client_tuner)) {
- module_put(client_demod->dev.driver->owner);
- i2c_unregister_device(client_demod);
- port->i2c_client_demod = NULL;
goto frontend_detach;
}
if (!try_module_get(client_tuner->dev.driver->owner)) {
i2c_unregister_device(client_tuner);
- module_put(client_demod->dev.driver->owner);
- i2c_unregister_device(client_demod);
- port->i2c_client_demod = NULL;
goto frontend_detach;
}
port->i2c_client_tuner = client_tuner;
@@ -2505,16 +2499,10 @@ static int dvb_register(struct cx23885_tsport *port)
request_module("%s", info.type);
client_tuner = i2c_new_client_device(&dev->i2c_bus[1].i2c_adap, &info);
if (!i2c_client_has_driver(client_tuner)) {
- module_put(client_demod->dev.driver->owner);
- i2c_unregister_device(client_demod);
- port->i2c_client_demod = NULL;
goto frontend_detach;
}
if (!try_module_get(client_tuner->dev.driver->owner)) {
i2c_unregister_device(client_tuner);
- module_put(client_demod->dev.driver->owner);
- i2c_unregister_device(client_demod);
- port->i2c_client_demod = NULL;
goto frontend_detach;
}
port->i2c_client_tuner = client_tuner;
diff --git a/drivers/media/platform/amphion/vpu_core.c b/drivers/media/platform/amphion/vpu_core.c
index 43d85a54268b..7863b7b53494 100644
--- a/drivers/media/platform/amphion/vpu_core.c
+++ b/drivers/media/platform/amphion/vpu_core.c
@@ -826,7 +826,7 @@ static const struct dev_pm_ops vpu_core_pm_ops = {
static struct vpu_core_resources imx8q_enc = {
.type = VPU_CORE_TYPE_ENC,
- .fwname = "vpu/vpu_fw_imx8_enc.bin",
+ .fwname = "amphion/vpu/vpu_fw_imx8_enc.bin",
.stride = 16,
.max_width = 1920,
.max_height = 1920,
@@ -841,7 +841,7 @@ static struct vpu_core_resources imx8q_enc = {
static struct vpu_core_resources imx8q_dec = {
.type = VPU_CORE_TYPE_DEC,
- .fwname = "vpu/vpu_fw_imx8_dec.bin",
+ .fwname = "amphion/vpu/vpu_fw_imx8_dec.bin",
.stride = 256,
.max_width = 8188,
.max_height = 8188,
diff --git a/drivers/media/platform/amphion/vpu_mbox.c b/drivers/media/platform/amphion/vpu_mbox.c
index bf759eb2fd46..b6d5b4844f67 100644
--- a/drivers/media/platform/amphion/vpu_mbox.c
+++ b/drivers/media/platform/amphion/vpu_mbox.c
@@ -46,11 +46,10 @@ static int vpu_mbox_request_channel(struct device *dev, struct vpu_mbox *mbox)
cl->rx_callback = vpu_mbox_rx_callback;
ch = mbox_request_channel_byname(cl, mbox->name);
- if (IS_ERR(ch)) {
- dev_err(dev, "Failed to request mbox chan %s, ret : %ld\n",
- mbox->name, PTR_ERR(ch));
- return PTR_ERR(ch);
- }
+ if (IS_ERR(ch))
+ return dev_err_probe(dev, PTR_ERR(ch),
+ "Failed to request mbox chan %s\n",
+ mbox->name);
mbox->ch = ch;
return 0;
diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
index 4768156181c9..60425c99a2b8 100644
--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_core.c
@@ -28,7 +28,6 @@
#include "mtk_jpeg_core.h"
#include "mtk_jpeg_dec_parse.h"
-#if defined(CONFIG_OF)
static struct mtk_jpeg_fmt mtk_jpeg_enc_formats[] = {
{
.fourcc = V4L2_PIX_FMT_JPEG,
@@ -102,7 +101,6 @@ static struct mtk_jpeg_fmt mtk_jpeg_dec_formats[] = {
.flags = MTK_JPEG_FMT_FLAG_CAPTURE,
},
};
-#endif
#define MTK_JPEG_ENC_NUM_FORMATS ARRAY_SIZE(mtk_jpeg_enc_formats)
#define MTK_JPEG_DEC_NUM_FORMATS ARRAY_SIZE(mtk_jpeg_dec_formats)
@@ -1312,6 +1310,8 @@ static int mtk_jpeg_probe(struct platform_device *pdev)
jpeg->dev = &pdev->dev;
jpeg->variant = of_device_get_match_data(jpeg->dev);
+ platform_set_drvdata(pdev, jpeg);
+
ret = devm_of_platform_populate(&pdev->dev);
if (ret) {
v4l2_err(&jpeg->v4l2_dev, "Master of platform populate failed.");
@@ -1383,8 +1383,6 @@ static int mtk_jpeg_probe(struct platform_device *pdev)
jpeg->variant->dev_name, jpeg->vdev->num,
VIDEO_MAJOR, jpeg->vdev->minor);
- platform_set_drvdata(pdev, jpeg);
-
pm_runtime_enable(&pdev->dev);
return 0;
@@ -1455,7 +1453,6 @@ static const struct dev_pm_ops mtk_jpeg_pm_ops = {
SET_RUNTIME_PM_OPS(mtk_jpeg_pm_suspend, mtk_jpeg_pm_resume, NULL)
};
-#if defined(CONFIG_OF)
static int mtk_jpegenc_get_hw(struct mtk_jpeg_ctx *ctx)
{
struct mtk_jpegenc_comp_dev *comp_jpeg;
@@ -1951,14 +1948,13 @@ static const struct of_device_id mtk_jpeg_match[] = {
};
MODULE_DEVICE_TABLE(of, mtk_jpeg_match);
-#endif
static struct platform_driver mtk_jpeg_driver = {
.probe = mtk_jpeg_probe,
.remove_new = mtk_jpeg_remove,
.driver = {
.name = MTK_JPEG_NAME,
- .of_match_table = of_match_ptr(mtk_jpeg_match),
+ .of_match_table = mtk_jpeg_match,
.pm = &mtk_jpeg_pm_ops,
},
};
diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_dec_hw.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_dec_hw.c
index 869068fac5e2..baa7be58ce69 100644
--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_dec_hw.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_dec_hw.c
@@ -39,7 +39,6 @@ enum mtk_jpeg_color {
MTK_JPEG_COLOR_400 = 0x00110000
};
-#if defined(CONFIG_OF)
static const struct of_device_id mtk_jpegdec_hw_ids[] = {
{
.compatible = "mediatek,mt8195-jpgdec-hw",
@@ -47,7 +46,6 @@ static const struct of_device_id mtk_jpegdec_hw_ids[] = {
{},
};
MODULE_DEVICE_TABLE(of, mtk_jpegdec_hw_ids);
-#endif
static inline int mtk_jpeg_verify_align(u32 val, int align, u32 reg)
{
@@ -653,7 +651,7 @@ static struct platform_driver mtk_jpegdec_hw_driver = {
.probe = mtk_jpegdec_hw_probe,
.driver = {
.name = "mtk-jpegdec-hw",
- .of_match_table = of_match_ptr(mtk_jpegdec_hw_ids),
+ .of_match_table = mtk_jpegdec_hw_ids,
},
};
diff --git a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_enc_hw.c b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_enc_hw.c
index 71e85b4bbf12..244018365b6f 100644
--- a/drivers/media/platform/mediatek/jpeg/mtk_jpeg_enc_hw.c
+++ b/drivers/media/platform/mediatek/jpeg/mtk_jpeg_enc_hw.c
@@ -46,7 +46,6 @@ static const struct mtk_jpeg_enc_qlt mtk_jpeg_enc_quality[] = {
{.quality_param = 97, .hardware_value = JPEG_ENC_QUALITY_Q97},
};
-#if defined(CONFIG_OF)
static const struct of_device_id mtk_jpegenc_drv_ids[] = {
{
.compatible = "mediatek,mt8195-jpgenc-hw",
@@ -54,7 +53,6 @@ static const struct of_device_id mtk_jpegenc_drv_ids[] = {
{},
};
MODULE_DEVICE_TABLE(of, mtk_jpegenc_drv_ids);
-#endif
void mtk_jpeg_enc_reset(void __iomem *base)
{
@@ -377,7 +375,7 @@ static struct platform_driver mtk_jpegenc_hw_driver = {
.probe = mtk_jpegenc_hw_probe,
.driver = {
.name = "mtk-jpegenc-hw",
- .of_match_table = of_match_ptr(mtk_jpegenc_drv_ids),
+ .of_match_table = mtk_jpegenc_drv_ids,
},
};
diff --git a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
index 9ff439a50f53..315e97a2450e 100644
--- a/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
+++ b/drivers/media/platform/mediatek/vcodec/mtk_vcodec_enc.c
@@ -821,6 +821,8 @@ static int vb2ops_venc_queue_setup(struct vb2_queue *vq,
return -EINVAL;
if (*nplanes) {
+ if (*nplanes != q_data->fmt->num_planes)
+ return -EINVAL;
for (i = 0; i < *nplanes; i++)
if (sizes[i] < q_data->sizeimage[i])
return -EINVAL;
diff --git a/drivers/media/platform/mediatek/vcodec/vdec_msg_queue.c b/drivers/media/platform/mediatek/vcodec/vdec_msg_queue.c
index f555341ae708..04e6dc6cfa1d 100644
--- a/drivers/media/platform/mediatek/vcodec/vdec_msg_queue.c
+++ b/drivers/media/platform/mediatek/vcodec/vdec_msg_queue.c
@@ -233,7 +233,8 @@ void vdec_msg_queue_deinit(struct vdec_msg_queue *msg_queue,
kfree(lat_buf->private_data);
}
- cancel_work_sync(&msg_queue->core_work);
+ if (msg_queue->wdma_addr.size)
+ cancel_work_sync(&msg_queue->core_work);
}
static void vdec_msg_queue_core_work(struct work_struct *work)
diff --git a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h
index ed15ea348f97..a2b4fb9e29e7 100644
--- a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h
+++ b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg-hw.h
@@ -58,7 +58,6 @@
#define CAST_OFBSIZE_LO CAST_STATUS18
#define CAST_OFBSIZE_HI CAST_STATUS19
-#define MXC_MAX_SLOTS 1 /* TODO use all 4 slots*/
/* JPEG-Decoder Wrapper Slot Registers 0..3 */
#define SLOT_BASE 0x10000
#define SLOT_STATUS 0x0
diff --git a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c
index c0e49be42450..9512c0a61966 100644
--- a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c
+++ b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.c
@@ -745,87 +745,77 @@ static void notify_src_chg(struct mxc_jpeg_ctx *ctx)
v4l2_event_queue_fh(&ctx->fh, &ev);
}
-static int mxc_get_free_slot(struct mxc_jpeg_slot_data slot_data[], int n)
+static int mxc_get_free_slot(struct mxc_jpeg_slot_data *slot_data)
{
- int free_slot = 0;
-
- while (slot_data[free_slot].used && free_slot < n)
- free_slot++;
-
- return free_slot; /* >=n when there are no more free slots */
+ if (!slot_data->used)
+ return slot_data->slot;
+ return -1;
}
-static bool mxc_jpeg_alloc_slot_data(struct mxc_jpeg_dev *jpeg,
- unsigned int slot)
+static bool mxc_jpeg_alloc_slot_data(struct mxc_jpeg_dev *jpeg)
{
struct mxc_jpeg_desc *desc;
struct mxc_jpeg_desc *cfg_desc;
void *cfg_stm;
- if (jpeg->slot_data[slot].desc)
+ if (jpeg->slot_data.desc)
goto skip_alloc; /* already allocated, reuse it */
/* allocate descriptor for decoding/encoding phase */
desc = dma_alloc_coherent(jpeg->dev,
sizeof(struct mxc_jpeg_desc),
- &jpeg->slot_data[slot].desc_handle,
+ &jpeg->slot_data.desc_handle,
GFP_ATOMIC);
if (!desc)
goto err;
- jpeg->slot_data[slot].desc = desc;
+ jpeg->slot_data.desc = desc;
/* allocate descriptor for configuration phase (encoder only) */
cfg_desc = dma_alloc_coherent(jpeg->dev,
sizeof(struct mxc_jpeg_desc),
- &jpeg->slot_data[slot].cfg_desc_handle,
+ &jpeg->slot_data.cfg_desc_handle,
GFP_ATOMIC);
if (!cfg_desc)
goto err;
- jpeg->slot_data[slot].cfg_desc = cfg_desc;
+ jpeg->slot_data.cfg_desc = cfg_desc;
/* allocate configuration stream */
cfg_stm = dma_alloc_coherent(jpeg->dev,
MXC_JPEG_MAX_CFG_STREAM,
- &jpeg->slot_data[slot].cfg_stream_handle,
+ &jpeg->slot_data.cfg_stream_handle,
GFP_ATOMIC);
if (!cfg_stm)
goto err;
- jpeg->slot_data[slot].cfg_stream_vaddr = cfg_stm;
+ jpeg->slot_data.cfg_stream_vaddr = cfg_stm;
skip_alloc:
- jpeg->slot_data[slot].used = true;
+ jpeg->slot_data.used = true;
return true;
err:
- dev_err(jpeg->dev, "Could not allocate descriptors for slot %d", slot);
+ dev_err(jpeg->dev, "Could not allocate descriptors for slot %d", jpeg->slot_data.slot);
return false;
}
-static void mxc_jpeg_free_slot_data(struct mxc_jpeg_dev *jpeg,
- unsigned int slot)
+static void mxc_jpeg_free_slot_data(struct mxc_jpeg_dev *jpeg)
{
- if (slot >= MXC_MAX_SLOTS) {
- dev_err(jpeg->dev, "Invalid slot %d, nothing to free.", slot);
- return;
- }
-
/* free descriptor for decoding/encoding phase */
dma_free_coherent(jpeg->dev, sizeof(struct mxc_jpeg_desc),
- jpeg->slot_data[slot].desc,
- jpeg->slot_data[slot].desc_handle);
+ jpeg->slot_data.desc,
+ jpeg->slot_data.desc_handle);
/* free descriptor for encoder configuration phase / decoder DHT */
dma_free_coherent(jpeg->dev, sizeof(struct mxc_jpeg_desc),
- jpeg->slot_data[slot].cfg_desc,
- jpeg->slot_data[slot].cfg_desc_handle);
+ jpeg->slot_data.cfg_desc,
+ jpeg->slot_data.cfg_desc_handle);
/* free configuration stream */
dma_free_coherent(jpeg->dev, MXC_JPEG_MAX_CFG_STREAM,
- jpeg->slot_data[slot].cfg_stream_vaddr,
- jpeg->slot_data[slot].cfg_stream_handle);
+ jpeg->slot_data.cfg_stream_vaddr,
+ jpeg->slot_data.cfg_stream_handle);
- jpeg->slot_data[slot].used = false;
+ jpeg->slot_data.used = false;
}
static void mxc_jpeg_check_and_set_last_buffer(struct mxc_jpeg_ctx *ctx,
@@ -855,7 +845,7 @@ static void mxc_jpeg_job_finish(struct mxc_jpeg_ctx *ctx, enum vb2_buffer_state
v4l2_m2m_buf_done(dst_buf, state);
mxc_jpeg_disable_irq(reg, ctx->slot);
- ctx->mxc_jpeg->slot_data[ctx->slot].used = false;
+ jpeg->slot_data.used = false;
if (reset)
mxc_jpeg_sw_reset(reg);
}
@@ -919,7 +909,7 @@ static irqreturn_t mxc_jpeg_dec_irq(int irq, void *priv)
goto job_unlock;
}
- if (!jpeg->slot_data[slot].used)
+ if (!jpeg->slot_data.used)
goto job_unlock;
dec_ret = readl(reg + MXC_SLOT_OFFSET(slot, SLOT_STATUS));
@@ -1179,13 +1169,13 @@ static void mxc_jpeg_config_dec_desc(struct vb2_buffer *out_buf,
struct mxc_jpeg_dev *jpeg = ctx->mxc_jpeg;
void __iomem *reg = jpeg->base_reg;
unsigned int slot = ctx->slot;
- struct mxc_jpeg_desc *desc = jpeg->slot_data[slot].desc;
- struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data[slot].cfg_desc;
- dma_addr_t desc_handle = jpeg->slot_data[slot].desc_handle;
- dma_addr_t cfg_desc_handle = jpeg->slot_data[slot].cfg_desc_handle;
- dma_addr_t cfg_stream_handle = jpeg->slot_data[slot].cfg_stream_handle;
- unsigned int *cfg_size = &jpeg->slot_data[slot].cfg_stream_size;
- void *cfg_stream_vaddr = jpeg->slot_data[slot].cfg_stream_vaddr;
+ struct mxc_jpeg_desc *desc = jpeg->slot_data.desc;
+ struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data.cfg_desc;
+ dma_addr_t desc_handle = jpeg->slot_data.desc_handle;
+ dma_addr_t cfg_desc_handle = jpeg->slot_data.cfg_desc_handle;
+ dma_addr_t cfg_stream_handle = jpeg->slot_data.cfg_stream_handle;
+ unsigned int *cfg_size = &jpeg->slot_data.cfg_stream_size;
+ void *cfg_stream_vaddr = jpeg->slot_data.cfg_stream_vaddr;
struct mxc_jpeg_src_buf *jpeg_src_buf;
jpeg_src_buf = vb2_to_mxc_buf(src_buf);
@@ -1245,18 +1235,18 @@ static void mxc_jpeg_config_enc_desc(struct vb2_buffer *out_buf,
struct mxc_jpeg_dev *jpeg = ctx->mxc_jpeg;
void __iomem *reg = jpeg->base_reg;
unsigned int slot = ctx->slot;
- struct mxc_jpeg_desc *desc = jpeg->slot_data[slot].desc;
- struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data[slot].cfg_desc;
- dma_addr_t desc_handle = jpeg->slot_data[slot].desc_handle;
- dma_addr_t cfg_desc_handle = jpeg->slot_data[slot].cfg_desc_handle;
- void *cfg_stream_vaddr = jpeg->slot_data[slot].cfg_stream_vaddr;
+ struct mxc_jpeg_desc *desc = jpeg->slot_data.desc;
+ struct mxc_jpeg_desc *cfg_desc = jpeg->slot_data.cfg_desc;
+ dma_addr_t desc_handle = jpeg->slot_data.desc_handle;
+ dma_addr_t cfg_desc_handle = jpeg->slot_data.cfg_desc_handle;
+ void *cfg_stream_vaddr = jpeg->slot_data.cfg_stream_vaddr;
struct mxc_jpeg_q_data *q_data;
enum mxc_jpeg_image_format img_fmt;
int w, h;
q_data = mxc_jpeg_get_q_data(ctx, src_buf->vb2_queue->type);
- jpeg->slot_data[slot].cfg_stream_size =
+ jpeg->slot_data.cfg_stream_size =
mxc_jpeg_setup_cfg_stream(cfg_stream_vaddr,
q_data->fmt->fourcc,
q_data->crop.width,
@@ -1265,7 +1255,7 @@ static void mxc_jpeg_config_enc_desc(struct vb2_buffer *out_buf,
/* chain the config descriptor with the encoding descriptor */
cfg_desc->next_descpt_ptr = desc_handle | MXC_NXT_DESCPT_EN;
- cfg_desc->buf_base0 = jpeg->slot_data[slot].cfg_stream_handle;
+ cfg_desc->buf_base0 = jpeg->slot_data.cfg_stream_handle;
cfg_desc->buf_base1 = 0;
cfg_desc->line_pitch = 0;
cfg_desc->stm_bufbase = 0; /* no output expected */
@@ -1408,7 +1398,7 @@ static void mxc_jpeg_device_run_timeout(struct work_struct *work)
unsigned long flags;
spin_lock_irqsave(&ctx->mxc_jpeg->hw_lock, flags);
- if (ctx->slot < MXC_MAX_SLOTS && ctx->mxc_jpeg->slot_data[ctx->slot].used) {
+ if (ctx->mxc_jpeg->slot_data.used) {
dev_warn(jpeg->dev, "%s timeout, cancel it\n",
ctx->mxc_jpeg->mode == MXC_JPEG_DECODE ? "decode" : "encode");
mxc_jpeg_job_finish(ctx, VB2_BUF_STATE_ERROR, true);
@@ -1476,12 +1466,12 @@ static void mxc_jpeg_device_run(void *priv)
mxc_jpeg_enable(reg);
mxc_jpeg_set_l_endian(reg, 1);
- ctx->slot = mxc_get_free_slot(jpeg->slot_data, MXC_MAX_SLOTS);
- if (ctx->slot >= MXC_MAX_SLOTS) {
+ ctx->slot = mxc_get_free_slot(&jpeg->slot_data);
+ if (ctx->slot < 0) {
dev_err(dev, "No more free slots\n");
goto end;
}
- if (!mxc_jpeg_alloc_slot_data(jpeg, ctx->slot)) {
+ if (!mxc_jpeg_alloc_slot_data(jpeg)) {
dev_err(dev, "Cannot allocate slot data\n");
goto end;
}
@@ -2101,7 +2091,7 @@ static int mxc_jpeg_open(struct file *file)
}
ctx->fh.ctrl_handler = &ctx->ctrl_handler;
mxc_jpeg_set_default_params(ctx);
- ctx->slot = MXC_MAX_SLOTS; /* slot not allocated yet */
+ ctx->slot = -1; /* slot not allocated yet */
INIT_DELAYED_WORK(&ctx->task_timer, mxc_jpeg_device_run_timeout);
if (mxc_jpeg->mode == MXC_JPEG_DECODE)
@@ -2677,6 +2667,11 @@ static int mxc_jpeg_attach_pm_domains(struct mxc_jpeg_dev *jpeg)
dev_err(dev, "No power domains defined for jpeg node\n");
return jpeg->num_domains;
}
+ if (jpeg->num_domains == 1) {
+ /* genpd_dev_pm_attach() attach automatically if power domains count is 1 */
+ jpeg->num_domains = 0;
+ return 0;
+ }
jpeg->pd_dev = devm_kmalloc_array(dev, jpeg->num_domains,
sizeof(*jpeg->pd_dev), GFP_KERNEL);
@@ -2718,7 +2713,6 @@ static int mxc_jpeg_probe(struct platform_device *pdev)
int ret;
int mode;
const struct of_device_id *of_id;
- unsigned int slot;
of_id = of_match_node(mxc_jpeg_match, dev->of_node);
if (!of_id)
@@ -2742,19 +2736,22 @@ static int mxc_jpeg_probe(struct platform_device *pdev)
if (IS_ERR(jpeg->base_reg))
return PTR_ERR(jpeg->base_reg);
- for (slot = 0; slot < MXC_MAX_SLOTS; slot++) {
- dec_irq = platform_get_irq(pdev, slot);
- if (dec_irq < 0) {
- ret = dec_irq;
- goto err_irq;
- }
- ret = devm_request_irq(&pdev->dev, dec_irq, mxc_jpeg_dec_irq,
- 0, pdev->name, jpeg);
- if (ret) {
- dev_err(&pdev->dev, "Failed to request irq %d (%d)\n",
- dec_irq, ret);
- goto err_irq;
- }
+ ret = of_property_read_u32_index(pdev->dev.of_node, "slot", 0, &jpeg->slot_data.slot);
+ if (ret)
+ jpeg->slot_data.slot = 0;
+ dev_info(&pdev->dev, "choose slot %d\n", jpeg->slot_data.slot);
+ dec_irq = platform_get_irq(pdev, 0);
+ if (dec_irq < 0) {
+ dev_err(&pdev->dev, "Failed to get irq %d\n", dec_irq);
+ ret = dec_irq;
+ goto err_irq;
+ }
+ ret = devm_request_irq(&pdev->dev, dec_irq, mxc_jpeg_dec_irq,
+ 0, pdev->name, jpeg);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to request irq %d (%d)\n",
+ dec_irq, ret);
+ goto err_irq;
}
jpeg->pdev = pdev;
@@ -2914,11 +2911,9 @@ static const struct dev_pm_ops mxc_jpeg_pm_ops = {
static void mxc_jpeg_remove(struct platform_device *pdev)
{
- unsigned int slot;
struct mxc_jpeg_dev *jpeg = platform_get_drvdata(pdev);
- for (slot = 0; slot < MXC_MAX_SLOTS; slot++)
- mxc_jpeg_free_slot_data(jpeg, slot);
+ mxc_jpeg_free_slot_data(jpeg);
pm_runtime_disable(&pdev->dev);
video_unregister_device(jpeg->dec_vdev);
diff --git a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h
index 87157db78082..d80e94cc9d99 100644
--- a/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h
+++ b/drivers/media/platform/nxp/imx-jpeg/mxc-jpeg.h
@@ -97,7 +97,7 @@ struct mxc_jpeg_ctx {
struct mxc_jpeg_q_data cap_q;
struct v4l2_fh fh;
enum mxc_jpeg_enc_state enc_state;
- unsigned int slot;
+ int slot;
unsigned int source_change;
bool header_parsed;
struct v4l2_ctrl_handler ctrl_handler;
@@ -106,6 +106,7 @@ struct mxc_jpeg_ctx {
};
struct mxc_jpeg_slot_data {
+ int slot;
bool used;
struct mxc_jpeg_desc *desc; // enc/dec descriptor
struct mxc_jpeg_desc *cfg_desc; // configuration descriptor
@@ -128,7 +129,7 @@ struct mxc_jpeg_dev {
struct v4l2_device v4l2_dev;
struct v4l2_m2m_dev *m2m_dev;
struct video_device *dec_vdev;
- struct mxc_jpeg_slot_data slot_data[MXC_MAX_SLOTS];
+ struct mxc_jpeg_slot_data slot_data;
int num_domains;
struct device **pd_dev;
struct device_link **pd_link;
diff --git a/drivers/media/platform/nxp/imx7-media-csi.c b/drivers/media/platform/nxp/imx7-media-csi.c
index 0bd2613b9320..791bde67f439 100644
--- a/drivers/media/platform/nxp/imx7-media-csi.c
+++ b/drivers/media/platform/nxp/imx7-media-csi.c
@@ -9,7 +9,9 @@
#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
+#include <linux/math.h>
#include <linux/mfd/syscon.h>
+#include <linux/minmax.h>
#include <linux/module.h>
#include <linux/of_device.h>
#include <linux/of_graph.h>
@@ -1137,8 +1139,9 @@ __imx7_csi_video_try_fmt(struct v4l2_pix_format *pixfmt,
* TODO: Implement configurable stride support.
*/
walign = 8 * 8 / cc->bpp;
- v4l_bound_align_image(&pixfmt->width, 1, 0xffff, walign,
- &pixfmt->height, 1, 0xffff, 1, 0);
+ pixfmt->width = clamp(round_up(pixfmt->width, walign), walign,
+ round_down(65535U, walign));
+ pixfmt->height = clamp(pixfmt->height, 1U, 65535U);
pixfmt->bytesperline = pixfmt->width * cc->bpp / 8;
pixfmt->sizeimage = pixfmt->bytesperline * pixfmt->height;
diff --git a/drivers/media/platform/qcom/venus/hfi_cmds.c b/drivers/media/platform/qcom/venus/hfi_cmds.c
index 7f0802a5518c..3418d2dd9371 100644
--- a/drivers/media/platform/qcom/venus/hfi_cmds.c
+++ b/drivers/media/platform/qcom/venus/hfi_cmds.c
@@ -251,8 +251,8 @@ int pkt_session_unset_buffers(struct hfi_session_release_buffer_pkt *pkt,
pkt->extradata_size = 0;
pkt->shdr.hdr.size =
- struct_size((struct hfi_session_set_buffers_pkt *)0,
- buffer_info, bd->num_buffers);
+ struct_size_t(struct hfi_session_set_buffers_pkt,
+ buffer_info, bd->num_buffers);
}
pkt->response_req = bd->response_required;
diff --git a/drivers/media/platform/verisilicon/hantro.h b/drivers/media/platform/verisilicon/hantro.h
index 6523ffb74881..77aee9489516 100644
--- a/drivers/media/platform/verisilicon/hantro.h
+++ b/drivers/media/platform/verisilicon/hantro.h
@@ -370,26 +370,26 @@ extern int hantro_debug;
pr_err("%s:%d: " fmt, __func__, __LINE__, ##args)
/* Structure access helpers. */
-static inline struct hantro_ctx *fh_to_ctx(struct v4l2_fh *fh)
+static __always_inline struct hantro_ctx *fh_to_ctx(struct v4l2_fh *fh)
{
return container_of(fh, struct hantro_ctx, fh);
}
/* Register accessors. */
-static inline void vepu_write_relaxed(struct hantro_dev *vpu,
- u32 val, u32 reg)
+static __always_inline void vepu_write_relaxed(struct hantro_dev *vpu,
+ u32 val, u32 reg)
{
vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
writel_relaxed(val, vpu->enc_base + reg);
}
-static inline void vepu_write(struct hantro_dev *vpu, u32 val, u32 reg)
+static __always_inline void vepu_write(struct hantro_dev *vpu, u32 val, u32 reg)
{
vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
writel(val, vpu->enc_base + reg);
}
-static inline u32 vepu_read(struct hantro_dev *vpu, u32 reg)
+static __always_inline u32 vepu_read(struct hantro_dev *vpu, u32 reg)
{
u32 val = readl(vpu->enc_base + reg);
@@ -397,27 +397,27 @@ static inline u32 vepu_read(struct hantro_dev *vpu, u32 reg)
return val;
}
-static inline void vdpu_write_relaxed(struct hantro_dev *vpu,
- u32 val, u32 reg)
+static __always_inline void vdpu_write_relaxed(struct hantro_dev *vpu,
+ u32 val, u32 reg)
{
vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
writel_relaxed(val, vpu->dec_base + reg);
}
-static inline void vdpu_write(struct hantro_dev *vpu, u32 val, u32 reg)
+static __always_inline void vdpu_write(struct hantro_dev *vpu, u32 val, u32 reg)
{
vpu_debug(6, "0x%04x = 0x%08x\n", reg / 4, val);
writel(val, vpu->dec_base + reg);
}
-static inline void hantro_write_addr(struct hantro_dev *vpu,
- unsigned long offset,
- dma_addr_t addr)
+static __always_inline void hantro_write_addr(struct hantro_dev *vpu,
+ unsigned long offset,
+ dma_addr_t addr)
{
vdpu_write(vpu, addr & 0xffffffff, offset);
}
-static inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg)
+static __always_inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg)
{
u32 val = readl(vpu->dec_base + reg);
@@ -425,9 +425,9 @@ static inline u32 vdpu_read(struct hantro_dev *vpu, u32 reg)
return val;
}
-static inline u32 vdpu_read_mask(struct hantro_dev *vpu,
- const struct hantro_reg *reg,
- u32 val)
+static __always_inline u32 vdpu_read_mask(struct hantro_dev *vpu,
+ const struct hantro_reg *reg,
+ u32 val)
{
u32 v;
@@ -437,18 +437,18 @@ static inline u32 vdpu_read_mask(struct hantro_dev *vpu,
return v;
}
-static inline void hantro_reg_write(struct hantro_dev *vpu,
- const struct hantro_reg *reg,
- u32 val)
+static __always_inline void hantro_reg_write(struct hantro_dev *vpu,
+ const struct hantro_reg *reg,
+ u32 val)
{
- vdpu_write_relaxed(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
+ vdpu_write(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
}
-static inline void hantro_reg_write_s(struct hantro_dev *vpu,
- const struct hantro_reg *reg,
- u32 val)
+static __always_inline void hantro_reg_write_relaxed(struct hantro_dev *vpu,
+ const struct hantro_reg *reg,
+ u32 val)
{
- vdpu_write(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
+ vdpu_write_relaxed(vpu, vdpu_read_mask(vpu, reg, val), reg->base);
}
void *hantro_get_ctrl(struct hantro_ctx *ctx, u32 id);
diff --git a/drivers/media/platform/verisilicon/hantro_postproc.c b/drivers/media/platform/verisilicon/hantro_postproc.c
index c977d64105b1..0224ff68ab3f 100644
--- a/drivers/media/platform/verisilicon/hantro_postproc.c
+++ b/drivers/media/platform/verisilicon/hantro_postproc.c
@@ -21,11 +21,11 @@
val); \
}
-#define HANTRO_PP_REG_WRITE_S(vpu, reg_name, val) \
+#define HANTRO_PP_REG_WRITE_RELAXED(vpu, reg_name, val) \
{ \
- hantro_reg_write_s(vpu, \
- &hantro_g1_postproc_regs.reg_name, \
- val); \
+ hantro_reg_write_relaxed(vpu, \
+ &hantro_g1_postproc_regs.reg_name, \
+ val); \
}
#define VPU_PP_IN_YUYV 0x0
@@ -72,7 +72,7 @@ static void hantro_postproc_g1_enable(struct hantro_ctx *ctx)
dma_addr_t dst_dma;
/* Turn on pipeline mode. Must be done first. */
- HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x1);
+ HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x1);
src_pp_fmt = VPU_PP_IN_NV12;
@@ -242,7 +242,7 @@ static void hantro_postproc_g1_disable(struct hantro_ctx *ctx)
{
struct hantro_dev *vpu = ctx->dev;
- HANTRO_PP_REG_WRITE_S(vpu, pipeline_en, 0x0);
+ HANTRO_PP_REG_WRITE(vpu, pipeline_en, 0x0);
}
static void hantro_postproc_g2_disable(struct hantro_ctx *ctx)
diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c
index 5ac2a424b13d..f4988f03640a 100644
--- a/drivers/media/usb/uvc/uvc_v4l2.c
+++ b/drivers/media/usb/uvc/uvc_v4l2.c
@@ -45,7 +45,7 @@ static int uvc_control_add_xu_mapping(struct uvc_video_chain *chain,
map->menu_names = NULL;
map->menu_mapping = NULL;
- map->menu_mask = BIT_MASK(xmap->menu_count);
+ map->menu_mask = GENMASK(xmap->menu_count - 1, 0);
size = xmap->menu_count * sizeof(*map->menu_mapping);
map->menu_mapping = kzalloc(size, GFP_KERNEL);
diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c
index 4a750da1c12a..deb6e65b59af 100644
--- a/drivers/memory/tegra/mc.c
+++ b/drivers/memory/tegra/mc.c
@@ -755,6 +755,43 @@ const char *const tegra_mc_error_names[8] = {
[6] = "SMMU translation error",
};
+struct icc_node *tegra_mc_icc_xlate(struct of_phandle_args *spec, void *data)
+{
+ struct tegra_mc *mc = icc_provider_to_tegra_mc(data);
+ struct icc_node *node;
+
+ list_for_each_entry(node, &mc->provider.nodes, node_list) {
+ if (node->id == spec->args[0])
+ return node;
+ }
+
+ /*
+ * If a client driver calls devm_of_icc_get() before the MC driver
+ * is probed, then return EPROBE_DEFER to the client driver.
+ */
+ return ERR_PTR(-EPROBE_DEFER);
+}
+
+static int tegra_mc_icc_get(struct icc_node *node, u32 *average, u32 *peak)
+{
+ *average = 0;
+ *peak = 0;
+
+ return 0;
+}
+
+static int tegra_mc_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+ return 0;
+}
+
+const struct tegra_mc_icc_ops tegra_mc_icc_ops = {
+ .xlate = tegra_mc_icc_xlate,
+ .aggregate = icc_std_aggregate,
+ .get_bw = tegra_mc_icc_get,
+ .set = tegra_mc_icc_set,
+};
+
/*
* Memory Controller (MC) has few Memory Clients that are issuing memory
* bandwidth allocation requests to the MC interconnect provider. The MC
diff --git a/drivers/memory/tegra/tegra194.c b/drivers/memory/tegra/tegra194.c
index b2416ee3ac26..26035ac3a1eb 100644
--- a/drivers/memory/tegra/tegra194.c
+++ b/drivers/memory/tegra/tegra194.c
@@ -1355,6 +1355,7 @@ const struct tegra_mc_soc tegra194_mc_soc = {
MC_INT_SECURITY_VIOLATION | MC_INT_DECERR_EMEM,
.has_addr_hi_reg = true,
.ops = &tegra186_mc_ops,
+ .icc_ops = &tegra_mc_icc_ops,
.ch_intmask = 0x00000f00,
.global_intstatus_channel_shift = 8,
};
diff --git a/drivers/memory/tegra/tegra234.c b/drivers/memory/tegra/tegra234.c
index 8e873a7bc34f..8fb83b39f5f5 100644
--- a/drivers/memory/tegra/tegra234.c
+++ b/drivers/memory/tegra/tegra234.c
@@ -827,7 +827,7 @@ static int tegra234_mc_icc_set(struct icc_node *src, struct icc_node *dst)
return 0;
if (!mc->bwmgr_mrq_supported)
- return -EINVAL;
+ return 0;
if (!mc->bpmp) {
dev_err(mc->dev, "BPMP reference NULL\n");
@@ -874,7 +874,7 @@ static int tegra234_mc_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
struct tegra_mc *mc = icc_provider_to_tegra_mc(p);
if (!mc->bwmgr_mrq_supported)
- return -EINVAL;
+ return 0;
if (node->id == TEGRA_ICC_MC_CPU_CLUSTER0 ||
node->id == TEGRA_ICC_MC_CPU_CLUSTER1 ||
@@ -889,27 +889,6 @@ static int tegra234_mc_icc_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
return 0;
}
-static struct icc_node*
-tegra234_mc_of_icc_xlate(struct of_phandle_args *spec, void *data)
-{
- struct tegra_mc *mc = icc_provider_to_tegra_mc(data);
- unsigned int cl_id = spec->args[0];
- struct icc_node *node;
-
- list_for_each_entry(node, &mc->provider.nodes, node_list) {
- if (node->id != cl_id)
- continue;
-
- return node;
- }
-
- /*
- * If a client driver calls devm_of_icc_get() before the MC driver
- * is probed, then return EPROBE_DEFER to the client driver.
- */
- return ERR_PTR(-EPROBE_DEFER);
-}
-
static int tegra234_mc_icc_get_init_bw(struct icc_node *node, u32 *avg, u32 *peak)
{
*avg = 0;
@@ -919,7 +898,7 @@ static int tegra234_mc_icc_get_init_bw(struct icc_node *node, u32 *avg, u32 *pea
}
static const struct tegra_mc_icc_ops tegra234_mc_icc_ops = {
- .xlate = tegra234_mc_of_icc_xlate,
+ .xlate = tegra_mc_icc_xlate,
.aggregate = tegra234_mc_icc_aggregate,
.get_bw = tegra234_mc_icc_get_init_bw,
.set = tegra234_mc_icc_set,
diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c
index d676cf63a966..3dae5e3a1697 100644
--- a/drivers/misc/cardreader/rts5227.c
+++ b/drivers/misc/cardreader/rts5227.c
@@ -195,7 +195,7 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
}
}
- if (option->force_clkreq_0)
+ if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, PETXCFG,
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
else
diff --git a/drivers/misc/cardreader/rts5228.c b/drivers/misc/cardreader/rts5228.c
index cfebad51d1d8..f4ab09439da7 100644
--- a/drivers/misc/cardreader/rts5228.c
+++ b/drivers/misc/cardreader/rts5228.c
@@ -435,17 +435,10 @@ static void rts5228_init_from_cfg(struct rtsx_pcr *pcr)
option->ltr_enabled = false;
}
}
-
- if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
- | PM_L1_1_EN | PM_L1_2_EN))
- option->force_clkreq_0 = false;
- else
- option->force_clkreq_0 = true;
}
static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
{
- struct rtsx_cr_option *option = &pcr->option;
rtsx_pci_write_register(pcr, RTS5228_AUTOLOAD_CFG1,
CD_RESUME_EN_MASK, CD_RESUME_EN_MASK);
@@ -476,17 +469,6 @@ static int rts5228_extra_init_hw(struct rtsx_pcr *pcr)
else
rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
- /*
- * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
- * to drive low, and we forcibly request clock.
- */
- if (option->force_clkreq_0)
- rtsx_pci_write_register(pcr, PETXCFG,
- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
- else
- rtsx_pci_write_register(pcr, PETXCFG,
- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
if (pcr->rtd3_en) {
diff --git a/drivers/misc/cardreader/rts5249.c b/drivers/misc/cardreader/rts5249.c
index 91d240dd68fa..47ab72a43256 100644
--- a/drivers/misc/cardreader/rts5249.c
+++ b/drivers/misc/cardreader/rts5249.c
@@ -327,12 +327,11 @@ static int rts5249_extra_init_hw(struct rtsx_pcr *pcr)
}
}
-
/*
* If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
* to drive low, and we forcibly request clock.
*/
- if (option->force_clkreq_0)
+ if (option->force_clkreq_0 && pcr->aspm_mode == ASPM_MODE_CFG)
rtsx_pci_write_register(pcr, PETXCFG,
FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
else
diff --git a/drivers/misc/cardreader/rts5260.c b/drivers/misc/cardreader/rts5260.c
index 9b42b20a3e5a..79b18f6f73a8 100644
--- a/drivers/misc/cardreader/rts5260.c
+++ b/drivers/misc/cardreader/rts5260.c
@@ -517,17 +517,10 @@ static void rts5260_init_from_cfg(struct rtsx_pcr *pcr)
option->ltr_enabled = false;
}
}
-
- if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
- | PM_L1_1_EN | PM_L1_2_EN))
- option->force_clkreq_0 = false;
- else
- option->force_clkreq_0 = true;
}
static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
{
- struct rtsx_cr_option *option = &pcr->option;
/* Set mcu_cnt to 7 to ensure data can be sampled properly */
rtsx_pci_write_register(pcr, 0xFC03, 0x7F, 0x07);
@@ -546,17 +539,6 @@ static int rts5260_extra_init_hw(struct rtsx_pcr *pcr)
rts5260_init_hw(pcr);
- /*
- * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
- * to drive low, and we forcibly request clock.
- */
- if (option->force_clkreq_0)
- rtsx_pci_write_register(pcr, PETXCFG,
- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
- else
- rtsx_pci_write_register(pcr, PETXCFG,
- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
rtsx_pci_write_register(pcr, pcr->reg_pm_ctrl3, 0x10, 0x00);
return 0;
diff --git a/drivers/misc/cardreader/rts5261.c b/drivers/misc/cardreader/rts5261.c
index b1e76030cafd..94af6bf8a25a 100644
--- a/drivers/misc/cardreader/rts5261.c
+++ b/drivers/misc/cardreader/rts5261.c
@@ -498,17 +498,10 @@ static void rts5261_init_from_cfg(struct rtsx_pcr *pcr)
option->ltr_enabled = false;
}
}
-
- if (rtsx_check_dev_flag(pcr, ASPM_L1_1_EN | ASPM_L1_2_EN
- | PM_L1_1_EN | PM_L1_2_EN))
- option->force_clkreq_0 = false;
- else
- option->force_clkreq_0 = true;
}
static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
{
- struct rtsx_cr_option *option = &pcr->option;
u32 val;
rtsx_pci_write_register(pcr, RTS5261_AUTOLOAD_CFG1,
@@ -554,17 +547,6 @@ static int rts5261_extra_init_hw(struct rtsx_pcr *pcr)
else
rtsx_pci_write_register(pcr, PETXCFG, 0x30, 0x00);
- /*
- * If u_force_clkreq_0 is enabled, CLKREQ# PIN will be forced
- * to drive low, and we forcibly request clock.
- */
- if (option->force_clkreq_0)
- rtsx_pci_write_register(pcr, PETXCFG,
- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_LOW);
- else
- rtsx_pci_write_register(pcr, PETXCFG,
- FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
-
rtsx_pci_write_register(pcr, PWD_SUSPEND_EN, 0xFF, 0xFB);
if (pcr->rtd3_en) {
diff --git a/drivers/misc/cardreader/rtsx_pcr.c b/drivers/misc/cardreader/rtsx_pcr.c
index 32b7783e9d4f..a3f4b52bb159 100644
--- a/drivers/misc/cardreader/rtsx_pcr.c
+++ b/drivers/misc/cardreader/rtsx_pcr.c
@@ -1326,8 +1326,11 @@ static int rtsx_pci_init_hw(struct rtsx_pcr *pcr)
return err;
}
- if (pcr->aspm_mode == ASPM_MODE_REG)
+ if (pcr->aspm_mode == ASPM_MODE_REG) {
rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0x30, 0x30);
+ rtsx_pci_write_register(pcr, PETXCFG,
+ FORCE_CLKREQ_DELINK_MASK, FORCE_CLKREQ_HIGH);
+ }
/* No CD interrupt if probing driver with card inserted.
* So we need to initialize pcr->card_exist here.
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c
index 35fec1bf1b3d..5867af9f592c 100644
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -139,7 +139,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
if (ret) {
ret->i_ino = get_next_ino();
ret->i_mode = mode;
- ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+ ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
}
return ret;
}
diff --git a/drivers/misc/ibmvmc.c b/drivers/misc/ibmvmc.c
index cbaf6d35e854..2101eb12bcba 100644
--- a/drivers/misc/ibmvmc.c
+++ b/drivers/misc/ibmvmc.c
@@ -1124,7 +1124,7 @@ static ssize_t ibmvmc_write(struct file *file, const char *buffer,
goto out;
inode = file_inode(file);
- inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
dev_dbg(adapter->dev, "write: file = 0x%lx, count = 0x%lx\n",
diff --git a/drivers/misc/sram.c b/drivers/misc/sram.c
index 5757adf418b1..61209739dc43 100644
--- a/drivers/misc/sram.c
+++ b/drivers/misc/sram.c
@@ -236,7 +236,7 @@ static int sram_reserve_regions(struct sram_dev *sram, struct resource *res)
}
if (!label)
block->label = devm_kasprintf(sram->dev, GFP_KERNEL,
- "%s", dev_name(sram->dev));
+ "%s", of_node_full_name(child));
else
block->label = devm_kstrdup(sram->dev,
label, GFP_KERNEL);
diff --git a/drivers/misc/tps6594-esm.c b/drivers/misc/tps6594-esm.c
index b488f704f104..05e2c151e632 100644
--- a/drivers/misc/tps6594-esm.c
+++ b/drivers/misc/tps6594-esm.c
@@ -13,6 +13,8 @@
#include <linux/mfd/tps6594.h>
+#define TPS6594_DEV_REV_1 0x08
+
static irqreturn_t tps6594_esm_isr(int irq, void *dev_id)
{
struct platform_device *pdev = dev_id;
@@ -32,11 +34,26 @@ static int tps6594_esm_probe(struct platform_device *pdev)
{
struct tps6594 *tps = dev_get_drvdata(pdev->dev.parent);
struct device *dev = &pdev->dev;
+ unsigned int rev;
int irq;
int ret;
int i;
- for (i = 0 ; i < pdev->num_resources ; i++) {
+ /*
+ * Due to a bug in revision 1 of the PMIC, the GPIO3 used for the
+ * SoC ESM function is used to power the load switch instead.
+ * As a consequence, ESM can not be used on those PMIC.
+ * Check the version and return an error in case of revision 1.
+ */
+ ret = regmap_read(tps->regmap, TPS6594_REG_DEV_REV, &rev);
+ if (ret)
+ return dev_err_probe(dev, ret,
+ "Failed to read PMIC revision\n");
+ if (rev == TPS6594_DEV_REV_1)
+ return dev_err_probe(dev, -ENODEV,
+ "ESM not supported for revision 1 PMIC\n");
+
+ for (i = 0; i < pdev->num_resources; i++) {
irq = platform_get_irq_byname(pdev, pdev->resource[i].name);
if (irq < 0)
return dev_err_probe(dev, irq, "Failed to get %s irq\n",
diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index f701efb1fa78..b6f4be25b31b 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -2097,14 +2097,14 @@ static void mmc_blk_mq_poll_completion(struct mmc_queue *mq,
mmc_blk_urgent_bkops(mq, mqrq);
}
-static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
+static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, enum mmc_issue_type issue_type)
{
unsigned long flags;
bool put_card;
spin_lock_irqsave(&mq->lock, flags);
- mq->in_flight[mmc_issue_type(mq, req)] -= 1;
+ mq->in_flight[issue_type] -= 1;
put_card = (mmc_tot_in_flight(mq) == 0);
@@ -2117,6 +2117,7 @@ static void mmc_blk_mq_dec_in_flight(struct mmc_queue *mq, struct request *req)
static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
bool can_sleep)
{
+ enum mmc_issue_type issue_type = mmc_issue_type(mq, req);
struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
struct mmc_request *mrq = &mqrq->brq.mrq;
struct mmc_host *host = mq->card->host;
@@ -2136,7 +2137,7 @@ static void mmc_blk_mq_post_req(struct mmc_queue *mq, struct request *req,
blk_mq_complete_request(req);
}
- mmc_blk_mq_dec_in_flight(mq, req);
+ mmc_blk_mq_dec_in_flight(mq, issue_type);
}
void mmc_blk_mq_recovery(struct mmc_queue *mq)
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
index 2d002c81dcf3..d0d6ffcf78d4 100644
--- a/drivers/mmc/host/moxart-mmc.c
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -338,13 +338,7 @@ static void moxart_transfer_pio(struct moxart_host *host)
return;
}
for (len = 0; len < remain && len < host->fifo_width;) {
- /* SCR data must be read in big endian. */
- if (data->mrq->cmd->opcode == SD_APP_SEND_SCR)
- *sgp = ioread32be(host->base +
- REG_DATA_WINDOW);
- else
- *sgp = ioread32(host->base +
- REG_DATA_WINDOW);
+ *sgp = ioread32(host->base + REG_DATA_WINDOW);
sgp++;
len += 4;
}
diff --git a/drivers/mmc/host/sdhci_f_sdh30.c b/drivers/mmc/host/sdhci_f_sdh30.c
index a202a69a4b08..3215063bcf86 100644
--- a/drivers/mmc/host/sdhci_f_sdh30.c
+++ b/drivers/mmc/host/sdhci_f_sdh30.c
@@ -29,9 +29,16 @@ struct f_sdhost_priv {
bool enable_cmd_dat_delay;
};
+static void *sdhci_f_sdhost_priv(struct sdhci_host *host)
+{
+ struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+
+ return sdhci_pltfm_priv(pltfm_host);
+}
+
static void sdhci_f_sdh30_soft_voltage_switch(struct sdhci_host *host)
{
- struct f_sdhost_priv *priv = sdhci_priv(host);
+ struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
u32 ctrl = 0;
usleep_range(2500, 3000);
@@ -64,7 +71,7 @@ static unsigned int sdhci_f_sdh30_get_min_clock(struct sdhci_host *host)
static void sdhci_f_sdh30_reset(struct sdhci_host *host, u8 mask)
{
- struct f_sdhost_priv *priv = sdhci_priv(host);
+ struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
u32 ctl;
if (sdhci_readw(host, SDHCI_CLOCK_CONTROL) == 0)
@@ -95,30 +102,32 @@ static const struct sdhci_ops sdhci_f_sdh30_ops = {
.set_uhs_signaling = sdhci_set_uhs_signaling,
};
+static const struct sdhci_pltfm_data sdhci_f_sdh30_pltfm_data = {
+ .ops = &sdhci_f_sdh30_ops,
+ .quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC
+ | SDHCI_QUIRK_INVERTED_WRITE_PROTECT,
+ .quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE
+ | SDHCI_QUIRK2_TUNING_WORK_AROUND,
+};
+
static int sdhci_f_sdh30_probe(struct platform_device *pdev)
{
struct sdhci_host *host;
struct device *dev = &pdev->dev;
- int irq, ctrl = 0, ret = 0;
+ int ctrl = 0, ret = 0;
struct f_sdhost_priv *priv;
+ struct sdhci_pltfm_host *pltfm_host;
u32 reg = 0;
- irq = platform_get_irq(pdev, 0);
- if (irq < 0)
- return irq;
-
- host = sdhci_alloc_host(dev, sizeof(struct f_sdhost_priv));
+ host = sdhci_pltfm_init(pdev, &sdhci_f_sdh30_pltfm_data,
+ sizeof(struct f_sdhost_priv));
if (IS_ERR(host))
return PTR_ERR(host);
- priv = sdhci_priv(host);
+ pltfm_host = sdhci_priv(host);
+ priv = sdhci_pltfm_priv(pltfm_host);
priv->dev = dev;
- host->quirks = SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC |
- SDHCI_QUIRK_INVERTED_WRITE_PROTECT;
- host->quirks2 = SDHCI_QUIRK2_SUPPORT_SINGLE |
- SDHCI_QUIRK2_TUNING_WORK_AROUND;
-
priv->enable_cmd_dat_delay = device_property_read_bool(dev,
"fujitsu,cmd-dat-delay-select");
@@ -126,18 +135,6 @@ static int sdhci_f_sdh30_probe(struct platform_device *pdev)
if (ret)
goto err;
- platform_set_drvdata(pdev, host);
-
- host->hw_name = "f_sdh30";
- host->ops = &sdhci_f_sdh30_ops;
- host->irq = irq;
-
- host->ioaddr = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(host->ioaddr)) {
- ret = PTR_ERR(host->ioaddr);
- goto err;
- }
-
if (dev_of_node(dev)) {
sdhci_get_of_property(pdev);
@@ -204,24 +201,24 @@ err_rst:
err_clk:
clk_disable_unprepare(priv->clk_iface);
err:
- sdhci_free_host(host);
+ sdhci_pltfm_free(pdev);
+
return ret;
}
static int sdhci_f_sdh30_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
- struct f_sdhost_priv *priv = sdhci_priv(host);
+ struct f_sdhost_priv *priv = sdhci_f_sdhost_priv(host);
+ struct clk *clk_iface = priv->clk_iface;
+ struct reset_control *rst = priv->rst;
+ struct clk *clk = priv->clk;
- sdhci_remove_host(host, readl(host->ioaddr + SDHCI_INT_STATUS) ==
- 0xffffffff);
-
- reset_control_assert(priv->rst);
- clk_disable_unprepare(priv->clk);
- clk_disable_unprepare(priv->clk_iface);
+ sdhci_pltfm_unregister(pdev);
- sdhci_free_host(host);
- platform_set_drvdata(pdev, NULL);
+ reset_control_assert(rst);
+ clk_disable_unprepare(clk);
+ clk_disable_unprepare(clk_iface);
return 0;
}
diff --git a/drivers/mmc/host/sunplus-mmc.c b/drivers/mmc/host/sunplus-mmc.c
index db5e0dcdfa7f..2bdebeb1f8e4 100644
--- a/drivers/mmc/host/sunplus-mmc.c
+++ b/drivers/mmc/host/sunplus-mmc.c
@@ -863,11 +863,9 @@ static int spmmc_drv_probe(struct platform_device *pdev)
struct spmmc_host *host;
int ret = 0;
- mmc = mmc_alloc_host(sizeof(*host), &pdev->dev);
- if (!mmc) {
- ret = -ENOMEM;
- goto probe_free_host;
- }
+ mmc = devm_mmc_alloc_host(&pdev->dev, sizeof(struct spmmc_host));
+ if (!mmc)
+ return -ENOMEM;
host = mmc_priv(mmc);
host->mmc = mmc;
@@ -902,7 +900,7 @@ static int spmmc_drv_probe(struct platform_device *pdev)
ret = mmc_of_parse(mmc);
if (ret)
- goto probe_free_host;
+ goto clk_disable;
mmc->ops = &spmmc_ops;
mmc->f_min = SPMMC_MIN_CLK;
@@ -911,7 +909,7 @@ static int spmmc_drv_probe(struct platform_device *pdev)
ret = mmc_regulator_get_supply(mmc);
if (ret)
- goto probe_free_host;
+ goto clk_disable;
if (!mmc->ocr_avail)
mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
@@ -927,14 +925,17 @@ static int spmmc_drv_probe(struct platform_device *pdev)
host->tuning_info.enable_tuning = 1;
pm_runtime_set_active(&pdev->dev);
pm_runtime_enable(&pdev->dev);
- mmc_add_host(mmc);
+ ret = mmc_add_host(mmc);
+ if (ret)
+ goto pm_disable;
- return ret;
+ return 0;
-probe_free_host:
- if (mmc)
- mmc_free_host(mmc);
+pm_disable:
+ pm_runtime_disable(&pdev->dev);
+clk_disable:
+ clk_disable_unprepare(host->clk);
return ret;
}
@@ -948,7 +949,6 @@ static int spmmc_drv_remove(struct platform_device *dev)
pm_runtime_put_noidle(&dev->dev);
pm_runtime_disable(&dev->dev);
platform_set_drvdata(dev, NULL);
- mmc_free_host(host->mmc);
return 0;
}
diff --git a/drivers/mmc/host/wbsd.c b/drivers/mmc/host/wbsd.c
index 521af9251f33..bf2a92fba0ed 100644
--- a/drivers/mmc/host/wbsd.c
+++ b/drivers/mmc/host/wbsd.c
@@ -1705,8 +1705,6 @@ static int wbsd_init(struct device *dev, int base, int irq, int dma,
wbsd_release_resources(host);
wbsd_free_mmc(dev);
-
- mmc_free_host(mmc);
return ret;
}
diff --git a/drivers/mtd/nand/raw/fsl_upm.c b/drivers/mtd/nand/raw/fsl_upm.c
index 086426139173..7366e85c09fd 100644
--- a/drivers/mtd/nand/raw/fsl_upm.c
+++ b/drivers/mtd/nand/raw/fsl_upm.c
@@ -135,7 +135,7 @@ static int fun_exec_op(struct nand_chip *chip, const struct nand_operation *op,
unsigned int i;
int ret;
- if (op->cs > NAND_MAX_CHIPS)
+ if (op->cs >= NAND_MAX_CHIPS)
return -EINVAL;
if (check_only)
diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c
index d3faf8086631..b10011dec1e6 100644
--- a/drivers/mtd/nand/raw/meson_nand.c
+++ b/drivers/mtd/nand/raw/meson_nand.c
@@ -1278,7 +1278,6 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
struct meson_nfc *nfc = nand_get_controller_data(nand);
struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
struct mtd_info *mtd = nand_to_mtd(nand);
- int nsectors = mtd->writesize / 1024;
int raw_writesize;
int ret;
@@ -1304,7 +1303,7 @@ static int meson_nand_attach_chip(struct nand_chip *nand)
nand->options |= NAND_NO_SUBPAGE_WRITE;
ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps,
- mtd->oobsize - 2 * nsectors);
+ mtd->oobsize - 2);
if (ret) {
dev_err(nfc->dev, "failed to ECC init\n");
return -EINVAL;
diff --git a/drivers/mtd/nand/raw/omap_elm.c b/drivers/mtd/nand/raw/omap_elm.c
index 6e1eac6644a6..4a97d4a76454 100644
--- a/drivers/mtd/nand/raw/omap_elm.c
+++ b/drivers/mtd/nand/raw/omap_elm.c
@@ -177,17 +177,17 @@ static void elm_load_syndrome(struct elm_info *info,
switch (info->bch_type) {
case BCH8_ECC:
/* syndrome fragment 0 = ecc[9-12B] */
- val = cpu_to_be32(*(u32 *) &ecc[9]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[9]);
elm_write_reg(info, offset, val);
/* syndrome fragment 1 = ecc[5-8B] */
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[5]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[5]);
elm_write_reg(info, offset, val);
/* syndrome fragment 2 = ecc[1-4B] */
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[1]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[1]);
elm_write_reg(info, offset, val);
/* syndrome fragment 3 = ecc[0B] */
@@ -197,35 +197,35 @@ static void elm_load_syndrome(struct elm_info *info,
break;
case BCH4_ECC:
/* syndrome fragment 0 = ecc[20-52b] bits */
- val = (cpu_to_be32(*(u32 *) &ecc[3]) >> 4) |
+ val = ((__force u32)cpu_to_be32(*(u32 *)&ecc[3]) >> 4) |
((ecc[2] & 0xf) << 28);
elm_write_reg(info, offset, val);
/* syndrome fragment 1 = ecc[0-20b] bits */
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[0]) >> 12;
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 12;
elm_write_reg(info, offset, val);
break;
case BCH16_ECC:
- val = cpu_to_be32(*(u32 *) &ecc[22]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[22]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[18]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[18]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[14]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[14]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[10]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[10]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[6]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[6]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[2]);
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[2]);
elm_write_reg(info, offset, val);
offset += 4;
- val = cpu_to_be32(*(u32 *) &ecc[0]) >> 16;
+ val = (__force u32)cpu_to_be32(*(u32 *)&ecc[0]) >> 16;
elm_write_reg(info, offset, val);
break;
default:
diff --git a/drivers/mtd/nand/raw/rockchip-nand-controller.c b/drivers/mtd/nand/raw/rockchip-nand-controller.c
index 2312e27362cb..5a04680342c3 100644
--- a/drivers/mtd/nand/raw/rockchip-nand-controller.c
+++ b/drivers/mtd/nand/raw/rockchip-nand-controller.c
@@ -562,9 +562,10 @@ static int rk_nfc_write_page_raw(struct nand_chip *chip, const u8 *buf,
* BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3
*
* The rk_nfc_ooblayout_free() function already has reserved
- * these 4 bytes with:
+ * these 4 bytes together with 2 bytes for BBM
+ * by reducing it's length:
*
- * oob_region->offset = NFC_SYS_DATA_SIZE + 2;
+ * oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
*/
if (!i)
memcpy(rk_nfc_oob_ptr(chip, i),
@@ -597,7 +598,7 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
int pages_per_blk = mtd->erasesize / mtd->writesize;
int ret = 0, i, boot_rom_mode = 0;
dma_addr_t dma_data, dma_oob;
- u32 reg;
+ u32 tmp;
u8 *oob;
nand_prog_page_begin_op(chip, page, 0, NULL, 0);
@@ -624,6 +625,13 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
*
* 0xFF 0xFF 0xFF 0xFF | BBM OOB1 OOB2 OOB3 | ...
*
+ * The code here just swaps the first 4 bytes with the last
+ * 4 bytes without losing any data.
+ *
+ * The chip->oob_poi data layout:
+ *
+ * BBM OOB1 OOB2 OOB3 |......| PA0 PA1 PA2 PA3
+ *
* Configure the ECC algorithm supported by the boot ROM.
*/
if ((page < (pages_per_blk * rknand->boot_blks)) &&
@@ -634,21 +642,17 @@ static int rk_nfc_write_page_hwecc(struct nand_chip *chip, const u8 *buf,
}
for (i = 0; i < ecc->steps; i++) {
- if (!i) {
- reg = 0xFFFFFFFF;
- } else {
+ if (!i)
+ oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
+ else
oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
- reg = oob[0] | oob[1] << 8 | oob[2] << 16 |
- oob[3] << 24;
- }
- if (!i && boot_rom_mode)
- reg = (page & (pages_per_blk - 1)) * 4;
+ tmp = oob[0] | oob[1] << 8 | oob[2] << 16 | oob[3] << 24;
if (nfc->cfg->type == NFC_V9)
- nfc->oob_buf[i] = reg;
+ nfc->oob_buf[i] = tmp;
else
- nfc->oob_buf[i * (oob_step / 4)] = reg;
+ nfc->oob_buf[i * (oob_step / 4)] = tmp;
}
dma_data = dma_map_single(nfc->dev, (void *)nfc->page_buf,
@@ -811,12 +815,17 @@ static int rk_nfc_read_page_hwecc(struct nand_chip *chip, u8 *buf, int oob_on,
goto timeout_err;
}
- for (i = 1; i < ecc->steps; i++) {
- oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
+ for (i = 0; i < ecc->steps; i++) {
+ if (!i)
+ oob = chip->oob_poi + (ecc->steps - 1) * NFC_SYS_DATA_SIZE;
+ else
+ oob = chip->oob_poi + (i - 1) * NFC_SYS_DATA_SIZE;
+
if (nfc->cfg->type == NFC_V9)
tmp = nfc->oob_buf[i];
else
tmp = nfc->oob_buf[i * (oob_step / 4)];
+
*oob++ = (u8)tmp;
*oob++ = (u8)(tmp >> 8);
*oob++ = (u8)(tmp >> 16);
@@ -933,12 +942,8 @@ static int rk_nfc_ooblayout_free(struct mtd_info *mtd, int section,
if (section)
return -ERANGE;
- /*
- * The beginning of the OOB area stores the reserved data for the NFC,
- * the size of the reserved data is NFC_SYS_DATA_SIZE bytes.
- */
oob_region->length = rknand->metadata_size - NFC_SYS_DATA_SIZE - 2;
- oob_region->offset = NFC_SYS_DATA_SIZE + 2;
+ oob_region->offset = 2;
return 0;
}
diff --git a/drivers/mtd/nand/spi/toshiba.c b/drivers/mtd/nand/spi/toshiba.c
index 7380b1ebaccd..a80427c13121 100644
--- a/drivers/mtd/nand/spi/toshiba.c
+++ b/drivers/mtd/nand/spi/toshiba.c
@@ -73,7 +73,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
{
struct nand_device *nand = spinand_to_nand(spinand);
u8 mbf = 0;
- struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf);
+ struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf);
switch (status & STATUS_ECC_MASK) {
case STATUS_ECC_NO_BITFLIPS:
@@ -92,7 +92,7 @@ static int tx58cxgxsxraix_ecc_get_status(struct spinand_device *spinand,
if (spi_mem_exec_op(spinand->spimem, &op))
return nanddev_get_ecc_conf(nand)->strength;
- mbf >>= 4;
+ mbf = *(spinand->scratchbuf) >> 4;
if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
return nanddev_get_ecc_conf(nand)->strength;
diff --git a/drivers/mtd/nand/spi/winbond.c b/drivers/mtd/nand/spi/winbond.c
index 3ad58cd284d8..f507e3759301 100644
--- a/drivers/mtd/nand/spi/winbond.c
+++ b/drivers/mtd/nand/spi/winbond.c
@@ -108,7 +108,7 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand,
{
struct nand_device *nand = spinand_to_nand(spinand);
u8 mbf = 0;
- struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf);
+ struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf);
switch (status & STATUS_ECC_MASK) {
case STATUS_ECC_NO_BITFLIPS:
@@ -126,7 +126,7 @@ static int w25n02kv_ecc_get_status(struct spinand_device *spinand,
if (spi_mem_exec_op(spinand->spimem, &op))
return nanddev_get_ecc_conf(nand)->strength;
- mbf >>= 4;
+ mbf = *(spinand->scratchbuf) >> 4;
if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
return nanddev_get_ecc_conf(nand)->strength;
diff --git a/drivers/mtd/spi-nor/spansion.c b/drivers/mtd/spi-nor/spansion.c
index 36876aa849ed..15f9a80c10b9 100644
--- a/drivers/mtd/spi-nor/spansion.c
+++ b/drivers/mtd/spi-nor/spansion.c
@@ -361,7 +361,7 @@ static int cypress_nor_determine_addr_mode_by_sr1(struct spi_nor *nor,
*/
static int cypress_nor_set_addr_mode_nbytes(struct spi_nor *nor)
{
- struct spi_mem_op op = {};
+ struct spi_mem_op op;
u8 addr_mode;
int ret;
@@ -492,7 +492,7 @@ s25fs256t_post_bfpt_fixup(struct spi_nor *nor,
const struct sfdp_parameter_header *bfpt_header,
const struct sfdp_bfpt *bfpt)
{
- struct spi_mem_op op = {};
+ struct spi_mem_op op;
int ret;
ret = cypress_nor_set_addr_mode_nbytes(nor);
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index b9dbad3a8af8..fc5da5d7744d 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -660,10 +660,10 @@ static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
return NULL;
arp = (struct arp_pkt *)skb_network_header(skb);
- /* Don't modify or load balance ARPs that do not originate locally
- * (e.g.,arrive via a bridge).
+ /* Don't modify or load balance ARPs that do not originate
+ * from the bond itself or a VLAN directly above the bond.
*/
- if (!bond_slave_has_mac_rx(bond, arp->mac_src))
+ if (!bond_slave_has_mac_rcu(bond, arp->mac_src))
return NULL;
dev = ip_dev_find(dev_net(bond->dev), arp->ip_src);
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7a0f25301f7e..447b06ea4fc9 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1508,6 +1508,11 @@ static void bond_setup_by_slave(struct net_device *bond_dev,
memcpy(bond_dev->broadcast, slave_dev->broadcast,
slave_dev->addr_len);
+
+ if (slave_dev->flags & IFF_POINTOPOINT) {
+ bond_dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
+ bond_dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
+ }
}
/* On bonding slaves other than the currently active slave, suppress
@@ -5896,7 +5901,9 @@ void bond_setup(struct net_device *bond_dev)
bond_dev->hw_features = BOND_VLAN_FEATURES |
NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_HW_VLAN_CTAG_FILTER;
+ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_FILTER;
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
bond_dev->features |= bond_dev->hw_features;
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 68df6d4641b5..eebf967f4711 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -227,6 +227,8 @@ static int
__mcp251xfd_chip_set_mode(const struct mcp251xfd_priv *priv,
const u8 mode_req, bool nowait)
{
+ const struct can_bittiming *bt = &priv->can.bittiming;
+ unsigned long timeout_us = MCP251XFD_POLL_TIMEOUT_US;
u32 con = 0, con_reqop, osc = 0;
u8 mode;
int err;
@@ -246,12 +248,16 @@ __mcp251xfd_chip_set_mode(const struct mcp251xfd_priv *priv,
if (mode_req == MCP251XFD_REG_CON_MODE_SLEEP || nowait)
return 0;
+ if (bt->bitrate)
+ timeout_us = max_t(unsigned long, timeout_us,
+ MCP251XFD_FRAME_LEN_MAX_BITS * USEC_PER_SEC /
+ bt->bitrate);
+
err = regmap_read_poll_timeout(priv->map_reg, MCP251XFD_REG_CON, con,
!mcp251xfd_reg_invalid(con) &&
FIELD_GET(MCP251XFD_REG_CON_OPMOD_MASK,
con) == mode_req,
- MCP251XFD_POLL_SLEEP_US,
- MCP251XFD_POLL_TIMEOUT_US);
+ MCP251XFD_POLL_SLEEP_US, timeout_us);
if (err != -ETIMEDOUT && err != -EBADMSG)
return err;
diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
index 7024ff0cc2c0..24510b3b8020 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd.h
@@ -387,6 +387,7 @@ static_assert(MCP251XFD_TIMESTAMP_WORK_DELAY_SEC <
#define MCP251XFD_OSC_STAB_TIMEOUT_US (10 * MCP251XFD_OSC_STAB_SLEEP_US)
#define MCP251XFD_POLL_SLEEP_US (10)
#define MCP251XFD_POLL_TIMEOUT_US (USEC_PER_MSEC)
+#define MCP251XFD_FRAME_LEN_MAX_BITS (736)
/* Misc */
#define MCP251XFD_NAPI_WEIGHT 32
diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index d476c2884008..bd9eb066ecf1 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -303,12 +303,6 @@ struct gs_can {
struct can_bittiming_const bt_const, data_bt_const;
unsigned int channel; /* channel number */
- /* time counter for hardware timestamps */
- struct cyclecounter cc;
- struct timecounter tc;
- spinlock_t tc_lock; /* spinlock to guard access tc->cycle_last */
- struct delayed_work timestamp;
-
u32 feature;
unsigned int hf_size_tx;
@@ -325,6 +319,13 @@ struct gs_usb {
struct gs_can *canch[GS_MAX_INTF];
struct usb_anchor rx_submitted;
struct usb_device *udev;
+
+ /* time counter for hardware timestamps */
+ struct cyclecounter cc;
+ struct timecounter tc;
+ spinlock_t tc_lock; /* spinlock to guard access tc->cycle_last */
+ struct delayed_work timestamp;
+
unsigned int hf_size_rx;
u8 active_channels;
};
@@ -388,15 +389,15 @@ static int gs_cmd_reset(struct gs_can *dev)
GFP_KERNEL);
}
-static inline int gs_usb_get_timestamp(const struct gs_can *dev,
+static inline int gs_usb_get_timestamp(const struct gs_usb *parent,
u32 *timestamp_p)
{
__le32 timestamp;
int rc;
- rc = usb_control_msg_recv(dev->udev, 0, GS_USB_BREQ_TIMESTAMP,
+ rc = usb_control_msg_recv(parent->udev, 0, GS_USB_BREQ_TIMESTAMP,
USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE,
- dev->channel, 0,
+ 0, 0,
&timestamp, sizeof(timestamp),
USB_CTRL_GET_TIMEOUT,
GFP_KERNEL);
@@ -410,20 +411,20 @@ static inline int gs_usb_get_timestamp(const struct gs_can *dev,
static u64 gs_usb_timestamp_read(const struct cyclecounter *cc) __must_hold(&dev->tc_lock)
{
- struct gs_can *dev = container_of(cc, struct gs_can, cc);
+ struct gs_usb *parent = container_of(cc, struct gs_usb, cc);
u32 timestamp = 0;
int err;
- lockdep_assert_held(&dev->tc_lock);
+ lockdep_assert_held(&parent->tc_lock);
/* drop lock for synchronous USB transfer */
- spin_unlock_bh(&dev->tc_lock);
- err = gs_usb_get_timestamp(dev, &timestamp);
- spin_lock_bh(&dev->tc_lock);
+ spin_unlock_bh(&parent->tc_lock);
+ err = gs_usb_get_timestamp(parent, &timestamp);
+ spin_lock_bh(&parent->tc_lock);
if (err)
- netdev_err(dev->netdev,
- "Error %d while reading timestamp. HW timestamps may be inaccurate.",
- err);
+ dev_err(&parent->udev->dev,
+ "Error %d while reading timestamp. HW timestamps may be inaccurate.",
+ err);
return timestamp;
}
@@ -431,14 +432,14 @@ static u64 gs_usb_timestamp_read(const struct cyclecounter *cc) __must_hold(&dev
static void gs_usb_timestamp_work(struct work_struct *work)
{
struct delayed_work *delayed_work = to_delayed_work(work);
- struct gs_can *dev;
+ struct gs_usb *parent;
- dev = container_of(delayed_work, struct gs_can, timestamp);
- spin_lock_bh(&dev->tc_lock);
- timecounter_read(&dev->tc);
- spin_unlock_bh(&dev->tc_lock);
+ parent = container_of(delayed_work, struct gs_usb, timestamp);
+ spin_lock_bh(&parent->tc_lock);
+ timecounter_read(&parent->tc);
+ spin_unlock_bh(&parent->tc_lock);
- schedule_delayed_work(&dev->timestamp,
+ schedule_delayed_work(&parent->timestamp,
GS_USB_TIMESTAMP_WORK_DELAY_SEC * HZ);
}
@@ -446,37 +447,38 @@ static void gs_usb_skb_set_timestamp(struct gs_can *dev,
struct sk_buff *skb, u32 timestamp)
{
struct skb_shared_hwtstamps *hwtstamps = skb_hwtstamps(skb);
+ struct gs_usb *parent = dev->parent;
u64 ns;
- spin_lock_bh(&dev->tc_lock);
- ns = timecounter_cyc2time(&dev->tc, timestamp);
- spin_unlock_bh(&dev->tc_lock);
+ spin_lock_bh(&parent->tc_lock);
+ ns = timecounter_cyc2time(&parent->tc, timestamp);
+ spin_unlock_bh(&parent->tc_lock);
hwtstamps->hwtstamp = ns_to_ktime(ns);
}
-static void gs_usb_timestamp_init(struct gs_can *dev)
+static void gs_usb_timestamp_init(struct gs_usb *parent)
{
- struct cyclecounter *cc = &dev->cc;
+ struct cyclecounter *cc = &parent->cc;
cc->read = gs_usb_timestamp_read;
cc->mask = CYCLECOUNTER_MASK(32);
cc->shift = 32 - bits_per(NSEC_PER_SEC / GS_USB_TIMESTAMP_TIMER_HZ);
cc->mult = clocksource_hz2mult(GS_USB_TIMESTAMP_TIMER_HZ, cc->shift);
- spin_lock_init(&dev->tc_lock);
- spin_lock_bh(&dev->tc_lock);
- timecounter_init(&dev->tc, &dev->cc, ktime_get_real_ns());
- spin_unlock_bh(&dev->tc_lock);
+ spin_lock_init(&parent->tc_lock);
+ spin_lock_bh(&parent->tc_lock);
+ timecounter_init(&parent->tc, &parent->cc, ktime_get_real_ns());
+ spin_unlock_bh(&parent->tc_lock);
- INIT_DELAYED_WORK(&dev->timestamp, gs_usb_timestamp_work);
- schedule_delayed_work(&dev->timestamp,
+ INIT_DELAYED_WORK(&parent->timestamp, gs_usb_timestamp_work);
+ schedule_delayed_work(&parent->timestamp,
GS_USB_TIMESTAMP_WORK_DELAY_SEC * HZ);
}
-static void gs_usb_timestamp_stop(struct gs_can *dev)
+static void gs_usb_timestamp_stop(struct gs_usb *parent)
{
- cancel_delayed_work_sync(&dev->timestamp);
+ cancel_delayed_work_sync(&parent->timestamp);
}
static void gs_update_state(struct gs_can *dev, struct can_frame *cf)
@@ -560,6 +562,9 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
if (!netif_device_present(netdev))
return;
+ if (!netif_running(netdev))
+ goto resubmit_urb;
+
if (hf->echo_id == -1) { /* normal rx */
if (hf->flags & GS_CAN_FLAG_FD) {
skb = alloc_canfd_skb(dev->netdev, &cfd);
@@ -833,6 +838,7 @@ static int gs_can_open(struct net_device *netdev)
.mode = cpu_to_le32(GS_CAN_MODE_START),
};
struct gs_host_frame *hf;
+ struct urb *urb = NULL;
u32 ctrlmode;
u32 flags = 0;
int rc, i;
@@ -855,14 +861,18 @@ static int gs_can_open(struct net_device *netdev)
}
if (!parent->active_channels) {
+ if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
+ gs_usb_timestamp_init(parent);
+
for (i = 0; i < GS_MAX_RX_URBS; i++) {
- struct urb *urb;
u8 *buf;
/* alloc rx urb */
urb = usb_alloc_urb(0, GFP_KERNEL);
- if (!urb)
- return -ENOMEM;
+ if (!urb) {
+ rc = -ENOMEM;
+ goto out_usb_kill_anchored_urbs;
+ }
/* alloc rx buffer */
buf = kmalloc(dev->parent->hf_size_rx,
@@ -870,8 +880,8 @@ static int gs_can_open(struct net_device *netdev)
if (!buf) {
netdev_err(netdev,
"No memory left for USB buffer\n");
- usb_free_urb(urb);
- return -ENOMEM;
+ rc = -ENOMEM;
+ goto out_usb_free_urb;
}
/* fill, anchor, and submit rx urb */
@@ -894,9 +904,7 @@ static int gs_can_open(struct net_device *netdev)
netdev_err(netdev,
"usb_submit failed (err=%d)\n", rc);
- usb_unanchor_urb(urb);
- usb_free_urb(urb);
- break;
+ goto out_usb_unanchor_urb;
}
/* Drop reference,
@@ -926,13 +934,9 @@ static int gs_can_open(struct net_device *netdev)
flags |= GS_CAN_MODE_FD;
/* if hardware supports timestamps, enable it */
- if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP) {
+ if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
flags |= GS_CAN_MODE_HW_TIMESTAMP;
- /* start polling timestamp */
- gs_usb_timestamp_init(dev);
- }
-
/* finally start device */
dev->can.state = CAN_STATE_ERROR_ACTIVE;
dm.flags = cpu_to_le32(flags);
@@ -942,10 +946,9 @@ static int gs_can_open(struct net_device *netdev)
GFP_KERNEL);
if (rc) {
netdev_err(netdev, "Couldn't start device (err=%d)\n", rc);
- if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
- gs_usb_timestamp_stop(dev);
dev->can.state = CAN_STATE_STOPPED;
- return rc;
+
+ goto out_usb_kill_anchored_urbs;
}
parent->active_channels++;
@@ -953,6 +956,22 @@ static int gs_can_open(struct net_device *netdev)
netif_start_queue(netdev);
return 0;
+
+out_usb_unanchor_urb:
+ usb_unanchor_urb(urb);
+out_usb_free_urb:
+ usb_free_urb(urb);
+out_usb_kill_anchored_urbs:
+ if (!parent->active_channels) {
+ usb_kill_anchored_urbs(&dev->tx_submitted);
+
+ if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
+ gs_usb_timestamp_stop(parent);
+ }
+
+ close_candev(netdev);
+
+ return rc;
}
static int gs_usb_get_state(const struct net_device *netdev,
@@ -998,20 +1017,21 @@ static int gs_can_close(struct net_device *netdev)
netif_stop_queue(netdev);
- /* stop polling timestamp */
- if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
- gs_usb_timestamp_stop(dev);
-
/* Stop polling */
parent->active_channels--;
if (!parent->active_channels) {
usb_kill_anchored_urbs(&parent->rx_submitted);
+
+ if (dev->feature & GS_CAN_FEATURE_HW_TIMESTAMP)
+ gs_usb_timestamp_stop(parent);
}
/* Stop sending URBs */
usb_kill_anchored_urbs(&dev->tx_submitted);
atomic_set(&dev->active_tx_urbs, 0);
+ dev->can.state = CAN_STATE_STOPPED;
+
/* reset the device */
rc = gs_cmd_reset(dev);
if (rc < 0)
diff --git a/drivers/net/can/vxcan.c b/drivers/net/can/vxcan.c
index 4068d962203d..98c669ad5141 100644
--- a/drivers/net/can/vxcan.c
+++ b/drivers/net/can/vxcan.c
@@ -192,12 +192,7 @@ static int vxcan_newlink(struct net *net, struct net_device *dev,
nla_peer = data[VXCAN_INFO_PEER];
ifmp = nla_data(nla_peer);
- err = rtnl_nla_parse_ifla(peer_tb,
- nla_data(nla_peer) +
- sizeof(struct ifinfomsg),
- nla_len(nla_peer) -
- sizeof(struct ifinfomsg),
- NULL);
+ err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
if (err < 0)
return err;
diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
index cde253d27bd0..72374b066f64 100644
--- a/drivers/net/dsa/bcm_sf2.c
+++ b/drivers/net/dsa/bcm_sf2.c
@@ -1436,7 +1436,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
if (IS_ERR(priv->clk))
return PTR_ERR(priv->clk);
- clk_prepare_enable(priv->clk);
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ return ret;
priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv");
if (IS_ERR(priv->clk_mdiv)) {
@@ -1444,7 +1446,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
goto out_clk;
}
- clk_prepare_enable(priv->clk_mdiv);
+ ret = clk_prepare_enable(priv->clk_mdiv);
+ if (ret)
+ goto out_clk;
ret = bcm_sf2_sw_rst(priv);
if (ret) {
diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
index 84d502589f8e..91aba470fb2f 100644
--- a/drivers/net/dsa/microchip/ksz8795.c
+++ b/drivers/net/dsa/microchip/ksz8795.c
@@ -506,7 +506,13 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
(data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
shifts[STATIC_MAC_FWD_PORTS];
alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
- data_hi >>= 1;
+
+ /* KSZ8795 family switches have STATIC_MAC_TABLE_USE_FID and
+ * STATIC_MAC_TABLE_FID definitions off by 1 when doing read on the
+ * static MAC table compared to doing write.
+ */
+ if (ksz_is_ksz87xx(dev))
+ data_hi >>= 1;
alu->is_static = true;
alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 813b91a816bb..6c0623f88654 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -331,13 +331,13 @@ static const u32 ksz8795_masks[] = {
[STATIC_MAC_TABLE_VALID] = BIT(21),
[STATIC_MAC_TABLE_USE_FID] = BIT(23),
[STATIC_MAC_TABLE_FID] = GENMASK(30, 24),
- [STATIC_MAC_TABLE_OVERRIDE] = BIT(26),
- [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(24, 20),
+ [STATIC_MAC_TABLE_OVERRIDE] = BIT(22),
+ [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(20, 16),
[DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(6, 0),
- [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(8),
+ [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(7),
[DYNAMIC_MAC_TABLE_NOT_READY] = BIT(7),
[DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 29),
- [DYNAMIC_MAC_TABLE_FID] = GENMASK(26, 20),
+ [DYNAMIC_MAC_TABLE_FID] = GENMASK(22, 16),
[DYNAMIC_MAC_TABLE_SRC_PORT] = GENMASK(26, 24),
[DYNAMIC_MAC_TABLE_TIMESTAMP] = GENMASK(28, 27),
[P_MII_TX_FLOW_CTRL] = BIT(5),
@@ -635,10 +635,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
regmap_reg_range(0x1030, 0x1030),
regmap_reg_range(0x1100, 0x1115),
regmap_reg_range(0x111a, 0x111f),
- regmap_reg_range(0x1122, 0x1127),
- regmap_reg_range(0x112a, 0x112b),
- regmap_reg_range(0x1136, 0x1139),
- regmap_reg_range(0x113e, 0x113f),
+ regmap_reg_range(0x1120, 0x112b),
+ regmap_reg_range(0x1134, 0x113b),
+ regmap_reg_range(0x113c, 0x113f),
regmap_reg_range(0x1400, 0x1401),
regmap_reg_range(0x1403, 0x1403),
regmap_reg_range(0x1410, 0x1417),
@@ -669,10 +668,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
regmap_reg_range(0x2030, 0x2030),
regmap_reg_range(0x2100, 0x2115),
regmap_reg_range(0x211a, 0x211f),
- regmap_reg_range(0x2122, 0x2127),
- regmap_reg_range(0x212a, 0x212b),
- regmap_reg_range(0x2136, 0x2139),
- regmap_reg_range(0x213e, 0x213f),
+ regmap_reg_range(0x2120, 0x212b),
+ regmap_reg_range(0x2134, 0x213b),
+ regmap_reg_range(0x213c, 0x213f),
regmap_reg_range(0x2400, 0x2401),
regmap_reg_range(0x2403, 0x2403),
regmap_reg_range(0x2410, 0x2417),
@@ -703,10 +701,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
regmap_reg_range(0x3030, 0x3030),
regmap_reg_range(0x3100, 0x3115),
regmap_reg_range(0x311a, 0x311f),
- regmap_reg_range(0x3122, 0x3127),
- regmap_reg_range(0x312a, 0x312b),
- regmap_reg_range(0x3136, 0x3139),
- regmap_reg_range(0x313e, 0x313f),
+ regmap_reg_range(0x3120, 0x312b),
+ regmap_reg_range(0x3134, 0x313b),
+ regmap_reg_range(0x313c, 0x313f),
regmap_reg_range(0x3400, 0x3401),
regmap_reg_range(0x3403, 0x3403),
regmap_reg_range(0x3410, 0x3417),
@@ -737,10 +734,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
regmap_reg_range(0x4030, 0x4030),
regmap_reg_range(0x4100, 0x4115),
regmap_reg_range(0x411a, 0x411f),
- regmap_reg_range(0x4122, 0x4127),
- regmap_reg_range(0x412a, 0x412b),
- regmap_reg_range(0x4136, 0x4139),
- regmap_reg_range(0x413e, 0x413f),
+ regmap_reg_range(0x4120, 0x412b),
+ regmap_reg_range(0x4134, 0x413b),
+ regmap_reg_range(0x413c, 0x413f),
regmap_reg_range(0x4400, 0x4401),
regmap_reg_range(0x4403, 0x4403),
regmap_reg_range(0x4410, 0x4417),
@@ -771,10 +767,9 @@ static const struct regmap_range ksz9477_valid_regs[] = {
regmap_reg_range(0x5030, 0x5030),
regmap_reg_range(0x5100, 0x5115),
regmap_reg_range(0x511a, 0x511f),
- regmap_reg_range(0x5122, 0x5127),
- regmap_reg_range(0x512a, 0x512b),
- regmap_reg_range(0x5136, 0x5139),
- regmap_reg_range(0x513e, 0x513f),
+ regmap_reg_range(0x5120, 0x512b),
+ regmap_reg_range(0x5134, 0x513b),
+ regmap_reg_range(0x513c, 0x513f),
regmap_reg_range(0x5400, 0x5401),
regmap_reg_range(0x5403, 0x5403),
regmap_reg_range(0x5410, 0x5417),
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index 28444e5924f9..a4de58847dea 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -601,6 +601,13 @@ static inline void ksz_regmap_unlock(void *__mtx)
mutex_unlock(mtx);
}
+static inline bool ksz_is_ksz87xx(struct ksz_device *dev)
+{
+ return dev->chip_id == KSZ8795_CHIP_ID ||
+ dev->chip_id == KSZ8794_CHIP_ID ||
+ dev->chip_id == KSZ8765_CHIP_ID;
+}
+
static inline bool ksz_is_ksz88x3(struct ksz_device *dev)
{
return dev->chip_id == KSZ8830_CHIP_ID;
diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 38b3c6dda386..b8bb9f3b3609 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1006,6 +1006,10 @@ mt753x_trap_frames(struct mt7530_priv *priv)
mt7530_rmw(priv, MT753X_BPC, MT753X_BPDU_PORT_FW_MASK,
MT753X_BPDU_CPU_ONLY);
+ /* Trap 802.1X PAE frames to the CPU port(s) */
+ mt7530_rmw(priv, MT753X_BPC, MT753X_PAE_PORT_FW_MASK,
+ MT753X_PAE_PORT_FW(MT753X_BPDU_CPU_ONLY));
+
/* Trap LLDP frames with :0E MAC DA to the CPU port(s) */
mt7530_rmw(priv, MT753X_RGAC2, MT753X_R0E_PORT_FW_MASK,
MT753X_R0E_PORT_FW(MT753X_BPDU_CPU_ONLY));
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 08045b035e6a..17e42d30fff4 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -66,6 +66,8 @@ enum mt753x_id {
/* Registers for BPDU and PAE frame control*/
#define MT753X_BPC 0x24
#define MT753X_BPDU_PORT_FW_MASK GENMASK(2, 0)
+#define MT753X_PAE_PORT_FW_MASK GENMASK(18, 16)
+#define MT753X_PAE_PORT_FW(x) FIELD_PREP(MT753X_PAE_PORT_FW_MASK, x)
/* Register for :03 and :0E MAC DA frame control */
#define MT753X_RGAC2 0x2c
diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 8b51756bd805..7af2f08a62f1 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -109,6 +109,13 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
usleep_range(1000, 2000);
}
+ err = mv88e6xxx_read(chip, addr, reg, &data);
+ if (err)
+ return err;
+
+ if ((data & mask) == val)
+ return 0;
+
dev_err(chip->dev, "Timeout while waiting for switch\n");
return -ETIMEDOUT;
}
@@ -3027,6 +3034,14 @@ static void mv88e6xxx_hardware_reset(struct mv88e6xxx_chip *chip)
/* If there is a GPIO connected to the reset pin, toggle it */
if (gpiod) {
+ /* If the switch has just been reset and not yet completed
+ * loading EEPROM, the reset may interrupt the I2C transaction
+ * mid-byte, causing the first EEPROM read after the reset
+ * from the wrong location resulting in the switch booting
+ * to wrong mode and inoperable.
+ */
+ mv88e6xxx_g1_wait_eeprom_done(chip);
+
gpiod_set_value_cansleep(gpiod, 1);
usleep_range(10000, 20000);
gpiod_set_value_cansleep(gpiod, 0);
diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 8da46d284e35..bef879c6d500 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -1625,8 +1625,10 @@ static void felix_teardown(struct dsa_switch *ds)
struct felix *felix = ocelot_to_felix(ocelot);
struct dsa_port *dp;
+ rtnl_lock();
if (felix->tag_proto_ops)
felix->tag_proto_ops->teardown(ds);
+ rtnl_unlock();
dsa_switch_for_each_available_port(dp, ds)
ocelot_deinit_port(ocelot, dp->index);
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 1c113957fcf4..f16daa9b1765 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1069,6 +1069,9 @@ static u64 vsc9959_tas_remaining_gate_len_ps(u64 gate_len_ns)
if (gate_len_ns == U64_MAX)
return U64_MAX;
+ if (gate_len_ns < VSC9959_TAS_MIN_GATE_LEN_NS)
+ return 0;
+
return (gate_len_ns - VSC9959_TAS_MIN_GATE_LEN_NS) * PSEC_PER_NSEC;
}
diff --git a/drivers/net/dsa/qca/ar9331.c b/drivers/net/dsa/qca/ar9331.c
index b2bf78ac485e..3b0937031499 100644
--- a/drivers/net/dsa/qca/ar9331.c
+++ b/drivers/net/dsa/qca/ar9331.c
@@ -1002,6 +1002,8 @@ static const struct regmap_config ar9331_mdio_regmap_config = {
.val_bits = 32,
.reg_stride = 4,
.max_register = AR9331_SW_REG_PAGE,
+ .use_single_read = true,
+ .use_single_write = true,
.ranges = ar9331_regmap_range,
.num_ranges = ARRAY_SIZE(ar9331_regmap_range),
@@ -1018,8 +1020,6 @@ static struct regmap_bus ar9331_sw_bus = {
.val_format_endian_default = REGMAP_ENDIAN_NATIVE,
.read = ar9331_mdio_read,
.write = ar9331_sw_bus_write,
- .max_raw_read = 4,
- .max_raw_write = 4,
};
static int ar9331_sw_probe(struct mdio_device *mdiodev)
diff --git a/drivers/net/dsa/qca/qca8k-8xxx.c b/drivers/net/dsa/qca/qca8k-8xxx.c
index 09b80644c11b..efe9380d4a15 100644
--- a/drivers/net/dsa/qca/qca8k-8xxx.c
+++ b/drivers/net/dsa/qca/qca8k-8xxx.c
@@ -576,8 +576,11 @@ static struct regmap_config qca8k_regmap_config = {
.rd_table = &qca8k_readable_table,
.disable_locking = true, /* Locking is handled by qca8k read/write */
.cache_type = REGCACHE_NONE, /* Explicitly disable CACHE */
- .max_raw_read = 32, /* mgmt eth can read/write up to 8 registers at time */
- .max_raw_write = 32,
+ .max_raw_read = 32, /* mgmt eth can read up to 8 registers at time */
+ /* ATU regs suffer from a bug where some data are not correctly
+ * written. Disable bulk write to correctly write ATU entry.
+ */
+ .use_single_write = true,
};
static int
diff --git a/drivers/net/dsa/qca/qca8k-common.c b/drivers/net/dsa/qca/qca8k-common.c
index 8c2dc0e48ff4..13b8452ce5b2 100644
--- a/drivers/net/dsa/qca/qca8k-common.c
+++ b/drivers/net/dsa/qca/qca8k-common.c
@@ -244,7 +244,7 @@ void qca8k_fdb_flush(struct qca8k_priv *priv)
}
static int qca8k_fdb_search_and_insert(struct qca8k_priv *priv, u8 port_mask,
- const u8 *mac, u16 vid)
+ const u8 *mac, u16 vid, u8 aging)
{
struct qca8k_fdb fdb = { 0 };
int ret;
@@ -261,10 +261,12 @@ static int qca8k_fdb_search_and_insert(struct qca8k_priv *priv, u8 port_mask,
goto exit;
/* Rule exist. Delete first */
- if (!fdb.aging) {
+ if (fdb.aging) {
ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1);
if (ret)
goto exit;
+ } else {
+ fdb.aging = aging;
}
/* Add port to fdb portmask */
@@ -291,6 +293,10 @@ static int qca8k_fdb_search_and_del(struct qca8k_priv *priv, u8 port_mask,
if (ret < 0)
goto exit;
+ ret = qca8k_fdb_read(priv, &fdb);
+ if (ret < 0)
+ goto exit;
+
/* Rule doesn't exist. Why delete? */
if (!fdb.aging) {
ret = -EINVAL;
@@ -810,7 +816,11 @@ int qca8k_port_mdb_add(struct dsa_switch *ds, int port,
const u8 *addr = mdb->addr;
u16 vid = mdb->vid;
- return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid);
+ if (!vid)
+ vid = QCA8K_PORT_VID_DEF;
+
+ return qca8k_fdb_search_and_insert(priv, BIT(port), addr, vid,
+ QCA8K_ATU_STATUS_STATIC);
}
int qca8k_port_mdb_del(struct dsa_switch *ds, int port,
@@ -821,6 +831,9 @@ int qca8k_port_mdb_del(struct dsa_switch *ds, int port,
const u8 *addr = mdb->addr;
u16 vid = mdb->vid;
+ if (!vid)
+ vid = QCA8K_PORT_VID_DEF;
+
return qca8k_fdb_search_and_del(priv, BIT(port), addr, vid);
}
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 4a288799633f..940c5d1ff9cf 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2094,8 +2094,11 @@ static int atl1c_tso_csum(struct atl1c_adapter *adapter,
real_len = (((unsigned char *)ip_hdr(skb) - skb->data)
+ ntohs(ip_hdr(skb)->tot_len));
- if (real_len < skb->len)
- pskb_trim(skb, real_len);
+ if (real_len < skb->len) {
+ err = pskb_trim(skb, real_len);
+ if (err)
+ return err;
+ }
hdr_len = skb_tcp_all_headers(skb);
if (unlikely(skb->len == hdr_len)) {
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 5db0f3495a32..5935be190b9e 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -1641,8 +1641,11 @@ static int atl1e_tso_csum(struct atl1e_adapter *adapter,
real_len = (((unsigned char *)ip_hdr(skb) - skb->data)
+ ntohs(ip_hdr(skb)->tot_len));
- if (real_len < skb->len)
- pskb_trim(skb, real_len);
+ if (real_len < skb->len) {
+ err = pskb_trim(skb, real_len);
+ if (err)
+ return err;
+ }
hdr_len = skb_tcp_all_headers(skb);
if (unlikely(skb->len == hdr_len)) {
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index c8444bcdf527..02aa6fd8ebc2 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -2113,8 +2113,11 @@ static int atl1_tso(struct atl1_adapter *adapter, struct sk_buff *skb,
real_len = (((unsigned char *)iph - skb->data) +
ntohs(iph->tot_len));
- if (real_len < skb->len)
- pskb_trim(skb, real_len);
+ if (real_len < skb->len) {
+ err = pskb_trim(skb, real_len);
+ if (err)
+ return err;
+ }
hdr_len = skb_tcp_all_headers(skb);
if (skb->len == hdr_len) {
iph->check = 0;
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 392ec09a1d8a..3e4fb3c3e834 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -1793,11 +1793,9 @@ static int b44_nway_reset(struct net_device *dev)
b44_readphy(bp, MII_BMCR, &bmcr);
b44_readphy(bp, MII_BMCR, &bmcr);
r = -EINVAL;
- if (bmcr & BMCR_ANENABLE) {
- b44_writephy(bp, MII_BMCR,
- bmcr | BMCR_ANRESTART);
- r = 0;
- }
+ if (bmcr & BMCR_ANENABLE)
+ r = b44_writephy(bp, MII_BMCR,
+ bmcr | BMCR_ANRESTART);
spin_unlock_irq(&bp->lock);
return r;
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 10c7c232cc4e..52ee3751187a 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1448,7 +1448,7 @@ int bgmac_phy_connect_direct(struct bgmac *bgmac)
int err;
phy_dev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
- if (!phy_dev || IS_ERR(phy_dev)) {
+ if (IS_ERR(phy_dev)) {
dev_err(bgmac->dev, "Failed to register fixed PHY device\n");
return -ENODEV;
}
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 8bcde0a6e011..e2a4e1088b7f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1508,6 +1508,8 @@ struct bnx2x {
bool cnic_loaded;
struct cnic_eth_dev *(*cnic_probe)(struct net_device *);
+ bool nic_stopped;
+
/* Flag that indicates that we can start looking for FCoE L2 queue
* completions in the default status block.
*/
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 6ea5521074d3..e9c1e1bb5580 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -2715,6 +2715,7 @@ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
bnx2x_add_all_napi(bp);
DP(NETIF_MSG_IFUP, "napi added\n");
bnx2x_napi_enable(bp);
+ bp->nic_stopped = false;
if (IS_PF(bp)) {
/* set pf load just before approaching the MCP */
@@ -2960,6 +2961,7 @@ load_error2:
load_error1:
bnx2x_napi_disable(bp);
bnx2x_del_all_napi(bp);
+ bp->nic_stopped = true;
/* clear pf_load status, as it was already set */
if (IS_PF(bp))
@@ -3095,14 +3097,17 @@ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
if (!CHIP_IS_E1x(bp))
bnx2x_pf_disable(bp);
- /* Disable HW interrupts, NAPI */
- bnx2x_netif_stop(bp, 1);
- /* Delete all NAPI objects */
- bnx2x_del_all_napi(bp);
- if (CNIC_LOADED(bp))
- bnx2x_del_all_napi_cnic(bp);
- /* Release IRQs */
- bnx2x_free_irq(bp);
+ if (!bp->nic_stopped) {
+ /* Disable HW interrupts, NAPI */
+ bnx2x_netif_stop(bp, 1);
+ /* Delete all NAPI objects */
+ bnx2x_del_all_napi(bp);
+ if (CNIC_LOADED(bp))
+ bnx2x_del_all_napi_cnic(bp);
+ /* Release IRQs */
+ bnx2x_free_irq(bp);
+ bp->nic_stopped = true;
+ }
/* Report UNLOAD_DONE to MCP */
bnx2x_send_unload_done(bp, false);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 1e7a6f1d4223..0d8e61c63c7c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -9474,15 +9474,18 @@ unload_error:
}
}
- /* Disable HW interrupts, NAPI */
- bnx2x_netif_stop(bp, 1);
- /* Delete all NAPI objects */
- bnx2x_del_all_napi(bp);
- if (CNIC_LOADED(bp))
- bnx2x_del_all_napi_cnic(bp);
+ if (!bp->nic_stopped) {
+ /* Disable HW interrupts, NAPI */
+ bnx2x_netif_stop(bp, 1);
+ /* Delete all NAPI objects */
+ bnx2x_del_all_napi(bp);
+ if (CNIC_LOADED(bp))
+ bnx2x_del_all_napi_cnic(bp);
- /* Release IRQs */
- bnx2x_free_irq(bp);
+ /* Release IRQs */
+ bnx2x_free_irq(bp);
+ bp->nic_stopped = true;
+ }
/* Reset the chip, unless PCI function is offline. If we reach this
* point following a PCI error handling, it means device is really
@@ -14238,13 +14241,16 @@ static pci_ers_result_t bnx2x_io_slot_reset(struct pci_dev *pdev)
}
bnx2x_drain_tx_queues(bp);
bnx2x_send_unload_req(bp, UNLOAD_RECOVERY);
- bnx2x_netif_stop(bp, 1);
- bnx2x_del_all_napi(bp);
+ if (!bp->nic_stopped) {
+ bnx2x_netif_stop(bp, 1);
+ bnx2x_del_all_napi(bp);
- if (CNIC_LOADED(bp))
- bnx2x_del_all_napi_cnic(bp);
+ if (CNIC_LOADED(bp))
+ bnx2x_del_all_napi_cnic(bp);
- bnx2x_free_irq(bp);
+ bnx2x_free_irq(bp);
+ bp->nic_stopped = true;
+ }
/* Report UNLOAD_DONE to MCP */
bnx2x_send_unload_done(bp, true);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 0657a0f5170f..8946a931e87e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -529,13 +529,16 @@ void bnx2x_vfpf_close_vf(struct bnx2x *bp)
bnx2x_vfpf_finalize(bp, &req->first_tlv);
free_irq:
- /* Disable HW interrupts, NAPI */
- bnx2x_netif_stop(bp, 0);
- /* Delete all NAPI objects */
- bnx2x_del_all_napi(bp);
-
- /* Release IRQs */
- bnx2x_free_irq(bp);
+ if (!bp->nic_stopped) {
+ /* Disable HW interrupts, NAPI */
+ bnx2x_netif_stop(bp, 0);
+ /* Delete all NAPI objects */
+ bnx2x_del_all_napi(bp);
+
+ /* Release IRQs */
+ bnx2x_free_irq(bp);
+ bp->nic_stopped = true;
+ }
}
static void bnx2x_leading_vfq_init(struct bnx2x *bp, struct bnx2x_virtf *vf,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e5b54e6025be..1eb490c48c52 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -633,12 +633,13 @@ tx_kick_pending:
return NETDEV_TX_OK;
}
-static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
{
struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
u16 cons = txr->tx_cons;
struct pci_dev *pdev = bp->pdev;
+ int nr_pkts = bnapi->tx_pkts;
int i;
unsigned int tx_bytes = 0;
@@ -688,6 +689,7 @@ next_tx_int:
dev_kfree_skb_any(skb);
}
+ bnapi->tx_pkts = 0;
WRITE_ONCE(txr->tx_cons, cons);
__netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
@@ -697,17 +699,24 @@ next_tx_int:
static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
struct bnxt_rx_ring_info *rxr,
+ unsigned int *offset,
gfp_t gfp)
{
struct device *dev = &bp->pdev->dev;
struct page *page;
- page = page_pool_dev_alloc_pages(rxr->page_pool);
+ if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) {
+ page = page_pool_dev_alloc_frag(rxr->page_pool, offset,
+ BNXT_RX_PAGE_SIZE);
+ } else {
+ page = page_pool_dev_alloc_pages(rxr->page_pool);
+ *offset = 0;
+ }
if (!page)
return NULL;
- *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
- DMA_ATTR_WEAK_ORDERING);
+ *mapping = dma_map_page_attrs(dev, page, *offset, BNXT_RX_PAGE_SIZE,
+ bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
if (dma_mapping_error(dev, *mapping)) {
page_pool_recycle_direct(rxr->page_pool, page);
return NULL;
@@ -747,15 +756,16 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
dma_addr_t mapping;
if (BNXT_RX_PAGE_MODE(bp)) {
+ unsigned int offset;
struct page *page =
- __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
+ __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
if (!page)
return -ENOMEM;
mapping += bp->rx_dma_offset;
rx_buf->data = page;
- rx_buf->data_ptr = page_address(page) + bp->rx_offset;
+ rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset;
} else {
u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp);
@@ -815,7 +825,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
unsigned int offset = 0;
if (BNXT_RX_PAGE_MODE(bp)) {
- page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
+ page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
if (!page)
return -ENOMEM;
@@ -962,15 +972,15 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
return NULL;
}
dma_addr -= bp->rx_dma_offset;
- dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
- DMA_ATTR_WEAK_ORDERING);
- skb = build_skb(page_address(page), PAGE_SIZE);
+ dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
+ bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
+ skb = build_skb(data_ptr - bp->rx_offset, BNXT_RX_PAGE_SIZE);
if (!skb) {
page_pool_recycle_direct(rxr->page_pool, page);
return NULL;
}
skb_mark_for_recycle(skb);
- skb_reserve(skb, bp->rx_dma_offset);
+ skb_reserve(skb, bp->rx_offset);
__skb_put(skb, len);
return skb;
@@ -996,8 +1006,8 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
return NULL;
}
dma_addr -= bp->rx_dma_offset;
- dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
- DMA_ATTR_WEAK_ORDERING);
+ dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
+ bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
if (unlikely(!payload))
payload = eth_get_headlen(bp->dev, data_ptr, len);
@@ -1010,7 +1020,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
skb_mark_for_recycle(skb);
off = (void *)data_ptr - page_address(page);
- skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE);
+ skb_add_rx_frag(skb, 0, page, off, len, BNXT_RX_PAGE_SIZE);
memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN,
payload + NET_IP_ALIGN);
@@ -1141,7 +1151,7 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,
skb->data_len += total_frag_len;
skb->len += total_frag_len;
- skb->truesize += PAGE_SIZE * agg_bufs;
+ skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs;
return skb;
}
@@ -2569,12 +2579,11 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
return rx_pkts;
}
-static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
+static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
+ int budget)
{
- if (bnapi->tx_pkts) {
- bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
- bnapi->tx_pkts = 0;
- }
+ if (bnapi->tx_pkts)
+ bnapi->tx_int(bp, bnapi, budget);
if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) {
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
@@ -2603,7 +2612,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
*/
bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons);
- __bnxt_poll_work_done(bp, bnapi);
+ __bnxt_poll_work_done(bp, bnapi, budget);
return rx_pkts;
}
@@ -2734,7 +2743,7 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
}
static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
- u64 dbr_type)
+ u64 dbr_type, int budget)
{
struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
int i;
@@ -2750,7 +2759,7 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
cpr2->had_work_done = 0;
}
}
- __bnxt_poll_work_done(bp, bnapi);
+ __bnxt_poll_work_done(bp, bnapi, budget);
}
static int bnxt_poll_p5(struct napi_struct *napi, int budget)
@@ -2780,7 +2789,8 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
if (cpr->has_more_work)
break;
- __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL);
+ __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL,
+ budget);
cpr->cp_raw_cons = raw_cons;
if (napi_complete_done(napi, work_done))
BNXT_DB_NQ_ARM_P5(&cpr->cp_db,
@@ -2810,7 +2820,7 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
}
raw_cons = NEXT_RAW_CMP(raw_cons);
}
- __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ);
+ __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ, budget);
if (raw_cons != cpr->cp_raw_cons) {
cpr->cp_raw_cons = raw_cons;
BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons);
@@ -2943,8 +2953,8 @@ skip_rx_tpa_free:
rx_buf->data = NULL;
if (BNXT_RX_PAGE_MODE(bp)) {
mapping -= bp->rx_dma_offset;
- dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
- bp->rx_dir,
+ dma_unmap_page_attrs(&pdev->dev, mapping,
+ BNXT_RX_PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
page_pool_recycle_direct(rxr->page_pool, data);
} else {
@@ -3213,6 +3223,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
pp.napi = &rxr->bnapi->napi;
pp.dev = &bp->pdev->dev;
pp.dma_dir = DMA_BIDIRECTIONAL;
+ if (PAGE_SIZE > BNXT_RX_PAGE_SIZE)
+ pp.flags |= PP_FLAG_PAGE_FRAG;
rxr->page_pool = page_pool_create(&pp);
if (IS_ERR(rxr->page_pool)) {
@@ -3989,26 +4001,29 @@ void bnxt_set_ring_params(struct bnxt *bp)
*/
int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
{
+ struct net_device *dev = bp->dev;
+
if (page_mode) {
bp->flags &= ~BNXT_FLAG_AGG_RINGS;
bp->flags |= BNXT_FLAG_RX_PAGE_MODE;
- if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
+ if (bp->xdp_prog->aux->xdp_has_frags)
+ dev->max_mtu = min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
+ else
+ dev->max_mtu =
+ min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
+ if (dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
bp->flags |= BNXT_FLAG_JUMBO;
bp->rx_skb_func = bnxt_rx_multi_page_skb;
- bp->dev->max_mtu =
- min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
} else {
bp->flags |= BNXT_FLAG_NO_AGG_RINGS;
bp->rx_skb_func = bnxt_rx_page_skb;
- bp->dev->max_mtu =
- min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
}
bp->rx_dir = DMA_BIDIRECTIONAL;
/* Disable LRO or GRO_HW */
- netdev_update_features(bp->dev);
+ netdev_update_features(dev);
} else {
- bp->dev->max_mtu = bp->max_mtu;
+ dev->max_mtu = bp->max_mtu;
bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
bp->rx_dir = DMA_FROM_DEVICE;
bp->rx_skb_func = bnxt_rx_skb;
@@ -9429,6 +9444,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
cpr->sw_stats.rx.rx_resets++;
bnapi->in_reset = false;
+ bnapi->tx_pkts = 0;
+
if (bnapi->rx_ring) {
INIT_WORK(&cpr->dim.work, bnxt_dim_work);
cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 080e73496066..bb95c3dc5270 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1005,7 +1005,7 @@ struct bnxt_napi {
struct bnxt_tx_ring_info *tx_ring;
void (*tx_int)(struct bnxt *, struct bnxt_napi *,
- int);
+ int budget);
int tx_pkts;
u8 events;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index 4efa5fe6972b..fb43232310b2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -125,16 +125,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
dma_unmap_len_set(tx_buf, len, 0);
}
-void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
{
struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
bool rx_doorbell_needed = false;
+ int nr_pkts = bnapi->tx_pkts;
struct bnxt_sw_tx_bd *tx_buf;
u16 tx_cons = txr->tx_cons;
u16 last_tx_cons = tx_cons;
int i, j, frags;
+ if (!budget)
+ return;
+
for (i = 0; i < nr_pkts; i++) {
tx_buf = &txr->tx_buf_ring[tx_cons];
@@ -161,6 +165,8 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
}
tx_cons = NEXT_TX(tx_cons);
}
+
+ bnapi->tx_pkts = 0;
WRITE_ONCE(txr->tx_cons, tx_cons);
if (rx_doorbell_needed) {
tx_buf = &txr->tx_buf_ring[last_tx_cons];
@@ -180,8 +186,8 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
u16 cons, u8 *data_ptr, unsigned int len,
struct xdp_buff *xdp)
{
+ u32 buflen = BNXT_RX_PAGE_SIZE;
struct bnxt_sw_rx_bd *rx_buf;
- u32 buflen = PAGE_SIZE;
struct pci_dev *pdev;
dma_addr_t mapping;
u32 offset;
@@ -297,7 +303,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
rx_buf = &rxr->rx_buf_ring[cons];
mapping = rx_buf->mapping - bp->rx_dma_offset;
dma_unmap_page_attrs(&pdev->dev, mapping,
- PAGE_SIZE, bp->rx_dir,
+ BNXT_RX_PAGE_SIZE, bp->rx_dir,
DMA_ATTR_WEAK_ORDERING);
/* if we are unable to allocate a new buffer, abort and reuse */
@@ -480,7 +486,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
}
xdp_update_skb_shared_info(skb, num_frags,
sinfo->xdp_frags_size,
- PAGE_SIZE * sinfo->nr_frags,
+ BNXT_RX_PAGE_SIZE * sinfo->nr_frags,
xdp_buff_is_frag_pfmemalloc(xdp));
return skb;
}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
index ea430d6961df..5e412c5655ba 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
@@ -16,7 +16,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
struct bnxt_tx_ring_info *txr,
dma_addr_t mapping, u32 len,
struct xdp_buff *xdp);
-void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
+void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget);
bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
struct xdp_buff xdp, struct page *page, u8 **data_ptr,
unsigned int *len, u8 *event);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 0092e46c46f8..cc3afb605b1e 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -617,7 +617,7 @@ static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
};
phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
- if (!phydev || IS_ERR(phydev)) {
+ if (IS_ERR(phydev)) {
dev_err(kdev, "failed to register fixed PHY device\n");
return -ENODEV;
}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 5ef073a79ce9..cb2810f175cc 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6881,7 +6881,10 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
ri->data = NULL;
- skb = build_skb(data, frag_size);
+ if (frag_size)
+ skb = build_skb(data, frag_size);
+ else
+ skb = slab_build_skb(data);
if (!skb) {
tg3_frag_free(frag_size != 0, data);
goto drop_it_no_recycle;
diff --git a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
index 04ad0f2b9677..7246e13dd559 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_debugfs.c
@@ -512,11 +512,6 @@ bnad_debugfs_init(struct bnad *bnad)
if (!bnad->port_debugfs_root) {
bnad->port_debugfs_root =
debugfs_create_dir(name, bna_debugfs_root);
- if (!bnad->port_debugfs_root) {
- netdev_warn(bnad->netdev,
- "debugfs root dir creation failed\n");
- return;
- }
atomic_inc(&bna_debugfs_port_count);
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index f6a0f12a6d52..82929ee76739 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -5194,6 +5194,9 @@ static int __maybe_unused macb_suspend(struct device *dev)
unsigned int q;
int err;
+ if (!device_may_wakeup(&bp->dev->dev))
+ phy_exit(bp->sgmii_phy);
+
if (!netif_running(netdev))
return 0;
@@ -5254,7 +5257,6 @@ static int __maybe_unused macb_suspend(struct device *dev)
if (!(bp->wol & MACB_WOL_ENABLED)) {
rtnl_lock();
phylink_stop(bp->phylink);
- phy_exit(bp->sgmii_phy);
rtnl_unlock();
spin_lock_irqsave(&bp->lock, flags);
macb_reset_hw(bp);
@@ -5284,6 +5286,9 @@ static int __maybe_unused macb_resume(struct device *dev)
unsigned int q;
int err;
+ if (!device_may_wakeup(&bp->dev->dev))
+ phy_init(bp->sgmii_phy);
+
if (!netif_running(netdev))
return 0;
@@ -5344,8 +5349,6 @@ static int __maybe_unused macb_resume(struct device *dev)
macb_set_rx_mode(netdev);
macb_restore_features(bp);
rtnl_lock();
- if (!device_may_wakeup(&bp->dev->dev))
- phy_init(bp->sgmii_phy);
phylink_start(bp->phylink);
rtnl_unlock();
diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
index c2e7037c7ba1..7750702900fa 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c
@@ -1466,7 +1466,7 @@ static void make_established(struct sock *sk, u32 snd_isn, unsigned int opt)
tp->write_seq = snd_isn;
tp->snd_nxt = snd_isn;
tp->snd_una = snd_isn;
- inet_sk(sk)->inet_id = get_random_u16();
+ atomic_set(&inet_sk(sk)->inet_id, get_random_u16());
assign_rxopt(sk, opt);
if (tp->rcv_wnd > (RCV_BUFSIZ_M << 10))
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 18c2fc880d09..0616b5fe241c 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1138,7 +1138,8 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
(lancer_chip(adapter) || BE3_chip(adapter) ||
skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
ip = (struct iphdr *)ip_hdr(skb);
- pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
+ if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len))))
+ goto tx_drop;
}
/* If vlan tag is already inlined in the packet, skip HW VLAN
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 1416262d4296..e0a4cb7e3f50 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -1186,14 +1186,9 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
{
- struct device_node *node = pdev->dev.of_node;
struct platform_device *ierb_pdev;
struct device_node *ierb_node;
- /* Don't register with the IERB if the PF itself is disabled */
- if (!node || !of_device_is_available(node))
- return 0;
-
ierb_node = of_find_compatible_node(NULL, NULL,
"fsl,ls1028a-enetc-ierb");
if (!ierb_node || !of_device_is_available(ierb_node))
@@ -1208,56 +1203,81 @@ static int enetc_pf_register_with_ierb(struct pci_dev *pdev)
return enetc_ierb_register_pf(ierb_pdev, pdev);
}
-static int enetc_pf_probe(struct pci_dev *pdev,
- const struct pci_device_id *ent)
+static struct enetc_si *enetc_psi_create(struct pci_dev *pdev)
{
- struct device_node *node = pdev->dev.of_node;
- struct enetc_ndev_priv *priv;
- struct net_device *ndev;
struct enetc_si *si;
- struct enetc_pf *pf;
int err;
- err = enetc_pf_register_with_ierb(pdev);
- if (err == -EPROBE_DEFER)
- return err;
- if (err)
- dev_warn(&pdev->dev,
- "Could not register with IERB driver: %pe, please update the device tree\n",
- ERR_PTR(err));
-
- err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
- if (err)
- return dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+ err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(struct enetc_pf));
+ if (err) {
+ dev_err_probe(&pdev->dev, err, "PCI probing failed\n");
+ goto out;
+ }
si = pci_get_drvdata(pdev);
if (!si->hw.port || !si->hw.global) {
err = -ENODEV;
dev_err(&pdev->dev, "could not map PF space, probing a VF?\n");
- goto err_map_pf_space;
+ goto out_pci_remove;
}
err = enetc_setup_cbdr(&pdev->dev, &si->hw, ENETC_CBDR_DEFAULT_SIZE,
&si->cbd_ring);
if (err)
- goto err_setup_cbdr;
+ goto out_pci_remove;
err = enetc_init_port_rfs_memory(si);
if (err) {
dev_err(&pdev->dev, "Failed to initialize RFS memory\n");
- goto err_init_port_rfs;
+ goto out_teardown_cbdr;
}
err = enetc_init_port_rss_memory(si);
if (err) {
dev_err(&pdev->dev, "Failed to initialize RSS memory\n");
- goto err_init_port_rss;
+ goto out_teardown_cbdr;
}
- if (node && !of_device_is_available(node)) {
- dev_info(&pdev->dev, "device is disabled, skipping\n");
- err = -ENODEV;
- goto err_device_disabled;
+ return si;
+
+out_teardown_cbdr:
+ enetc_teardown_cbdr(&si->cbd_ring);
+out_pci_remove:
+ enetc_pci_remove(pdev);
+out:
+ return ERR_PTR(err);
+}
+
+static void enetc_psi_destroy(struct pci_dev *pdev)
+{
+ struct enetc_si *si = pci_get_drvdata(pdev);
+
+ enetc_teardown_cbdr(&si->cbd_ring);
+ enetc_pci_remove(pdev);
+}
+
+static int enetc_pf_probe(struct pci_dev *pdev,
+ const struct pci_device_id *ent)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct enetc_ndev_priv *priv;
+ struct net_device *ndev;
+ struct enetc_si *si;
+ struct enetc_pf *pf;
+ int err;
+
+ err = enetc_pf_register_with_ierb(pdev);
+ if (err == -EPROBE_DEFER)
+ return err;
+ if (err)
+ dev_warn(&pdev->dev,
+ "Could not register with IERB driver: %pe, please update the device tree\n",
+ ERR_PTR(err));
+
+ si = enetc_psi_create(pdev);
+ if (IS_ERR(si)) {
+ err = PTR_ERR(si);
+ goto err_psi_create;
}
pf = enetc_si_priv(si);
@@ -1339,15 +1359,9 @@ err_alloc_si_res:
si->ndev = NULL;
free_netdev(ndev);
err_alloc_netdev:
-err_init_port_rss:
-err_init_port_rfs:
-err_device_disabled:
err_setup_mac_addresses:
- enetc_teardown_cbdr(&si->cbd_ring);
-err_setup_cbdr:
-err_map_pf_space:
- enetc_pci_remove(pdev);
-
+ enetc_psi_destroy(pdev);
+err_psi_create:
return err;
}
@@ -1370,12 +1384,29 @@ static void enetc_pf_remove(struct pci_dev *pdev)
enetc_free_msix(priv);
enetc_free_si_resources(priv);
- enetc_teardown_cbdr(&si->cbd_ring);
free_netdev(si->ndev);
- enetc_pci_remove(pdev);
+ enetc_psi_destroy(pdev);
+}
+
+static void enetc_fixup_clear_rss_rfs(struct pci_dev *pdev)
+{
+ struct device_node *node = pdev->dev.of_node;
+ struct enetc_si *si;
+
+ /* Only apply quirk for disabled functions. For the ones
+ * that are enabled, enetc_pf_probe() will apply it.
+ */
+ if (node && of_device_is_available(node))
+ return;
+
+ si = enetc_psi_create(pdev);
+ if (si)
+ enetc_psi_destroy(pdev);
}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF,
+ enetc_fixup_clear_rss_rfs);
static const struct pci_device_id enetc_pf_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_FREESCALE, ENETC_DEV_ID_PF) },
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index ec9e4bdb0c06..66b5cbdb43b9 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1372,7 +1372,7 @@ fec_enet_hwtstamp(struct fec_enet_private *fep, unsigned ts,
}
static void
-fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
+fec_enet_tx_queue(struct net_device *ndev, u16 queue_id, int budget)
{
struct fec_enet_private *fep;
struct xdp_frame *xdpf;
@@ -1416,6 +1416,14 @@ fec_enet_tx_queue(struct net_device *ndev, u16 queue_id)
if (!skb)
goto tx_buf_done;
} else {
+ /* Tx processing cannot call any XDP (or page pool) APIs if
+ * the "budget" is 0. Because NAPI is called with budget of
+ * 0 (such as netpoll) indicates we may be in an IRQ context,
+ * however, we can't use the page pool from IRQ context.
+ */
+ if (unlikely(!budget))
+ break;
+
xdpf = txq->tx_buf[index].xdp;
if (bdp->cbd_bufaddr)
dma_unmap_single(&fep->pdev->dev,
@@ -1508,14 +1516,14 @@ tx_buf_done:
writel(0, txq->bd.reg_desc_active);
}
-static void fec_enet_tx(struct net_device *ndev)
+static void fec_enet_tx(struct net_device *ndev, int budget)
{
struct fec_enet_private *fep = netdev_priv(ndev);
int i;
/* Make sure that AVB queues are processed first. */
for (i = fep->num_tx_queues - 1; i >= 0; i--)
- fec_enet_tx_queue(ndev, i);
+ fec_enet_tx_queue(ndev, i, budget);
}
static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
@@ -1858,7 +1866,7 @@ static int fec_enet_rx_napi(struct napi_struct *napi, int budget)
do {
done += fec_enet_rx(ndev, budget - done);
- fec_enet_tx(ndev);
+ fec_enet_tx(ndev, budget);
} while ((done < budget) && fec_enet_collect_events(fep));
if (done < budget) {
@@ -3916,6 +3924,8 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
__netif_tx_lock(nq, cpu);
+ /* Avoid tx timeout as XDP shares the queue with kernel stack */
+ txq_trans_cond_update(nq);
for (i = 0; i < num_frames; i++) {
if (fec_enet_txq_xmit_frame(fep, txq, frames[i]) < 0)
break;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index b99d75260d59..514a20bce4f4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -31,6 +31,7 @@
#include <linux/pci.h>
#include <linux/pkt_sched.h>
#include <linux/types.h>
+#include <linux/bitmap.h>
#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
@@ -101,6 +102,7 @@ enum HNAE3_DEV_CAP_BITS {
HNAE3_DEV_SUPPORT_FEC_STATS_B,
HNAE3_DEV_SUPPORT_LANE_NUM_B,
HNAE3_DEV_SUPPORT_WOL_B,
+ HNAE3_DEV_SUPPORT_TM_FLUSH_B,
};
#define hnae3_ae_dev_fd_supported(ae_dev) \
@@ -172,6 +174,9 @@ enum HNAE3_DEV_CAP_BITS {
#define hnae3_ae_dev_wol_supported(ae_dev) \
test_bit(HNAE3_DEV_SUPPORT_WOL_B, (ae_dev)->caps)
+#define hnae3_ae_dev_tm_flush_supported(hdev) \
+ test_bit(HNAE3_DEV_SUPPORT_TM_FLUSH_B, (hdev)->ae_dev->caps)
+
enum HNAE3_PF_CAP_BITS {
HNAE3_PF_SUPPORT_VLAN_FLTR_MDF_B = 0,
};
@@ -407,7 +412,7 @@ struct hnae3_ae_dev {
unsigned long hw_err_reset_req;
struct hnae3_dev_specs dev_specs;
u32 dev_version;
- unsigned long caps[BITS_TO_LONGS(HNAE3_DEV_CAPS_MAX_NUM)];
+ DECLARE_BITMAP(caps, HNAE3_DEV_CAPS_MAX_NUM);
void *priv;
};
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
index b85c412683dd..dcecb23daac6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.c
@@ -156,6 +156,7 @@ static const struct hclge_comm_caps_bit_map hclge_pf_cmd_caps[] = {
{HCLGE_COMM_CAP_FEC_STATS_B, HNAE3_DEV_SUPPORT_FEC_STATS_B},
{HCLGE_COMM_CAP_LANE_NUM_B, HNAE3_DEV_SUPPORT_LANE_NUM_B},
{HCLGE_COMM_CAP_WOL_B, HNAE3_DEV_SUPPORT_WOL_B},
+ {HCLGE_COMM_CAP_TM_FLUSH_B, HNAE3_DEV_SUPPORT_TM_FLUSH_B},
};
static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = {
@@ -172,6 +173,20 @@ static const struct hclge_comm_caps_bit_map hclge_vf_cmd_caps[] = {
};
static void
+hclge_comm_capability_to_bitmap(unsigned long *bitmap, __le32 *caps)
+{
+ const unsigned int words = HCLGE_COMM_QUERY_CAP_LENGTH;
+ u32 val[HCLGE_COMM_QUERY_CAP_LENGTH];
+ unsigned int i;
+
+ for (i = 0; i < words; i++)
+ val[i] = __le32_to_cpu(caps[i]);
+
+ bitmap_from_arr32(bitmap, val,
+ HCLGE_COMM_QUERY_CAP_LENGTH * BITS_PER_TYPE(u32));
+}
+
+static void
hclge_comm_parse_capability(struct hnae3_ae_dev *ae_dev, bool is_pf,
struct hclge_comm_query_version_cmd *cmd)
{
@@ -179,11 +194,12 @@ hclge_comm_parse_capability(struct hnae3_ae_dev *ae_dev, bool is_pf,
is_pf ? hclge_pf_cmd_caps : hclge_vf_cmd_caps;
u32 size = is_pf ? ARRAY_SIZE(hclge_pf_cmd_caps) :
ARRAY_SIZE(hclge_vf_cmd_caps);
- u32 caps, i;
+ DECLARE_BITMAP(caps, HCLGE_COMM_QUERY_CAP_LENGTH * BITS_PER_TYPE(u32));
+ u32 i;
- caps = __le32_to_cpu(cmd->caps[0]);
+ hclge_comm_capability_to_bitmap(caps, cmd->caps);
for (i = 0; i < size; i++)
- if (hnae3_get_bit(caps, caps_map[i].imp_bit))
+ if (test_bit(caps_map[i].imp_bit, caps))
set_bit(caps_map[i].local_bit, ae_dev->caps);
}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
index 18f1b4bf362d..2b7197ce0ae8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_common/hclge_comm_cmd.h
@@ -153,6 +153,7 @@ enum hclge_opcode_type {
HCLGE_OPC_TM_INTERNAL_STS = 0x0850,
HCLGE_OPC_TM_INTERNAL_CNT = 0x0851,
HCLGE_OPC_TM_INTERNAL_STS_1 = 0x0852,
+ HCLGE_OPC_TM_FLUSH = 0x0872,
/* Packet buffer allocate commands */
HCLGE_OPC_TX_BUFF_ALLOC = 0x0901,
@@ -349,6 +350,7 @@ enum HCLGE_COMM_CAP_BITS {
HCLGE_COMM_CAP_FEC_STATS_B = 25,
HCLGE_COMM_CAP_LANE_NUM_B = 27,
HCLGE_COMM_CAP_WOL_B = 28,
+ HCLGE_COMM_CAP_TM_FLUSH_B = 31,
};
enum HCLGE_COMM_API_CAP_BITS {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index 6546cfe7f7cc..f276b5ecb431 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -411,6 +411,9 @@ static struct hns3_dbg_cap_info hns3_dbg_cap[] = {
}, {
.name = "support wake on lan",
.cap_bit = HNAE3_DEV_SUPPORT_WOL_B,
+ }, {
+ .name = "support tm flush",
+ .cap_bit = HNAE3_DEV_SUPPORT_TM_FLUSH_B,
}
};
@@ -461,9 +464,9 @@ static void hns3_dbg_fill_content(char *content, u16 len,
if (result) {
if (item_len < strlen(result[i]))
break;
- strscpy(pos, result[i], strlen(result[i]));
+ memcpy(pos, result[i], strlen(result[i]));
} else {
- strscpy(pos, items[i].name, strlen(items[i].name));
+ memcpy(pos, items[i].name, strlen(items[i].name));
}
pos += item_len;
len -= item_len;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 9f6890059666..b7b51e56b030 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -5854,6 +5854,9 @@ void hns3_external_lb_prepare(struct net_device *ndev, bool if_running)
if (!if_running)
return;
+ if (test_and_set_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+ return;
+
netif_carrier_off(ndev);
netif_tx_disable(ndev);
@@ -5882,7 +5885,16 @@ void hns3_external_lb_restore(struct net_device *ndev, bool if_running)
if (!if_running)
return;
- hns3_nic_reset_all_ring(priv->ae_handle);
+ if (hns3_nic_resetting(ndev))
+ return;
+
+ if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state))
+ return;
+
+ if (hns3_nic_reset_all_ring(priv->ae_handle))
+ return;
+
+ clear_bit(HNS3_NIC_STATE_DOWN, &priv->state);
for (i = 0; i < priv->vector_num; i++)
hns3_vector_enable(&priv->tqp_vector[i]);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
index c4aded65e848..fad5a5ff3cda 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_dcb.c
@@ -52,7 +52,10 @@ static void hclge_tm_info_to_ieee_ets(struct hclge_dev *hdev,
for (i = 0; i < HNAE3_MAX_TC; i++) {
ets->prio_tc[i] = hdev->tm_info.prio_tc[i];
- ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+ if (i < hdev->tm_info.num_tc)
+ ets->tc_tx_bw[i] = hdev->tm_info.pg_info[0].tc_dwrr[i];
+ else
+ ets->tc_tx_bw[i] = 0;
if (hdev->tm_info.tc_info[i].tc_sch_mode ==
HCLGE_SCH_MODE_SP)
@@ -123,7 +126,8 @@ static u8 hclge_ets_tc_changed(struct hclge_dev *hdev, struct ieee_ets *ets,
}
static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
- struct ieee_ets *ets, bool *changed)
+ struct ieee_ets *ets, bool *changed,
+ u8 tc_num)
{
bool has_ets_tc = false;
u32 total_ets_bw = 0;
@@ -137,6 +141,13 @@ static int hclge_ets_sch_mode_validate(struct hclge_dev *hdev,
*changed = true;
break;
case IEEE_8021QAZ_TSA_ETS:
+ if (i >= tc_num) {
+ dev_err(&hdev->pdev->dev,
+ "tc%u is disabled, cannot set ets bw\n",
+ i);
+ return -EINVAL;
+ }
+
/* The hardware will switch to sp mode if bandwidth is
* 0, so limit ets bandwidth must be greater than 0.
*/
@@ -176,7 +187,7 @@ static int hclge_ets_validate(struct hclge_dev *hdev, struct ieee_ets *ets,
if (ret)
return ret;
- ret = hclge_ets_sch_mode_validate(hdev, ets, changed);
+ ret = hclge_ets_sch_mode_validate(hdev, ets, changed, tc_num);
if (ret)
return ret;
@@ -216,6 +227,10 @@ static int hclge_notify_down_uinit(struct hclge_dev *hdev)
if (ret)
return ret;
+ ret = hclge_tm_flush_cfg(hdev, true);
+ if (ret)
+ return ret;
+
return hclge_notify_client(hdev, HNAE3_UNINIT_CLIENT);
}
@@ -227,6 +242,10 @@ static int hclge_notify_init_up(struct hclge_dev *hdev)
if (ret)
return ret;
+ ret = hclge_tm_flush_cfg(hdev, false);
+ if (ret)
+ return ret;
+
return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
}
@@ -313,6 +332,7 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
struct net_device *netdev = h->kinfo.netdev;
struct hclge_dev *hdev = vport->back;
u8 i, j, pfc_map, *prio_tc;
+ int last_bad_ret = 0;
int ret;
if (!(hdev->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
@@ -350,13 +370,28 @@ static int hclge_ieee_setpfc(struct hnae3_handle *h, struct ieee_pfc *pfc)
if (ret)
return ret;
- ret = hclge_buffer_alloc(hdev);
- if (ret) {
- hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+ ret = hclge_tm_flush_cfg(hdev, true);
+ if (ret)
return ret;
- }
- return hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+ /* No matter whether the following operations are performed
+ * successfully or not, disabling the tm flush and notify
+ * the network status to up are necessary.
+ * Do not return immediately.
+ */
+ ret = hclge_buffer_alloc(hdev);
+ if (ret)
+ last_bad_ret = ret;
+
+ ret = hclge_tm_flush_cfg(hdev, false);
+ if (ret)
+ last_bad_ret = ret;
+
+ ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
+ if (ret)
+ last_bad_ret = ret;
+
+ return last_bad_ret;
}
static int hclge_ieee_setapp(struct hnae3_handle *h, struct dcb_app *app)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 233c132dc513..0fb2eaee3e8a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -111,9 +111,9 @@ static void hclge_dbg_fill_content(char *content, u16 len,
if (result) {
if (item_len < strlen(result[i]))
break;
- strscpy(pos, result[i], strlen(result[i]));
+ memcpy(pos, result[i], strlen(result[i]));
} else {
- strscpy(pos, items[i].name, strlen(items[i].name));
+ memcpy(pos, items[i].name, strlen(items[i].name));
}
pos += item_len;
len -= item_len;
@@ -693,8 +693,7 @@ static int hclge_dbg_dump_tc(struct hclge_dev *hdev, char *buf, int len)
for (i = 0; i < HNAE3_MAX_TC; i++) {
sch_mode_str = ets_weight->tc_weight[i] ? "dwrr" : "sp";
pos += scnprintf(buf + pos, len - pos, "%u %4s %3u\n",
- i, sch_mode_str,
- hdev->tm_info.pg_info[0].tc_dwrr[i]);
+ i, sch_mode_str, ets_weight->tc_weight[i]);
}
return 0;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index bf675c15fbb9..a940e35aef29 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -72,6 +72,8 @@ static void hclge_restore_hw_table(struct hclge_dev *hdev);
static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
static void hclge_sync_fd_table(struct hclge_dev *hdev);
static void hclge_update_fec_stats(struct hclge_dev *hdev);
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+ int wait_cnt);
static struct hnae3_ae_algo ae_algo;
@@ -7558,6 +7560,8 @@ static void hclge_enable_fd(struct hnae3_handle *handle, bool enable)
static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
{
+#define HCLGE_LINK_STATUS_WAIT_CNT 3
+
struct hclge_desc desc;
struct hclge_config_mac_mode_cmd *req =
(struct hclge_config_mac_mode_cmd *)desc.data;
@@ -7582,9 +7586,15 @@ static void hclge_cfg_mac_mode(struct hclge_dev *hdev, bool enable)
req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
ret = hclge_cmd_send(&hdev->hw, &desc, 1);
- if (ret)
+ if (ret) {
dev_err(&hdev->pdev->dev,
"mac enable fail, ret =%d.\n", ret);
+ return;
+ }
+
+ if (!enable)
+ hclge_mac_link_status_wait(hdev, HCLGE_LINK_STATUS_DOWN,
+ HCLGE_LINK_STATUS_WAIT_CNT);
}
static int hclge_config_switch_param(struct hclge_dev *hdev, int vfid,
@@ -7647,10 +7657,9 @@ static void hclge_phy_link_status_wait(struct hclge_dev *hdev,
} while (++i < HCLGE_PHY_LINK_STATUS_NUM);
}
-static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
+static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret,
+ int wait_cnt)
{
-#define HCLGE_MAC_LINK_STATUS_NUM 100
-
int link_status;
int i = 0;
int ret;
@@ -7663,13 +7672,15 @@ static int hclge_mac_link_status_wait(struct hclge_dev *hdev, int link_ret)
return 0;
msleep(HCLGE_LINK_STATUS_MS);
- } while (++i < HCLGE_MAC_LINK_STATUS_NUM);
+ } while (++i < wait_cnt);
return -EBUSY;
}
static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
bool is_phy)
{
+#define HCLGE_MAC_LINK_STATUS_NUM 100
+
int link_ret;
link_ret = en ? HCLGE_LINK_STATUS_UP : HCLGE_LINK_STATUS_DOWN;
@@ -7677,7 +7688,8 @@ static int hclge_mac_phy_link_status_wait(struct hclge_dev *hdev, bool en,
if (is_phy)
hclge_phy_link_status_wait(hdev, link_ret);
- return hclge_mac_link_status_wait(hdev, link_ret);
+ return hclge_mac_link_status_wait(hdev, link_ret,
+ HCLGE_MAC_LINK_STATUS_NUM);
}
static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
@@ -10915,9 +10927,12 @@ int hclge_cfg_flowctrl(struct hclge_dev *hdev)
u32 rx_pause, tx_pause;
u8 flowctl;
- if (!phydev->link || !phydev->autoneg)
+ if (!phydev->link)
return 0;
+ if (!phydev->autoneg)
+ return hclge_mac_pause_setup_hw(hdev);
+
local_advertising = linkmode_adv_to_lcl_adv_t(phydev->advertising);
if (phydev->pause)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
index 922c0da3660c..c58c31221762 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.c
@@ -785,6 +785,7 @@ static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
{
#define BW_PERCENT 100
+#define DEFAULT_BW_WEIGHT 1
u8 i;
@@ -806,7 +807,7 @@ static void hclge_tm_pg_info_init(struct hclge_dev *hdev)
for (k = 0; k < hdev->tm_info.num_tc; k++)
hdev->tm_info.pg_info[i].tc_dwrr[k] = BW_PERCENT;
for (; k < HNAE3_MAX_TC; k++)
- hdev->tm_info.pg_info[i].tc_dwrr[k] = 0;
+ hdev->tm_info.pg_info[i].tc_dwrr[k] = DEFAULT_BW_WEIGHT;
}
}
@@ -1484,7 +1485,11 @@ int hclge_tm_schd_setup_hw(struct hclge_dev *hdev)
return ret;
/* Cfg schd mode for each level schd */
- return hclge_tm_schd_mode_hw(hdev);
+ ret = hclge_tm_schd_mode_hw(hdev);
+ if (ret)
+ return ret;
+
+ return hclge_tm_flush_cfg(hdev, false);
}
static int hclge_pause_param_setup_hw(struct hclge_dev *hdev)
@@ -1548,7 +1553,7 @@ static int hclge_bp_setup_hw(struct hclge_dev *hdev, u8 tc)
return 0;
}
-static int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev)
{
bool tx_en, rx_en;
@@ -2113,3 +2118,28 @@ int hclge_tm_get_port_shaper(struct hclge_dev *hdev,
return 0;
}
+
+int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable)
+{
+ struct hclge_desc desc;
+ int ret;
+
+ if (!hnae3_ae_dev_tm_flush_supported(hdev))
+ return 0;
+
+ hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TM_FLUSH, false);
+
+ desc.data[0] = cpu_to_le32(enable ? HCLGE_TM_FLUSH_EN_MSK : 0);
+
+ ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+ if (ret) {
+ dev_err(&hdev->pdev->dev,
+ "failed to config tm flush, ret = %d\n", ret);
+ return ret;
+ }
+
+ if (enable)
+ msleep(HCLGE_TM_FLUSH_TIME_MS);
+
+ return ret;
+}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
index dd6f1fd486cf..53eec6df5194 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_tm.h
@@ -33,6 +33,9 @@ enum hclge_opcode_type;
#define HCLGE_DSCP_MAP_TC_BD_NUM 2
#define HCLGE_DSCP_TC_SHIFT(n) (((n) & 1) * 4)
+#define HCLGE_TM_FLUSH_TIME_MS 10
+#define HCLGE_TM_FLUSH_EN_MSK BIT(0)
+
struct hclge_pg_to_pri_link_cmd {
u8 pg_id;
u8 rsvd1[3];
@@ -242,6 +245,7 @@ int hclge_pfc_pause_en_cfg(struct hclge_dev *hdev, u8 tx_rx_bitmap,
u8 pfc_bitmap);
int hclge_mac_pause_en_cfg(struct hclge_dev *hdev, bool tx, bool rx);
int hclge_pause_addr_cfg(struct hclge_dev *hdev, const u8 *mac_addr);
+int hclge_mac_pause_setup_hw(struct hclge_dev *hdev);
void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
@@ -272,4 +276,5 @@ int hclge_tm_get_port_shaper(struct hclge_dev *hdev,
struct hclge_tm_shaper_para *para);
int hclge_up_to_tc_map(struct hclge_dev *hdev);
int hclge_dscp_to_tc_map(struct hclge_dev *hdev);
+int hclge_tm_flush_cfg(struct hclge_dev *hdev, bool enable);
#endif
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index 113fcb3e353e..832a2ae01950 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -203,7 +203,7 @@ static inline void ibmveth_flush_buffer(void *addr, unsigned long length)
unsigned long offset;
for (offset = 0; offset < length; offset += SMP_CACHE_BYTES)
- asm("dcbfl %0,%1" :: "b" (addr), "r" (offset));
+ asm("dcbf %0,%1,1" :: "b" (addr), "r" (offset));
}
/* replenish the buffers for a pool. note that we don't need to
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 763d613adbcc..df76cdaddcfb 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -97,6 +97,8 @@ static int pending_scrq(struct ibmvnic_adapter *,
static union sub_crq *ibmvnic_next_scrq(struct ibmvnic_adapter *,
struct ibmvnic_sub_crq_queue *);
static int ibmvnic_poll(struct napi_struct *napi, int data);
+static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter);
+static inline void reinit_init_done(struct ibmvnic_adapter *adapter);
static void send_query_map(struct ibmvnic_adapter *adapter);
static int send_request_map(struct ibmvnic_adapter *, dma_addr_t, u32, u8);
static int send_request_unmap(struct ibmvnic_adapter *, u8);
@@ -114,6 +116,7 @@ static void ibmvnic_tx_scrq_clean_buffer(struct ibmvnic_adapter *adapter,
static void free_long_term_buff(struct ibmvnic_adapter *adapter,
struct ibmvnic_long_term_buff *ltb);
static void ibmvnic_disable_irqs(struct ibmvnic_adapter *adapter);
+static void flush_reset_queue(struct ibmvnic_adapter *adapter);
struct ibmvnic_stat {
char name[ETH_GSTRING_LEN];
@@ -1505,8 +1508,8 @@ static const char *adapter_state_to_string(enum vnic_state state)
static int ibmvnic_login(struct net_device *netdev)
{
+ unsigned long flags, timeout = msecs_to_jiffies(20000);
struct ibmvnic_adapter *adapter = netdev_priv(netdev);
- unsigned long timeout = msecs_to_jiffies(20000);
int retry_count = 0;
int retries = 10;
bool retry;
@@ -1527,11 +1530,9 @@ static int ibmvnic_login(struct net_device *netdev)
if (!wait_for_completion_timeout(&adapter->init_done,
timeout)) {
- netdev_warn(netdev, "Login timed out, retrying...\n");
- retry = true;
- adapter->init_done_rc = 0;
- retry_count++;
- continue;
+ netdev_warn(netdev, "Login timed out\n");
+ adapter->login_pending = false;
+ goto partial_reset;
}
if (adapter->init_done_rc == ABORTED) {
@@ -1573,10 +1574,69 @@ static int ibmvnic_login(struct net_device *netdev)
"SCRQ irq initialization failed\n");
return rc;
}
+ /* Default/timeout error handling, reset and start fresh */
} else if (adapter->init_done_rc) {
netdev_warn(netdev, "Adapter login failed, init_done_rc = %d\n",
adapter->init_done_rc);
- return -EIO;
+
+partial_reset:
+ /* adapter login failed, so free any CRQs or sub-CRQs
+ * and register again before attempting to login again.
+ * If we don't do this then the VIOS may think that
+ * we are already logged in and reject any subsequent
+ * attempts
+ */
+ netdev_warn(netdev,
+ "Freeing and re-registering CRQs before attempting to login again\n");
+ retry = true;
+ adapter->init_done_rc = 0;
+ release_sub_crqs(adapter, true);
+ /* Much of this is similar logic as ibmvnic_probe(),
+ * we are essentially re-initializing communication
+ * with the server. We really should not run any
+ * resets/failovers here because this is already a form
+ * of reset and we do not want parallel resets occurring
+ */
+ do {
+ reinit_init_done(adapter);
+ /* Clear any failovers we got in the previous
+ * pass since we are re-initializing the CRQ
+ */
+ adapter->failover_pending = false;
+ release_crq_queue(adapter);
+ /* If we don't sleep here then we risk an
+ * unnecessary failover event from the VIOS.
+ * This is a known VIOS issue caused by a vnic
+ * device freeing and registering a CRQ too
+ * quickly.
+ */
+ msleep(1500);
+ /* Avoid any resets, since we are currently
+ * resetting.
+ */
+ spin_lock_irqsave(&adapter->rwi_lock, flags);
+ flush_reset_queue(adapter);
+ spin_unlock_irqrestore(&adapter->rwi_lock,
+ flags);
+
+ rc = init_crq_queue(adapter);
+ if (rc) {
+ netdev_err(netdev, "login recovery: init CRQ failed %d\n",
+ rc);
+ return -EIO;
+ }
+
+ rc = ibmvnic_reset_init(adapter, false);
+ if (rc)
+ netdev_err(netdev, "login recovery: Reset init failed %d\n",
+ rc);
+ /* IBMVNIC_CRQ_INIT will return EAGAIN if it
+ * fails, since ibmvnic_reset_init will free
+ * irq's in failure, we won't be able to receive
+ * new CRQs so we need to keep trying. probe()
+ * handles this similarly.
+ */
+ } while (rc == -EAGAIN && retry_count++ < retries);
}
} while (retry);
@@ -1588,12 +1648,22 @@ static int ibmvnic_login(struct net_device *netdev)
static void release_login_buffer(struct ibmvnic_adapter *adapter)
{
+ if (!adapter->login_buf)
+ return;
+
+ dma_unmap_single(&adapter->vdev->dev, adapter->login_buf_token,
+ adapter->login_buf_sz, DMA_TO_DEVICE);
kfree(adapter->login_buf);
adapter->login_buf = NULL;
}
static void release_login_rsp_buffer(struct ibmvnic_adapter *adapter)
{
+ if (!adapter->login_rsp_buf)
+ return;
+
+ dma_unmap_single(&adapter->vdev->dev, adapter->login_rsp_buf_token,
+ adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
kfree(adapter->login_rsp_buf);
adapter->login_rsp_buf = NULL;
}
@@ -4830,11 +4900,14 @@ static int send_login(struct ibmvnic_adapter *adapter)
if (rc) {
adapter->login_pending = false;
netdev_err(adapter->netdev, "Failed to send login, rc=%d\n", rc);
- goto buf_rsp_map_failed;
+ goto buf_send_failed;
}
return 0;
+buf_send_failed:
+ dma_unmap_single(dev, rsp_buffer_token, rsp_buffer_size,
+ DMA_FROM_DEVICE);
buf_rsp_map_failed:
kfree(login_rsp_buffer);
adapter->login_rsp_buf = NULL;
@@ -5396,6 +5469,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
int num_tx_pools;
int num_rx_pools;
u64 *size_array;
+ u32 rsp_len;
int i;
/* CHECK: Test/set of login_pending does not need to be atomic
@@ -5407,11 +5481,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
}
adapter->login_pending = false;
- dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz,
- DMA_TO_DEVICE);
- dma_unmap_single(dev, adapter->login_rsp_buf_token,
- adapter->login_rsp_buf_sz, DMA_FROM_DEVICE);
-
/* If the number of queues requested can't be allocated by the
* server, the login response will return with code 1. We will need
* to resend the login buffer with fewer queues requested.
@@ -5447,6 +5516,23 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq,
ibmvnic_reset(adapter, VNIC_RESET_FATAL);
return -EIO;
}
+
+ rsp_len = be32_to_cpu(login_rsp->len);
+ if (be32_to_cpu(login->login_rsp_len) < rsp_len ||
+ rsp_len <= be32_to_cpu(login_rsp->off_txsubm_subcrqs) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_subcrqs) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_rxadd_buff_size) ||
+ rsp_len <= be32_to_cpu(login_rsp->off_supp_tx_desc)) {
+ /* This can happen if a login request times out and there are
+ * 2 outstanding login requests sent, the LOGIN_RSP crq
+ * could have been for the older login request. So we are
+ * parsing the newer response buffer which may be incomplete
+ */
+ dev_err(dev, "FATAL: Login rsp offsets/lengths invalid\n");
+ ibmvnic_reset(adapter, VNIC_RESET_FATAL);
+ return -EIO;
+ }
+
size_array = (u64 *)((u8 *)(adapter->login_rsp_buf) +
be32_to_cpu(adapter->login_rsp_buf->off_rxadd_buff_size));
/* variable buffer sizes are not supported, so just read the
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 9954493cd448..62497f5565c5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1839,7 +1839,7 @@ void i40e_dbg_pf_exit(struct i40e_pf *pf)
void i40e_dbg_init(void)
{
i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL);
- if (!i40e_dbg_root)
+ if (IS_ERR(i40e_dbg_root))
pr_info("init of debugfs failed\n");
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 29ad1797adce..a86bfa3bba74 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2609,7 +2609,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
retval = i40e_correct_mac_vlan_filters
(vsi, &tmp_add_list, &tmp_del_list,
vlan_filters);
- else
+ else if (pf->vf)
retval = i40e_correct_vf_mac_vlan_filters
(vsi, &tmp_add_list, &tmp_del_list,
vlan_filters, pf->vf[vsi->vf_id].trusted);
@@ -2782,7 +2782,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
}
/* if the VF is not trusted do not do promisc */
- if ((vsi->type == I40E_VSI_SRIOV) && !pf->vf[vsi->vf_id].trusted) {
+ if (vsi->type == I40E_VSI_SRIOV && pf->vf &&
+ !pf->vf[vsi->vf_id].trusted) {
clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
goto out;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 9da0c87f0328..f99c1f7fec40 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -210,11 +210,11 @@ read_nvm_exit:
* @hw: pointer to the HW structure.
* @module_pointer: module pointer location in words from the NVM beginning
* @offset: offset in words from module start
- * @words: number of words to write
- * @data: buffer with words to write to the Shadow RAM
+ * @words: number of words to read
+ * @data: buffer with words to read to the Shadow RAM
* @last_command: tells the AdminQ that this is the last command
*
- * Writes a 16 bit words buffer to the Shadow RAM using the admin command.
+ * Reads a 16 bit words buffer to the Shadow RAM using the admin command.
**/
static int i40e_read_nvm_aq(struct i40e_hw *hw,
u8 module_pointer, u32 offset,
@@ -234,18 +234,18 @@ static int i40e_read_nvm_aq(struct i40e_hw *hw,
*/
if ((offset + words) > hw->nvm.sr_size)
i40e_debug(hw, I40E_DEBUG_NVM,
- "NVM write error: offset %d beyond Shadow RAM limit %d\n",
+ "NVM read error: offset %d beyond Shadow RAM limit %d\n",
(offset + words), hw->nvm.sr_size);
else if (words > I40E_SR_SECTOR_SIZE_IN_WORDS)
- /* We can write only up to 4KB (one sector), in one AQ write */
+ /* We can read only up to 4KB (one sector), in one AQ write */
i40e_debug(hw, I40E_DEBUG_NVM,
- "NVM write fail error: tried to write %d words, limit is %d.\n",
+ "NVM read fail error: tried to read %d words, limit is %d.\n",
words, I40E_SR_SECTOR_SIZE_IN_WORDS);
else if (((offset + (words - 1)) / I40E_SR_SECTOR_SIZE_IN_WORDS)
!= (offset / I40E_SR_SECTOR_SIZE_IN_WORDS))
- /* A single write cannot spread over two sectors */
+ /* A single read cannot spread over two sectors */
i40e_debug(hw, I40E_DEBUG_NVM,
- "NVM write error: cannot spread over two sectors in a single write offset=%d words=%d\n",
+ "NVM read error: cannot spread over two sectors in a single read offset=%d words=%d\n",
offset, words);
else
ret_code = i40e_aq_read_nvm(hw, module_pointer,
diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
index f80f2735e688..8cbdebc5b698 100644
--- a/drivers/net/ethernet/intel/iavf/iavf.h
+++ b/drivers/net/ethernet/intel/iavf/iavf.h
@@ -255,8 +255,10 @@ struct iavf_adapter {
struct workqueue_struct *wq;
struct work_struct reset_task;
struct work_struct adminq_task;
+ struct work_struct finish_config;
struct delayed_work client_task;
wait_queue_head_t down_waitqueue;
+ wait_queue_head_t reset_waitqueue;
wait_queue_head_t vc_waitqueue;
struct iavf_q_vector *q_vectors;
struct list_head vlan_filter_list;
@@ -518,8 +520,9 @@ int iavf_up(struct iavf_adapter *adapter);
void iavf_down(struct iavf_adapter *adapter);
int iavf_process_config(struct iavf_adapter *adapter);
int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
-void iavf_schedule_reset(struct iavf_adapter *adapter);
+void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags);
void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+void iavf_schedule_finish_config(struct iavf_adapter *adapter);
void iavf_reset(struct iavf_adapter *adapter);
void iavf_set_ethtool_ops(struct net_device *netdev);
void iavf_update_stats(struct iavf_adapter *adapter);
@@ -582,4 +585,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
const u8 *macaddr);
+int iavf_wait_for_reset(struct iavf_adapter *adapter);
#endif /* _IAVF_H_ */
diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
index 6f171d1d85b7..a34303ad057d 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
@@ -484,6 +484,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
u32 orig_flags, new_flags, changed_flags;
+ int ret = 0;
u32 i;
orig_flags = READ_ONCE(adapter->flags);
@@ -531,12 +532,14 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
/* issue a reset to force legacy-rx change to take effect */
if (changed_flags & IAVF_FLAG_LEGACY_RX) {
if (netif_running(netdev)) {
- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(adapter->wq, &adapter->reset_task);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ ret = iavf_wait_for_reset(adapter);
+ if (ret)
+ netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset");
}
}
- return 0;
+ return ret;
}
/**
@@ -627,6 +630,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
{
struct iavf_adapter *adapter = netdev_priv(netdev);
u32 new_rx_count, new_tx_count;
+ int ret = 0;
if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
return -EINVAL;
@@ -671,11 +675,13 @@ static int iavf_set_ringparam(struct net_device *netdev,
}
if (netif_running(netdev)) {
- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(adapter->wq, &adapter->reset_task);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ ret = iavf_wait_for_reset(adapter);
+ if (ret)
+ netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset");
}
- return 0;
+ return ret;
}
/**
@@ -1283,6 +1289,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
fltr->ip_mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
fltr->ip_mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
fltr->ip_mask.tos = fsp->m_u.tcp_ip4_spec.tos;
+ fltr->ip_ver = 4;
break;
case AH_V4_FLOW:
case ESP_V4_FLOW:
@@ -1294,6 +1301,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
fltr->ip_mask.v4_addrs.dst_ip = fsp->m_u.ah_ip4_spec.ip4dst;
fltr->ip_mask.spi = fsp->m_u.ah_ip4_spec.spi;
fltr->ip_mask.tos = fsp->m_u.ah_ip4_spec.tos;
+ fltr->ip_ver = 4;
break;
case IPV4_USER_FLOW:
fltr->ip_data.v4_addrs.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
@@ -1306,6 +1314,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
fltr->ip_mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
fltr->ip_mask.tos = fsp->m_u.usr_ip4_spec.tos;
fltr->ip_mask.proto = fsp->m_u.usr_ip4_spec.proto;
+ fltr->ip_ver = 4;
break;
case TCP_V6_FLOW:
case UDP_V6_FLOW:
@@ -1324,6 +1333,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
fltr->ip_mask.src_port = fsp->m_u.tcp_ip6_spec.psrc;
fltr->ip_mask.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
fltr->ip_mask.tclass = fsp->m_u.tcp_ip6_spec.tclass;
+ fltr->ip_ver = 6;
break;
case AH_V6_FLOW:
case ESP_V6_FLOW:
@@ -1339,6 +1349,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
sizeof(struct in6_addr));
fltr->ip_mask.spi = fsp->m_u.ah_ip6_spec.spi;
fltr->ip_mask.tclass = fsp->m_u.ah_ip6_spec.tclass;
+ fltr->ip_ver = 6;
break;
case IPV6_USER_FLOW:
memcpy(&fltr->ip_data.v6_addrs.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
@@ -1355,6 +1366,7 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
fltr->ip_mask.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
fltr->ip_mask.tclass = fsp->m_u.usr_ip6_spec.tclass;
fltr->ip_mask.proto = fsp->m_u.usr_ip6_spec.l4_proto;
+ fltr->ip_ver = 6;
break;
case ETHER_FLOW:
fltr->eth_data.etype = fsp->h_u.ether_spec.h_proto;
@@ -1365,6 +1377,10 @@ iavf_add_fdir_fltr_info(struct iavf_adapter *adapter, struct ethtool_rx_flow_spe
return -EINVAL;
}
+ err = iavf_validate_fdir_fltr_masks(adapter, fltr);
+ if (err)
+ return err;
+
if (iavf_fdir_is_dup_fltr(adapter, fltr))
return -EEXIST;
@@ -1395,14 +1411,15 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx
if (fsp->flow_type & FLOW_MAC_EXT)
return -EINVAL;
+ spin_lock_bh(&adapter->fdir_fltr_lock);
if (adapter->fdir_active_fltr >= IAVF_MAX_FDIR_FILTERS) {
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
dev_err(&adapter->pdev->dev,
"Unable to add Flow Director filter because VF reached the limit of max allowed filters (%u)\n",
IAVF_MAX_FDIR_FILTERS);
return -ENOSPC;
}
- spin_lock_bh(&adapter->fdir_fltr_lock);
if (iavf_find_fdir_fltr_by_loc(adapter, fsp->location)) {
dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, it already exists\n");
spin_unlock_bh(&adapter->fdir_fltr_lock);
@@ -1775,7 +1792,9 @@ static int iavf_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
case ETHTOOL_GRXCLSRLCNT:
if (!FDIR_FLTR_SUPPORT(adapter))
break;
+ spin_lock_bh(&adapter->fdir_fltr_lock);
cmd->rule_cnt = adapter->fdir_active_fltr;
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
cmd->data = IAVF_MAX_FDIR_FILTERS;
ret = 0;
break;
@@ -1830,7 +1849,7 @@ static int iavf_set_channels(struct net_device *netdev,
{
struct iavf_adapter *adapter = netdev_priv(netdev);
u32 num_req = ch->combined_count;
- int i;
+ int ret = 0;
if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
adapter->num_tc) {
@@ -1852,22 +1871,13 @@ static int iavf_set_channels(struct net_device *netdev,
adapter->num_req_queues = num_req;
adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
- iavf_schedule_reset(adapter);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
- /* wait for the reset is done */
- for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
- msleep(IAVF_RESET_WAIT_MS);
- if (adapter->flags & IAVF_FLAG_RESET_PENDING)
- continue;
- break;
- }
- if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
- adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
- adapter->num_active_queues = num_req;
- return -EOPNOTSUPP;
- }
+ ret = iavf_wait_for_reset(adapter);
+ if (ret)
+ netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset");
- return 0;
+ return ret;
}
/**
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.c b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
index 6146203efd84..03e774bd2a5b 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.c
@@ -18,6 +18,79 @@ static const struct in6_addr ipv6_addr_full_mask = {
}
};
+static const struct in6_addr ipv6_addr_zero_mask = {
+ .in6_u = {
+ .u6_addr8 = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ }
+ }
+};
+
+/**
+ * iavf_validate_fdir_fltr_masks - validate Flow Director filter fields masks
+ * @adapter: pointer to the VF adapter structure
+ * @fltr: Flow Director filter data structure
+ *
+ * Returns 0 if all masks of packet fields are either full or empty. Returns
+ * error on at least one partial mask.
+ */
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+ struct iavf_fdir_fltr *fltr)
+{
+ if (fltr->eth_mask.etype && fltr->eth_mask.etype != htons(U16_MAX))
+ goto partial_mask;
+
+ if (fltr->ip_ver == 4) {
+ if (fltr->ip_mask.v4_addrs.src_ip &&
+ fltr->ip_mask.v4_addrs.src_ip != htonl(U32_MAX))
+ goto partial_mask;
+
+ if (fltr->ip_mask.v4_addrs.dst_ip &&
+ fltr->ip_mask.v4_addrs.dst_ip != htonl(U32_MAX))
+ goto partial_mask;
+
+ if (fltr->ip_mask.tos && fltr->ip_mask.tos != U8_MAX)
+ goto partial_mask;
+ } else if (fltr->ip_ver == 6) {
+ if (memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_zero_mask,
+ sizeof(struct in6_addr)) &&
+ memcmp(&fltr->ip_mask.v6_addrs.src_ip, &ipv6_addr_full_mask,
+ sizeof(struct in6_addr)))
+ goto partial_mask;
+
+ if (memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_zero_mask,
+ sizeof(struct in6_addr)) &&
+ memcmp(&fltr->ip_mask.v6_addrs.dst_ip, &ipv6_addr_full_mask,
+ sizeof(struct in6_addr)))
+ goto partial_mask;
+
+ if (fltr->ip_mask.tclass && fltr->ip_mask.tclass != U8_MAX)
+ goto partial_mask;
+ }
+
+ if (fltr->ip_mask.proto && fltr->ip_mask.proto != U8_MAX)
+ goto partial_mask;
+
+ if (fltr->ip_mask.src_port && fltr->ip_mask.src_port != htons(U16_MAX))
+ goto partial_mask;
+
+ if (fltr->ip_mask.dst_port && fltr->ip_mask.dst_port != htons(U16_MAX))
+ goto partial_mask;
+
+ if (fltr->ip_mask.spi && fltr->ip_mask.spi != htonl(U32_MAX))
+ goto partial_mask;
+
+ if (fltr->ip_mask.l4_header &&
+ fltr->ip_mask.l4_header != htonl(U32_MAX))
+ goto partial_mask;
+
+ return 0;
+
+partial_mask:
+ dev_err(&adapter->pdev->dev, "Failed to add Flow Director filter, partial masks are not supported\n");
+ return -EOPNOTSUPP;
+}
+
/**
* iavf_pkt_udp_no_pay_len - the length of UDP packet without payload
* @fltr: Flow Director filter data structure
@@ -263,8 +336,6 @@ iavf_fill_fdir_ip4_hdr(struct iavf_fdir_fltr *fltr,
VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV4, DST);
}
- fltr->ip_ver = 4;
-
return 0;
}
@@ -309,8 +380,6 @@ iavf_fill_fdir_ip6_hdr(struct iavf_fdir_fltr *fltr,
VIRTCHNL_ADD_PROTO_HDR_FIELD_BIT(hdr, IPV6, DST);
}
- fltr->ip_ver = 6;
-
return 0;
}
@@ -722,7 +791,9 @@ void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *f
bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr)
{
struct iavf_fdir_fltr *tmp;
+ bool ret = false;
+ spin_lock_bh(&adapter->fdir_fltr_lock);
list_for_each_entry(tmp, &adapter->fdir_list_head, list) {
if (tmp->flow_type != fltr->flow_type)
continue;
@@ -732,11 +803,14 @@ bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *
!memcmp(&tmp->ip_data, &fltr->ip_data,
sizeof(fltr->ip_data)) &&
!memcmp(&tmp->ext_data, &fltr->ext_data,
- sizeof(fltr->ext_data)))
- return true;
+ sizeof(fltr->ext_data))) {
+ ret = true;
+ break;
+ }
}
+ spin_unlock_bh(&adapter->fdir_fltr_lock);
- return false;
+ return ret;
}
/**
diff --git a/drivers/net/ethernet/intel/iavf/iavf_fdir.h b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
index 33c55c366315..9eb9f73f6adf 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_fdir.h
+++ b/drivers/net/ethernet/intel/iavf/iavf_fdir.h
@@ -110,6 +110,8 @@ struct iavf_fdir_fltr {
struct virtchnl_fdir_add vc_add_msg;
};
+int iavf_validate_fdir_fltr_masks(struct iavf_adapter *adapter,
+ struct iavf_fdir_fltr *fltr);
int iavf_fill_fdir_add_msg(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
void iavf_print_fdir_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
bool iavf_fdir_is_dup_fltr(struct iavf_adapter *adapter, struct iavf_fdir_fltr *fltr);
diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
index a483eb185c99..9610ca770349 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
@@ -167,6 +167,45 @@ static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev)
}
/**
+ * iavf_is_reset_in_progress - Check if a reset is in progress
+ * @adapter: board private structure
+ */
+static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter)
+{
+ if (adapter->state == __IAVF_RESETTING ||
+ adapter->flags & (IAVF_FLAG_RESET_PENDING |
+ IAVF_FLAG_RESET_NEEDED))
+ return true;
+
+ return false;
+}
+
+/**
+ * iavf_wait_for_reset - Wait for reset to finish.
+ * @adapter: board private structure
+ *
+ * Returns 0 if reset finished successfully, negative on timeout or interrupt.
+ */
+int iavf_wait_for_reset(struct iavf_adapter *adapter)
+{
+ int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue,
+ !iavf_is_reset_in_progress(adapter),
+ msecs_to_jiffies(5000));
+
+ /* If ret < 0 then it means wait was interrupted.
+ * If ret == 0 then it means we got a timeout while waiting
+ * for reset to finish.
+ * If ret > 0 it means reset has finished.
+ */
+ if (ret > 0)
+ return 0;
+ else if (ret < 0)
+ return -EINTR;
+ else
+ return -EBUSY;
+}
+
+/**
* iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
* @hw: pointer to the HW structure
* @mem: ptr to mem struct to fill out
@@ -262,12 +301,14 @@ static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
/**
* iavf_schedule_reset - Set the flags and schedule a reset event
* @adapter: board private structure
+ * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED
**/
-void iavf_schedule_reset(struct iavf_adapter *adapter)
+void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags)
{
- if (!(adapter->flags &
- (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+ if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
+ !(adapter->flags &
+ (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
+ adapter->flags |= flags;
queue_work(adapter->wq, &adapter->reset_task);
}
}
@@ -295,7 +336,7 @@ static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
struct iavf_adapter *adapter = netdev_priv(netdev);
adapter->tx_timeout_count++;
- iavf_schedule_reset(adapter);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
}
/**
@@ -1651,10 +1692,10 @@ static int iavf_set_interrupt_capability(struct iavf_adapter *adapter)
adapter->msix_entries[vector].entry = vector;
err = iavf_acquire_msix_vectors(adapter, v_budget);
+ if (!err)
+ iavf_schedule_finish_config(adapter);
out:
- netif_set_real_num_rx_queues(adapter->netdev, pairs);
- netif_set_real_num_tx_queues(adapter->netdev, pairs);
return err;
}
@@ -1828,19 +1869,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
static void iavf_free_q_vectors(struct iavf_adapter *adapter)
{
int q_idx, num_q_vectors;
- int napi_vectors;
if (!adapter->q_vectors)
return;
num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
- napi_vectors = adapter->num_active_queues;
for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
- if (q_idx < napi_vectors)
- netif_napi_del(&q_vector->napi);
+ netif_napi_del(&q_vector->napi);
}
kfree(adapter->q_vectors);
adapter->q_vectors = NULL;
@@ -1877,9 +1915,7 @@ static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
goto err_alloc_queues;
}
- rtnl_lock();
err = iavf_set_interrupt_capability(adapter);
- rtnl_unlock();
if (err) {
dev_err(&adapter->pdev->dev,
"Unable to setup interrupt capabilities\n");
@@ -1932,15 +1968,16 @@ static void iavf_free_rss(struct iavf_adapter *adapter)
/**
* iavf_reinit_interrupt_scheme - Reallocate queues and vectors
* @adapter: board private structure
+ * @running: true if adapter->state == __IAVF_RUNNING
*
* Returns 0 on success, negative on failure
**/
-static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter)
+static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running)
{
struct net_device *netdev = adapter->netdev;
int err;
- if (netif_running(netdev))
+ if (running)
iavf_free_traffic_irqs(adapter);
iavf_free_misc_irq(adapter);
iavf_reset_interrupt_capability(adapter);
@@ -1965,6 +2002,78 @@ err:
}
/**
+ * iavf_finish_config - do all netdev work that needs RTNL
+ * @work: our work_struct
+ *
+ * Do work that needs both RTNL and crit_lock.
+ **/
+static void iavf_finish_config(struct work_struct *work)
+{
+ struct iavf_adapter *adapter;
+ int pairs, err;
+
+ adapter = container_of(work, struct iavf_adapter, finish_config);
+
+ /* Always take RTNL first to prevent circular lock dependency */
+ rtnl_lock();
+ mutex_lock(&adapter->crit_lock);
+
+ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
+ adapter->netdev_registered &&
+ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
+ netdev_update_features(adapter->netdev);
+ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+ }
+
+ switch (adapter->state) {
+ case __IAVF_DOWN:
+ if (!adapter->netdev_registered) {
+ err = register_netdevice(adapter->netdev);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
+ err);
+
+ /* go back and try again.*/
+ iavf_free_rss(adapter);
+ iavf_free_misc_irq(adapter);
+ iavf_reset_interrupt_capability(adapter);
+ iavf_change_state(adapter,
+ __IAVF_INIT_CONFIG_ADAPTER);
+ goto out;
+ }
+ adapter->netdev_registered = true;
+ }
+
+ /* Set the real number of queues when reset occurs while
+ * state == __IAVF_DOWN
+ */
+ fallthrough;
+ case __IAVF_RUNNING:
+ pairs = adapter->num_active_queues;
+ netif_set_real_num_rx_queues(adapter->netdev, pairs);
+ netif_set_real_num_tx_queues(adapter->netdev, pairs);
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ mutex_unlock(&adapter->crit_lock);
+ rtnl_unlock();
+}
+
+/**
+ * iavf_schedule_finish_config - Set the flags and schedule a reset event
+ * @adapter: board private structure
+ **/
+void iavf_schedule_finish_config(struct iavf_adapter *adapter)
+{
+ if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+ queue_work(adapter->wq, &adapter->finish_config);
+}
+
+/**
* iavf_process_aq_command - process aq_required flags
* and sends aq command
* @adapter: pointer to iavf adapter structure
@@ -2371,7 +2480,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
adapter->vsi_res->num_queue_pairs);
adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED;
adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
- iavf_schedule_reset(adapter);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
return -EAGAIN;
}
@@ -2601,22 +2710,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
netif_carrier_off(netdev);
adapter->link_up = false;
-
- /* set the semaphore to prevent any callbacks after device registration
- * up to time when state of driver will be set to __IAVF_DOWN
- */
- rtnl_lock();
- if (!adapter->netdev_registered) {
- err = register_netdevice(netdev);
- if (err) {
- rtnl_unlock();
- goto err_register;
- }
- }
-
- adapter->netdev_registered = true;
-
netif_tx_stop_all_queues(netdev);
+
if (CLIENT_ALLOWED(adapter)) {
err = iavf_lan_add_device(adapter);
if (err)
@@ -2629,7 +2724,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
iavf_change_state(adapter, __IAVF_DOWN);
set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
- rtnl_unlock();
iavf_misc_irq_enable(adapter);
wake_up(&adapter->down_waitqueue);
@@ -2649,10 +2743,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
/* request initial VLAN offload settings */
iavf_set_vlan_offload_features(adapter, 0, netdev->features);
+ iavf_schedule_finish_config(adapter);
return;
+
err_mem:
iavf_free_rss(adapter);
-err_register:
iavf_free_misc_irq(adapter);
err_sw_init:
iavf_reset_interrupt_capability(adapter);
@@ -2679,26 +2774,9 @@ static void iavf_watchdog_task(struct work_struct *work)
goto restart_watchdog;
}
- if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
- adapter->netdev_registered &&
- !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
- rtnl_trylock()) {
- netdev_update_features(adapter->netdev);
- rtnl_unlock();
- adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
- }
-
if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
iavf_change_state(adapter, __IAVF_COMM_FAILED);
- if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
- adapter->aq_required = 0;
- adapter->current_op = VIRTCHNL_OP_UNKNOWN;
- mutex_unlock(&adapter->crit_lock);
- queue_work(adapter->wq, &adapter->reset_task);
- return;
- }
-
switch (adapter->state) {
case __IAVF_STARTUP:
iavf_startup(adapter);
@@ -2826,11 +2904,10 @@ static void iavf_watchdog_task(struct work_struct *work)
/* check for hw reset */
reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
if (!reg_val) {
- adapter->flags |= IAVF_FLAG_RESET_PENDING;
adapter->aq_required = 0;
adapter->current_op = VIRTCHNL_OP_UNKNOWN;
dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
- queue_work(adapter->wq, &adapter->reset_task);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
mutex_unlock(&adapter->crit_lock);
queue_delayed_work(adapter->wq,
&adapter->watchdog_task, HZ * 2);
@@ -2940,11 +3017,6 @@ static void iavf_reset_task(struct work_struct *work)
int i = 0, err;
bool running;
- /* Detach interface to avoid subsequent NDO callbacks */
- rtnl_lock();
- netif_device_detach(netdev);
- rtnl_unlock();
-
/* When device is being removed it doesn't make sense to run the reset
* task, just return in such a case.
*/
@@ -2952,7 +3024,7 @@ static void iavf_reset_task(struct work_struct *work)
if (adapter->state != __IAVF_REMOVE)
queue_work(adapter->wq, &adapter->reset_task);
- goto reset_finish;
+ return;
}
while (!mutex_trylock(&adapter->client_lock))
@@ -3010,11 +3082,6 @@ static void iavf_reset_task(struct work_struct *work)
iavf_disable_vf(adapter);
mutex_unlock(&adapter->client_lock);
mutex_unlock(&adapter->crit_lock);
- if (netif_running(netdev)) {
- rtnl_lock();
- dev_close(netdev);
- rtnl_unlock();
- }
return; /* Do not attempt to reinit. It's dead, Jim. */
}
@@ -3056,7 +3123,7 @@ continue_reset:
if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
(adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
- err = iavf_reinit_interrupt_scheme(adapter);
+ err = iavf_reinit_interrupt_scheme(adapter, running);
if (err)
goto reset_err;
}
@@ -3151,10 +3218,11 @@ continue_reset:
adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+ wake_up(&adapter->reset_waitqueue);
mutex_unlock(&adapter->client_lock);
mutex_unlock(&adapter->crit_lock);
- goto reset_finish;
+ return;
reset_err:
if (running) {
set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
@@ -3164,21 +3232,7 @@ reset_err:
mutex_unlock(&adapter->client_lock);
mutex_unlock(&adapter->crit_lock);
-
- if (netif_running(netdev)) {
- /* Close device to ensure that Tx queues will not be started
- * during netif_device_attach() at the end of the reset task.
- */
- rtnl_lock();
- dev_close(netdev);
- rtnl_unlock();
- }
-
dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
-reset_finish:
- rtnl_lock();
- netif_device_attach(netdev);
- rtnl_unlock();
}
/**
@@ -3196,9 +3250,6 @@ static void iavf_adminq_task(struct work_struct *work)
u32 val, oldval;
u16 pending;
- if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
- goto out;
-
if (!mutex_trylock(&adapter->crit_lock)) {
if (adapter->state == __IAVF_REMOVE)
return;
@@ -3207,10 +3258,13 @@ static void iavf_adminq_task(struct work_struct *work)
goto out;
}
+ if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+ goto unlock;
+
event.buf_len = IAVF_MAX_AQ_BUF_SIZE;
event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL);
if (!event.msg_buf)
- goto out;
+ goto unlock;
do {
ret = iavf_clean_arq_element(hw, &event, &pending);
@@ -3225,11 +3279,8 @@ static void iavf_adminq_task(struct work_struct *work)
if (pending != 0)
memset(event.msg_buf, 0, IAVF_MAX_AQ_BUF_SIZE);
} while (pending);
- mutex_unlock(&adapter->crit_lock);
- if ((adapter->flags &
- (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
- adapter->state == __IAVF_RESETTING)
+ if (iavf_is_reset_in_progress(adapter))
goto freedom;
/* check for error indications */
@@ -3271,6 +3322,8 @@ static void iavf_adminq_task(struct work_struct *work)
freedom:
kfree(event.msg_buf);
+unlock:
+ mutex_unlock(&adapter->crit_lock);
out:
/* re-enable Admin queue interrupt cause */
iavf_misc_irq_enable(adapter);
@@ -4315,6 +4368,7 @@ static int iavf_close(struct net_device *netdev)
static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
+ int ret = 0;
netdev_dbg(netdev, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu);
@@ -4325,11 +4379,15 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
}
if (netif_running(netdev)) {
- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
- queue_work(adapter->wq, &adapter->reset_task);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ ret = iavf_wait_for_reset(adapter);
+ if (ret < 0)
+ netdev_warn(netdev, "MTU change interrupted waiting for reset");
+ else if (ret)
+ netdev_warn(netdev, "MTU change timed out waiting for reset");
}
- return 0;
+ return ret;
}
#define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
@@ -4922,6 +4980,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
INIT_WORK(&adapter->reset_task, iavf_reset_task);
INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
+ INIT_WORK(&adapter->finish_config, iavf_finish_config);
INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
queue_delayed_work(adapter->wq, &adapter->watchdog_task,
@@ -4930,6 +4989,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
/* Setup the wait queue for indicating transition to down status */
init_waitqueue_head(&adapter->down_waitqueue);
+ /* Setup the wait queue for indicating transition to running state */
+ init_waitqueue_head(&adapter->reset_waitqueue);
+
/* Setup the wait queue for indicating virtchannel events */
init_waitqueue_head(&adapter->vc_waitqueue);
@@ -5061,13 +5123,15 @@ static void iavf_remove(struct pci_dev *pdev)
usleep_range(500, 1000);
}
cancel_delayed_work_sync(&adapter->watchdog_task);
+ cancel_work_sync(&adapter->finish_config);
+ rtnl_lock();
if (adapter->netdev_registered) {
- rtnl_lock();
unregister_netdevice(netdev);
adapter->netdev_registered = false;
- rtnl_unlock();
}
+ rtnl_unlock();
+
if (CLIENT_ALLOWED(adapter)) {
err = iavf_lan_del_device(adapter);
if (err)
diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
index 7c0578b5457b..be3c007ce90a 100644
--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
@@ -1961,9 +1961,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
case VIRTCHNL_EVENT_RESET_IMPENDING:
dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n");
if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
- adapter->flags |= IAVF_FLAG_RESET_PENDING;
dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
- queue_work(adapter->wq, &adapter->reset_task);
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
}
break;
default:
@@ -2237,6 +2236,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
iavf_process_config(adapter);
adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
+ iavf_schedule_finish_config(adapter);
iavf_set_queue_vlan_tag_loc(adapter);
@@ -2285,6 +2285,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
case VIRTCHNL_OP_ENABLE_QUEUES:
/* enable transmits */
iavf_irq_enable(adapter, true);
+ wake_up(&adapter->reset_waitqueue);
adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED;
break;
case VIRTCHNL_OP_DISABLE_QUEUES:
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 4a12316f7b46..074bf9403cd1 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -435,7 +435,8 @@ static int ice_setup_rx_ctx(struct ice_rx_ring *ring)
/* Receive Packet Data Buffer Size.
* The Packet Data Buffer Size is defined in 128 byte units.
*/
- rlan_ctx.dbuf = ring->rx_buf_len >> ICE_RLAN_CTX_DBUF_S;
+ rlan_ctx.dbuf = DIV_ROUND_UP(ring->rx_buf_len,
+ BIT_ULL(ICE_RLAN_CTX_DBUF_S));
/* use 32 byte descriptors */
rlan_ctx.dsize = 1;
@@ -800,6 +801,8 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi)
ice_for_each_q_vector(vsi, v_idx)
ice_free_q_vector(vsi, v_idx);
+
+ vsi->num_q_vectors = 0;
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c
index ad0a007b7398..8f232c41a89e 100644
--- a/drivers/net/ethernet/intel/ice/ice_eswitch.c
+++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c
@@ -538,6 +538,12 @@ ice_eswitch_mode_set(struct devlink *devlink, u16 mode,
break;
case DEVLINK_ESWITCH_MODE_SWITCHDEV:
{
+ if (ice_is_adq_active(pf)) {
+ dev_err(ice_pf_to_dev(pf), "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't change eswitch mode to switchdev - ADQ is active. Delete ADQ configs and try again, e.g. tc qdisc del dev $PF root");
+ return -EOPNOTSUPP;
+ }
+
dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev",
pf->hw.pf_id);
NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev");
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 8d5cbbd0b3d5..ad4d4702129f 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2681,8 +2681,13 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
ring->rx_max_pending = ICE_MAX_NUM_DESC;
ring->tx_max_pending = ICE_MAX_NUM_DESC;
- ring->rx_pending = vsi->rx_rings[0]->count;
- ring->tx_pending = vsi->tx_rings[0]->count;
+ if (vsi->tx_rings && vsi->rx_rings) {
+ ring->rx_pending = vsi->rx_rings[0]->count;
+ ring->tx_pending = vsi->tx_rings[0]->count;
+ } else {
+ ring->rx_pending = 0;
+ ring->tx_pending = 0;
+ }
/* Rx mini and jumbo rings are not supported */
ring->rx_mini_max_pending = 0;
@@ -2716,6 +2721,10 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring,
return -EINVAL;
}
+ /* Return if there is no rings (device is reloading) */
+ if (!vsi->tx_rings || !vsi->rx_rings)
+ return -EBUSY;
+
new_tx_cnt = ALIGN(ring->tx_pending, ICE_REQ_DESC_MULTIPLE);
if (new_tx_cnt != ring->tx_pending)
netdev_info(netdev, "Requested Tx descriptor count rounded up to %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index ead6d50fc0ad..8c6e13f87b7d 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1281,16 +1281,21 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
ICE_FLOW_FLD_OFF_INVAL);
}
- /* add filter for outer headers */
fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
+
+ assign_bit(fltr_idx, hw->fdir_perfect_fltr, perfect_filter);
+
+ /* add filter for outer headers */
ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
ICE_FD_HW_SEG_NON_TUN);
- if (ret == -EEXIST)
- /* Rule already exists, free memory and continue */
- devm_kfree(dev, seg);
- else if (ret)
+ if (ret == -EEXIST) {
+ /* Rule already exists, free memory and count as success */
+ ret = 0;
+ goto err_exit;
+ } else if (ret) {
/* could not write filter, free memory */
goto err_exit;
+ }
/* make tunneled filter HW entries if possible */
memcpy(&tun_seg[1], seg, sizeof(*seg));
@@ -1305,18 +1310,13 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
devm_kfree(dev, tun_seg);
}
- if (perfect_filter)
- set_bit(fltr_idx, hw->fdir_perfect_fltr);
- else
- clear_bit(fltr_idx, hw->fdir_perfect_fltr);
-
return ret;
err_exit:
devm_kfree(dev, tun_seg);
devm_kfree(dev, seg);
- return -EOPNOTSUPP;
+ return ret;
}
/**
@@ -1914,7 +1914,9 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
input->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
/* input struct is added to the HW filter list */
- ice_fdir_update_list_entry(pf, input, fsp->location);
+ ret = ice_fdir_update_list_entry(pf, input, fsp->location);
+ if (ret)
+ goto release_lock;
ret = ice_fdir_write_all_fltr(pf, input, true);
if (ret)
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 00e3afd507a4..0054d7e64ec3 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2972,39 +2972,12 @@ int ice_vsi_release(struct ice_vsi *vsi)
return -ENODEV;
pf = vsi->back;
- /* do not unregister while driver is in the reset recovery pending
- * state. Since reset/rebuild happens through PF service task workqueue,
- * it's not a good idea to unregister netdev that is associated to the
- * PF that is running the work queue items currently. This is done to
- * avoid check_flush_dependency() warning on this wq
- */
- if (vsi->netdev && !ice_is_reset_in_progress(pf->state) &&
- (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state))) {
- unregister_netdev(vsi->netdev);
- clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
- }
-
- if (vsi->type == ICE_VSI_PF)
- ice_devlink_destroy_pf_port(pf);
-
if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
ice_rss_clean(vsi);
ice_vsi_close(vsi);
ice_vsi_decfg(vsi);
- if (vsi->netdev) {
- if (test_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state)) {
- unregister_netdev(vsi->netdev);
- clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state);
- }
- if (test_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state)) {
- free_netdev(vsi->netdev);
- vsi->netdev = NULL;
- clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state);
- }
- }
-
/* retain SW VSI data structure since it is needed to unregister and
* free VSI netdev when PF is not in reset recovery pending state,\
* for ex: during rmmod.
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 19a5e7f3a075..b40dfe6ae321 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4430,9 +4430,9 @@ static int ice_start_eth(struct ice_vsi *vsi)
if (err)
return err;
- rtnl_lock();
err = ice_vsi_open(vsi);
- rtnl_unlock();
+ if (err)
+ ice_fltr_remove_all(vsi);
return err;
}
@@ -4895,6 +4895,7 @@ int ice_load(struct ice_pf *pf)
params = ice_vsi_to_params(vsi);
params.flags = ICE_VSI_FLAG_INIT;
+ rtnl_lock();
err = ice_vsi_cfg(vsi, &params);
if (err)
goto err_vsi_cfg;
@@ -4902,6 +4903,7 @@ int ice_load(struct ice_pf *pf)
err = ice_start_eth(ice_get_main_vsi(pf));
if (err)
goto err_start_eth;
+ rtnl_unlock();
err = ice_init_rdma(pf);
if (err)
@@ -4916,9 +4918,11 @@ int ice_load(struct ice_pf *pf)
err_init_rdma:
ice_vsi_close(ice_get_main_vsi(pf));
+ rtnl_lock();
err_start_eth:
ice_vsi_decfg(ice_get_main_vsi(pf));
err_vsi_cfg:
+ rtnl_unlock();
ice_deinit_dev(pf);
return err;
}
@@ -4931,8 +4935,10 @@ void ice_unload(struct ice_pf *pf)
{
ice_deinit_features(pf);
ice_deinit_rdma(pf);
+ rtnl_lock();
ice_stop_eth(ice_get_main_vsi(pf));
ice_vsi_decfg(ice_get_main_vsi(pf));
+ rtnl_unlock();
ice_deinit_dev(pf);
}
@@ -8807,6 +8813,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
{
struct ice_netdev_priv *np = netdev_priv(netdev);
struct ice_pf *pf = np->vsi->back;
+ bool locked = false;
int err;
switch (type) {
@@ -8816,10 +8823,32 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
ice_setup_tc_block_cb,
np, np, true);
case TC_SETUP_QDISC_MQPRIO:
+ if (ice_is_eswitch_mode_switchdev(pf)) {
+ netdev_err(netdev, "TC MQPRIO offload not supported, switchdev is enabled\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (pf->adev) {
+ mutex_lock(&pf->adev_mutex);
+ device_lock(&pf->adev->dev);
+ locked = true;
+ if (pf->adev->dev.driver) {
+ netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
+ err = -EBUSY;
+ goto adev_unlock;
+ }
+ }
+
/* setup traffic classifier for receive side */
mutex_lock(&pf->tc_mutex);
err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
mutex_unlock(&pf->tc_mutex);
+
+adev_unlock:
+ if (locked) {
+ device_unlock(&pf->adev->dev);
+ mutex_unlock(&pf->adev_mutex);
+ }
return err;
default:
return -EOPNOTSUPP;
diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c
index 1f66914c7a20..31314e7540f8 100644
--- a/drivers/net/ethernet/intel/ice/ice_sriov.c
+++ b/drivers/net/ethernet/intel/ice/ice_sriov.c
@@ -1131,7 +1131,7 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
if (!vf)
return -EINVAL;
- ret = ice_check_vf_ready_for_reset(vf);
+ ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
goto out_put_vf;
@@ -1246,7 +1246,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
goto out_put_vf;
}
- ret = ice_check_vf_ready_for_reset(vf);
+ ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
goto out_put_vf;
@@ -1300,7 +1300,7 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted)
return -EOPNOTSUPP;
}
- ret = ice_check_vf_ready_for_reset(vf);
+ ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
goto out_put_vf;
@@ -1613,7 +1613,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
if (!vf)
return -EINVAL;
- ret = ice_check_vf_ready_for_reset(vf);
+ ret = ice_check_vf_ready_for_cfg(vf);
if (ret)
goto out_put_vf;
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
index b26ce4425f45..ea3310be8354 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c
@@ -186,25 +186,6 @@ int ice_check_vf_ready_for_cfg(struct ice_vf *vf)
}
/**
- * ice_check_vf_ready_for_reset - check if VF is ready to be reset
- * @vf: VF to check if it's ready to be reset
- *
- * The purpose of this function is to ensure that the VF is not in reset,
- * disabled, and is both initialized and active, thus enabling us to safely
- * initialize another reset.
- */
-int ice_check_vf_ready_for_reset(struct ice_vf *vf)
-{
- int ret;
-
- ret = ice_check_vf_ready_for_cfg(vf);
- if (!ret && !test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
- ret = -EAGAIN;
-
- return ret;
-}
-
-/**
* ice_trigger_vf_reset - Reset a VF on HW
* @vf: pointer to the VF structure
* @is_vflr: true if VFLR was issued, false if not
@@ -631,11 +612,17 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
return 0;
}
+ if (flags & ICE_VF_RESET_LOCK)
+ mutex_lock(&vf->cfg_lock);
+ else
+ lockdep_assert_held(&vf->cfg_lock);
+
if (ice_is_vf_disabled(vf)) {
vsi = ice_get_vf_vsi(vf);
if (!vsi) {
dev_dbg(dev, "VF is already removed\n");
- return -EINVAL;
+ err = -EINVAL;
+ goto out_unlock;
}
ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
@@ -644,14 +631,9 @@ int ice_reset_vf(struct ice_vf *vf, u32 flags)
dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
vf->vf_id);
- return 0;
+ goto out_unlock;
}
- if (flags & ICE_VF_RESET_LOCK)
- mutex_lock(&vf->cfg_lock);
- else
- lockdep_assert_held(&vf->cfg_lock);
-
/* Set VF disable bit state here, before triggering reset */
set_bit(ICE_VF_STATE_DIS, vf->vf_states);
ice_trigger_vf_reset(vf, flags & ICE_VF_RESET_VFLR, false);
diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
index 67172fdd9bc2..48fea6fa0362 100644
--- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h
@@ -215,7 +215,6 @@ u16 ice_get_num_vfs(struct ice_pf *pf);
struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf);
bool ice_is_vf_disabled(struct ice_vf *vf);
int ice_check_vf_ready_for_cfg(struct ice_vf *vf);
-int ice_check_vf_ready_for_reset(struct ice_vf *vf);
void ice_set_vf_state_dis(struct ice_vf *vf);
bool ice_is_any_vf_in_unicast_promisc(struct ice_pf *pf);
void
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
index efbc2968a7bf..dcf628b1fccd 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl.c
@@ -3947,7 +3947,6 @@ error_handler:
ice_vc_notify_vf_link_state(vf);
break;
case VIRTCHNL_OP_RESET_VF:
- clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
ops->reset_vf(vf);
break;
case VIRTCHNL_OP_ADD_ETH_ADDR:
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index 405886ee5261..319c544b9f04 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -1385,18 +1385,6 @@ void igb_ptp_init(struct igb_adapter *adapter)
return;
}
- spin_lock_init(&adapter->tmreg_lock);
- INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
-
- if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
- INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
- igb_ptp_overflow_check);
-
- adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
- adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
-
- igb_ptp_reset(adapter);
-
adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps,
&adapter->pdev->dev);
if (IS_ERR(adapter->ptp_clock)) {
@@ -1406,6 +1394,18 @@ void igb_ptp_init(struct igb_adapter *adapter)
dev_info(&adapter->pdev->dev, "added PHC on %s\n",
adapter->netdev->name);
adapter->ptp_flags |= IGB_PTP_ENABLED;
+
+ spin_lock_init(&adapter->tmreg_lock);
+ INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work);
+
+ if (adapter->ptp_flags & IGB_PTP_OVERFLOW_CHECK)
+ INIT_DELAYED_WORK(&adapter->ptp_overflow_work,
+ igb_ptp_overflow_check);
+
+ adapter->tstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+ adapter->tstamp_config.tx_type = HWTSTAMP_TX_OFF;
+
+ igb_ptp_reset(adapter);
}
}
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 9db384f66a8e..38901d2a4680 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -195,6 +195,10 @@ struct igc_adapter {
u32 qbv_config_change_errors;
bool qbv_transition;
unsigned int qbv_count;
+ /* Access to oper_gate_closed, admin_gate_closed and qbv_transition
+ * are protected by the qbv_tx_lock.
+ */
+ spinlock_t qbv_tx_lock;
/* OS defined structs */
struct pci_dev *pdev;
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 44a507029946..2f780cc90883 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -546,7 +546,7 @@
#define IGC_PTM_CTRL_START_NOW BIT(29) /* Start PTM Now */
#define IGC_PTM_CTRL_EN BIT(30) /* Enable PTM */
#define IGC_PTM_CTRL_TRIG BIT(31) /* PTM Cycle trigger */
-#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x2f) << 2)
+#define IGC_PTM_CTRL_SHRT_CYC(usec) (((usec) & 0x3f) << 2)
#define IGC_PTM_CTRL_PTM_TO(usec) (((usec) & 0xff) << 8)
#define IGC_PTM_SHORT_CYC_DEFAULT 10 /* Default Short/interrupted cycle interval */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9f93f0f4f752..6f557e843e49 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -316,6 +316,33 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
igc_clean_tx_ring(adapter->tx_ring[i]);
}
+static void igc_disable_tx_ring_hw(struct igc_ring *ring)
+{
+ struct igc_hw *hw = &ring->q_vector->adapter->hw;
+ u8 idx = ring->reg_idx;
+ u32 txdctl;
+
+ txdctl = rd32(IGC_TXDCTL(idx));
+ txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
+ txdctl |= IGC_TXDCTL_SWFLUSH;
+ wr32(IGC_TXDCTL(idx), txdctl);
+}
+
+/**
+ * igc_disable_all_tx_rings_hw - Disable all transmit queue operation
+ * @adapter: board private structure
+ */
+static void igc_disable_all_tx_rings_hw(struct igc_adapter *adapter)
+{
+ int i;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *tx_ring = adapter->tx_ring[i];
+
+ igc_disable_tx_ring_hw(tx_ring);
+ }
+}
+
/**
* igc_setup_tx_resources - allocate Tx resources (Descriptors)
* @tx_ring: tx descriptor ring (for a specific queue) to setup
@@ -2828,9 +2855,8 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
struct netdev_queue *nq = txring_txq(ring);
union igc_adv_tx_desc *tx_desc = NULL;
int cpu = smp_processor_id();
- u16 ntu = ring->next_to_use;
struct xdp_desc xdp_desc;
- u16 budget;
+ u16 budget, ntu;
if (!netif_carrier_ok(ring->netdev))
return;
@@ -2840,6 +2866,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
/* Avoid transmit queue timeout since we share it with the slow path */
txq_trans_cond_update(nq);
+ ntu = ring->next_to_use;
budget = igc_desc_unused(ring);
while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
@@ -4774,6 +4801,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
adapter->nfc_rule_count = 0;
spin_lock_init(&adapter->stats64_lock);
+ spin_lock_init(&adapter->qbv_tx_lock);
/* Assume MSI-X interrupts, will be checked during IRQ allocation */
adapter->flags |= IGC_FLAG_HAS_MSIX;
@@ -5058,6 +5086,7 @@ void igc_down(struct igc_adapter *adapter)
/* clear VLAN promisc flag so VFTA will be updated if necessary */
adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
+ igc_disable_all_tx_rings_hw(adapter);
igc_clean_all_tx_rings(adapter);
igc_clean_all_rx_rings(adapter);
}
@@ -6091,15 +6120,15 @@ static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
return igc_tsn_offload_apply(adapter);
}
-static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+static int igc_qbv_clear_schedule(struct igc_adapter *adapter)
{
+ unsigned long flags;
int i;
adapter->base_time = 0;
adapter->cycle_time = NSEC_PER_SEC;
adapter->taprio_offload_enable = false;
adapter->qbv_config_change_errors = 0;
- adapter->qbv_transition = false;
adapter->qbv_count = 0;
for (i = 0; i < adapter->num_tx_queues; i++) {
@@ -6108,10 +6137,28 @@ static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
ring->start_time = 0;
ring->end_time = NSEC_PER_SEC;
ring->max_sdu = 0;
+ }
+
+ spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
+ adapter->qbv_transition = false;
+
+ for (i = 0; i < adapter->num_tx_queues; i++) {
+ struct igc_ring *ring = adapter->tx_ring[i];
+
ring->oper_gate_closed = false;
ring->admin_gate_closed = false;
}
+ spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
+ return 0;
+}
+
+static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
+{
+ igc_qbv_clear_schedule(adapter);
+
return 0;
}
@@ -6122,6 +6169,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
struct igc_hw *hw = &adapter->hw;
u32 start_time = 0, end_time = 0;
struct timespec64 now;
+ unsigned long flags;
size_t n;
int i;
@@ -6189,6 +6237,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
start_time += e->interval;
}
+ spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
/* Check whether a queue gets configured.
* If not, set the start and end time to be end time.
*/
@@ -6213,6 +6263,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
}
}
+ spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *ring = adapter->tx_ring[i];
struct net_device *dev = adapter->netdev;
@@ -6591,8 +6643,11 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
{
struct igc_adapter *adapter = container_of(timer, struct igc_adapter,
hrtimer);
+ unsigned long flags;
unsigned int i;
+ spin_lock_irqsave(&adapter->qbv_tx_lock, flags);
+
adapter->qbv_transition = true;
for (i = 0; i < adapter->num_tx_queues; i++) {
struct igc_ring *tx_ring = adapter->tx_ring[i];
@@ -6605,6 +6660,9 @@ static enum hrtimer_restart igc_qbv_scheduling_timer(struct hrtimer *timer)
}
}
adapter->qbv_transition = false;
+
+ spin_unlock_irqrestore(&adapter->qbv_tx_lock, flags);
+
return HRTIMER_NORESTART;
}
@@ -7290,18 +7348,6 @@ void igc_enable_rx_ring(struct igc_ring *ring)
igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
}
-static void igc_disable_tx_ring_hw(struct igc_ring *ring)
-{
- struct igc_hw *hw = &ring->q_vector->adapter->hw;
- u8 idx = ring->reg_idx;
- u32 txdctl;
-
- txdctl = rd32(IGC_TXDCTL(idx));
- txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
- txdctl |= IGC_TXDCTL_SWFLUSH;
- wr32(IGC_TXDCTL(idx), txdctl);
-}
-
void igc_disable_tx_ring(struct igc_ring *ring)
{
igc_disable_tx_ring_hw(ring);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1726297f2e0d..8eb9839a3ca6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8479,7 +8479,7 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
struct ixgbe_adapter *adapter = q_vector->adapter;
if (unlikely(skb_tail_pointer(skb) < hdr.network +
- VXLAN_HEADROOM))
+ vxlan_headroom(0)))
return;
/* verify the port is recognized as VXLAN */
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 2b9335cb4bb3..8537578e1cf1 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1302,11 +1302,10 @@ static int korina_probe(struct platform_device *pdev)
else if (of_get_ethdev_address(pdev->dev.of_node, dev) < 0)
eth_hw_addr_random(dev);
- clk = devm_clk_get_optional(&pdev->dev, "mdioclk");
+ clk = devm_clk_get_optional_enabled(&pdev->dev, "mdioclk");
if (IS_ERR(clk))
return PTR_ERR(clk);
if (clk) {
- clk_prepare_enable(clk);
lp->mii_clock_freq = clk_get_rate(clk);
} else {
lp->mii_clock_freq = 200000000; /* max possible input clk */
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
index 035ead7935c7..dab61cc1acb5 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
@@ -98,6 +98,9 @@ int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox)
writeq(OCTEP_CTRL_MBOX_STATUS_INIT,
OCTEP_CTRL_MBOX_INFO_HOST_STATUS(mbox->barmem));
+ mutex_init(&mbox->h2fq_lock);
+ mutex_init(&mbox->f2hq_lock);
+
mbox->h2fq.sz = readl(OCTEP_CTRL_MBOX_H2FQ_SZ(mbox->barmem));
mbox->h2fq.hw_prod = OCTEP_CTRL_MBOX_H2FQ_PROD(mbox->barmem);
mbox->h2fq.hw_cons = OCTEP_CTRL_MBOX_H2FQ_CONS(mbox->barmem);
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
index 1cc6af2feb38..565320ec24f8 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_net.c
@@ -55,7 +55,7 @@ static int octep_send_mbox_req(struct octep_device *oct,
list_add_tail(&d->list, &oct->ctrl_req_wait_list);
ret = wait_event_interruptible_timeout(oct->ctrl_req_wait_q,
(d->done != 0),
- jiffies + msecs_to_jiffies(500));
+ msecs_to_jiffies(500));
list_del(&d->list);
if (ret == 0 || ret == 1)
return -EAGAIN;
diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
index 43eb6e871351..4424de2ffd70 100644
--- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
+++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c
@@ -1038,6 +1038,10 @@ static void octep_device_cleanup(struct octep_device *oct)
{
int i;
+ oct->poll_non_ioq_intr = false;
+ cancel_delayed_work_sync(&oct->intr_poll_task);
+ cancel_work_sync(&oct->ctrl_mbox_task);
+
dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n");
for (i = 0; i < OCTEP_MAX_VF; i++) {
@@ -1200,14 +1204,11 @@ static void octep_remove(struct pci_dev *pdev)
if (!oct)
return;
- cancel_work_sync(&oct->tx_timeout_task);
- cancel_work_sync(&oct->ctrl_mbox_task);
netdev = oct->netdev;
if (netdev->reg_state == NETREG_REGISTERED)
unregister_netdev(netdev);
- oct->poll_non_ioq_intr = false;
- cancel_delayed_work_sync(&oct->intr_poll_task);
+ cancel_work_sync(&oct->tx_timeout_task);
octep_device_cleanup(oct);
pci_release_mem_regions(pdev);
free_netdev(netdev);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 04b0e885f9d2..c2f68678e947 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -4270,9 +4270,10 @@ rx_frscfg:
if (link < 0)
return NIX_AF_ERR_RX_LINK_INVALID;
- nix_find_link_frs(rvu, req, pcifunc);
linkcfg:
+ nix_find_link_frs(rvu, req, pcifunc);
+
cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_LINKX_CFG(link));
cfg = (cfg & ~(0xFFFFULL << 16)) | ((u64)req->maxlen << 16);
if (req->update_minlen)
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
index 6fe67f3a7f6f..7e20282c12d0 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.c
@@ -218,13 +218,54 @@ void npc_config_secret_key(struct rvu *rvu, int blkaddr)
void npc_program_mkex_hash(struct rvu *rvu, int blkaddr)
{
+ struct npc_mcam_kex_hash *mh = rvu->kpu.mkex_hash;
struct hw_cap *hwcap = &rvu->hw->cap;
+ u8 intf, ld, hdr_offset, byte_len;
struct rvu_hwinfo *hw = rvu->hw;
- u8 intf;
+ u64 cfg;
+ /* Check if hardware supports hash extraction */
if (!hwcap->npc_hash_extract)
return;
+ /* Check if IPv6 source/destination address
+ * should be hash enabled.
+ * Hashing reduces 128bit SIP/DIP fields to 32bit
+ * so that 224 bit X2 key can be used for IPv6 based filters as well,
+ * which in turn results in more number of MCAM entries available for
+ * use.
+ *
+ * Hashing of IPV6 SIP/DIP is enabled in below scenarios
+ * 1. If the silicon variant supports hashing feature
+ * 2. If the number of bytes of IP addr being extracted is 4 bytes ie
+ * 32bit. The assumption here is that if user wants 8bytes of LSB of
+ * IP addr or full 16 bytes then his intention is not to use 32bit
+ * hash.
+ */
+ for (intf = 0; intf < hw->npc_intfs; intf++) {
+ for (ld = 0; ld < NPC_MAX_LD; ld++) {
+ cfg = rvu_read64(rvu, blkaddr,
+ NPC_AF_INTFX_LIDX_LTX_LDX_CFG(intf,
+ NPC_LID_LC,
+ NPC_LT_LC_IP6,
+ ld));
+ hdr_offset = FIELD_GET(NPC_HDR_OFFSET, cfg);
+ byte_len = FIELD_GET(NPC_BYTESM, cfg);
+ /* Hashing of IPv6 source/destination address should be
+ * enabled if,
+ * hdr_offset == 8 (offset of source IPv6 address) or
+ * hdr_offset == 24 (offset of destination IPv6)
+ * address) and the number of byte to be
+ * extracted is 4. As per hardware configuration
+ * byte_len should be == actual byte_len - 1.
+ * Hence byte_len is checked against 3 but nor 4.
+ */
+ if ((hdr_offset == 8 || hdr_offset == 24) && byte_len == 3)
+ mh->lid_lt_ld_hash_en[intf][NPC_LID_LC][NPC_LT_LC_IP6][ld] = true;
+ }
+ }
+
+ /* Update hash configuration if the field is hash enabled */
for (intf = 0; intf < hw->npc_intfs; intf++) {
npc_program_mkex_hash_rx(rvu, blkaddr, intf);
npc_program_mkex_hash_tx(rvu, blkaddr, intf);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h
index a1c3d987b804..57a09328d46b 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc_hash.h
@@ -70,8 +70,8 @@ static struct npc_mcam_kex_hash npc_mkex_hash_default __maybe_unused = {
[NIX_INTF_RX] = {
[NPC_LID_LC] = {
[NPC_LT_LC_IP6] = {
- true,
- true,
+ false,
+ false,
},
},
},
@@ -79,8 +79,8 @@ static struct npc_mcam_kex_hash npc_mkex_hash_default __maybe_unused = {
[NIX_INTF_TX] = {
[NPC_LID_LC] = {
[NPC_LT_LC_IP6] = {
- true,
- true,
+ false,
+ false,
},
},
},
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
index 6e2fb24be8c1..59b138214af2 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_macsec.c
@@ -4,6 +4,7 @@
* Copyright (C) 2022 Marvell.
*/
+#include <crypto/skcipher.h>
#include <linux/rtnetlink.h>
#include <linux/bitfield.h>
#include "otx2_common.h"
@@ -42,6 +43,56 @@
#define MCS_TCI_E 0x08 /* encryption */
#define MCS_TCI_C 0x04 /* changed text */
+#define CN10K_MAX_HASH_LEN 16
+#define CN10K_MAX_SAK_LEN 32
+
+static int cn10k_ecb_aes_encrypt(struct otx2_nic *pfvf, u8 *sak,
+ u16 sak_len, u8 *hash)
+{
+ u8 data[CN10K_MAX_HASH_LEN] = { 0 };
+ struct skcipher_request *req = NULL;
+ struct scatterlist sg_src, sg_dst;
+ struct crypto_skcipher *tfm;
+ DECLARE_CRYPTO_WAIT(wait);
+ int err;
+
+ tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
+ if (IS_ERR(tfm)) {
+ dev_err(pfvf->dev, "failed to allocate transform for ecb-aes\n");
+ return PTR_ERR(tfm);
+ }
+
+ req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ dev_err(pfvf->dev, "failed to allocate request for skcipher\n");
+ err = -ENOMEM;
+ goto free_tfm;
+ }
+
+ err = crypto_skcipher_setkey(tfm, sak, sak_len);
+ if (err) {
+ dev_err(pfvf->dev, "failed to set key for skcipher\n");
+ goto free_req;
+ }
+
+ /* build sg list */
+ sg_init_one(&sg_src, data, CN10K_MAX_HASH_LEN);
+ sg_init_one(&sg_dst, hash, CN10K_MAX_HASH_LEN);
+
+ skcipher_request_set_callback(req, 0, crypto_req_done, &wait);
+ skcipher_request_set_crypt(req, &sg_src, &sg_dst,
+ CN10K_MAX_HASH_LEN, NULL);
+
+ err = crypto_skcipher_encrypt(req);
+ err = crypto_wait_req(err, &wait);
+
+free_req:
+ skcipher_request_free(req);
+free_tfm:
+ crypto_free_skcipher(tfm);
+ return err;
+}
+
static struct cn10k_mcs_txsc *cn10k_mcs_get_txsc(struct cn10k_mcs_cfg *cfg,
struct macsec_secy *secy)
{
@@ -330,19 +381,53 @@ fail:
return ret;
}
+static int cn10k_mcs_write_keys(struct otx2_nic *pfvf,
+ struct macsec_secy *secy,
+ struct mcs_sa_plcy_write_req *req,
+ u8 *sak, u8 *salt, ssci_t ssci)
+{
+ u8 hash_rev[CN10K_MAX_HASH_LEN];
+ u8 sak_rev[CN10K_MAX_SAK_LEN];
+ u8 salt_rev[MACSEC_SALT_LEN];
+ u8 hash[CN10K_MAX_HASH_LEN];
+ u32 ssci_63_32;
+ int err, i;
+
+ err = cn10k_ecb_aes_encrypt(pfvf, sak, secy->key_len, hash);
+ if (err) {
+ dev_err(pfvf->dev, "Generating hash using ECB(AES) failed\n");
+ return err;
+ }
+
+ for (i = 0; i < secy->key_len; i++)
+ sak_rev[i] = sak[secy->key_len - 1 - i];
+
+ for (i = 0; i < CN10K_MAX_HASH_LEN; i++)
+ hash_rev[i] = hash[CN10K_MAX_HASH_LEN - 1 - i];
+
+ for (i = 0; i < MACSEC_SALT_LEN; i++)
+ salt_rev[i] = salt[MACSEC_SALT_LEN - 1 - i];
+
+ ssci_63_32 = (__force u32)cpu_to_be32((__force u32)ssci);
+
+ memcpy(&req->plcy[0][0], sak_rev, secy->key_len);
+ memcpy(&req->plcy[0][4], hash_rev, CN10K_MAX_HASH_LEN);
+ memcpy(&req->plcy[0][6], salt_rev, MACSEC_SALT_LEN);
+ req->plcy[0][7] |= (u64)ssci_63_32 << 32;
+
+ return 0;
+}
+
static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf,
struct macsec_secy *secy,
struct cn10k_mcs_rxsc *rxsc,
u8 assoc_num, bool sa_in_use)
{
- unsigned char *src = rxsc->sa_key[assoc_num];
struct mcs_sa_plcy_write_req *plcy_req;
- u8 *salt_p = rxsc->salt[assoc_num];
+ u8 *sak = rxsc->sa_key[assoc_num];
+ u8 *salt = rxsc->salt[assoc_num];
struct mcs_rx_sc_sa_map *map_req;
struct mbox *mbox = &pfvf->mbox;
- u64 ssci_salt_95_64 = 0;
- u8 reg, key_len;
- u64 salt_63_0;
int ret;
mutex_lock(&mbox->lock);
@@ -360,20 +445,10 @@ static int cn10k_mcs_write_rx_sa_plcy(struct otx2_nic *pfvf,
goto fail;
}
- for (reg = 0, key_len = 0; key_len < secy->key_len; key_len += 8) {
- memcpy((u8 *)&plcy_req->plcy[0][reg],
- (src + reg * 8), 8);
- reg++;
- }
-
- if (secy->xpn) {
- memcpy((u8 *)&salt_63_0, salt_p, 8);
- memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4);
- ssci_salt_95_64 |= (__force u64)rxsc->ssci[assoc_num] << 32;
-
- plcy_req->plcy[0][6] = salt_63_0;
- plcy_req->plcy[0][7] = ssci_salt_95_64;
- }
+ ret = cn10k_mcs_write_keys(pfvf, secy, plcy_req, sak,
+ salt, rxsc->ssci[assoc_num]);
+ if (ret)
+ goto fail;
plcy_req->sa_index[0] = rxsc->hw_sa_id[assoc_num];
plcy_req->sa_cnt = 1;
@@ -586,13 +661,10 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf,
struct cn10k_mcs_txsc *txsc,
u8 assoc_num)
{
- unsigned char *src = txsc->sa_key[assoc_num];
struct mcs_sa_plcy_write_req *plcy_req;
- u8 *salt_p = txsc->salt[assoc_num];
+ u8 *sak = txsc->sa_key[assoc_num];
+ u8 *salt = txsc->salt[assoc_num];
struct mbox *mbox = &pfvf->mbox;
- u64 ssci_salt_95_64 = 0;
- u8 reg, key_len;
- u64 salt_63_0;
int ret;
mutex_lock(&mbox->lock);
@@ -603,19 +675,10 @@ static int cn10k_mcs_write_tx_sa_plcy(struct otx2_nic *pfvf,
goto fail;
}
- for (reg = 0, key_len = 0; key_len < secy->key_len; key_len += 8) {
- memcpy((u8 *)&plcy_req->plcy[0][reg], (src + reg * 8), 8);
- reg++;
- }
-
- if (secy->xpn) {
- memcpy((u8 *)&salt_63_0, salt_p, 8);
- memcpy((u8 *)&ssci_salt_95_64, salt_p + 8, 4);
- ssci_salt_95_64 |= (__force u64)txsc->ssci[assoc_num] << 32;
-
- plcy_req->plcy[0][6] = salt_63_0;
- plcy_req->plcy[0][7] = ssci_salt_95_64;
- }
+ ret = cn10k_mcs_write_keys(pfvf, secy, plcy_req, sak,
+ salt, txsc->ssci[assoc_num]);
+ if (ret)
+ goto fail;
plcy_req->plcy[0][8] = assoc_num;
plcy_req->sa_index[0] = txsc->hw_sa_id[assoc_num];
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index fe8ea4e531b7..9551b422622a 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1454,8 +1454,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
if (err)
goto err_free_npa_lf;
- /* Enable backpressure */
- otx2_nix_config_bp(pf, true);
+ /* Enable backpressure for CGX mapped PF/VFs */
+ if (!is_otx2_lbkvf(pf->pdev))
+ otx2_nix_config_bp(pf, true);
/* Init Auras and pools used by NIX RQ, for free buffer ptrs */
err = otx2_rq_aura_pool_init(pf);
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
index f328d957b2db..35857dc19542 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
@@ -727,7 +727,8 @@ pick_fw_ver:
err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev);
if (err) {
- if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) {
+ if (ver_maj != PRESTERA_PREV_FW_MAJ_VER ||
+ ver_min != PRESTERA_PREV_FW_MIN_VER) {
ver_maj = PRESTERA_PREV_FW_MAJ_VER;
ver_min = PRESTERA_PREV_FW_MIN_VER;
diff --git a/drivers/net/ethernet/marvell/prestera/prestera_router.c b/drivers/net/ethernet/marvell/prestera/prestera_router.c
index a9a1028cb17b..de317179a7dc 100644
--- a/drivers/net/ethernet/marvell/prestera/prestera_router.c
+++ b/drivers/net/ethernet/marvell/prestera/prestera_router.c
@@ -166,11 +166,11 @@ prestera_util_neigh2nc_key(struct prestera_switch *sw, struct neighbour *n,
static bool __prestera_fi_is_direct(struct fib_info *fi)
{
- struct fib_nh *fib_nh;
+ struct fib_nh_common *fib_nhc;
if (fib_info_num_path(fi) == 1) {
- fib_nh = fib_info_nh(fi, 0);
- if (fib_nh->fib_nh_gw_family == AF_UNSPEC)
+ fib_nhc = fib_info_nhc(fi, 0);
+ if (fib_nhc->nhc_gw_family == AF_UNSPEC)
return true;
}
@@ -261,7 +261,7 @@ static bool
__prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
struct net_device *dev)
{
- struct fib_nh *fib_nh;
+ struct fib_nh_common *fib_nhc;
struct fib_result res;
bool reachable;
@@ -269,8 +269,8 @@ __prestera_util_kern_n_is_reachable_v4(u32 tb_id, __be32 *addr,
if (!prestera_util_kern_get_route(&res, tb_id, addr))
if (prestera_fi_is_direct(res.fi)) {
- fib_nh = fib_info_nh(res.fi, 0);
- if (dev == fib_nh->fib_nh_dev)
+ fib_nhc = fib_info_nhc(res.fi, 0);
+ if (dev == fib_nhc->nhc_dev)
reachable = true;
}
@@ -324,7 +324,7 @@ prestera_kern_fib_info_nhc(struct fib_notifier_info *info, int n)
if (info->family == AF_INET) {
fen4_info = container_of(info, struct fib_entry_notifier_info,
info);
- return &fib_info_nh(fen4_info->fi, n)->nh_common;
+ return fib_info_nhc(fen4_info->fi, n);
} else if (info->family == AF_INET6) {
fen6_info = container_of(info, struct fib6_entry_notifier_info,
info);
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 834c644b67db..2d15342c260a 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -3846,23 +3846,6 @@ static int mtk_hw_deinit(struct mtk_eth *eth)
return 0;
}
-static int __init mtk_init(struct net_device *dev)
-{
- struct mtk_mac *mac = netdev_priv(dev);
- struct mtk_eth *eth = mac->hw;
- int ret;
-
- ret = of_get_ethdev_address(mac->of_node, dev);
- if (ret) {
- /* If the mac address is invalid, use random mac address */
- eth_hw_addr_random(dev);
- dev_err(eth->dev, "generated random MAC address %pM\n",
- dev->dev_addr);
- }
-
- return 0;
-}
-
static void mtk_uninit(struct net_device *dev)
{
struct mtk_mac *mac = netdev_priv(dev);
@@ -4278,7 +4261,6 @@ static const struct ethtool_ops mtk_ethtool_ops = {
};
static const struct net_device_ops mtk_netdev_ops = {
- .ndo_init = mtk_init,
.ndo_uninit = mtk_uninit,
.ndo_open = mtk_open,
.ndo_stop = mtk_stop,
@@ -4340,6 +4322,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
mac->hw = eth;
mac->of_node = np;
+ err = of_get_ethdev_address(mac->of_node, eth->netdev[id]);
+ if (err == -EPROBE_DEFER)
+ return err;
+
+ if (err) {
+ /* If the mac address is invalid, use random mac address */
+ eth_hw_addr_random(eth->netdev[id]);
+ dev_err(eth->dev, "generated random MAC address %pM\n",
+ eth->netdev[id]->dev_addr);
+ }
+
memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
mac->hwlro_ip_cnt = 0;
diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
index 316fe2e70fea..1a97feca77f2 100644
--- a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
@@ -98,7 +98,7 @@ mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
acct = mtk_foe_entry_get_mib(ppe, i, NULL);
- type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
+ type = mtk_get_ib1_pkt_type(ppe->eth, entry->ib1);
seq_printf(m, "%05x %s %7s", i,
mtk_foe_entry_state_str(state),
mtk_foe_pkt_type_str(type));
diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c
index 985cff910f30..3b651efcc25e 100644
--- a/drivers/net/ethernet/mediatek/mtk_wed.c
+++ b/drivers/net/ethernet/mediatek/mtk_wed.c
@@ -221,9 +221,13 @@ void mtk_wed_fe_reset(void)
for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
struct mtk_wed_hw *hw = hw_list[i];
- struct mtk_wed_device *dev = hw->wed_dev;
+ struct mtk_wed_device *dev;
int err;
+ if (!hw)
+ break;
+
+ dev = hw->wed_dev;
if (!dev || !dev->wlan.reset)
continue;
@@ -244,8 +248,12 @@ void mtk_wed_fe_reset_complete(void)
for (i = 0; i < ARRAY_SIZE(hw_list); i++) {
struct mtk_wed_hw *hw = hw_list[i];
- struct mtk_wed_device *dev = hw->wed_dev;
+ struct mtk_wed_device *dev;
+
+ if (!hw)
+ break;
+ dev = hw->wed_dev;
if (!dev || !dev->wlan.reset_complete)
continue;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
index b0128336ff01..e869c65d8e90 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */
#include "reporter_vnic.h"
+#include "en_stats.h"
#include "devlink.h"
#define VNIC_ENV_GET64(vnic_env_stats, c) \
@@ -36,55 +37,72 @@ int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev,
if (err)
return err;
- err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues",
- VNIC_ENV_GET64(&vnic, total_error_queues));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow",
- VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun",
- VNIC_ENV_GET64(&vnic, comp_eq_overrun));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun",
- VNIC_ENV_GET64(&vnic, async_eq_overrun));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun",
- VNIC_ENV_GET64(&vnic, cq_overrun));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command",
- VNIC_ENV_GET64(&vnic, invalid_command));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command",
- VNIC_ENV_GET64(&vnic, quota_exceeded_command));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
- VNIC_ENV_GET64(&vnic, nic_receive_steering_discard));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
- VNIC_ENV_GET64(&vnic, generated_pkt_steering_fail));
- if (err)
- return err;
-
- err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
- VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
- if (err)
- return err;
+ if (MLX5_CAP_GEN(dev, vnic_env_queue_counters)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "total_error_queues",
+ VNIC_ENV_GET(&vnic, total_error_queues));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "send_queue_priority_update_flow",
+ VNIC_ENV_GET(&vnic,
+ send_queue_priority_update_flow));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, eq_overrun_count)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "comp_eq_overrun",
+ VNIC_ENV_GET(&vnic, comp_eq_overrun));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_pair_put(fmsg, "async_eq_overrun",
+ VNIC_ENV_GET(&vnic, async_eq_overrun));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vnic_env_cq_overrun)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "cq_overrun",
+ VNIC_ENV_GET(&vnic, cq_overrun));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, invalid_command_count)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "invalid_command",
+ VNIC_ENV_GET(&vnic, invalid_command));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, quota_exceeded_count)) {
+ err = devlink_fmsg_u32_pair_put(fmsg, "quota_exceeded_command",
+ VNIC_ENV_GET(&vnic, quota_exceeded_command));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, nic_receive_steering_discard)) {
+ err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard",
+ VNIC_ENV_GET64(&vnic,
+ nic_receive_steering_discard));
+ if (err)
+ return err;
+ }
+
+ if (MLX5_CAP_GEN(dev, vnic_env_cnt_steering_fail)) {
+ err = devlink_fmsg_u64_pair_put(fmsg, "generated_pkt_steering_fail",
+ VNIC_ENV_GET64(&vnic,
+ generated_pkt_steering_fail));
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_pair_put(fmsg, "handled_pkt_steering_fail",
+ VNIC_ENV_GET64(&vnic, handled_pkt_steering_fail));
+ if (err)
+ return err;
+ }
err = devlink_fmsg_obj_nest_end(fmsg);
if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
index f0c3464f037f..1730f6a716ee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
@@ -1030,9 +1030,6 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
int out_index;
int err = 0;
- if (!mlx5e_is_eswitch_flow(flow))
- return 0;
-
parse_attr = attr->parse_attr;
esw_attr = attr->esw_attr;
*vf_tun = false;
@@ -1464,10 +1461,12 @@ static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
attr = mlx5e_tc_get_encap_attr(flow);
esw_attr = attr->esw_attr;
- if (flow_flag_test(flow, SLOW))
+ if (flow_flag_test(flow, SLOW)) {
mlx5e_tc_unoffload_from_slow_path(esw, flow);
- else
+ } else {
mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
+ mlx5e_tc_unoffload_flow_post_acts(flow);
+ }
mlx5e_tc_detach_mod_hdr(priv, flow, attr);
attr->modify_hdr = NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 9e8e6184f9e4..ecfe93a479da 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -84,6 +84,8 @@ enum mlx5e_xdp_xmit_mode {
* MLX5E_XDP_XMIT_MODE_XSK:
* none.
*/
+#define MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO 4
+
union mlx5e_xdp_info {
enum mlx5e_xdp_xmit_mode mode;
union {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index d97e6df66f45..b8dd74453655 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -323,8 +323,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
net_prefetch(mxbuf->xdp.data);
prog = rcu_dereference(rq->xdp_prog);
- if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf)))
+ if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
+ if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
+ wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
return NULL; /* page/packet was consumed by XDP */
+ }
/* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
* will be handled by mlx5e_free_rx_wqe.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
index dbe87bf89c0d..832d36be4a17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
@@ -808,9 +808,9 @@ static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upsp
}
if (upspec->sport) {
- MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport,
+ MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_sport,
upspec->sport_mask);
- MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport);
+ MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_sport, upspec->sport);
}
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index eab5bc718771..8d995e304869 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -58,7 +58,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
trailer_len = alen + plen + 2;
- pskb_trim(skb, skb->len - trailer_len);
+ ret = pskb_trim(skb, skb->len - trailer_len);
+ if (unlikely(ret))
+ return ret;
if (skb->protocol == htons(ETH_P_IP)) {
ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
ip_send_check(ipv4hdr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index cf704f106b7c..984fa04bd331 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -188,7 +188,6 @@ static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls,
int mlx5e_ktls_init(struct mlx5e_priv *priv)
{
- struct mlx5_crypto_dek_pool *dek_pool;
struct mlx5e_tls *tls;
if (!mlx5e_is_ktls_device(priv->mdev))
@@ -199,12 +198,6 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv)
return -ENOMEM;
tls->mdev = priv->mdev;
- dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY);
- if (IS_ERR(dek_pool)) {
- kfree(tls);
- return PTR_ERR(dek_pool);
- }
- tls->dek_pool = dek_pool;
priv->tls = tls;
mlx5e_tls_debugfs_init(tls, priv->dfs_root);
@@ -222,7 +215,6 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv)
debugfs_remove_recursive(tls->debugfs.dfs);
tls->debugfs.dfs = NULL;
- mlx5_crypto_dek_pool_destroy(tls->dek_pool);
kfree(priv->tls);
priv->tls = NULL;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index efb2cf74ad6a..d61be26a4df1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -908,28 +908,51 @@ static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls,
int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
{
+ struct mlx5_crypto_dek_pool *dek_pool;
struct mlx5e_tls *tls = priv->tls;
+ int err;
+
+ if (!mlx5e_is_ktls_device(priv->mdev))
+ return 0;
+
+ /* DEK pool could be used by either or both of TX and RX. But we have to
+ * put the creation here to avoid syndrome when doing devlink reload.
+ */
+ dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY);
+ if (IS_ERR(dek_pool))
+ return PTR_ERR(dek_pool);
+ tls->dek_pool = dek_pool;
if (!mlx5e_is_ktls_tx(priv->mdev))
return 0;
priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats);
- if (!priv->tls->tx_pool)
- return -ENOMEM;
+ if (!priv->tls->tx_pool) {
+ err = -ENOMEM;
+ goto err_tx_pool_init;
+ }
mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs);
return 0;
+
+err_tx_pool_init:
+ mlx5_crypto_dek_pool_destroy(dek_pool);
+ return err;
}
void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
{
if (!mlx5e_is_ktls_tx(priv->mdev))
- return;
+ goto dek_pool_destroy;
debugfs_remove_recursive(priv->tls->debugfs.dfs_tx);
priv->tls->debugfs.dfs_tx = NULL;
mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool);
priv->tls->tx_pool = NULL;
+
+dek_pool_destroy:
+ if (mlx5e_is_ktls_device(priv->mdev))
+ mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
index 7fc901a6ec5f..414e28584881 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
@@ -161,6 +161,7 @@ static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
if (!in) {
kfree(ft->g);
+ ft->g = NULL;
return -ENOMEM;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index 933a7772a7a3..5aa51d74f8b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -135,6 +135,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs);
int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
{
+ /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile
+ * cleanup_rx callback and it is not recreated when
+ * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering()
+ * is not called by the uplink_rep profile init_rx callback. Thus, if
+ * ntuple is set, moving to switchdev flow will enter this function
+ * with fs->arfs nullified.
+ */
+ if (!mlx5e_fs_get_arfs(fs))
+ return 0;
+
arfs_del_rules(fs);
return arfs_disable(fs);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index defb1efccb78..f7b494125eee 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s
return err;
}
-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq)
+{
+ struct mlx5_cqwq *cqwq = &rq->cq.wq;
+ struct mlx5_cqe64 *cqe;
+
+ if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) {
+ while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)))
+ mlx5_cqwq_pop(cqwq);
+ } else {
+ while ((cqe = mlx5_cqwq_get_cqe(cqwq)))
+ mlx5_cqwq_pop(cqwq);
+ }
+
+ mlx5_cqwq_update_db_record(cqwq);
+}
+
+int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
{
struct net_device *dev = rq->netdev;
int err;
@@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
return err;
}
+
+ mlx5e_free_rx_descs(rq);
+ mlx5e_flush_rq_cq(rq);
+
err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
if (err) {
netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
@@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
return 0;
}
-int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
-{
- mlx5e_free_rx_descs(rq);
-
- return mlx5e_rq_to_ready(rq, curr_state);
-}
-
static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
{
struct mlx5_core_dev *mdev = rq->mdev;
@@ -1285,11 +1298,13 @@ static int mlx5e_alloc_xdpsq_fifo(struct mlx5e_xdpsq *sq, int numa)
{
struct mlx5e_xdp_info_fifo *xdpi_fifo = &sq->db.xdpi_fifo;
int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
- int entries = wq_sz * MLX5_SEND_WQEBB_NUM_DS * 2; /* upper bound for maximum num of
- * entries of all xmit_modes.
- */
+ int entries;
size_t size;
+ /* upper bound for maximum num of entries of all xmit_modes. */
+ entries = roundup_pow_of_two(wq_sz * MLX5_SEND_WQEBB_NUM_DS *
+ MLX5E_XDP_FIFO_ENTRIES2DS_MAX_RATIO);
+
size = array_size(sizeof(*xdpi_fifo->xi), entries);
xdpi_fifo->xi = kvzalloc_node(size, GFP_KERNEL, numa);
if (!xdpi_fifo->xi)
@@ -5253,6 +5268,7 @@ void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
struct net_device *netdev)
{
+ const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED;
struct mlx5e_priv *priv = netdev_priv(netdev);
struct mlx5e_flow_steering *fs;
int err;
@@ -5281,9 +5297,19 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
mlx5_core_err(mdev, "TLS initialization failed, %d\n", err);
mlx5e_health_create_reporters(priv);
+
+ /* If netdev is already registered (e.g. move from uplink to nic profile),
+ * RTNL lock must be held before triggering netdev notifiers.
+ */
+ if (take_rtnl)
+ rtnl_lock();
+
/* update XDP supported features */
mlx5e_set_xdp_feature(netdev);
+ if (take_rtnl)
+ rtnl_unlock();
+
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 152b62138450..99b3843396f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1012,7 +1012,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
if (err) {
mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
- return err;
+ goto err_rx_res_free;
}
err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
@@ -1046,6 +1046,7 @@ err_destroy_rx_res:
mlx5e_rx_res_destroy(priv->rx_res);
err_close_drop_rq:
mlx5e_close_drop_rq(&priv->drop_rq);
+err_rx_res_free:
mlx5e_rx_res_free(priv->rx_res);
priv->rx_res = NULL;
err_free_fs:
@@ -1159,6 +1160,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
return err;
}
+ err = mlx5e_rep_neigh_init(rpriv);
+ if (err)
+ goto err_neigh_init;
+
if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
err = mlx5e_init_uplink_rep_tx(rpriv);
if (err)
@@ -1175,6 +1180,8 @@ err_ht_init:
if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
mlx5e_cleanup_uplink_rep_tx(rpriv);
err_init_tx:
+ mlx5e_rep_neigh_cleanup(rpriv);
+err_neigh_init:
mlx5e_destroy_tises(priv);
return err;
}
@@ -1188,22 +1195,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
mlx5e_cleanup_uplink_rep_tx(rpriv);
+ mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_destroy_tises(priv);
}
static void mlx5e_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
mlx5e_set_netdev_mtu_boundaries(priv);
- mlx5e_rep_neigh_init(rpriv);
}
static void mlx5e_rep_disable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
- mlx5e_rep_neigh_cleanup(rpriv);
}
static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
@@ -1253,7 +1255,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct net_device *netdev = priv->netdev;
struct mlx5_core_dev *mdev = priv->mdev;
u16 max_mtu;
@@ -1275,7 +1276,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
mlx5_notifier_register(mdev, &priv->events_nb);
mlx5e_dcbnl_initialize(priv);
mlx5e_dcbnl_init_app(priv);
- mlx5e_rep_neigh_init(rpriv);
mlx5e_rep_bridge_init(priv);
netdev->wanted_features |= NETIF_F_HW_TC;
@@ -1290,7 +1290,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
{
- struct mlx5e_rep_priv *rpriv = priv->ppriv;
struct mlx5_core_dev *mdev = priv->mdev;
rtnl_lock();
@@ -1300,7 +1299,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
rtnl_unlock();
mlx5e_rep_bridge_cleanup(priv);
- mlx5e_rep_neigh_cleanup(rpriv);
mlx5e_dcbnl_delete_app(priv);
mlx5_notifier_unregister(mdev, &priv->events_nb);
mlx5e_rep_tc_disable(priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 8d0a3f69693e..31708d5aa608 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1725,6 +1725,19 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
return 0;
}
+static bool
+has_encap_dests(struct mlx5_flow_attr *attr)
+{
+ struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
+ int out_index;
+
+ for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
+ if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
+ return true;
+
+ return false;
+}
+
static int
post_process_attr(struct mlx5e_tc_flow *flow,
struct mlx5_flow_attr *attr,
@@ -1737,9 +1750,11 @@ post_process_attr(struct mlx5e_tc_flow *flow,
if (err)
goto err_out;
- err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
- if (err)
- goto err_out;
+ if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) {
+ err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
+ if (err)
+ goto err_out;
+ }
if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
@@ -1928,9 +1943,7 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
{
struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
struct mlx5_flow_attr *attr = flow->attr;
- struct mlx5_esw_flow_attr *esw_attr;
- esw_attr = attr->esw_attr;
mlx5e_put_flow_tunnel_id(flow);
remove_unready_flow(flow);
@@ -1951,12 +1964,6 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
- if (esw_attr->int_port)
- mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
-
- if (esw_attr->dest_int_port)
- mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
-
if (flow_flag_test(flow, L3_TO_L2_DECAP))
mlx5e_detach_decap(priv, flow);
@@ -4253,6 +4260,7 @@ static void
mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
{
struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
+ struct mlx5_esw_flow_attr *esw_attr;
if (!attr)
return;
@@ -4270,6 +4278,18 @@ mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *a
mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
}
+ if (mlx5e_is_eswitch_flow(flow)) {
+ esw_attr = attr->esw_attr;
+
+ if (esw_attr->int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+ esw_attr->int_port);
+
+ if (esw_attr->dest_int_port)
+ mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(flow->priv),
+ esw_attr->dest_int_port);
+ }
+
mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
free_branch_attr(flow, attr->branch_true);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c
index b6a45eff28f5..dbd7cbe6cbf3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_debugfs.c
@@ -64,7 +64,7 @@ void mlx5_esw_bridge_debugfs_init(struct net_device *br_netdev, struct mlx5_esw_
bridge->debugfs_dir = debugfs_create_dir(br_netdev->name,
bridge->br_offloads->debugfs_root);
- debugfs_create_file("fdb", 0444, bridge->debugfs_dir, bridge,
+ debugfs_create_file("fdb", 0400, bridge->debugfs_dir, bridge,
&mlx5_esw_bridge_debugfs_fops);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
index af779c700278..fdf2be548e85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c
@@ -60,7 +60,7 @@ static struct devlink_port *mlx5_esw_dl_port_alloc(struct mlx5_eswitch *esw, u16
} else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) {
memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len);
dl_port->attrs.switch_id.id_len = ppid.id_len;
- devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum,
+ devlink_port_attrs_pci_vf_set(dl_port, 0, pfnum,
vport_num - 1, false);
}
return dl_port;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index bdfe609cc9ec..e59380ee1ead 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1436,7 +1436,6 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
esw_init_chains_offload_flags(esw, &attr.flags);
attr.ns = MLX5_FLOW_NAMESPACE_FDB;
- attr.fs_base_prio = FDB_TC_OFFLOAD;
attr.max_grp_num = esw->params.large_group_num;
attr.default_ft = miss_fdb;
attr.mapping = esw->offloads.reg_c0_obj_pool;
@@ -2779,9 +2778,9 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
struct mlx5_eswitch *peer_esw,
bool pair)
{
- u8 peer_idx = mlx5_get_dev_index(peer_esw->dev);
+ u16 peer_vhca_id = MLX5_CAP_GEN(peer_esw->dev, vhca_id);
+ u16 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
struct mlx5_flow_root_namespace *peer_ns;
- u8 idx = mlx5_get_dev_index(esw->dev);
struct mlx5_flow_root_namespace *ns;
int err;
@@ -2789,18 +2788,18 @@ static int mlx5_esw_offloads_set_ns_peer(struct mlx5_eswitch *esw,
ns = esw->dev->priv.steering->fdb_root_ns;
if (pair) {
- err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_idx);
+ err = mlx5_flow_namespace_set_peer(ns, peer_ns, peer_vhca_id);
if (err)
return err;
- err = mlx5_flow_namespace_set_peer(peer_ns, ns, idx);
+ err = mlx5_flow_namespace_set_peer(peer_ns, ns, vhca_id);
if (err) {
- mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
+ mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id);
return err;
}
} else {
- mlx5_flow_namespace_set_peer(ns, NULL, peer_idx);
- mlx5_flow_namespace_set_peer(peer_ns, NULL, idx);
+ mlx5_flow_namespace_set_peer(ns, NULL, peer_vhca_id);
+ mlx5_flow_namespace_set_peer(peer_ns, NULL, vhca_id);
}
return 0;
@@ -4196,7 +4195,7 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
}
hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
- MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1);
+ MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable);
err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 91dcb0dcad10..244cfd470903 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -140,7 +140,7 @@ static void mlx5_cmd_stub_modify_header_dealloc(struct mlx5_flow_root_namespace
static int mlx5_cmd_stub_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns,
- u8 peer_idx)
+ u16 peer_vhca_id)
{
return 0;
}
@@ -245,12 +245,20 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
mlx5_lag_is_shared_fdb(dev) &&
mlx5_lag_is_master(dev)) {
struct mlx5_core_dev *peer_dev;
- int i;
+ int i, j;
mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
err = mlx5_cmd_set_slave_root_fdb(dev, peer_dev, !disconnect,
(!disconnect) ? ft->id : 0);
if (err && !disconnect) {
+ mlx5_lag_for_each_peer_mdev(dev, peer_dev, j) {
+ if (j < i)
+ mlx5_cmd_set_slave_root_fdb(dev, peer_dev, 1,
+ ns->root_ft->id);
+ else
+ break;
+ }
+
MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
MLX5_SET(set_flow_table_root_in, in, table_id,
ns->root_ft->id);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index b6b9a5a20591..7790ae5531e1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -94,7 +94,7 @@ struct mlx5_flow_cmds {
int (*set_peer)(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns,
- u8 peer_idx);
+ u16 peer_vhca_id);
int (*create_ns)(struct mlx5_flow_root_namespace *ns);
int (*destroy_ns)(struct mlx5_flow_root_namespace *ns);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 4ef04aa28771..6b069fa411c5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -889,7 +889,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
struct fs_node *iter = list_entry(start, struct fs_node, list);
struct mlx5_flow_table *ft = NULL;
- if (!root || root->type == FS_TYPE_PRIO_CHAINS)
+ if (!root)
return NULL;
list_for_each_advance_continue(iter, &root->children, reverse) {
@@ -905,20 +905,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node *root,
return ft;
}
-/* If reverse is false then return the first flow table in next priority of
- * prio in the tree, else return the last flow table in the previous priority
- * of prio in the tree.
+static struct fs_node *find_prio_chains_parent(struct fs_node *parent,
+ struct fs_node **child)
+{
+ struct fs_node *node = NULL;
+
+ while (parent && parent->type != FS_TYPE_PRIO_CHAINS) {
+ node = parent;
+ parent = parent->parent;
+ }
+
+ if (child)
+ *child = node;
+
+ return parent;
+}
+
+/* If reverse is false then return the first flow table next to the passed node
+ * in the tree, else return the last flow table before the node in the tree.
+ * If skip is true, skip the flow tables in the same prio_chains prio.
*/
-static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
+static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse,
+ bool skip)
{
+ struct fs_node *prio_chains_parent = NULL;
struct mlx5_flow_table *ft = NULL;
struct fs_node *curr_node;
struct fs_node *parent;
- parent = prio->node.parent;
- curr_node = &prio->node;
+ if (skip)
+ prio_chains_parent = find_prio_chains_parent(node, NULL);
+ parent = node->parent;
+ curr_node = node;
while (!ft && parent) {
- ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+ if (parent != prio_chains_parent)
+ ft = find_closest_ft_recursive(parent, &curr_node->list,
+ reverse);
curr_node = parent;
parent = curr_node->parent;
}
@@ -926,15 +948,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers
}
/* Assuming all the tree is locked by mutex chain lock */
-static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
+static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node)
{
- return find_closest_ft(prio, false);
+ return find_closest_ft(node, false, true);
}
/* Assuming all the tree is locked by mutex chain lock */
-static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
+static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node)
{
- return find_closest_ft(prio, true);
+ return find_closest_ft(node, true, true);
}
static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
@@ -946,7 +968,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
- return find_next_chained_ft(prio);
+ return find_next_chained_ft(&prio->node);
}
static int connect_fts_in_prio(struct mlx5_core_dev *dev,
@@ -970,21 +992,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
return 0;
}
+static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node,
+ struct fs_node *parent,
+ struct fs_node **child,
+ bool reverse)
+{
+ struct mlx5_flow_table *ft;
+
+ ft = find_closest_ft(node, reverse, false);
+
+ if (ft && parent == find_prio_chains_parent(&ft->node, child))
+ return ft;
+
+ return NULL;
+}
+
/* Connect flow tables from previous priority of prio to ft */
static int connect_prev_fts(struct mlx5_core_dev *dev,
struct mlx5_flow_table *ft,
struct fs_prio *prio)
{
+ struct fs_node *prio_parent, *parent = NULL, *child, *node;
struct mlx5_flow_table *prev_ft;
+ int err = 0;
+
+ prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+ /* return directly if not under the first sub ns of prio_chains prio */
+ if (prio_parent && !list_is_first(&child->list, &prio_parent->children))
+ return 0;
- prev_ft = find_prev_chained_ft(prio);
- if (prev_ft) {
+ prev_ft = find_prev_chained_ft(&prio->node);
+ while (prev_ft) {
struct fs_prio *prev_prio;
fs_get_obj(prev_prio, prev_ft->node.parent);
- return connect_fts_in_prio(dev, prev_prio, ft);
+ err = connect_fts_in_prio(dev, prev_prio, ft);
+ if (err)
+ break;
+
+ if (!parent) {
+ parent = find_prio_chains_parent(&prev_prio->node, &child);
+ if (!parent)
+ break;
+ }
+
+ node = child;
+ prev_ft = find_closet_ft_prio_chains(node, parent, &child, true);
}
- return 0;
+ return err;
}
static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
@@ -1123,7 +1179,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
if (err)
return err;
- next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
+ next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node);
err = connect_fwd_rules(dev, ft, next_ft);
if (err)
return err;
@@ -1198,7 +1254,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
next_ft = unmanaged ? ft_attr->next_ft :
- find_next_chained_ft(fs_prio);
+ find_next_chained_ft(&fs_prio->node);
ft->def_miss_action = ns->def_miss_action;
ft->ns = ns;
err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
@@ -2195,13 +2251,20 @@ EXPORT_SYMBOL(mlx5_del_flow_rules);
/* Assuming prio->node.children(flow tables) is sorted by level */
static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
{
+ struct fs_node *prio_parent, *child;
struct fs_prio *prio;
fs_get_obj(prio, ft->node.parent);
if (!list_is_last(&ft->node.list, &prio->node.children))
return list_next_entry(ft, node.list);
- return find_next_chained_ft(prio);
+
+ prio_parent = find_prio_chains_parent(&prio->node, &child);
+
+ if (prio_parent && list_is_first(&child->list, &prio_parent->children))
+ return find_closest_ft(&prio->node, false, false);
+
+ return find_next_chained_ft(&prio->node);
}
static int update_root_ft_destroy(struct mlx5_flow_table *ft)
@@ -3621,7 +3684,7 @@ void mlx5_destroy_match_definer(struct mlx5_core_dev *dev,
int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns,
- u8 peer_idx)
+ u16 peer_vhca_id)
{
if (peer_ns && ns->mode != peer_ns->mode) {
mlx5_core_err(ns->dev,
@@ -3629,7 +3692,7 @@ int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
return -EINVAL;
}
- return ns->cmds->set_peer(ns, peer_ns, peer_idx);
+ return ns->cmds->set_peer(ns, peer_ns, peer_vhca_id);
}
/* This function should be called only at init stage of the namespace.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 03e64c4c245d..4aed1768b85f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -303,7 +303,7 @@ const struct mlx5_flow_cmds *mlx5_fs_cmd_get_fw_cmds(void);
int mlx5_flow_namespace_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns,
- u8 peer_idx);
+ u16 peer_vhca_id);
int mlx5_flow_namespace_set_mode(struct mlx5_flow_namespace *ns,
enum mlx5_flow_steering_mode mode);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
index d3a3fe4ce670..7d9bbb494d95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c
@@ -574,7 +574,7 @@ static int __mlx5_lag_modify_definers_destinations(struct mlx5_lag *ldev,
for (i = 0; i < ldev->ports; i++) {
for (j = 0; j < ldev->buckets; j++) {
idx = i * ldev->buckets + j;
- if (ldev->v2p_map[i] == ports[i])
+ if (ldev->v2p_map[idx] == ports[idx])
continue;
dest.vport.vhca_id = MLX5_CAP_GEN(ldev->pf[ports[idx] - 1].dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 973babfaff25..377372f0578a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -227,10 +227,15 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
clock = container_of(timer, struct mlx5_clock, timer);
mdev = container_of(clock, struct mlx5_core_dev, clock);
+ if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+ goto out;
+
write_seqlock_irqsave(&clock->lock, flags);
timecounter_read(&timer->tc);
mlx5_update_clock_info_page(mdev);
write_sequnlock_irqrestore(&clock->lock, flags);
+
+out:
schedule_delayed_work(&timer->overflow_work, timer->overflow_period);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
index db9df9798ffa..a80ecb672f33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
@@ -178,7 +178,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
if (!mlx5_chains_ignore_flow_level_supported(chains) ||
(chain == 0 && prio == 1 && level == 0)) {
ft_attr.level = chains->fs_base_level;
- ft_attr.prio = chains->fs_base_prio;
+ ft_attr.prio = chains->fs_base_prio + prio - 1;
ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
mlx5_get_fdb_sub_ns(chains->dev, chain) :
mlx5_get_flow_namespace(chains->dev, chains->ns);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 88dbea6631d5..72ae560a1c68 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1506,6 +1506,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
mlx5_core_warn(dev, "%s: interface is down, NOP\n",
__func__);
+ mlx5_devlink_params_unregister(priv_to_devlink(dev));
mlx5_cleanup_once(dev);
goto out;
}
@@ -1988,7 +1989,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev,
mlx5_enter_error_state(dev, false);
mlx5_error_sw_reset(dev);
- mlx5_unload_one(dev, true);
+ mlx5_unload_one(dev, false);
mlx5_drain_health_wq(dev);
mlx5_pci_disable_device(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index c4be257c043d..682d3dc00dd1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -361,7 +361,7 @@ static inline bool mlx5_core_is_ec_vf_vport(const struct mlx5_core_dev *dev, u16
static inline int mlx5_vport_to_func_id(const struct mlx5_core_dev *dev, u16 vport, bool ec_vf_func)
{
- return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev)
+ return ec_vf_func ? vport - mlx5_core_ec_vf_vport_base(dev) + 1
: vport;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
index 4e42a3b9b8ee..a2fc937d5461 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c
@@ -285,8 +285,7 @@ static u16 mlx5_get_max_vfs(struct mlx5_core_dev *dev)
host_total_vfs = MLX5_GET(query_esw_functions_out, out,
host_params_context.host_total_vfs);
kvfree(out);
- if (host_total_vfs)
- return host_total_vfs;
+ return host_total_vfs;
}
done:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index e739ec6cdf90..54bb0866ed72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -2079,7 +2079,7 @@ mlx5dr_action_create_dest_vport(struct mlx5dr_domain *dmn,
peer_vport = vhca_id_valid && mlx5_core_is_pf(dmn->mdev) &&
(vhca_id != dmn->info.caps.gvmi);
- vport_dmn = peer_vport ? dmn->peer_dmn[vhca_id] : dmn;
+ vport_dmn = peer_vport ? xa_load(&dmn->peer_dmn_xa, vhca_id) : dmn;
if (!vport_dmn) {
mlx5dr_dbg(dmn, "No peer vport domain for given vhca_id\n");
return NULL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 7491911ebcb5..8c2a34a0d6be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -564,11 +564,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
if (err)
- return err;
+ goto err_free_in;
*reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
- kvfree(in);
+err_free_in:
+ kvfree(in);
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 75dc85dc24ef..3d74109f8230 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -475,6 +475,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
mutex_init(&dmn->info.rx.mutex);
mutex_init(&dmn->info.tx.mutex);
xa_init(&dmn->definers_xa);
+ xa_init(&dmn->peer_dmn_xa);
if (dr_domain_caps_init(mdev, dmn)) {
mlx5dr_err(dmn, "Failed init domain, no caps\n");
@@ -507,6 +508,7 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
uninit_caps:
dr_domain_caps_uninit(dmn);
def_xa_destroy:
+ xa_destroy(&dmn->peer_dmn_xa);
xa_destroy(&dmn->definers_xa);
kfree(dmn);
return NULL;
@@ -547,6 +549,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
dr_domain_uninit_csum_recalc_fts(dmn);
dr_domain_uninit_resources(dmn);
dr_domain_caps_uninit(dmn);
+ xa_destroy(&dmn->peer_dmn_xa);
xa_destroy(&dmn->definers_xa);
mutex_destroy(&dmn->info.tx.mutex);
mutex_destroy(&dmn->info.rx.mutex);
@@ -556,17 +559,21 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn,
- u8 peer_idx)
+ u16 peer_vhca_id)
{
+ struct mlx5dr_domain *peer;
+
mlx5dr_domain_lock(dmn);
- if (dmn->peer_dmn[peer_idx])
- refcount_dec(&dmn->peer_dmn[peer_idx]->refcount);
+ peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id);
+ if (peer)
+ refcount_dec(&peer->refcount);
- dmn->peer_dmn[peer_idx] = peer_dmn;
+ WARN_ON(xa_err(xa_store(&dmn->peer_dmn_xa, peer_vhca_id, peer_dmn, GFP_KERNEL)));
- if (dmn->peer_dmn[peer_idx])
- refcount_inc(&dmn->peer_dmn[peer_idx]->refcount);
+ peer = xa_load(&dmn->peer_dmn_xa, peer_vhca_id);
+ if (peer)
+ refcount_inc(&peer->refcount);
mlx5dr_domain_unlock(dmn);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
index d6947fe13d56..8ca534ef5d03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ptrn.c
@@ -82,7 +82,7 @@ dr_ptrn_alloc_pattern(struct mlx5dr_ptrn_mgr *mgr,
u32 chunk_size;
u32 index;
- chunk_size = ilog2(num_of_actions);
+ chunk_size = ilog2(roundup_pow_of_two(num_of_actions));
/* HW modify action index granularity is at least 64B */
chunk_size = max_t(u32, chunk_size, DR_CHUNK_SIZE_8);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
index 69d7a8f3c402..f708b029425a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v0.c
@@ -1652,17 +1652,18 @@ dr_ste_v0_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
struct mlx5dr_domain *dmn = sb->dmn;
struct mlx5dr_domain *vport_dmn;
u8 *bit_mask = sb->bit_mask;
+ struct mlx5dr_domain *peer;
bool source_gvmi_set;
DR_STE_SET_TAG(src_gvmi_qp, tag, source_qp, misc, source_sqn);
if (sb->vhca_id_valid) {
+ peer = xa_load(&dmn->peer_dmn_xa, id);
/* Find port GVMI based on the eswitch_owner_vhca_id */
if (id == dmn->info.caps.gvmi)
vport_dmn = dmn;
- else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
- (id == dmn->peer_dmn[id]->info.caps.gvmi))
- vport_dmn = dmn->peer_dmn[id];
+ else if (peer && (id == peer->info.caps.gvmi))
+ vport_dmn = peer;
else
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
index f4ef0b22b991..dd856cde188d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
@@ -1984,16 +1984,17 @@ static int dr_ste_v1_build_src_gvmi_qpn_tag(struct mlx5dr_match_param *value,
struct mlx5dr_domain *dmn = sb->dmn;
struct mlx5dr_domain *vport_dmn;
u8 *bit_mask = sb->bit_mask;
+ struct mlx5dr_domain *peer;
DR_STE_SET_TAG(src_gvmi_qp_v1, tag, source_qp, misc, source_sqn);
if (sb->vhca_id_valid) {
+ peer = xa_load(&dmn->peer_dmn_xa, id);
/* Find port GVMI based on the eswitch_owner_vhca_id */
if (id == dmn->info.caps.gvmi)
vport_dmn = dmn;
- else if (id < MLX5_MAX_PORTS && dmn->peer_dmn[id] &&
- (id == dmn->peer_dmn[id]->info.caps.gvmi))
- vport_dmn = dmn->peer_dmn[id];
+ else if (peer && (id == peer->info.caps.gvmi))
+ vport_dmn = peer;
else
return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 1622dbbe6b97..6c59de3e28f6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -935,7 +935,6 @@ struct mlx5dr_domain_info {
};
struct mlx5dr_domain {
- struct mlx5dr_domain *peer_dmn[MLX5_MAX_PORTS];
struct mlx5_core_dev *mdev;
u32 pdn;
struct mlx5_uars_page *uar;
@@ -956,6 +955,7 @@ struct mlx5dr_domain {
struct list_head dbg_tbl_list;
struct mlx5dr_dbg_dump_info dump_info;
struct xarray definers_xa;
+ struct xarray peer_dmn_xa;
/* memory management statistics */
u32 num_buddies[DR_ICM_TYPE_MAX];
};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 6aac5f006bf8..feb307fb3440 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -781,14 +781,14 @@ restore_fte:
static int mlx5_cmd_dr_set_peer(struct mlx5_flow_root_namespace *ns,
struct mlx5_flow_root_namespace *peer_ns,
- u8 peer_idx)
+ u16 peer_vhca_id)
{
struct mlx5dr_domain *peer_domain = NULL;
if (peer_ns)
peer_domain = peer_ns->fs_dr_domain.dr_domain;
mlx5dr_domain_set_peer(ns->fs_dr_domain.dr_domain,
- peer_domain, peer_idx);
+ peer_domain, peer_vhca_id);
return 0;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
index 24cbb33ecd6c..89fced86936f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/mlx5dr.h
@@ -49,7 +49,7 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags);
void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
struct mlx5dr_domain *peer_dmn,
- u8 peer_idx);
+ u16 peer_vhca_id);
struct mlx5dr_table *
mlx5dr_table_create(struct mlx5dr_domain *domain, u32 level, u32 flags,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
index f0b2963ebac3..973de2adc943 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_acl_flex_keys.c
@@ -32,8 +32,8 @@ static const struct mlxsw_afk_element_info mlxsw_afk_element_infos[] = {
MLXSW_AFK_ELEMENT_INFO_U32(IP_TTL_, 0x18, 0, 8),
MLXSW_AFK_ELEMENT_INFO_U32(IP_ECN, 0x18, 9, 2),
MLXSW_AFK_ELEMENT_INFO_U32(IP_DSCP, 0x18, 11, 6),
- MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 3),
- MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 20, 8),
+ MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_MSB, 0x18, 17, 4),
+ MLXSW_AFK_ELEMENT_INFO_U32(VIRT_ROUTER_LSB, 0x18, 21, 8),
MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_96_127, 0x20, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_64_95, 0x24, 4),
MLXSW_AFK_ELEMENT_INFO_BUF(SRC_IP_32_63, 0x28, 4),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index c968309657dd..51eea1f0529c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -517,11 +517,15 @@ static void mlxsw_pci_skb_cb_ts_set(struct mlxsw_pci *mlxsw_pci,
struct sk_buff *skb,
enum mlxsw_pci_cqe_v cqe_v, char *cqe)
{
+ u8 ts_type;
+
if (cqe_v != MLXSW_PCI_CQE_V2)
return;
- if (mlxsw_pci_cqe2_time_stamp_type_get(cqe) !=
- MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC)
+ ts_type = mlxsw_pci_cqe2_time_stamp_type_get(cqe);
+
+ if (ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_UTC &&
+ ts_type != MLXSW_PCI_CQE_TIME_STAMP_TYPE_MIRROR_UTC)
return;
mlxsw_skb_cb(skb)->cqe_ts.sec = mlxsw_pci_cqe2_time_stamp_sec_get(cqe);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 8165bf31a99a..17160e867bef 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -97,14 +97,6 @@ MLXSW_ITEM32(reg, sspr, m, 0x00, 31, 1);
*/
MLXSW_ITEM32_LP(reg, sspr, 0x00, 16, 0x00, 12);
-/* reg_sspr_sub_port
- * Virtual port within the physical port.
- * Should be set to 0 when virtual ports are not enabled on the port.
- *
- * Access: RW
- */
-MLXSW_ITEM32(reg, sspr, sub_port, 0x00, 8, 8);
-
/* reg_sspr_system_port
* Unique identifier within the stacking domain that represents all the ports
* that are available in the system (external ports).
@@ -120,7 +112,6 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u16 local_port)
MLXSW_REG_ZERO(sspr, payload);
mlxsw_reg_sspr_m_set(payload, 1);
mlxsw_reg_sspr_local_port_set(payload, local_port);
- mlxsw_reg_sspr_sub_port_set(payload, 0);
mlxsw_reg_sspr_system_port_set(payload, local_port);
}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
index e4f4cded2b6f..b1178b7a7f51 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
@@ -193,7 +193,7 @@ mlxsw_sp2_mr_tcam_rule_parse(struct mlxsw_sp_acl_rule *rule,
key->vrid, GENMASK(7, 0));
mlxsw_sp_acl_rulei_keymask_u32(rulei,
MLXSW_AFK_ELEMENT_VIRT_ROUTER_MSB,
- key->vrid >> 8, GENMASK(2, 0));
+ key->vrid >> 8, GENMASK(3, 0));
switch (key->proto) {
case MLXSW_SP_L3_PROTO_IPV4:
return mlxsw_sp2_mr_tcam_rule_parse4(rulei, key);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
index 4dea39f2b304..ae2d6f12b799 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_keys.c
@@ -171,7 +171,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_2[] = {
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4[] = {
MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 24, 8),
- MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x00, 0, 3),
+ MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x00, 0, 3, 0, true),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_0[] = {
@@ -321,7 +321,7 @@ static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_mac_5b[] = {
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv4_4b[] = {
MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_LSB, 0x04, 13, 8),
- MLXSW_AFK_ELEMENT_INST_EXT_U32(VIRT_ROUTER_MSB, 0x04, 21, 4, 0, true),
+ MLXSW_AFK_ELEMENT_INST_U32(VIRT_ROUTER_MSB, 0x04, 21, 4),
};
static struct mlxsw_afk_element_inst mlxsw_sp_afk_element_info_ipv6_2b[] = {
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index a499e460594b..c2ad0921e893 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -8,6 +8,7 @@
#include <linux/ethtool.h>
#include <linux/filter.h>
#include <linux/mm.h>
+#include <linux/pci.h>
#include <net/checksum.h>
#include <net/ip6_checksum.h>
@@ -2345,9 +2346,12 @@ int mana_attach(struct net_device *ndev)
static int mana_dealloc_queues(struct net_device *ndev)
{
struct mana_port_context *apc = netdev_priv(ndev);
+ unsigned long timeout = jiffies + 120 * HZ;
struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_txq *txq;
+ struct sk_buff *skb;
int i, err;
+ u32 tsleep;
if (apc->port_is_up)
return -EINVAL;
@@ -2363,15 +2367,40 @@ static int mana_dealloc_queues(struct net_device *ndev)
* to false, but it doesn't matter since mana_start_xmit() drops any
* new packets due to apc->port_is_up being false.
*
- * Drain all the in-flight TX packets
+ * Drain all the in-flight TX packets.
+ * A timeout of 120 seconds for all the queues is used.
+ * This will break the while loop when h/w is not responding.
+ * This value of 120 has been decided here considering max
+ * number of queues.
*/
+
for (i = 0; i < apc->num_queues; i++) {
txq = &apc->tx_qp[i].txq;
-
- while (atomic_read(&txq->pending_sends) > 0)
- usleep_range(1000, 2000);
+ tsleep = 1000;
+ while (atomic_read(&txq->pending_sends) > 0 &&
+ time_before(jiffies, timeout)) {
+ usleep_range(tsleep, tsleep + 1000);
+ tsleep <<= 1;
+ }
+ if (atomic_read(&txq->pending_sends)) {
+ err = pcie_flr(to_pci_dev(gd->gdma_context->dev));
+ if (err) {
+ netdev_err(ndev, "flr failed %d with %d pkts pending in txq %u\n",
+ err, atomic_read(&txq->pending_sends),
+ txq->gdma_txq_id);
+ }
+ break;
+ }
}
+ for (i = 0; i < apc->num_queues; i++) {
+ txq = &apc->tx_qp[i].txq;
+ while ((skb = skb_dequeue(&txq->pending_skbs))) {
+ mana_unmap_skb(skb, apc);
+ dev_kfree_skb_any(skb);
+ }
+ atomic_set(&txq->pending_sends, 0);
+ }
/* We're 100% sure the queues can no longer be woken up, because
* we're sure now mana_poll_tx_cq() can't be running.
*/
diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c
index 8e3894cf5f7c..83a3ce0c568e 100644
--- a/drivers/net/ethernet/mscc/ocelot_fdma.c
+++ b/drivers/net/ethernet/mscc/ocelot_fdma.c
@@ -368,7 +368,8 @@ static bool ocelot_fdma_receive_skb(struct ocelot *ocelot, struct sk_buff *skb)
if (unlikely(!ndev))
return false;
- pskb_trim(skb, skb->len - ETH_FCS_LEN);
+ if (pskb_trim(skb, skb->len - ETH_FCS_LEN))
+ return false;
skb->dev = ndev;
skb->protocol = eth_type_trans(skb, skb->dev);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 612b0015dc43..432fb93aa801 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1817,6 +1817,7 @@ static int ionic_change_mtu(struct net_device *netdev, int new_mtu)
static void ionic_tx_timeout_work(struct work_struct *ws)
{
struct ionic_lif *lif = container_of(ws, struct ionic_lif, tx_timeout_work);
+ int err;
if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
return;
@@ -1829,8 +1830,11 @@ static void ionic_tx_timeout_work(struct work_struct *ws)
mutex_lock(&lif->queue_lock);
ionic_stop_queues_reconfig(lif);
- ionic_start_queues_reconfig(lif);
+ err = ionic_start_queues_reconfig(lif);
mutex_unlock(&lif->queue_lock);
+
+ if (err)
+ dev_err(lif->ionic->dev, "%s: Restarting queues failed\n", __func__);
}
static void ionic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
@@ -2800,17 +2804,22 @@ static int ionic_cmb_reconfig(struct ionic_lif *lif,
if (err) {
dev_err(lif->ionic->dev,
"CMB restore failed: %d\n", err);
- goto errout;
+ goto err_out;
}
}
- ionic_start_queues_reconfig(lif);
- } else {
- /* This was detached in ionic_stop_queues_reconfig() */
- netif_device_attach(lif->netdev);
+ err = ionic_start_queues_reconfig(lif);
+ if (err) {
+ dev_err(lif->ionic->dev,
+ "CMB reconfig failed: %d\n", err);
+ goto err_out;
+ }
}
-errout:
+err_out:
+ /* This was detached in ionic_stop_queues_reconfig() */
+ netif_device_attach(lif->netdev);
+
return err;
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
index f8682356d0cf..94d4f9413ab7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
@@ -194,6 +194,22 @@ void qed_hw_remove(struct qed_dev *cdev);
struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
/**
+ * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
+ * atomicy.
+ *
+ * @p_hwfn: HW device data.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: struct qed_ptt.
+ *
+ * Should be called at the entry point to the driver
+ * (at the beginning of an exported function).
+ */
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
+ bool is_atomic);
+
+/**
* qed_ptt_release(): Release PTT Window.
*
* @p_hwfn: HW device data.
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
index 3764190b948e..04602ac94708 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
@@ -693,13 +693,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
}
static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
- struct qed_fcoe_stats *p_stats)
+ struct qed_fcoe_stats *p_stats,
+ bool is_atomic)
{
struct qed_ptt *p_ptt;
memset(p_stats, 0, sizeof(*p_stats));
- p_ptt = qed_ptt_acquire(p_hwfn);
+ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
if (!p_ptt) {
DP_ERR(p_hwfn, "Failed to acquire ptt\n");
@@ -973,19 +974,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
QED_SPQ_MODE_EBLOCK, NULL);
}
+static int qed_fcoe_stats_context(struct qed_dev *cdev,
+ struct qed_fcoe_stats *stats,
+ bool is_atomic)
+{
+ return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
+}
+
static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
{
- return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
+ return qed_fcoe_stats_context(cdev, stats, false);
}
void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats)
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic)
{
struct qed_fcoe_stats proto_stats;
/* Retrieve FW statistics */
memset(&proto_stats, 0, sizeof(proto_stats));
- if (qed_fcoe_stats(cdev, &proto_stats)) {
+ if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
DP_VERBOSE(cdev, QED_MSG_STORAGE,
"Failed to collect FCoE statistics\n");
return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
index 19c85adf4ceb..214e8299ecb4 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
@@ -28,8 +28,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
void qed_fcoe_free(struct qed_hwfn *p_hwfn);
+/**
+ * qed_get_protocol_stats_fcoe(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats);
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic);
#else /* CONFIG_QED_FCOE */
static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
{
@@ -40,7 +52,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
- struct qed_mcp_fcoe_stats *stats)
+ struct qed_mcp_fcoe_stats *stats,
+ bool is_atomic)
{
}
#endif /* CONFIG_QED_FCOE */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 554f30b0cfd5..6263f847b6b9 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -23,7 +23,10 @@
#include "qed_reg_addr.h"
#include "qed_sriov.h"
-#define QED_BAR_ACQUIRE_TIMEOUT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP 1000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT 100000
+#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY 10
/* Invalid values */
#define QED_BAR_INVALID_OFFSET (cpu_to_le32(-1))
@@ -85,11 +88,21 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
{
+ return qed_ptt_acquire_context(p_hwfn, false);
+}
+
+struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
+{
struct qed_ptt *p_ptt;
- unsigned int i;
+ unsigned int i, count;
+
+ if (is_atomic)
+ count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
+ else
+ count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
/* Take the free PTT from the list */
- for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
+ for (i = 0; i < count; i++) {
spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
@@ -105,7 +118,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
}
spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
- usleep_range(1000, 2000);
+
+ if (is_atomic)
+ udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
+ else
+ usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
+ QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
}
DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
index 511ab214eb9c..980e7289b481 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -999,13 +999,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
}
static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
- struct qed_iscsi_stats *stats)
+ struct qed_iscsi_stats *stats,
+ bool is_atomic)
{
struct qed_ptt *p_ptt;
memset(stats, 0, sizeof(*stats));
- p_ptt = qed_ptt_acquire(p_hwfn);
+ p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
if (!p_ptt) {
DP_ERR(p_hwfn, "Failed to acquire ptt\n");
return -EAGAIN;
@@ -1336,9 +1337,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
QED_SPQ_MODE_EBLOCK, NULL);
}
+static int qed_iscsi_stats_context(struct qed_dev *cdev,
+ struct qed_iscsi_stats *stats,
+ bool is_atomic)
+{
+ return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
+}
+
static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
{
- return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
+ return qed_iscsi_stats_context(cdev, stats, false);
}
static int qed_iscsi_change_mac(struct qed_dev *cdev,
@@ -1358,13 +1366,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
}
void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats)
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic)
{
struct qed_iscsi_stats proto_stats;
/* Retrieve FW statistics */
memset(&proto_stats, 0, sizeof(proto_stats));
- if (qed_iscsi_stats(cdev, &proto_stats)) {
+ if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
DP_VERBOSE(cdev, QED_MSG_STORAGE,
"Failed to collect ISCSI statistics\n");
return;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
index dec2b00259d4..974cb8d26608 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
@@ -39,11 +39,14 @@ void qed_iscsi_free(struct qed_hwfn *p_hwfn);
*
* @cdev: Qed dev pointer.
* @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
*
+ * Context: The function should not sleep in case is_atomic == true.
* Return: Void.
*/
void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats);
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic);
#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
{
@@ -56,7 +59,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
static inline void
qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
- struct qed_mcp_iscsi_stats *stats) {}
+ struct qed_mcp_iscsi_stats *stats,
+ bool is_atomic) {}
#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 7776d3bdd459..970b9aabbc3d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -1863,7 +1863,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
}
static void _qed_get_vport_stats(struct qed_dev *cdev,
- struct qed_eth_stats *stats)
+ struct qed_eth_stats *stats,
+ bool is_atomic)
{
u8 fw_vport = 0;
int i;
@@ -1872,10 +1873,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
for_each_hwfn(cdev, i) {
struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
- struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
- : NULL;
+ struct qed_ptt *p_ptt;
bool b_get_port_stats;
+ p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
+ : NULL;
if (IS_PF(cdev)) {
/* The main vport index is relative first */
if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
@@ -1901,6 +1903,13 @@ out:
void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
{
+ qed_get_vport_stats_context(cdev, stats, false);
+}
+
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+ struct qed_eth_stats *stats,
+ bool is_atomic)
+{
u32 i;
if (!cdev || cdev->recov_in_prog) {
@@ -1908,7 +1917,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
return;
}
- _qed_get_vport_stats(cdev, stats);
+ _qed_get_vport_stats(cdev, stats, is_atomic);
if (!cdev->reset_stats)
return;
@@ -1960,7 +1969,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
if (!cdev->reset_stats) {
DP_INFO(cdev, "Reset stats not allocated\n");
} else {
- _qed_get_vport_stats(cdev, cdev->reset_stats);
+ _qed_get_vport_stats(cdev, cdev->reset_stats, false);
cdev->reset_stats->common.link_change_count = 0;
}
}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index a538cf478c14..2d2f82c785ad 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -249,8 +249,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
enum spq_mode comp_mode,
struct qed_spq_comp_cb *p_comp_data);
+/**
+ * qed_get_vport_stats(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ *
+ * Return: Void.
+ */
void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
+/**
+ * qed_get_vport_stats_context(): Fills provided statistics
+ * struct with statistics.
+ *
+ * @cdev: Qed dev pointer.
+ * @stats: Points to struct that will be filled with statistics.
+ * @is_atomic: Hint from the caller - if the func can sleep or not.
+ *
+ * Context: The function should not sleep in case is_atomic == true.
+ * Return: Void.
+ */
+void qed_get_vport_stats_context(struct qed_dev *cdev,
+ struct qed_eth_stats *stats,
+ bool is_atomic);
+
void qed_reset_vport_stats(struct qed_dev *cdev);
/**
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index f5af83342856..c278f8893042 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -3092,7 +3092,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
switch (type) {
case QED_MCP_LAN_STATS:
- qed_get_vport_stats(cdev, &eth_stats);
+ qed_get_vport_stats_context(cdev, &eth_stats, true);
stats->lan_stats.ucast_rx_pkts =
eth_stats.common.rx_ucast_pkts;
stats->lan_stats.ucast_tx_pkts =
@@ -3100,10 +3100,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
stats->lan_stats.fcs_err = -1;
break;
case QED_MCP_FCOE_STATS:
- qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
+ qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
break;
case QED_MCP_ISCSI_STATS:
- qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
+ qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
break;
default:
DP_VERBOSE(cdev, QED_MSG_SP,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 4b004a728190..99df00c30b8c 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -176,6 +176,15 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
}
#endif
+static int __maybe_unused qede_suspend(struct device *dev)
+{
+ dev_info(dev, "Device does not support suspend operation\n");
+
+ return -EOPNOTSUPP;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(qede_pm_ops, qede_suspend, NULL);
+
static const struct pci_error_handlers qede_err_handler = {
.error_detected = qede_io_error_detected,
};
@@ -190,6 +199,7 @@ static struct pci_driver qede_pci_driver = {
.sriov_configure = qede_sriov_configure,
#endif
.err_handler = &qede_err_handler,
+ .driver.pm = &qede_pm_ops,
};
static struct qed_eth_cb_ops qede_ll_ops = {
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 0d80447d4d3b..d5c688a8d7be 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -1260,8 +1260,11 @@ static int emac_tso_csum(struct emac_adapter *adpt,
if (skb->protocol == htons(ETH_P_IP)) {
u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data)
+ ntohs(ip_hdr(skb)->tot_len);
- if (skb->len > pkt_len)
- pskb_trim(skb, pkt_len);
+ if (skb->len > pkt_len) {
+ ret = pskb_trim(skb, pkt_len);
+ if (unlikely(ret))
+ return ret;
+ }
}
hdr_len = skb_tcp_all_headers(skb);
diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9445f04f8d48..5eb50b265c0b 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -623,6 +623,7 @@ struct rtl8169_private {
int cfg9346_usage_count;
unsigned supports_gmii:1;
+ unsigned aspm_manageable:1;
dma_addr_t counters_phys_addr;
struct rtl8169_counters *counters;
struct rtl8169_tc_offsets tc_offset;
@@ -2746,7 +2747,15 @@ static void rtl_hw_aspm_clkreq_enable(struct rtl8169_private *tp, bool enable)
if (tp->mac_version < RTL_GIGA_MAC_VER_32)
return;
- if (enable) {
+ /* Don't enable ASPM in the chip if OS can't control ASPM */
+ if (enable && tp->aspm_manageable) {
+ /* On these chip versions ASPM can even harm
+ * bus communication of other PCI devices.
+ */
+ if (tp->mac_version == RTL_GIGA_MAC_VER_42 ||
+ tp->mac_version == RTL_GIGA_MAC_VER_43)
+ return;
+
rtl_mod_config5(tp, 0, ASPM_en);
rtl_mod_config2(tp, 0, ClkReqEn);
@@ -4514,10 +4523,6 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
}
if (napi_schedule_prep(&tp->napi)) {
- rtl_unlock_config_regs(tp);
- rtl_hw_aspm_clkreq_enable(tp, false);
- rtl_lock_config_regs(tp);
-
rtl_irq_disable(tp);
__napi_schedule(&tp->napi);
}
@@ -4577,14 +4582,9 @@ static int rtl8169_poll(struct napi_struct *napi, int budget)
work_done = rtl_rx(dev, tp, budget);
- if (work_done < budget && napi_complete_done(napi, work_done)) {
+ if (work_done < budget && napi_complete_done(napi, work_done))
rtl_irq_enable(tp);
- rtl_unlock_config_regs(tp);
- rtl_hw_aspm_clkreq_enable(tp, true);
- rtl_lock_config_regs(tp);
- }
-
return work_done;
}
@@ -5158,6 +5158,16 @@ done:
rtl_rar_set(tp, mac_addr);
}
+/* register is set if system vendor successfully tested ASPM 1.2 */
+static bool rtl_aspm_is_safe(struct rtl8169_private *tp)
+{
+ if (tp->mac_version >= RTL_GIGA_MAC_VER_61 &&
+ r8168_mac_ocp_read(tp, 0xc0b2) & 0xf)
+ return true;
+
+ return false;
+}
+
static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
{
struct rtl8169_private *tp;
@@ -5227,6 +5237,19 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
xid);
tp->mac_version = chipset;
+ /* Disable ASPM L1 as that cause random device stop working
+ * problems as well as full system hangs for some PCIe devices users.
+ * Chips from RTL8168h partially have issues with L1.2, but seem
+ * to work fine with L1 and L1.1.
+ */
+ if (rtl_aspm_is_safe(tp))
+ rc = 0;
+ else if (tp->mac_version >= RTL_GIGA_MAC_VER_46)
+ rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1_2);
+ else
+ rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1);
+ tp->aspm_manageable = !rc;
+
tp->dash_type = rtl_check_dash(tp);
tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
diff --git a/drivers/net/ethernet/sfc/ef100_nic.c b/drivers/net/ethernet/sfc/ef100_nic.c
index 7adde9639c8a..35d8e9811998 100644
--- a/drivers/net/ethernet/sfc/ef100_nic.c
+++ b/drivers/net/ethernet/sfc/ef100_nic.c
@@ -1194,7 +1194,7 @@ int ef100_probe_netdev_pf(struct efx_nic *efx)
net_dev->features |= NETIF_F_HW_TC;
efx->fixed_features |= NETIF_F_HW_TC;
}
- return rc;
+ return 0;
}
int ef100_probe_vf(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c
index 9e5ce2a13787..c3dc88e6c26c 100644
--- a/drivers/net/ethernet/sfc/falcon/selftest.c
+++ b/drivers/net/ethernet/sfc/falcon/selftest.c
@@ -40,15 +40,16 @@
*/
struct ef4_loopback_payload {
char pad[2]; /* Ensures ip is 4-byte aligned */
- struct ethhdr header;
- struct iphdr ip;
- struct udphdr udp;
- __be16 iteration;
- char msg[64];
+ struct_group_attr(packet, __packed,
+ struct ethhdr header;
+ struct iphdr ip;
+ struct udphdr udp;
+ __be16 iteration;
+ char msg[64];
+ );
} __packed __aligned(4);
-#define EF4_LOOPBACK_PAYLOAD_LEN (sizeof(struct ef4_loopback_payload) - \
- offsetof(struct ef4_loopback_payload, \
- header))
+#define EF4_LOOPBACK_PAYLOAD_LEN \
+ sizeof_field(struct ef4_loopback_payload, packet)
/* Loopback test source MAC address */
static const u8 payload_source[ETH_ALEN] __aligned(2) = {
@@ -299,7 +300,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx,
payload = &state->payload;
- memcpy(&received.header, buf_ptr,
+ memcpy(&received.packet, buf_ptr,
min_t(int, pkt_len, EF4_LOOPBACK_PAYLOAD_LEN));
received.ip.saddr = payload->ip.saddr;
if (state->offload_csum)
@@ -370,7 +371,7 @@ void ef4_loopback_rx_packet(struct ef4_nic *efx,
buf_ptr, pkt_len, 0);
netif_err(efx, drv, efx->net_dev, "expected packet:\n");
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
- &state->payload.header, EF4_LOOPBACK_PAYLOAD_LEN,
+ &state->payload.packet, EF4_LOOPBACK_PAYLOAD_LEN,
0);
}
#endif
@@ -427,7 +428,7 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
for (i = 0; i < state->packet_count; i++) {
/* Allocate an skb, holding an extra reference for
* transmit completion counting */
- skb = alloc_skb(EF4_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+ skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
if (!skb)
return -ENOMEM;
state->skbs[i] = skb;
@@ -440,6 +441,8 @@ static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
/* Strip off the leading padding */
skb_pull(skb, offsetof(struct ef4_loopback_payload, header));
+ /* Strip off the trailing padding */
+ skb_trim(skb, EF4_LOOPBACK_PAYLOAD_LEN);
/* Ensure everything we've written is visible to the
* interrupt handler. */
diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c
index 96d856b9043c..563c1e317ce9 100644
--- a/drivers/net/ethernet/sfc/selftest.c
+++ b/drivers/net/ethernet/sfc/selftest.c
@@ -43,15 +43,16 @@
*/
struct efx_loopback_payload {
char pad[2]; /* Ensures ip is 4-byte aligned */
- struct ethhdr header;
- struct iphdr ip;
- struct udphdr udp;
- __be16 iteration;
- char msg[64];
+ struct_group_attr(packet, __packed,
+ struct ethhdr header;
+ struct iphdr ip;
+ struct udphdr udp;
+ __be16 iteration;
+ char msg[64];
+ );
} __packed __aligned(4);
-#define EFX_LOOPBACK_PAYLOAD_LEN (sizeof(struct efx_loopback_payload) - \
- offsetof(struct efx_loopback_payload, \
- header))
+#define EFX_LOOPBACK_PAYLOAD_LEN \
+ sizeof_field(struct efx_loopback_payload, packet)
/* Loopback test source MAC address */
static const u8 payload_source[ETH_ALEN] __aligned(2) = {
@@ -297,7 +298,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx,
payload = &state->payload;
- memcpy(&received.header, buf_ptr,
+ memcpy(&received.packet, buf_ptr,
min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN));
received.ip.saddr = payload->ip.saddr;
if (state->offload_csum)
@@ -368,7 +369,7 @@ void efx_loopback_rx_packet(struct efx_nic *efx,
buf_ptr, pkt_len, 0);
netif_err(efx, drv, efx->net_dev, "expected packet:\n");
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
- &state->payload.header, EFX_LOOPBACK_PAYLOAD_LEN,
+ &state->payload.packet, EFX_LOOPBACK_PAYLOAD_LEN,
0);
}
#endif
@@ -425,7 +426,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
for (i = 0; i < state->packet_count; i++) {
/* Allocate an skb, holding an extra reference for
* transmit completion counting */
- skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+ skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
if (!skb)
return -ENOMEM;
state->skbs[i] = skb;
@@ -438,6 +439,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
/* Strip off the leading padding */
skb_pull(skb, offsetof(struct efx_loopback_payload, header));
+ /* Strip off the trailing padding */
+ skb_trim(skb, EFX_LOOPBACK_PAYLOAD_LEN);
/* Ensure everything we've written is visible to the
* interrupt handler. */
diff --git a/drivers/net/ethernet/sfc/siena/selftest.c b/drivers/net/ethernet/sfc/siena/selftest.c
index 111ac17194a5..526da43d4b61 100644
--- a/drivers/net/ethernet/sfc/siena/selftest.c
+++ b/drivers/net/ethernet/sfc/siena/selftest.c
@@ -43,15 +43,16 @@
*/
struct efx_loopback_payload {
char pad[2]; /* Ensures ip is 4-byte aligned */
- struct ethhdr header;
- struct iphdr ip;
- struct udphdr udp;
- __be16 iteration;
- char msg[64];
+ struct_group_attr(packet, __packed,
+ struct ethhdr header;
+ struct iphdr ip;
+ struct udphdr udp;
+ __be16 iteration;
+ char msg[64];
+ );
} __packed __aligned(4);
-#define EFX_LOOPBACK_PAYLOAD_LEN (sizeof(struct efx_loopback_payload) - \
- offsetof(struct efx_loopback_payload, \
- header))
+#define EFX_LOOPBACK_PAYLOAD_LEN \
+ sizeof_field(struct efx_loopback_payload, packet)
/* Loopback test source MAC address */
static const u8 payload_source[ETH_ALEN] __aligned(2) = {
@@ -297,7 +298,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx,
payload = &state->payload;
- memcpy(&received.header, buf_ptr,
+ memcpy(&received.packet, buf_ptr,
min_t(int, pkt_len, EFX_LOOPBACK_PAYLOAD_LEN));
received.ip.saddr = payload->ip.saddr;
if (state->offload_csum)
@@ -368,7 +369,7 @@ void efx_siena_loopback_rx_packet(struct efx_nic *efx,
buf_ptr, pkt_len, 0);
netif_err(efx, drv, efx->net_dev, "expected packet:\n");
print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
- &state->payload.header, EFX_LOOPBACK_PAYLOAD_LEN,
+ &state->payload.packet, EFX_LOOPBACK_PAYLOAD_LEN,
0);
}
#endif
@@ -425,7 +426,7 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
for (i = 0; i < state->packet_count; i++) {
/* Allocate an skb, holding an extra reference for
* transmit completion counting */
- skb = alloc_skb(EFX_LOOPBACK_PAYLOAD_LEN, GFP_KERNEL);
+ skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
if (!skb)
return -ENOMEM;
state->skbs[i] = skb;
@@ -438,6 +439,8 @@ static int efx_begin_loopback(struct efx_tx_queue *tx_queue)
payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
/* Strip off the leading padding */
skb_pull(skb, offsetof(struct efx_loopback_payload, header));
+ /* Strip off the trailing padding */
+ skb_trim(skb, EFX_LOOPBACK_PAYLOAD_LEN);
/* Ensure everything we've written is visible to the
* interrupt handler. */
diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c
index 15ebd3973922..fe268b6c1cac 100644
--- a/drivers/net/ethernet/sfc/tc.c
+++ b/drivers/net/ethernet/sfc/tc.c
@@ -1657,10 +1657,10 @@ int efx_init_tc(struct efx_nic *efx)
rc = efx_tc_configure_fallback_acts_reps(efx);
if (rc)
return rc;
- efx->tc->up = true;
rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
if (rc)
return rc;
+ efx->tc->up = true;
return 0;
}
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index 2d7347b71c41..0dcd6a568b06 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -1851,6 +1851,17 @@ static int netsec_of_probe(struct platform_device *pdev,
return err;
}
+ /*
+ * SynQuacer is physically configured with TX and RX delays
+ * but the standard firmware claimed otherwise for a long
+ * time, ignore it.
+ */
+ if (of_machine_is_compatible("socionext,developer-box") &&
+ priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) {
+ dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n");
+ priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID;
+ }
+
priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
if (!priv->phy_np) {
dev_err(&pdev->dev, "missing required property 'phy-handle'\n");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
index f8367c5b490b..fbb0ccf84afc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
@@ -234,7 +234,8 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
res.addr = mgbe->regs;
res.irq = irq;
- mgbe->clks = devm_kzalloc(&pdev->dev, sizeof(*mgbe->clks), GFP_KERNEL);
+ mgbe->clks = devm_kcalloc(&pdev->dev, ARRAY_SIZE(mgbe_clks),
+ sizeof(*mgbe->clks), GFP_KERNEL);
if (!mgbe->clks)
return -ENOMEM;
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
index df41eac54058..03ceb6a94073 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
@@ -240,13 +240,15 @@ void stmmac_dwmac4_set_mac_addr(void __iomem *ioaddr, const u8 addr[6],
void stmmac_dwmac4_set_mac(void __iomem *ioaddr, bool enable)
{
u32 value = readl(ioaddr + GMAC_CONFIG);
+ u32 old_val = value;
if (enable)
value |= GMAC_CONFIG_RE | GMAC_CONFIG_TE;
else
value &= ~(GMAC_CONFIG_TE | GMAC_CONFIG_RE);
- writel(value, ioaddr + GMAC_CONFIG);
+ if (value != old_val)
+ writel(value, ioaddr + GMAC_CONFIG);
}
void stmmac_dwmac4_get_mac_addr(void __iomem *ioaddr, unsigned char *addr,
diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
index 0c5e783e574c..64bf22cd860c 100644
--- a/drivers/net/ethernet/ti/cpsw_ale.c
+++ b/drivers/net/ethernet/ti/cpsw_ale.c
@@ -106,23 +106,37 @@ struct cpsw_ale_dev_id {
static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
{
- int idx;
+ int idx, idx2;
+ u32 hi_val = 0;
idx = start / 32;
+ idx2 = (start + bits - 1) / 32;
+ /* Check if bits to be fetched exceed a word */
+ if (idx != idx2) {
+ idx2 = 2 - idx2; /* flip */
+ hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
+ }
start -= idx * 32;
idx = 2 - idx; /* flip */
- return (ale_entry[idx] >> start) & BITMASK(bits);
+ return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits);
}
static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
u32 value)
{
- int idx;
+ int idx, idx2;
value &= BITMASK(bits);
- idx = start / 32;
+ idx = start / 32;
+ idx2 = (start + bits - 1) / 32;
+ /* Check if bits to be set exceed a word */
+ if (idx != idx2) {
+ idx2 = 2 - idx2; /* flip */
+ ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
+ ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
+ }
start -= idx * 32;
- idx = 2 - idx; /* flip */
+ idx = 2 - idx; /* flip */
ale_entry[idx] &= ~(BITMASK(bits) << start);
ale_entry[idx] |= (value << start);
}
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 39a9aeee7aab..6321178fc814 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1511,7 +1511,6 @@ static void wx_configure_rx(struct wx *wx)
psrtype = WX_RDB_PL_CFG_L4HDR |
WX_RDB_PL_CFG_L3HDR |
WX_RDB_PL_CFG_L2HDR |
- WX_RDB_PL_CFG_TUN_TUNHDR |
WX_RDB_PL_CFG_TUN_TUNHDR;
wr32(wx, WX_RDB_PL_CFG(0), psrtype);
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index e0ac1bcd9925..49f303353ecb 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1567,12 +1567,16 @@ static int temac_probe(struct platform_device *pdev)
}
/* Error handle returned DMA RX and TX interrupts */
- if (lp->rx_irq < 0)
- return dev_err_probe(&pdev->dev, lp->rx_irq,
+ if (lp->rx_irq <= 0) {
+ rc = lp->rx_irq ?: -EINVAL;
+ return dev_err_probe(&pdev->dev, rc,
"could not get DMA RX irq\n");
- if (lp->tx_irq < 0)
- return dev_err_probe(&pdev->dev, lp->tx_irq,
+ }
+ if (lp->tx_irq <= 0) {
+ rc = lp->tx_irq ?: -EINVAL;
+ return dev_err_probe(&pdev->dev, rc,
"could not get DMA TX irq\n");
+ }
if (temac_np) {
/* Retrieve the MAC address */
diff --git a/drivers/net/ipa/ipa_table.c b/drivers/net/ipa/ipa_table.c
index f0529c31d0b6..7b637bb8b41c 100644
--- a/drivers/net/ipa/ipa_table.c
+++ b/drivers/net/ipa/ipa_table.c
@@ -273,16 +273,15 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
if (ret)
return ret;
- ret = ipa_filter_reset_table(ipa, true, false, modem);
- if (ret)
+ ret = ipa_filter_reset_table(ipa, false, true, modem);
+ if (ret || !ipa_table_hash_support(ipa))
return ret;
- ret = ipa_filter_reset_table(ipa, false, true, modem);
+ ret = ipa_filter_reset_table(ipa, true, false, modem);
if (ret)
return ret;
- ret = ipa_filter_reset_table(ipa, true, true, modem);
- return ret;
+ return ipa_filter_reset_table(ipa, true, true, modem);
}
/* The AP routes and modem routes are each contiguous within the
@@ -291,12 +290,13 @@ static int ipa_filter_reset(struct ipa *ipa, bool modem)
* */
static int ipa_route_reset(struct ipa *ipa, bool modem)
{
+ bool hash_support = ipa_table_hash_support(ipa);
u32 modem_route_count = ipa->modem_route_count;
struct gsi_trans *trans;
u16 first;
u16 count;
- trans = ipa_cmd_trans_alloc(ipa, 4);
+ trans = ipa_cmd_trans_alloc(ipa, hash_support ? 4 : 2);
if (!trans) {
dev_err(&ipa->pdev->dev,
"no transaction for %s route reset\n",
@@ -313,10 +313,12 @@ static int ipa_route_reset(struct ipa *ipa, bool modem)
}
ipa_table_reset_add(trans, false, false, false, first, count);
- ipa_table_reset_add(trans, false, true, false, first, count);
-
ipa_table_reset_add(trans, false, false, true, first, count);
- ipa_table_reset_add(trans, false, true, true, first, count);
+
+ if (hash_support) {
+ ipa_table_reset_add(trans, false, true, false, first, count);
+ ipa_table_reset_add(trans, false, true, true, first, count);
+ }
gsi_trans_commit_wait(trans);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index b15dd9a3ad54..1b55928e89b8 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -748,7 +748,8 @@ static int ipvlan_device_event(struct notifier_block *unused,
write_pnet(&port->pnet, newnet);
- ipvlan_migrate_l3s_hook(oldnet, newnet);
+ if (port->mode == IPVLAN_MODE_L3S)
+ ipvlan_migrate_l3s_hook(oldnet, newnet);
break;
}
case NETDEV_UNREGISTER:
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 984dfa5d6c11..144ec756c796 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -743,7 +743,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
u64_stats_update_begin(&rxsc_stats->syncp);
rxsc_stats->stats.InPktsLate++;
u64_stats_update_end(&rxsc_stats->syncp);
- secy->netdev->stats.rx_dropped++;
+ DEV_STATS_INC(secy->netdev, rx_dropped);
return false;
}
@@ -767,7 +767,7 @@ static bool macsec_post_decrypt(struct sk_buff *skb, struct macsec_secy *secy, u
rxsc_stats->stats.InPktsNotValid++;
u64_stats_update_end(&rxsc_stats->syncp);
this_cpu_inc(rx_sa->stats->InPktsNotValid);
- secy->netdev->stats.rx_errors++;
+ DEV_STATS_INC(secy->netdev, rx_errors);
return false;
}
@@ -1069,7 +1069,7 @@ static enum rx_handler_result handle_not_macsec(struct sk_buff *skb)
u64_stats_update_begin(&secy_stats->syncp);
secy_stats->stats.InPktsNoTag++;
u64_stats_update_end(&secy_stats->syncp);
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
continue;
}
@@ -1179,7 +1179,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
u64_stats_update_begin(&secy_stats->syncp);
secy_stats->stats.InPktsBadTag++;
u64_stats_update_end(&secy_stats->syncp);
- secy->netdev->stats.rx_errors++;
+ DEV_STATS_INC(secy->netdev, rx_errors);
goto drop_nosa;
}
@@ -1196,7 +1196,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
u64_stats_update_begin(&rxsc_stats->syncp);
rxsc_stats->stats.InPktsNotUsingSA++;
u64_stats_update_end(&rxsc_stats->syncp);
- secy->netdev->stats.rx_errors++;
+ DEV_STATS_INC(secy->netdev, rx_errors);
if (active_rx_sa)
this_cpu_inc(active_rx_sa->stats->InPktsNotUsingSA);
goto drop_nosa;
@@ -1230,7 +1230,7 @@ static rx_handler_result_t macsec_handle_frame(struct sk_buff **pskb)
u64_stats_update_begin(&rxsc_stats->syncp);
rxsc_stats->stats.InPktsLate++;
u64_stats_update_end(&rxsc_stats->syncp);
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
goto drop;
}
}
@@ -1271,7 +1271,7 @@ deliver:
if (ret == NET_RX_SUCCESS)
count_rx(dev, len);
else
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
rcu_read_unlock();
@@ -1308,7 +1308,7 @@ nosci:
u64_stats_update_begin(&secy_stats->syncp);
secy_stats->stats.InPktsNoSCI++;
u64_stats_update_end(&secy_stats->syncp);
- macsec->secy.netdev->stats.rx_errors++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_errors);
continue;
}
@@ -1327,7 +1327,7 @@ nosci:
secy_stats->stats.InPktsUnknownSCI++;
u64_stats_update_end(&secy_stats->syncp);
} else {
- macsec->secy.netdev->stats.rx_dropped++;
+ DEV_STATS_INC(macsec->secy.netdev, rx_dropped);
}
}
@@ -3422,7 +3422,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
if (!secy->operational) {
kfree_skb(skb);
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return NETDEV_TX_OK;
}
@@ -3430,7 +3430,7 @@ static netdev_tx_t macsec_start_xmit(struct sk_buff *skb,
skb = macsec_encrypt(skb, dev);
if (IS_ERR(skb)) {
if (PTR_ERR(skb) != -EINPROGRESS)
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
return NETDEV_TX_OK;
}
@@ -3667,9 +3667,9 @@ static void macsec_get_stats64(struct net_device *dev,
dev_fetch_sw_netstats(s, dev->tstats);
- s->rx_dropped = dev->stats.rx_dropped;
- s->tx_dropped = dev->stats.tx_dropped;
- s->rx_errors = dev->stats.rx_errors;
+ s->rx_dropped = atomic_long_read(&dev->stats.__rx_dropped);
+ s->tx_dropped = atomic_long_read(&dev->stats.__tx_dropped);
+ s->rx_errors = atomic_long_read(&dev->stats.__rx_errors);
}
static int macsec_get_iflink(const struct net_device *dev)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 4a53debf9d7c..ed908165a8b4 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1746,6 +1746,7 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
[IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 },
[IFLA_MACVLAN_BC_QUEUE_LEN] = { .type = NLA_U32 },
[IFLA_MACVLAN_BC_QUEUE_LEN_USED] = { .type = NLA_REJECT },
+ [IFLA_MACVLAN_BC_CUTOFF] = { .type = NLA_S32 },
};
int macvlan_link_register(struct rtnl_link_ops *ops)
diff --git a/drivers/net/mdio/mdio-bitbang.c b/drivers/net/mdio/mdio-bitbang.c
index b83932562be2..81b7748c10ce 100644
--- a/drivers/net/mdio/mdio-bitbang.c
+++ b/drivers/net/mdio/mdio-bitbang.c
@@ -186,7 +186,7 @@ int mdiobb_read_c45(struct mii_bus *bus, int phy, int devad, int reg)
struct mdiobb_ctrl *ctrl = bus->priv;
mdiobb_cmd_addr(ctrl, phy, devad, reg);
- mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg);
+ mdiobb_cmd(ctrl, MDIO_C45_READ, phy, devad);
return mdiobb_read_common(bus, phy);
}
@@ -222,7 +222,7 @@ int mdiobb_write_c45(struct mii_bus *bus, int phy, int devad, int reg, u16 val)
struct mdiobb_ctrl *ctrl = bus->priv;
mdiobb_cmd_addr(ctrl, phy, devad, reg);
- mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg);
+ mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, devad);
return mdiobb_write_common(bus, val);
}
diff --git a/drivers/net/pcs/pcs-rzn1-miic.c b/drivers/net/pcs/pcs-rzn1-miic.c
index 323bec5e57f8..356099169003 100644
--- a/drivers/net/pcs/pcs-rzn1-miic.c
+++ b/drivers/net/pcs/pcs-rzn1-miic.c
@@ -313,15 +313,21 @@ struct phylink_pcs *miic_create(struct device *dev, struct device_node *np)
pdev = of_find_device_by_node(pcs_np);
of_node_put(pcs_np);
- if (!pdev || !platform_get_drvdata(pdev))
+ if (!pdev || !platform_get_drvdata(pdev)) {
+ if (pdev)
+ put_device(&pdev->dev);
return ERR_PTR(-EPROBE_DEFER);
+ }
miic_port = kzalloc(sizeof(*miic_port), GFP_KERNEL);
- if (!miic_port)
+ if (!miic_port) {
+ put_device(&pdev->dev);
return ERR_PTR(-ENOMEM);
+ }
miic = platform_get_drvdata(pdev);
device_link_add(dev, miic->dev, DL_FLAG_AUTOREMOVE_CONSUMER);
+ put_device(&pdev->dev);
miic_port->miic = miic;
miic_port->port = port - 1;
diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index c1f307d90518..8a77ec33b417 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -459,21 +459,27 @@ static int at803x_set_wol(struct phy_device *phydev,
phy_write_mmd(phydev, MDIO_MMD_PCS, offsets[i],
mac[(i * 2) + 1] | (mac[(i * 2)] << 8));
- /* Enable WOL function */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
- 0, AT803X_WOL_EN);
- if (ret)
- return ret;
+ /* Enable WOL function for 1588 */
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ 0, AT803X_WOL_EN);
+ if (ret)
+ return ret;
+ }
/* Enable WOL interrupt */
ret = phy_modify(phydev, AT803X_INTR_ENABLE, 0, AT803X_INTR_ENABLE_WOL);
if (ret)
return ret;
} else {
- /* Disable WoL function */
- ret = phy_modify_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL,
- AT803X_WOL_EN, 0);
- if (ret)
- return ret;
+ /* Disable WoL function for 1588 */
+ if (phydev->drv->phy_id == ATH8031_PHY_ID) {
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ AT803X_WOL_EN, 0);
+ if (ret)
+ return ret;
+ }
/* Disable WOL interrupt */
ret = phy_modify(phydev, AT803X_INTR_ENABLE, AT803X_INTR_ENABLE_WOL, 0);
if (ret)
@@ -508,11 +514,11 @@ static void at803x_get_wol(struct phy_device *phydev,
wol->supported = WAKE_MAGIC;
wol->wolopts = 0;
- value = phy_read_mmd(phydev, MDIO_MMD_PCS, AT803X_PHY_MMD3_WOL_CTRL);
+ value = phy_read(phydev, AT803X_INTR_ENABLE);
if (value < 0)
return;
- if (value & AT803X_WOL_EN)
+ if (value & AT803X_INTR_ENABLE_WOL)
wol->wolopts |= WAKE_MAGIC;
}
@@ -858,9 +864,6 @@ static int at803x_probe(struct phy_device *phydev)
if (phydev->drv->phy_id == ATH8031_PHY_ID) {
int ccr = phy_read(phydev, AT803X_REG_CHIP_CONFIG);
int mode_cfg;
- struct ethtool_wolinfo wol = {
- .wolopts = 0,
- };
if (ccr < 0)
return ccr;
@@ -877,12 +880,14 @@ static int at803x_probe(struct phy_device *phydev)
break;
}
- /* Disable WOL by default */
- ret = at803x_set_wol(phydev, &wol);
- if (ret < 0) {
- phydev_err(phydev, "failed to disable WOL on probe: %d\n", ret);
+ /* Disable WoL in 1588 register which is enabled
+ * by default
+ */
+ ret = phy_modify_mmd(phydev, MDIO_MMD_PCS,
+ AT803X_PHY_MMD3_WOL_CTRL,
+ AT803X_WOL_EN, 0);
+ if (ret)
return ret;
- }
}
return 0;
@@ -2059,8 +2064,6 @@ static struct phy_driver at803x_driver[] = {
.flags = PHY_POLL_CABLE_TEST,
.config_init = at803x_config_init,
.link_change_notify = at803x_link_change_notify,
- .set_wol = at803x_set_wol,
- .get_wol = at803x_get_wol,
.suspend = at803x_suspend,
.resume = at803x_resume,
/* PHY_BASIC_FEATURES */
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 59cae0d808aa..04b2e6eeb195 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -542,6 +542,17 @@ static int bcm54xx_resume(struct phy_device *phydev)
return bcm54xx_config_init(phydev);
}
+static int bcm54810_read_mmd(struct phy_device *phydev, int devnum, u16 regnum)
+{
+ return -EOPNOTSUPP;
+}
+
+static int bcm54810_write_mmd(struct phy_device *phydev, int devnum, u16 regnum,
+ u16 val)
+{
+ return -EOPNOTSUPP;
+}
+
static int bcm54811_config_init(struct phy_device *phydev)
{
int err, reg;
@@ -1103,6 +1114,8 @@ static struct phy_driver broadcom_drivers[] = {
.get_strings = bcm_phy_get_strings,
.get_stats = bcm54xx_get_stats,
.probe = bcm54xx_phy_probe,
+ .read_mmd = bcm54810_read_mmd,
+ .write_mmd = bcm54810_write_mmd,
.config_init = bcm54xx_config_init,
.config_aneg = bcm5481_config_aneg,
.config_intr = bcm_phy_config_intr,
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 55d9d7acc32e..d4bb90d76881 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -328,6 +328,13 @@ static int mv3310_power_up(struct phy_device *phydev)
ret = phy_clear_bits_mmd(phydev, MDIO_MMD_VEND2, MV_V2_PORT_CTRL,
MV_V2_PORT_CTRL_PWRDOWN);
+ /* Sometimes, the power down bit doesn't clear immediately, and
+ * a read of this register causes the bit not to clear. Delay
+ * 100us to allow the PHY to come out of power down mode before
+ * the next access.
+ */
+ udelay(100);
+
if (phydev->drv->phy_id != MARVELL_PHY_ID_88X3310 ||
priv->firmware_ver < 0x00030000)
return ret;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index bdf00b2b2c1d..a9ecfdd19624 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -1184,9 +1184,11 @@ void phy_stop_machine(struct phy_device *phydev)
static void phy_process_error(struct phy_device *phydev)
{
- mutex_lock(&phydev->lock);
+ /* phydev->lock must be held for the state change to be safe */
+ if (!mutex_is_locked(&phydev->lock))
+ phydev_err(phydev, "PHY-device data unsafe context\n");
+
phydev->state = PHY_ERROR;
- mutex_unlock(&phydev->lock);
phy_trigger_machine(phydev);
}
@@ -1195,7 +1197,9 @@ static void phy_error_precise(struct phy_device *phydev,
const void *func, int err)
{
WARN(1, "%pS: returned: %d\n", func, err);
+ mutex_lock(&phydev->lock);
phy_process_error(phydev);
+ mutex_unlock(&phydev->lock);
}
/**
@@ -1204,8 +1208,7 @@ static void phy_error_precise(struct phy_device *phydev,
*
* Moves the PHY to the ERROR state in response to a read
* or write error, and tells the controller the link is down.
- * Must not be called from interrupt context, or while the
- * phydev->lock is held.
+ * Must be called with phydev->lock held.
*/
void phy_error(struct phy_device *phydev)
{
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 0c2014accba7..c7cf61fe41cf 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -3216,6 +3216,8 @@ static int phy_probe(struct device *dev)
goto out;
}
+ phy_disable_interrupts(phydev);
+
/* Start out supporting everything. Eventually,
* a controller will attach, and may modify one
* or both of these values
@@ -3333,16 +3335,6 @@ static int phy_remove(struct device *dev)
return 0;
}
-static void phy_shutdown(struct device *dev)
-{
- struct phy_device *phydev = to_phy_device(dev);
-
- if (phydev->state == PHY_READY || !phydev->attached_dev)
- return;
-
- phy_disable_interrupts(phydev);
-}
-
/**
* phy_driver_register - register a phy_driver with the PHY layer
* @new_driver: new phy_driver to register
@@ -3376,7 +3368,6 @@ int phy_driver_register(struct phy_driver *new_driver, struct module *owner)
new_driver->mdiodrv.driver.bus = &mdio_bus_type;
new_driver->mdiodrv.driver.probe = phy_probe;
new_driver->mdiodrv.driver.remove = phy_remove;
- new_driver->mdiodrv.driver.shutdown = phy_shutdown;
new_driver->mdiodrv.driver.owner = owner;
new_driver->mdiodrv.driver.probe_type = PROBE_FORCE_SYNCHRONOUS;
@@ -3451,23 +3442,30 @@ static int __init phy_init(void)
{
int rc;
+ ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
+
rc = mdio_bus_init();
if (rc)
- return rc;
+ goto err_ethtool_phy_ops;
- ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
features_init();
rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE);
if (rc)
- goto err_c45;
+ goto err_mdio_bus;
rc = phy_driver_register(&genphy_driver, THIS_MODULE);
- if (rc) {
- phy_driver_unregister(&genphy_c45_driver);
+ if (rc)
+ goto err_c45;
+
+ return 0;
+
err_c45:
- mdio_bus_exit();
- }
+ phy_driver_unregister(&genphy_c45_driver);
+err_mdio_bus:
+ mdio_bus_exit();
+err_ethtool_phy_ops:
+ ethtool_set_ethtool_phy_ops(NULL);
return rc;
}
diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c
index e8dd47bffe43..208a9393c2df 100644
--- a/drivers/net/phy/sfp-bus.c
+++ b/drivers/net/phy/sfp-bus.c
@@ -258,6 +258,16 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id,
switch (id->base.extended_cc) {
case SFF8024_ECC_UNSPEC:
break;
+ case SFF8024_ECC_100G_25GAUI_C2M_AOC:
+ if (br_min <= 28000 && br_max >= 25000) {
+ /* 25GBASE-R, possibly with FEC */
+ __set_bit(PHY_INTERFACE_MODE_25GBASER, interfaces);
+ /* There is currently no link mode for 25000base
+ * with unspecified range, reuse SR.
+ */
+ phylink_set(modes, 25000baseSR_Full);
+ }
+ break;
case SFF8024_ECC_100GBASE_SR4_25GBASE_SR:
phylink_set(modes, 100000baseSR4_Full);
phylink_set(modes, 25000baseSR_Full);
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 9137fb8c1c42..49d1d6acf95e 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -534,7 +534,7 @@ static int tap_open(struct inode *inode, struct file *file)
q->sock.state = SS_CONNECTED;
q->sock.file = file;
q->sock.ops = &tap_socket_ops;
- sock_init_data_uid(&q->sock, &q->sk, inode->i_uid);
+ sock_init_data_uid(&q->sock, &q->sk, current_fsuid());
q->sk.sk_write_space = tap_sock_write_space;
q->sk.sk_destruct = tap_sock_destruct;
q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 555b0b1e9a78..382756c3fb83 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2135,6 +2135,15 @@ static void team_setup_by_port(struct net_device *dev,
dev->mtu = port_dev->mtu;
memcpy(dev->broadcast, port_dev->broadcast, port_dev->addr_len);
eth_hw_addr_inherit(dev, port_dev);
+
+ if (port_dev->flags & IFF_POINTOPOINT) {
+ dev->flags &= ~(IFF_BROADCAST | IFF_MULTICAST);
+ dev->flags |= (IFF_POINTOPOINT | IFF_NOARP);
+ } else if ((port_dev->flags & (IFF_BROADCAST | IFF_MULTICAST)) ==
+ (IFF_BROADCAST | IFF_MULTICAST)) {
+ dev->flags |= (IFF_BROADCAST | IFF_MULTICAST);
+ dev->flags &= ~(IFF_POINTOPOINT | IFF_NOARP);
+ }
}
static int team_dev_type_check_change(struct net_device *dev,
@@ -2191,7 +2200,9 @@ static void team_setup(struct net_device *dev)
dev->hw_features = TEAM_VLAN_FEATURES |
NETIF_F_HW_VLAN_CTAG_RX |
- NETIF_F_HW_VLAN_CTAG_FILTER;
+ NETIF_F_HW_VLAN_CTAG_FILTER |
+ NETIF_F_HW_VLAN_STAG_RX |
+ NETIF_F_HW_VLAN_STAG_FILTER;
dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
dev->features |= dev->hw_features;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d75456adc62a..100339bc8b04 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1594,7 +1594,7 @@ static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
if (zerocopy)
return false;
- if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ if (SKB_DATA_ALIGN(len + TUN_RX_PAD + XDP_PACKET_HEADROOM) +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
return false;
@@ -3469,7 +3469,7 @@ static int tun_chr_open(struct inode *inode, struct file * file)
tfile->socket.file = file;
tfile->socket.ops = &tun_socket_ops;
- sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid);
+ sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());
tfile->sk.sk_write_space = tun_sock_write_space;
tfile->sk.sk_sndbuf = INT_MAX;
diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c
index c00a89b24df9..6d61052353f0 100644
--- a/drivers/net/usb/cdc_ether.c
+++ b/drivers/net/usb/cdc_ether.c
@@ -618,6 +618,13 @@ static const struct usb_device_id products[] = {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
+ .idProduct = 0x8005, /* A-300 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = 0,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
.idProduct = 0x8006, /* B-500/SL-5600 */
ZAURUS_MASTER_INTERFACE,
.driver_info = 0,
@@ -625,11 +632,25 @@ static const struct usb_device_id products[] = {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
+ .idProduct = 0x8006, /* B-500/SL-5600 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = 0,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
.idProduct = 0x8007, /* C-700 */
ZAURUS_MASTER_INTERFACE,
.driver_info = 0,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x8007, /* C-700 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = 0,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
.idProduct = 0x9031, /* C-750 C-760 */
diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
index c458c030fadf..59cde06aa7f6 100644
--- a/drivers/net/usb/lan78xx.c
+++ b/drivers/net/usb/lan78xx.c
@@ -4224,8 +4224,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
if (!dev)
return;
- set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
-
netif_napi_del(&dev->napi);
udev = interface_to_usbdev(intf);
@@ -4233,6 +4231,8 @@ static void lan78xx_disconnect(struct usb_interface *intf)
unregister_netdev(net);
+ timer_shutdown_sync(&dev->stat_monitor);
+ set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
cancel_delayed_work_sync(&dev->wq);
phydev = net->phydev;
@@ -4247,9 +4247,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
usb_scuttle_anchored_urbs(&dev->deferred);
- if (timer_pending(&dev->stat_monitor))
- del_timer_sync(&dev->stat_monitor);
-
lan78xx_unbind(dev, intf);
lan78xx_free_tx_resources(dev);
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 417f7ea1fffa..344af3c5c836 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1423,6 +1423,7 @@ static const struct usb_device_id products[] = {
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0191, 4)}, /* Quectel EG91 */
{QMI_QUIRK_SET_DTR(0x2c7c, 0x0195, 4)}, /* Quectel EG95 */
{QMI_FIXED_INTF(0x2c7c, 0x0296, 4)}, /* Quectel BG96 */
+ {QMI_QUIRK_SET_DTR(0x2c7c, 0x030e, 4)}, /* Quectel EM05GV2 */
{QMI_QUIRK_SET_DTR(0x2cb7, 0x0104, 4)}, /* Fibocom NL678 series */
{QMI_FIXED_INTF(0x0489, 0xe0b4, 0)}, /* Foxconn T77W968 LTE */
{QMI_FIXED_INTF(0x0489, 0xe0b5, 0)}, /* Foxconn T77W968 LTE with eSIM support*/
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 283ffddda821..2d14b0d78541 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1775,6 +1775,10 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
} else if (!info->in || !info->out)
status = usbnet_get_endpoints (dev, udev);
else {
+ u8 ep_addrs[3] = {
+ info->in + USB_DIR_IN, info->out + USB_DIR_OUT, 0
+ };
+
dev->in = usb_rcvbulkpipe (xdev, info->in);
dev->out = usb_sndbulkpipe (xdev, info->out);
if (!(info->flags & FLAG_NO_SETINT))
@@ -1784,6 +1788,8 @@ usbnet_probe (struct usb_interface *udev, const struct usb_device_id *prod)
else
status = 0;
+ if (status == 0 && !usb_check_bulk_endpoints(udev, ep_addrs))
+ status = -EINVAL;
}
if (status >= 0 && dev->status)
status = init_status (dev, udev);
diff --git a/drivers/net/usb/zaurus.c b/drivers/net/usb/zaurus.c
index 7984f2157d22..df3617c4c44e 100644
--- a/drivers/net/usb/zaurus.c
+++ b/drivers/net/usb/zaurus.c
@@ -289,11 +289,25 @@ static const struct usb_device_id products [] = {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
+ .idProduct = 0x8005, /* A-300 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
.idProduct = 0x8006, /* B-500/SL-5600 */
ZAURUS_MASTER_INTERFACE,
.driver_info = ZAURUS_PXA_INFO,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x8006, /* B-500/SL-5600 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
.idProduct = 0x8007, /* C-700 */
@@ -301,6 +315,13 @@ static const struct usb_device_id products [] = {
.driver_info = ZAURUS_PXA_INFO,
}, {
.match_flags = USB_DEVICE_ID_MATCH_INT_INFO
+ | USB_DEVICE_ID_MATCH_DEVICE,
+ .idVendor = 0x04DD,
+ .idProduct = 0x8007, /* C-700 */
+ ZAURUS_FAKE_INTERFACE,
+ .driver_info = (unsigned long)&bogus_mdlm_info,
+}, {
+ .match_flags = USB_DEVICE_ID_MATCH_INT_INFO
| USB_DEVICE_ID_MATCH_DEVICE,
.idVendor = 0x04DD,
.idProduct = 0x9031, /* C-750 C-760 */
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 614f3e3efab0..ef8eacb596f7 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -1081,8 +1081,9 @@ static int __veth_napi_enable_range(struct net_device *dev, int start, int end)
err_xdp_ring:
for (i--; i >= start; i--)
ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
+ i = end;
err_page_pool:
- for (i = start; i < end; i++) {
+ for (i--; i >= start; i--) {
page_pool_destroy(priv->rq[i].page_pool);
priv->rq[i].page_pool = NULL;
}
@@ -1860,10 +1861,7 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
nla_peer = data[VETH_INFO_PEER];
ifmp = nla_data(nla_peer);
- err = rtnl_nla_parse_ifla(peer_tb,
- nla_data(nla_peer) + sizeof(struct ifinfomsg),
- nla_len(nla_peer) - sizeof(struct ifinfomsg),
- NULL);
+ err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack);
if (err < 0)
return err;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 0db14f6b87d3..8e9f4cfe941f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -2761,7 +2761,7 @@ static void virtnet_init_default_rss(struct virtnet_info *vi)
vi->ctrl->rss.indirection_table[i] = indir_val;
}
- vi->ctrl->rss.max_tx_vq = vi->curr_queue_pairs;
+ vi->ctrl->rss.max_tx_vq = vi->has_rss ? vi->curr_queue_pairs : 0;
vi->ctrl->rss.hash_key_length = vi->rss_key_size;
netdev_rss_key_fill(vi->ctrl->rss.key, vi->rss_key_size);
@@ -4231,6 +4231,8 @@ static int virtnet_probe(struct virtio_device *vdev)
virtio_device_ready(vdev);
+ _virtnet_set_queues(vi, vi->curr_queue_pairs);
+
/* a random MAC address has been assigned, notify the device.
* We don't fail probe if VIRTIO_NET_F_CTRL_MAC_ADDR is not there
* because many devices work fine without getting MAC explicitly
@@ -4257,8 +4259,6 @@ static int virtnet_probe(struct virtio_device *vdev)
goto free_unregister_netdev;
}
- virtnet_set_queues(vi, vi->curr_queue_pairs);
-
/* Assume link up if device can't report link status,
otherwise get link status from config. */
netif_carrier_off(dev);
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index bdb3a76a352e..6043e63b42f9 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -664,7 +664,7 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
skb->protocol = htons(ETH_P_IPV6);
skb->dev = dev;
- rcu_read_lock_bh();
+ rcu_read_lock();
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
@@ -672,10 +672,10 @@ static int vrf_finish_output6(struct net *net, struct sock *sk,
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
IP6_INC_STATS(dev_net(dst->dev),
ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
@@ -889,7 +889,7 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
}
}
- rcu_read_lock_bh();
+ rcu_read_lock();
neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
if (!IS_ERR(neigh)) {
@@ -898,11 +898,11 @@ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
sock_confirm_neigh(skb, neigh);
/* if crossing protocols, can not use the cached header */
ret = neigh_output(neigh, skb, is_v6gw);
- rcu_read_unlock_bh();
+ rcu_read_unlock();
return ret;
}
- rcu_read_unlock_bh();
+ rcu_read_unlock();
vrf_tx_error(skb->dev, skb);
return -EINVAL;
}
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 78744549c1b3..c9a9373733c0 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -623,6 +623,32 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
return 1;
}
+static bool vxlan_parse_gpe_proto(struct vxlanhdr *hdr, __be16 *protocol)
+{
+ struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)hdr;
+
+ /* Need to have Next Protocol set for interfaces in GPE mode. */
+ if (!gpe->np_applied)
+ return false;
+ /* "The initial version is 0. If a receiver does not support the
+ * version indicated it MUST drop the packet.
+ */
+ if (gpe->version != 0)
+ return false;
+ /* "When the O bit is set to 1, the packet is an OAM packet and OAM
+ * processing MUST occur." However, we don't implement OAM
+ * processing, thus drop the packet.
+ */
+ if (gpe->oam_flag)
+ return false;
+
+ *protocol = tun_p_to_eth_p(gpe->next_protocol);
+ if (!*protocol)
+ return false;
+
+ return true;
+}
+
static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
unsigned int off,
struct vxlanhdr *vh, size_t hdrlen,
@@ -649,26 +675,24 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
return vh;
}
-static struct sk_buff *vxlan_gro_receive(struct sock *sk,
- struct list_head *head,
- struct sk_buff *skb)
+static struct vxlanhdr *vxlan_gro_prepare_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb,
+ struct gro_remcsum *grc)
{
- struct sk_buff *pp = NULL;
struct sk_buff *p;
struct vxlanhdr *vh, *vh2;
unsigned int hlen, off_vx;
- int flush = 1;
struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
__be32 flags;
- struct gro_remcsum grc;
- skb_gro_remcsum_init(&grc);
+ skb_gro_remcsum_init(grc);
off_vx = skb_gro_offset(skb);
hlen = off_vx + sizeof(*vh);
vh = skb_gro_header(skb, hlen, off_vx);
if (unlikely(!vh))
- goto out;
+ return NULL;
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
@@ -676,12 +700,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
- vh->vx_vni, &grc,
+ vh->vx_vni, grc,
!!(vs->flags &
VXLAN_F_REMCSUM_NOPARTIAL));
if (!vh)
- goto out;
+ return NULL;
}
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
@@ -698,12 +722,48 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
}
}
- pp = call_gro_receive(eth_gro_receive, head, skb);
- flush = 0;
+ return vh;
+}
-out:
+static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ struct sk_buff *pp = NULL;
+ struct gro_remcsum grc;
+ int flush = 1;
+
+ if (vxlan_gro_prepare_receive(sk, head, skb, &grc)) {
+ pp = call_gro_receive(eth_gro_receive, head, skb);
+ flush = 0;
+ }
skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
+ return pp;
+}
+
+static struct sk_buff *vxlan_gpe_gro_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ const struct packet_offload *ptype;
+ struct sk_buff *pp = NULL;
+ struct gro_remcsum grc;
+ struct vxlanhdr *vh;
+ __be16 protocol;
+ int flush = 1;
+ vh = vxlan_gro_prepare_receive(sk, head, skb, &grc);
+ if (vh) {
+ if (!vxlan_parse_gpe_proto(vh, &protocol))
+ goto out;
+ ptype = gro_find_receive_by_type(protocol);
+ if (!ptype)
+ goto out;
+ pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+ flush = 0;
+ }
+out:
+ skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
return pp;
}
@@ -715,6 +775,21 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
+static int vxlan_gpe_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
+{
+ struct vxlanhdr *vh = (struct vxlanhdr *)(skb->data + nhoff);
+ const struct packet_offload *ptype;
+ int err = -ENOSYS;
+ __be16 protocol;
+
+ if (!vxlan_parse_gpe_proto(vh, &protocol))
+ return err;
+ ptype = gro_find_complete_by_type(protocol);
+ if (ptype)
+ err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
+ return err;
+}
+
static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
__u16 state, __be32 src_vni,
__u16 ndm_flags)
@@ -1525,35 +1600,6 @@ out:
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}
-static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
- __be16 *protocol,
- struct sk_buff *skb, u32 vxflags)
-{
- struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
-
- /* Need to have Next Protocol set for interfaces in GPE mode. */
- if (!gpe->np_applied)
- return false;
- /* "The initial version is 0. If a receiver does not support the
- * version indicated it MUST drop the packet.
- */
- if (gpe->version != 0)
- return false;
- /* "When the O bit is set to 1, the packet is an OAM packet and OAM
- * processing MUST occur." However, we don't implement OAM
- * processing, thus drop the packet.
- */
- if (gpe->oam_flag)
- return false;
-
- *protocol = tun_p_to_eth_p(gpe->next_protocol);
- if (!*protocol)
- return false;
-
- unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
- return true;
-}
-
static bool vxlan_set_mac(struct vxlan_dev *vxlan,
struct vxlan_sock *vs,
struct sk_buff *skb, __be32 vni)
@@ -1655,8 +1701,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
* used by VXLAN extensions if explicitly requested.
*/
if (vs->flags & VXLAN_F_GPE) {
- if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+ if (!vxlan_parse_gpe_proto(&unparsed, &protocol))
goto drop;
+ unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS;
raw_proto = true;
}
@@ -2516,7 +2563,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ndst = &rt->dst;
- err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+ err = skb_tunnel_check_pmtu(skb, ndst, vxlan_headroom(flags & VXLAN_F_GPE),
netif_is_any_bridge_port(dev));
if (err < 0) {
goto tx_error;
@@ -2577,7 +2624,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto out_unlock;
}
- err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+ err = skb_tunnel_check_pmtu(skb, ndst,
+ vxlan_headroom((flags & VXLAN_F_GPE) | VXLAN_F_IPV6),
netif_is_any_bridge_port(dev));
if (err < 0) {
goto tx_error;
@@ -2989,14 +3037,12 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
struct vxlan_rdst *dst = &vxlan->default_dst;
struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
dst->remote_ifindex);
- bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);
/* This check is different than dev->max_mtu, because it looks at
* the lowerdev->mtu, rather than the static dev->max_mtu
*/
if (lowerdev) {
- int max_mtu = lowerdev->mtu -
- (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+ int max_mtu = lowerdev->mtu - vxlan_headroom(vxlan->cfg.flags);
if (new_mtu > max_mtu)
return -EINVAL;
}
@@ -3379,8 +3425,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
tunnel_cfg.encap_rcv = vxlan_rcv;
tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
tunnel_cfg.encap_destroy = NULL;
- tunnel_cfg.gro_receive = vxlan_gro_receive;
- tunnel_cfg.gro_complete = vxlan_gro_complete;
+ if (vs->flags & VXLAN_F_GPE) {
+ tunnel_cfg.gro_receive = vxlan_gpe_gro_receive;
+ tunnel_cfg.gro_complete = vxlan_gpe_gro_complete;
+ } else {
+ tunnel_cfg.gro_receive = vxlan_gro_receive;
+ tunnel_cfg.gro_complete = vxlan_gro_complete;
+ }
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
@@ -3644,11 +3695,11 @@ static void vxlan_config_apply(struct net_device *dev,
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *dst = &vxlan->default_dst;
unsigned short needed_headroom = ETH_HLEN;
- bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6);
int max_mtu = ETH_MAX_MTU;
+ u32 flags = conf->flags;
if (!changelink) {
- if (conf->flags & VXLAN_F_GPE)
+ if (flags & VXLAN_F_GPE)
vxlan_raw_setup(dev);
else
vxlan_ether_setup(dev);
@@ -3673,8 +3724,7 @@ static void vxlan_config_apply(struct net_device *dev,
dev->needed_tailroom = lowerdev->needed_tailroom;
- max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
- VXLAN_HEADROOM);
+ max_mtu = lowerdev->mtu - vxlan_headroom(flags);
if (max_mtu < ETH_MIN_MTU)
max_mtu = ETH_MIN_MTU;
@@ -3685,10 +3735,9 @@ static void vxlan_config_apply(struct net_device *dev,
if (dev->mtu > max_mtu)
dev->mtu = max_mtu;
- if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
- needed_headroom += VXLAN6_HEADROOM;
- else
- needed_headroom += VXLAN_HEADROOM;
+ if (flags & VXLAN_F_COLLECT_METADATA)
+ flags |= VXLAN_F_IPV6;
+ needed_headroom += vxlan_headroom(flags);
dev->needed_headroom = needed_headroom;
memcpy(&vxlan->cfg, conf, sizeof(*conf));
diff --git a/drivers/net/vxlan/vxlan_vnifilter.c b/drivers/net/vxlan/vxlan_vnifilter.c
index a3de081cda5e..c3ff30ab782e 100644
--- a/drivers/net/vxlan/vxlan_vnifilter.c
+++ b/drivers/net/vxlan/vxlan_vnifilter.c
@@ -713,6 +713,12 @@ static struct vxlan_vni_node *vxlan_vni_alloc(struct vxlan_dev *vxlan,
return vninode;
}
+static void vxlan_vni_free(struct vxlan_vni_node *vninode)
+{
+ free_percpu(vninode->stats);
+ kfree(vninode);
+}
+
static int vxlan_vni_add(struct vxlan_dev *vxlan,
struct vxlan_vni_group *vg,
u32 vni, union vxlan_addr *group,
@@ -740,7 +746,7 @@ static int vxlan_vni_add(struct vxlan_dev *vxlan,
&vninode->vnode,
vxlan_vni_rht_params);
if (err) {
- kfree(vninode);
+ vxlan_vni_free(vninode);
return err;
}
@@ -763,8 +769,7 @@ static void vxlan_vni_node_rcu_free(struct rcu_head *rcu)
struct vxlan_vni_node *v;
v = container_of(rcu, struct vxlan_vni_node, rcu);
- free_percpu(v->stats);
- kfree(v);
+ vxlan_vni_free(v);
}
static int vxlan_vni_del(struct vxlan_dev *vxlan,
diff --git a/drivers/net/wireguard/allowedips.c b/drivers/net/wireguard/allowedips.c
index 5bf7822c53f1..0ba714ca5185 100644
--- a/drivers/net/wireguard/allowedips.c
+++ b/drivers/net/wireguard/allowedips.c
@@ -6,7 +6,7 @@
#include "allowedips.h"
#include "peer.h"
-enum { MAX_ALLOWEDIPS_BITS = 128 };
+enum { MAX_ALLOWEDIPS_DEPTH = 129 };
static struct kmem_cache *node_cache;
@@ -42,7 +42,7 @@ static void push_rcu(struct allowedips_node **stack,
struct allowedips_node __rcu *p, unsigned int *len)
{
if (rcu_access_pointer(p)) {
- if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_BITS))
+ if (WARN_ON(IS_ENABLED(DEBUG) && *len >= MAX_ALLOWEDIPS_DEPTH))
return;
stack[(*len)++] = rcu_dereference_raw(p);
}
@@ -55,7 +55,7 @@ static void node_free_rcu(struct rcu_head *rcu)
static void root_free_rcu(struct rcu_head *rcu)
{
- struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = {
+ struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = {
container_of(rcu, struct allowedips_node, rcu) };
unsigned int len = 1;
@@ -68,7 +68,7 @@ static void root_free_rcu(struct rcu_head *rcu)
static void root_remove_peer_lists(struct allowedips_node *root)
{
- struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_BITS] = { root };
+ struct allowedips_node *node, *stack[MAX_ALLOWEDIPS_DEPTH] = { root };
unsigned int len = 1;
while (len > 0 && (node = stack[--len])) {
diff --git a/drivers/net/wireguard/selftest/allowedips.c b/drivers/net/wireguard/selftest/allowedips.c
index 78ebe2892a78..3d1f64ff2e12 100644
--- a/drivers/net/wireguard/selftest/allowedips.c
+++ b/drivers/net/wireguard/selftest/allowedips.c
@@ -593,16 +593,20 @@ bool __init wg_allowedips_selftest(void)
wg_allowedips_remove_by_peer(&t, a, &mutex);
test_negative(4, a, 192, 168, 0, 1);
- /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_BITS) in free_node
+ /* These will hit the WARN_ON(len >= MAX_ALLOWEDIPS_DEPTH) in free_node
* if something goes wrong.
*/
- for (i = 0; i < MAX_ALLOWEDIPS_BITS; ++i) {
- part = cpu_to_be64(~(1LLU << (i % 64)));
- memset(&ip, 0xff, 16);
- memcpy((u8 *)&ip + (i < 64) * 8, &part, 8);
+ for (i = 0; i < 64; ++i) {
+ part = cpu_to_be64(~0LLU << i);
+ memset(&ip, 0xff, 8);
+ memcpy((u8 *)&ip + 8, &part, 8);
+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
+ memcpy(&ip, &part, 8);
+ memset((u8 *)&ip + 8, 0, 8);
wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
}
-
+ memset(&ip, 0, 16);
+ wg_allowedips_insert_v6(&t, &ip, 128, a, &mutex);
wg_allowedips_free(&t, &mutex);
wg_allowedips_init(&t);
diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index 1cebba7889d7..139da578831a 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -376,7 +376,6 @@ static void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab)
struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i];
if (!irq_grp->napi_enabled) {
- dev_set_threaded(&irq_grp->napi_ndev, true);
napi_enable(&irq_grp->napi);
irq_grp->napi_enabled = true;
}
diff --git a/drivers/net/wireless/ath/ath11k/pcic.c b/drivers/net/wireless/ath/ath11k/pcic.c
index c899616fbee4..c63083633b37 100644
--- a/drivers/net/wireless/ath/ath11k/pcic.c
+++ b/drivers/net/wireless/ath/ath11k/pcic.c
@@ -466,7 +466,6 @@ void ath11k_pcic_ext_irq_enable(struct ath11k_base *ab)
struct ath11k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i];
if (!irq_grp->napi_enabled) {
- dev_set_threaded(&irq_grp->napi_ndev, true);
napi_enable(&irq_grp->napi);
irq_grp->napi_enabled = true;
}
diff --git a/drivers/net/wireless/ath/ath12k/wmi.c b/drivers/net/wireless/ath/ath12k/wmi.c
index 6512267ae4ca..4928e4e91660 100644
--- a/drivers/net/wireless/ath/ath12k/wmi.c
+++ b/drivers/net/wireless/ath/ath12k/wmi.c
@@ -2144,8 +2144,7 @@ int ath12k_wmi_send_scan_start_cmd(struct ath12k *ar,
struct wmi_tlv *tlv;
void *ptr;
int i, ret, len;
- u32 *tmp_ptr;
- u8 extraie_len_with_pad = 0;
+ u32 *tmp_ptr, extraie_len_with_pad = 0;
struct ath12k_wmi_hint_short_ssid_arg *s_ssid = NULL;
struct ath12k_wmi_hint_bssid_arg *hint_bssid = NULL;
diff --git a/drivers/net/wireless/ath/ath6kl/Makefile b/drivers/net/wireless/ath/ath6kl/Makefile
index a75bfa9fd1cf..dc2b3b46781e 100644
--- a/drivers/net/wireless/ath/ath6kl/Makefile
+++ b/drivers/net/wireless/ath/ath6kl/Makefile
@@ -36,11 +36,6 @@ ath6kl_core-y += wmi.o
ath6kl_core-y += core.o
ath6kl_core-y += recovery.o
-# FIXME: temporarily silence -Wdangling-pointer on non W=1+ builds
-ifndef KBUILD_EXTRA_WARN
-CFLAGS_htc_mbox.o += $(call cc-disable-warning, dangling-pointer)
-endif
-
ath6kl_core-$(CONFIG_NL80211_TESTMODE) += testmode.o
ath6kl_core-$(CONFIG_ATH6KL_TRACING) += trace.o
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index de8a2e27f49c..2a90bb24ba77 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -1456,6 +1456,10 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
params_size -= BRCMF_SCAN_PARAMS_V2_FIXED_SIZE;
params_size += BRCMF_SCAN_PARAMS_FIXED_SIZE;
params_v1 = kzalloc(params_size, GFP_KERNEL);
+ if (!params_v1) {
+ err = -ENOMEM;
+ goto exit_params;
+ }
params_v1->version = cpu_to_le32(BRCMF_ESCAN_REQ_VERSION);
brcmf_scan_params_v2_to_v1(&params->params_v2_le, &params_v1->params_le);
kfree(params);
@@ -1473,6 +1477,7 @@ brcmf_run_escan(struct brcmf_cfg80211_info *cfg, struct brcmf_if *ifp,
bphy_err(drvr, "error (%d)\n", err);
}
+exit_params:
kfree(params);
exit:
return err;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
index 792adaf880b4..bece26741d3a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwil_types.h
@@ -398,7 +398,12 @@ struct brcmf_scan_params_le {
* fixed parameter portion is assumed, otherwise
* ssid in the fixed portion is ignored
*/
- __le16 channel_list[1]; /* list of chanspecs */
+ union {
+ __le16 padding; /* Reserve space for at least 1 entry for abort
+ * which uses an on stack brcmf_scan_params_le
+ */
+ DECLARE_FLEX_ARRAY(__le16, channel_list); /* chanspecs */
+ };
};
struct brcmf_scan_params_v2_le {
diff --git a/drivers/net/wireless/intel/iwlwifi/Kconfig b/drivers/net/wireless/intel/iwlwifi/Kconfig
index b20409f8c13a..20971304fdef 100644
--- a/drivers/net/wireless/intel/iwlwifi/Kconfig
+++ b/drivers/net/wireless/intel/iwlwifi/Kconfig
@@ -66,6 +66,7 @@ config IWLMVM
tristate "Intel Wireless WiFi MVM Firmware support"
select WANT_DEV_COREDUMP
depends on MAC80211
+ depends on PTP_1588_CLOCK_OPTIONAL
help
This is the driver that supports the MVM firmware. The list
of the devices that use this firmware is available here:
diff --git a/drivers/net/wireless/legacy/rayctl.h b/drivers/net/wireless/legacy/rayctl.h
index 2b0f332043d7..1f3bde8ac73d 100644
--- a/drivers/net/wireless/legacy/rayctl.h
+++ b/drivers/net/wireless/legacy/rayctl.h
@@ -577,7 +577,7 @@ struct tx_msg {
struct tib_structure tib;
struct phy_header phy;
struct mac_header mac;
- UCHAR var[1];
+ UCHAR var[];
};
/****** ECF Receive Control Structure (RCS) Area at Shared RAM offset 0x0800 */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 68e88224b8b1..ccedea7e8a50 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -128,12 +128,12 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev)
case MT_EE_5GHZ:
dev->mphy.cap.has_5ghz = true;
break;
- case MT_EE_2GHZ:
- dev->mphy.cap.has_2ghz = true;
- break;
case MT_EE_DBDC:
dev->dbdc_support = true;
fallthrough;
+ case MT_EE_2GHZ:
+ dev->mphy.cap.has_2ghz = true;
+ break;
default:
dev->mphy.cap.has_2ghz = true;
dev->mphy.cap.has_5ghz = true;
diff --git a/drivers/net/wireless/realtek/rtw89/mac.c b/drivers/net/wireless/realtek/rtw89/mac.c
index b114babec698..c93e6250cb8b 100644
--- a/drivers/net/wireless/realtek/rtw89/mac.c
+++ b/drivers/net/wireless/realtek/rtw89/mac.c
@@ -2524,7 +2524,7 @@ static int cmac_dma_init(struct rtw89_dev *rtwdev, u8 mac_idx)
u32 reg;
int ret;
- if (chip_id != RTL8852A && chip_id != RTL8852B)
+ if (chip_id != RTL8852B)
return 0;
ret = rtw89_mac_check_mac_en(rtwdev, mac_idx, RTW89_CMAC_SEL);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index c8d20cddf658..88f760a7cbc3 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -396,7 +396,7 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
struct gnttab_map_grant_ref *gop = queue->tx_map_ops + *map_ops;
struct xen_netif_tx_request *txp = first;
- nr_slots = shinfo->nr_frags + 1;
+ nr_slots = shinfo->nr_frags + frag_overflow + 1;
copy_count(skb) = 0;
XENVIF_TX_CB(skb)->split_mask = 0;
@@ -462,8 +462,8 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
}
}
- for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
- shinfo->nr_frags++, gop++) {
+ for (shinfo->nr_frags = 0; nr_slots > 0 && shinfo->nr_frags < MAX_SKB_FRAGS;
+ shinfo->nr_frags++, gop++, nr_slots--) {
index = pending_index(queue->pending_cons++);
pending_idx = queue->pending_ring[index];
xenvif_tx_create_map_op(queue, pending_idx, txp,
@@ -476,12 +476,12 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
txp++;
}
- if (frag_overflow) {
+ if (nr_slots > 0) {
shinfo = skb_shinfo(nskb);
frags = shinfo->frags;
- for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
+ for (shinfo->nr_frags = 0; shinfo->nr_frags < nr_slots;
shinfo->nr_frags++, txp++, gop++) {
index = pending_index(queue->pending_cons++);
pending_idx = queue->pending_ring[index];
@@ -492,6 +492,11 @@ static void xenvif_get_requests(struct xenvif_queue *queue,
}
skb_shinfo(skb)->frag_list = nskb;
+ } else if (nskb) {
+ /* A frag_list skb was allocated but it is no longer needed
+ * because enough slots were converted to copy ops above.
+ */
+ kfree_skb(nskb);
}
(*copy_ops) = cop - queue->tx_copy_ops;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 37b6fa746662..f3a01b79148c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3933,6 +3933,12 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
*/
nvme_mpath_clear_ctrl_paths(ctrl);
+ /*
+ * Unquiesce io queues so any pending IO won't hang, especially
+ * those submitted from scan work
+ */
+ nvme_unquiesce_io_queues(ctrl);
+
/* prevent racing with ns scanning */
flush_work(&ctrl->scan_work);
@@ -3942,10 +3948,8 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
* removing the namespaces' disks; fail all the queues now to avoid
* potentially having to clean up the failed sync later.
*/
- if (ctrl->state == NVME_CTRL_DEAD) {
+ if (ctrl->state == NVME_CTRL_DEAD)
nvme_mark_namespaces_dead(ctrl);
- nvme_unquiesce_io_queues(ctrl);
- }
/* this is a no-op when called from the controller reset handler */
nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 5c3250f36ce7..d39f3219358b 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -786,11 +786,9 @@ int nvme_ns_chr_uring_cmd_iopoll(struct io_uring_cmd *ioucmd,
if (!(ioucmd->flags & IORING_URING_CMD_POLLED))
return 0;
- rcu_read_lock();
req = READ_ONCE(ioucmd->cookie);
if (req && blk_rq_is_poll(req))
ret = blk_rq_poll(req, iob, poll_flags);
- rcu_read_unlock();
return ret;
}
#ifdef CONFIG_NVME_MULTIPATH
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index baf69af7ea78..2f57da12d983 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3402,7 +3402,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */
- .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES |
+ NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x144d, 0xa802), /* Samsung SM953 */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index d433b2ec07a6..337a624a537c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -883,6 +883,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
goto out_cleanup_tagset;
if (!new) {
+ nvme_start_freeze(&ctrl->ctrl);
nvme_unquiesce_io_queues(&ctrl->ctrl);
if (!nvme_wait_freeze_timeout(&ctrl->ctrl, NVME_IO_TIMEOUT)) {
/*
@@ -891,6 +892,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
* to be safe.
*/
ret = -ENODEV;
+ nvme_unfreeze(&ctrl->ctrl);
goto out_wait_freeze_timed_out;
}
blk_mq_update_nr_hw_queues(ctrl->ctrl.tagset,
@@ -940,7 +942,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
bool remove)
{
if (ctrl->ctrl.queue_count > 1) {
- nvme_start_freeze(&ctrl->ctrl);
nvme_quiesce_io_queues(&ctrl->ctrl);
nvme_sync_io_queues(&ctrl->ctrl);
nvme_rdma_stop_io_queues(ctrl);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 9ce417cd32a7..5b332d9f87fc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1868,6 +1868,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
goto out_cleanup_connect_q;
if (!new) {
+ nvme_start_freeze(ctrl);
nvme_unquiesce_io_queues(ctrl);
if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
/*
@@ -1876,6 +1877,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
* to be safe.
*/
ret = -ENODEV;
+ nvme_unfreeze(ctrl);
goto out_wait_freeze_timed_out;
}
blk_mq_update_nr_hw_queues(ctrl->tagset,
@@ -1980,7 +1982,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
if (ctrl->queue_count <= 1)
return;
nvme_quiesce_admin_queue(ctrl);
- nvme_start_freeze(ctrl);
nvme_quiesce_io_queues(ctrl);
nvme_sync_io_queues(ctrl);
nvme_tcp_stop_io_queues(ctrl);
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index e40f10bf2ba4..da9826accb1b 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -55,7 +55,7 @@ config OF_FLATTREE
config OF_EARLY_FLATTREE
bool
- select DMA_DECLARE_COHERENT if HAS_DMA
+ select DMA_DECLARE_COHERENT if HAS_DMA && HAS_IOMEM
select OF_FLATTREE
config OF_PROMTREE
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index e311d406b170..4999636eaa92 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -63,15 +63,14 @@ int of_reconfig_notifier_unregister(struct notifier_block *nb)
}
EXPORT_SYMBOL_GPL(of_reconfig_notifier_unregister);
-#ifdef DEBUG
-const char *action_names[] = {
+static const char *action_names[] = {
+ [0] = "INVALID",
[OF_RECONFIG_ATTACH_NODE] = "ATTACH_NODE",
[OF_RECONFIG_DETACH_NODE] = "DETACH_NODE",
[OF_RECONFIG_ADD_PROPERTY] = "ADD_PROPERTY",
[OF_RECONFIG_REMOVE_PROPERTY] = "REMOVE_PROPERTY",
[OF_RECONFIG_UPDATE_PROPERTY] = "UPDATE_PROPERTY",
};
-#endif
int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p)
{
@@ -620,21 +619,9 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
}
ret = __of_add_property(ce->np, ce->prop);
- if (ret) {
- pr_err("changeset: add_property failed @%pOF/%s\n",
- ce->np,
- ce->prop->name);
- break;
- }
break;
case OF_RECONFIG_REMOVE_PROPERTY:
ret = __of_remove_property(ce->np, ce->prop);
- if (ret) {
- pr_err("changeset: remove_property failed @%pOF/%s\n",
- ce->np,
- ce->prop->name);
- break;
- }
break;
case OF_RECONFIG_UPDATE_PROPERTY:
@@ -648,20 +635,17 @@ static int __of_changeset_entry_apply(struct of_changeset_entry *ce)
}
ret = __of_update_property(ce->np, ce->prop, &old_prop);
- if (ret) {
- pr_err("changeset: update_property failed @%pOF/%s\n",
- ce->np,
- ce->prop->name);
- break;
- }
break;
default:
ret = -EINVAL;
}
raw_spin_unlock_irqrestore(&devtree_lock, flags);
- if (ret)
+ if (ret) {
+ pr_err("changeset: apply failed: %-15s %pOF:%s\n",
+ action_names[ce->action], ce->np, ce->prop->name);
return ret;
+ }
switch (ce->action) {
case OF_RECONFIG_ATTACH_NODE:
@@ -947,6 +931,9 @@ int of_changeset_action(struct of_changeset *ocs, unsigned long action,
if (!ce)
return -ENOMEM;
+ if (WARN_ON(action >= ARRAY_SIZE(action_names)))
+ return -EINVAL;
+
/* get a reference to the node */
ce->action = action;
ce->np = of_node_get(np);
diff --git a/drivers/of/kexec.c b/drivers/of/kexec.c
index f26d2ba8a371..68278340cecf 100644
--- a/drivers/of/kexec.c
+++ b/drivers/of/kexec.c
@@ -184,7 +184,8 @@ int __init ima_free_kexec_buffer(void)
if (ret)
return ret;
- return memblock_phys_free(addr, size);
+ memblock_free_late(addr, size);
+ return 0;
}
#endif
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 051e29b7ad2b..6a557eb866d0 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -141,7 +141,7 @@ struct platform_device *of_device_alloc(struct device_node *np,
}
/* setup generic device info */
- device_set_node(&dev->dev, of_fwnode_handle(np));
+ device_set_node(&dev->dev, of_fwnode_handle(of_node_get(np)));
dev->dev.parent = parent ? : &platform_bus;
if (bus_id)
@@ -239,7 +239,7 @@ static struct amba_device *of_amba_device_create(struct device_node *node,
dev->dev.dma_mask = &dev->dev.coherent_dma_mask;
/* setup generic device info */
- device_set_node(&dev->dev, of_fwnode_handle(node));
+ device_set_node(&dev->dev, of_fwnode_handle(of_node_get(node)));
dev->dev.parent = parent ? : &platform_bus;
dev->dev.platform_data = platform_data;
if (bus_id)
@@ -552,7 +552,7 @@ static int __init of_platform_default_populate_init(void)
if (!of_get_property(node, "linux,opened", NULL) ||
!of_get_property(node, "linux,boot-display", NULL))
continue;
- dev = of_platform_device_create(node, "of-display.0", NULL);
+ dev = of_platform_device_create(node, "of-display", NULL);
of_node_put(node);
if (WARN_ON(!dev))
return -ENOMEM;
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index a406a12eb208..b545fcb22536 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -664,12 +664,12 @@ static void __init of_unittest_parse_phandle_with_args_map(void)
memset(&args, 0, sizeof(args));
EXPECT_BEGIN(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
+ "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
rc = of_parse_phandle_with_args_map(np, "phandle-list-bad-phandle",
"phandle", 0, &args);
EXPECT_END(KERN_INFO,
- "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle");
+ "OF: /testcase-data/phandle-tests/consumer-b: could not find phandle 12345678");
unittest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c
index bf3405f4289e..8b1dcd537020 100644
--- a/drivers/parisc/sba_iommu.c
+++ b/drivers/parisc/sba_iommu.c
@@ -121,6 +121,8 @@ module_param(sba_reserve_agpgart, int, 0444);
MODULE_PARM_DESC(sba_reserve_agpgart, "Reserve half of IO pdir as AGPGART");
#endif
+struct proc_dir_entry *proc_runway_root __ro_after_init;
+struct proc_dir_entry *proc_mckinley_root __ro_after_init;
/************************************
** SBA register read and write support
@@ -1968,11 +1970,15 @@ static int __init sba_driver_callback(struct parisc_device *dev)
#ifdef CONFIG_PROC_FS
switch (dev->id.hversion) {
case PLUTO_MCKINLEY_PORT:
+ if (!proc_mckinley_root)
+ proc_mckinley_root = proc_mkdir("bus/mckinley", NULL);
root = proc_mckinley_root;
break;
case ASTRO_RUNWAY_PORT:
case IKE_MERCED_PORT:
default:
+ if (!proc_runway_root)
+ proc_runway_root = proc_mkdir("bus/runway", NULL);
root = proc_runway_root;
break;
}
diff --git a/drivers/parport/parport_gsc.c b/drivers/parport/parport_gsc.c
index 0dcc497b0449..5e4475254bd0 100644
--- a/drivers/parport/parport_gsc.c
+++ b/drivers/parport/parport_gsc.c
@@ -28,7 +28,6 @@
#include <linux/sysctl.h>
#include <asm/io.h>
-#include <asm/dma.h>
#include <linux/uaccess.h>
#include <asm/superio.h>
@@ -226,9 +225,9 @@ static int parport_PS2_supported(struct parport *pb)
/* --- Initialisation code -------------------------------- */
-struct parport *parport_gsc_probe_port(unsigned long base,
+static struct parport *parport_gsc_probe_port(unsigned long base,
unsigned long base_hi, int irq,
- int dma, struct parisc_device *padev)
+ struct parisc_device *padev)
{
struct parport_gsc_private *priv;
struct parport_operations *ops;
@@ -250,12 +249,9 @@ struct parport *parport_gsc_probe_port(unsigned long base,
}
priv->ctr = 0xc;
priv->ctr_writable = 0xff;
- priv->dma_buf = NULL;
- priv->dma_handle = 0;
p->base = base;
p->base_hi = base_hi;
p->irq = irq;
- p->dma = dma;
p->modes = PARPORT_MODE_PCSPP | PARPORT_MODE_SAFEININT;
p->ops = ops;
p->private_data = priv;
@@ -286,17 +282,9 @@ struct parport *parport_gsc_probe_port(unsigned long base,
if (p->irq == PARPORT_IRQ_AUTO) {
p->irq = PARPORT_IRQ_NONE;
}
- if (p->irq != PARPORT_IRQ_NONE) {
+ if (p->irq != PARPORT_IRQ_NONE)
pr_cont(", irq %d", p->irq);
- if (p->dma == PARPORT_DMA_AUTO) {
- p->dma = PARPORT_DMA_NONE;
- }
- }
- if (p->dma == PARPORT_DMA_AUTO) /* To use DMA, giving the irq
- is mandatory (see above) */
- p->dma = PARPORT_DMA_NONE;
-
pr_cont(" [");
#define printmode(x) \
do { \
@@ -321,7 +309,6 @@ do { \
pr_warn("%s: irq %d in use, resorting to polled operation\n",
p->name, p->irq);
p->irq = PARPORT_IRQ_NONE;
- p->dma = PARPORT_DMA_NONE;
}
}
@@ -369,8 +356,7 @@ static int __init parport_init_chip(struct parisc_device *dev)
pr_info("%s: enhanced parport-modes not supported\n", __func__);
}
- p = parport_gsc_probe_port(port, 0, dev->irq,
- /* PARPORT_IRQ_NONE */ PARPORT_DMA_NONE, dev);
+ p = parport_gsc_probe_port(port, 0, dev->irq, dev);
if (p)
parport_count++;
dev_set_drvdata(&dev->dev, p);
@@ -382,16 +368,10 @@ static void __exit parport_remove_chip(struct parisc_device *dev)
{
struct parport *p = dev_get_drvdata(&dev->dev);
if (p) {
- struct parport_gsc_private *priv = p->private_data;
struct parport_operations *ops = p->ops;
parport_remove_port(p);
- if (p->dma != PARPORT_DMA_NONE)
- free_dma(p->dma);
if (p->irq != PARPORT_IRQ_NONE)
free_irq(p->irq, p);
- if (priv->dma_buf)
- dma_free_coherent(&priv->dev->dev, PAGE_SIZE,
- priv->dma_buf, priv->dma_handle);
kfree (p->private_data);
parport_put_port(p);
kfree (ops); /* hope no-one cached it */
diff --git a/drivers/parport/parport_gsc.h b/drivers/parport/parport_gsc.h
index 9301217edf12..d447a568c257 100644
--- a/drivers/parport/parport_gsc.h
+++ b/drivers/parport/parport_gsc.h
@@ -63,8 +63,6 @@ struct parport_gsc_private {
int writeIntrThreshold;
/* buffer suitable for DMA, if DMA enabled */
- char *dma_buf;
- dma_addr_t dma_handle;
struct pci_dev *dev;
};
@@ -199,9 +197,4 @@ extern void parport_gsc_inc_use_count(void);
extern void parport_gsc_dec_use_count(void);
-extern struct parport *parport_gsc_probe_port(unsigned long base,
- unsigned long base_hi,
- int irq, int dma,
- struct parisc_device *padev);
-
#endif /* __DRIVERS_PARPORT_PARPORT_GSC_H */
diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 5bc81cc0a2de..46b252bbe500 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -11,6 +11,7 @@
#include <linux/pci.h>
#include <linux/errno.h>
#include <linux/ioport.h>
+#include <linux/of.h>
#include <linux/proc_fs.h>
#include <linux/slab.h>
@@ -332,6 +333,7 @@ void __weak pcibios_bus_add_device(struct pci_dev *pdev) { }
*/
void pci_bus_add_device(struct pci_dev *dev)
{
+ struct device_node *dn = dev->dev.of_node;
int retval;
/*
@@ -344,7 +346,7 @@ void pci_bus_add_device(struct pci_dev *dev)
pci_proc_attach_device(dev);
pci_bridge_d3_update(dev);
- dev->match_driver = true;
+ dev->match_driver = !dn || of_device_is_available(dn);
retval = device_attach(&dev->dev);
if (retval < 0 && retval != -EPROBE_DEFER)
pci_warn(dev, "device attach failed (%d)\n", retval);
diff --git a/drivers/pci/controller/Kconfig b/drivers/pci/controller/Kconfig
index 8d49bad7f847..0859be86e718 100644
--- a/drivers/pci/controller/Kconfig
+++ b/drivers/pci/controller/Kconfig
@@ -179,7 +179,6 @@ config PCI_MVEBU
depends on MVEBU_MBUS
depends on ARM
depends on OF
- depends on BROKEN
select PCI_BRIDGE_EMUL
help
Add support for Marvell EBU PCIe controller. This PCIe controller
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index cf61733bf78d..9952057c8819 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -485,20 +485,15 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
if (ret)
goto err_remove_edma;
- if (dw_pcie_link_up(pci)) {
- dw_pcie_print_link_status(pci);
- } else {
+ if (!dw_pcie_link_up(pci)) {
ret = dw_pcie_start_link(pci);
if (ret)
goto err_remove_edma;
-
- if (pci->ops && pci->ops->start_link) {
- ret = dw_pcie_wait_for_link(pci);
- if (ret)
- goto err_stop_link;
- }
}
+ /* Ignore errors, the link may come up later */
+ dw_pcie_wait_for_link(pci);
+
bridge->sysdata = pp;
ret = pci_host_probe(bridge);
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index c87848cd8686..1f2ee71da4da 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -644,20 +644,9 @@ void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index)
dw_pcie_writel_atu(pci, dir, index, PCIE_ATU_REGION_CTRL2, 0);
}
-void dw_pcie_print_link_status(struct dw_pcie *pci)
-{
- u32 offset, val;
-
- offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
- val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
-
- dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
- FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
- FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
-}
-
int dw_pcie_wait_for_link(struct dw_pcie *pci)
{
+ u32 offset, val;
int retries;
/* Check if the link is up or not */
@@ -673,7 +662,12 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci)
return -ETIMEDOUT;
}
- dw_pcie_print_link_status(pci);
+ offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+ val = dw_pcie_readw_dbi(pci, offset + PCI_EXP_LNKSTA);
+
+ dev_info(pci->dev, "PCIe Gen.%u x%u link up\n",
+ FIELD_GET(PCI_EXP_LNKSTA_CLS, val),
+ FIELD_GET(PCI_EXP_LNKSTA_NLW, val));
return 0;
}
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 615660640801..79713ce075cc 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -429,7 +429,6 @@ void dw_pcie_setup(struct dw_pcie *pci);
void dw_pcie_iatu_detect(struct dw_pcie *pci);
int dw_pcie_edma_detect(struct dw_pcie *pci);
void dw_pcie_edma_remove(struct dw_pcie *pci);
-void dw_pcie_print_link_status(struct dw_pcie *pci);
static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val)
{
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 328d1e416014..601129772b2d 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -498,6 +498,7 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
acpiphp_native_scan_bridge(dev);
}
} else {
+ LIST_HEAD(add_list);
int max, pass;
acpiphp_rescan_slot(slot);
@@ -511,10 +512,15 @@ static void enable_slot(struct acpiphp_slot *slot, bool bridge)
if (pass && dev->subordinate) {
check_hotplug_bridge(slot, dev);
pcibios_resource_survey_bus(dev->subordinate);
+ if (pci_is_root_bus(bus))
+ __pci_bus_size_bridges(dev->subordinate, &add_list);
}
}
}
- pci_assign_unassigned_bridge_resources(bus->self);
+ if (pci_is_root_bus(bus))
+ __pci_bus_assign_resources(bus, &add_list, NULL);
+ else
+ pci_assign_unassigned_bridge_resources(bus->self);
}
acpiphp_sanitize_bus(bus);
diff --git a/drivers/pci/of.c b/drivers/pci/of.c
index e51219f9f523..3c158b17dcb5 100644
--- a/drivers/pci/of.c
+++ b/drivers/pci/of.c
@@ -34,11 +34,6 @@ int pci_set_of_node(struct pci_dev *dev)
if (!node)
return 0;
- if (!of_device_is_available(node)) {
- of_node_put(node);
- return -ENODEV;
- }
-
device_set_node(&dev->dev, of_fwnode_handle(node));
return 0;
}
diff --git a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c
index 15dafe359552..6ae6d509dfdd 100644
--- a/drivers/phy/hisilicon/phy-hisi-inno-usb2.c
+++ b/drivers/phy/hisilicon/phy-hisi-inno-usb2.c
@@ -184,7 +184,7 @@ static int hisi_inno_phy_probe(struct platform_device *pdev)
phy_set_drvdata(phy, &priv->ports[i]);
i++;
- if (i > INNO_PHY_PORT_NUM) {
+ if (i >= INNO_PHY_PORT_NUM) {
dev_warn(dev, "Support %d ports in maximum\n", i);
of_node_put(child);
break;
diff --git a/drivers/phy/mediatek/phy-mtk-dp.c b/drivers/phy/mediatek/phy-mtk-dp.c
index 232fd3f1ff1b..d7024a144335 100644
--- a/drivers/phy/mediatek/phy-mtk-dp.c
+++ b/drivers/phy/mediatek/phy-mtk-dp.c
@@ -169,7 +169,7 @@ static int mtk_dp_phy_probe(struct platform_device *pdev)
regs = *(struct regmap **)dev->platform_data;
if (!regs)
- return dev_err_probe(dev, EINVAL,
+ return dev_err_probe(dev, -EINVAL,
"No data passed, requires struct regmap**\n");
dp_phy = devm_kzalloc(dev, sizeof(*dp_phy), GFP_KERNEL);
diff --git a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
index 8aa7251de4a9..bbfe11d6a69d 100644
--- a/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
+++ b/drivers/phy/mediatek/phy-mtk-hdmi-mt8195.c
@@ -253,7 +253,7 @@ static int mtk_hdmi_pll_calc(struct mtk_hdmi_phy *hdmi_phy, struct clk_hw *hw,
for (i = 0; i < ARRAY_SIZE(txpredivs); i++) {
ns_hdmipll_ck = 5 * tmds_clk * txposdiv * txpredivs[i];
if (ns_hdmipll_ck >= 5 * GIGA &&
- ns_hdmipll_ck <= 1 * GIGA)
+ ns_hdmipll_ck <= 12 * GIGA)
break;
}
if (i == (ARRAY_SIZE(txpredivs) - 1) &&
diff --git a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
index 6c237f3cc66d..d0319bee01c0 100644
--- a/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
+++ b/drivers/phy/qualcomm/phy-qcom-snps-femto-v2.c
@@ -110,11 +110,13 @@ struct phy_override_seq {
/**
* struct qcom_snps_hsphy - snps hs phy attributes
*
+ * @dev: device structure
+ *
* @phy: generic phy
* @base: iomapped memory space for snps hs phy
*
- * @cfg_ahb_clk: AHB2PHY interface clock
- * @ref_clk: phy reference clock
+ * @num_clks: number of clocks
+ * @clks: array of clocks
* @phy_reset: phy reset control
* @vregs: regulator supplies bulk data
* @phy_initialized: if PHY has been initialized correctly
@@ -122,11 +124,13 @@ struct phy_override_seq {
* @update_seq_cfg: tuning parameters for phy init
*/
struct qcom_snps_hsphy {
+ struct device *dev;
+
struct phy *phy;
void __iomem *base;
- struct clk *cfg_ahb_clk;
- struct clk *ref_clk;
+ int num_clks;
+ struct clk_bulk_data *clks;
struct reset_control *phy_reset;
struct regulator_bulk_data vregs[SNPS_HS_NUM_VREGS];
@@ -135,6 +139,34 @@ struct qcom_snps_hsphy {
struct phy_override_seq update_seq_cfg[NUM_HSPHY_TUNING_PARAMS];
};
+static int qcom_snps_hsphy_clk_init(struct qcom_snps_hsphy *hsphy)
+{
+ struct device *dev = hsphy->dev;
+
+ hsphy->num_clks = 2;
+ hsphy->clks = devm_kcalloc(dev, hsphy->num_clks, sizeof(*hsphy->clks), GFP_KERNEL);
+ if (!hsphy->clks)
+ return -ENOMEM;
+
+ /*
+ * TODO: Currently no device tree instantiation of the PHY is using the clock.
+ * This needs to be fixed in order for this code to be able to use devm_clk_bulk_get().
+ */
+ hsphy->clks[0].id = "cfg_ahb";
+ hsphy->clks[0].clk = devm_clk_get_optional(dev, "cfg_ahb");
+ if (IS_ERR(hsphy->clks[0].clk))
+ return dev_err_probe(dev, PTR_ERR(hsphy->clks[0].clk),
+ "failed to get cfg_ahb clk\n");
+
+ hsphy->clks[1].id = "ref";
+ hsphy->clks[1].clk = devm_clk_get(dev, "ref");
+ if (IS_ERR(hsphy->clks[1].clk))
+ return dev_err_probe(dev, PTR_ERR(hsphy->clks[1].clk),
+ "failed to get ref clk\n");
+
+ return 0;
+}
+
static inline void qcom_snps_hsphy_write_mask(void __iomem *base, u32 offset,
u32 mask, u32 val)
{
@@ -165,22 +197,13 @@ static int qcom_snps_hsphy_suspend(struct qcom_snps_hsphy *hsphy)
0, USB2_AUTO_RESUME);
}
- clk_disable_unprepare(hsphy->cfg_ahb_clk);
return 0;
}
static int qcom_snps_hsphy_resume(struct qcom_snps_hsphy *hsphy)
{
- int ret;
-
dev_dbg(&hsphy->phy->dev, "Resume QCOM SNPS PHY, mode\n");
- ret = clk_prepare_enable(hsphy->cfg_ahb_clk);
- if (ret) {
- dev_err(&hsphy->phy->dev, "failed to enable cfg ahb clock\n");
- return ret;
- }
-
return 0;
}
@@ -191,8 +214,7 @@ static int __maybe_unused qcom_snps_hsphy_runtime_suspend(struct device *dev)
if (!hsphy->phy_initialized)
return 0;
- qcom_snps_hsphy_suspend(hsphy);
- return 0;
+ return qcom_snps_hsphy_suspend(hsphy);
}
static int __maybe_unused qcom_snps_hsphy_runtime_resume(struct device *dev)
@@ -202,8 +224,7 @@ static int __maybe_unused qcom_snps_hsphy_runtime_resume(struct device *dev)
if (!hsphy->phy_initialized)
return 0;
- qcom_snps_hsphy_resume(hsphy);
- return 0;
+ return qcom_snps_hsphy_resume(hsphy);
}
static int qcom_snps_hsphy_set_mode(struct phy *phy, enum phy_mode mode,
@@ -374,16 +395,16 @@ static int qcom_snps_hsphy_init(struct phy *phy)
if (ret)
return ret;
- ret = clk_prepare_enable(hsphy->cfg_ahb_clk);
+ ret = clk_bulk_prepare_enable(hsphy->num_clks, hsphy->clks);
if (ret) {
- dev_err(&phy->dev, "failed to enable cfg ahb clock, %d\n", ret);
+ dev_err(&phy->dev, "failed to enable clocks, %d\n", ret);
goto poweroff_phy;
}
ret = reset_control_assert(hsphy->phy_reset);
if (ret) {
dev_err(&phy->dev, "failed to assert phy_reset, %d\n", ret);
- goto disable_ahb_clk;
+ goto disable_clks;
}
usleep_range(100, 150);
@@ -391,7 +412,7 @@ static int qcom_snps_hsphy_init(struct phy *phy)
ret = reset_control_deassert(hsphy->phy_reset);
if (ret) {
dev_err(&phy->dev, "failed to de-assert phy_reset, %d\n", ret);
- goto disable_ahb_clk;
+ goto disable_clks;
}
qcom_snps_hsphy_write_mask(hsphy->base, USB2_PHY_USB_PHY_CFG0,
@@ -448,8 +469,8 @@ static int qcom_snps_hsphy_init(struct phy *phy)
return 0;
-disable_ahb_clk:
- clk_disable_unprepare(hsphy->cfg_ahb_clk);
+disable_clks:
+ clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks);
poweroff_phy:
regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs);
@@ -461,7 +482,7 @@ static int qcom_snps_hsphy_exit(struct phy *phy)
struct qcom_snps_hsphy *hsphy = phy_get_drvdata(phy);
reset_control_assert(hsphy->phy_reset);
- clk_disable_unprepare(hsphy->cfg_ahb_clk);
+ clk_bulk_disable_unprepare(hsphy->num_clks, hsphy->clks);
regulator_bulk_disable(ARRAY_SIZE(hsphy->vregs), hsphy->vregs);
hsphy->phy_initialized = false;
@@ -554,14 +575,15 @@ static int qcom_snps_hsphy_probe(struct platform_device *pdev)
if (!hsphy)
return -ENOMEM;
+ hsphy->dev = dev;
+
hsphy->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(hsphy->base))
return PTR_ERR(hsphy->base);
- hsphy->ref_clk = devm_clk_get(dev, "ref");
- if (IS_ERR(hsphy->ref_clk))
- return dev_err_probe(dev, PTR_ERR(hsphy->ref_clk),
- "failed to get ref clk\n");
+ ret = qcom_snps_hsphy_clk_init(hsphy);
+ if (ret)
+ return dev_err_probe(dev, ret, "failed to initialize clocks\n");
hsphy->phy_reset = devm_reset_control_get_exclusive(&pdev->dev, NULL);
if (IS_ERR(hsphy->phy_reset)) {
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index 4a8c1b57a90d..4dff656af3ad 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -862,6 +862,33 @@ static const struct pinconf_ops amd_pinconf_ops = {
.pin_config_group_set = amd_pinconf_group_set,
};
+static void amd_gpio_irq_init(struct amd_gpio *gpio_dev)
+{
+ struct pinctrl_desc *desc = gpio_dev->pctrl->desc;
+ unsigned long flags;
+ u32 pin_reg, mask;
+ int i;
+
+ mask = BIT(WAKE_CNTRL_OFF_S0I3) | BIT(WAKE_CNTRL_OFF_S3) |
+ BIT(WAKE_CNTRL_OFF_S4);
+
+ for (i = 0; i < desc->npins; i++) {
+ int pin = desc->pins[i].number;
+ const struct pin_desc *pd = pin_desc_get(gpio_dev->pctrl, pin);
+
+ if (!pd)
+ continue;
+
+ raw_spin_lock_irqsave(&gpio_dev->lock, flags);
+
+ pin_reg = readl(gpio_dev->base + pin * 4);
+ pin_reg &= ~mask;
+ writel(pin_reg, gpio_dev->base + pin * 4);
+
+ raw_spin_unlock_irqrestore(&gpio_dev->lock, flags);
+ }
+}
+
#ifdef CONFIG_PM_SLEEP
static bool amd_gpio_should_save(struct amd_gpio *gpio_dev, unsigned int pin)
{
@@ -1099,6 +1126,9 @@ static int amd_gpio_probe(struct platform_device *pdev)
return PTR_ERR(gpio_dev->pctrl);
}
+ /* Disable and mask interrupts */
+ amd_gpio_irq_init(gpio_dev);
+
girq = &gpio_dev->gc.irq;
gpio_irq_chip_set_chip(girq, &amd_gpio_irqchip);
/* This will let us handle the parent IRQ in the driver */
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 2585ef2b2793..115b83e2d8e6 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -1038,6 +1038,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct msm_pinctrl *pctrl = gpiochip_get_data(gc);
const struct msm_pingroup *g;
+ u32 intr_target_mask = GENMASK(2, 0);
unsigned long flags;
bool was_enabled;
u32 val;
@@ -1074,13 +1075,15 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
* With intr_target_use_scm interrupts are routed to
* application cpu using scm calls.
*/
+ if (g->intr_target_width)
+ intr_target_mask = GENMASK(g->intr_target_width - 1, 0);
+
if (pctrl->intr_target_use_scm) {
u32 addr = pctrl->phys_base[0] + g->intr_target_reg;
int ret;
qcom_scm_io_readl(addr, &val);
-
- val &= ~(7 << g->intr_target_bit);
+ val &= ~(intr_target_mask << g->intr_target_bit);
val |= g->intr_target_kpss_val << g->intr_target_bit;
ret = qcom_scm_io_writel(addr, val);
@@ -1090,7 +1093,7 @@ static int msm_gpio_irq_set_type(struct irq_data *d, unsigned int type)
d->hwirq);
} else {
val = msm_readl_intr_target(pctrl, g);
- val &= ~(7 << g->intr_target_bit);
+ val &= ~(intr_target_mask << g->intr_target_bit);
val |= g->intr_target_kpss_val << g->intr_target_bit;
msm_writel_intr_target(val, pctrl, g);
}
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.h b/drivers/pinctrl/qcom/pinctrl-msm.h
index 5e4410bed823..1d2f2e904da1 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.h
+++ b/drivers/pinctrl/qcom/pinctrl-msm.h
@@ -59,6 +59,7 @@ struct pinctrl_pin_desc;
* @intr_status_bit: Offset in @intr_status_reg for reading and acking the interrupt
* status.
* @intr_target_bit: Offset in @intr_target_reg for configuring the interrupt routing.
+ * @intr_target_width: Number of bits used for specifying interrupt routing target.
* @intr_target_kpss_val: Value in @intr_target_bit for specifying that the interrupt from
* this gpio should get routed to the KPSS processor.
* @intr_raw_status_bit: Offset in @intr_cfg_reg for the raw status bit.
@@ -100,6 +101,7 @@ struct msm_pingroup {
unsigned intr_ack_high:1;
unsigned intr_target_bit:5;
+ unsigned intr_target_width:5;
unsigned intr_target_kpss_val:5;
unsigned intr_raw_status_bit:5;
unsigned intr_polarity_bit:5;
diff --git a/drivers/pinctrl/qcom/pinctrl-sa8775p.c b/drivers/pinctrl/qcom/pinctrl-sa8775p.c
index 8a5cd15512b9..8fdea25d8d67 100644
--- a/drivers/pinctrl/qcom/pinctrl-sa8775p.c
+++ b/drivers/pinctrl/qcom/pinctrl-sa8775p.c
@@ -46,6 +46,7 @@
.intr_enable_bit = 0, \
.intr_status_bit = 0, \
.intr_target_bit = 5, \
+ .intr_target_width = 4, \
.intr_target_kpss_val = 3, \
.intr_raw_status_bit = 4, \
.intr_polarity_bit = 1, \
diff --git a/drivers/pinctrl/renesas/pinctrl-rza2.c b/drivers/pinctrl/renesas/pinctrl-rza2.c
index 40b1326a1077..5591ddf16fdf 100644
--- a/drivers/pinctrl/renesas/pinctrl-rza2.c
+++ b/drivers/pinctrl/renesas/pinctrl-rza2.c
@@ -14,6 +14,7 @@
#include <linux/gpio/driver.h>
#include <linux/io.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/of_device.h>
#include <linux/pinctrl/pinmux.h>
@@ -46,6 +47,7 @@ struct rza2_pinctrl_priv {
struct pinctrl_dev *pctl;
struct pinctrl_gpio_range gpio_range;
int npins;
+ struct mutex mutex; /* serialize adding groups and functions */
};
#define RZA2_PDR(port) (0x0000 + (port) * 2) /* Direction 16-bit */
@@ -358,10 +360,14 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
psel_val[i] = MUX_FUNC(value);
}
+ mutex_lock(&priv->mutex);
+
/* Register a single pin group listing all the pins we read from DT */
gsel = pinctrl_generic_add_group(pctldev, np->name, pins, npins, NULL);
- if (gsel < 0)
- return gsel;
+ if (gsel < 0) {
+ ret = gsel;
+ goto unlock;
+ }
/*
* Register a single group function where the 'data' is an array PSEL
@@ -390,6 +396,8 @@ static int rza2_dt_node_to_map(struct pinctrl_dev *pctldev,
(*map)->data.mux.function = np->name;
*num_maps = 1;
+ mutex_unlock(&priv->mutex);
+
return 0;
remove_function:
@@ -398,6 +406,9 @@ remove_function:
remove_group:
pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+ mutex_unlock(&priv->mutex);
+
dev_err(priv->dev, "Unable to parse DT node %s\n", np->name);
return ret;
@@ -473,6 +484,8 @@ static int rza2_pinctrl_probe(struct platform_device *pdev)
if (IS_ERR(priv->base))
return PTR_ERR(priv->base);
+ mutex_init(&priv->mutex);
+
platform_set_drvdata(pdev, priv);
priv->npins = (int)(uintptr_t)of_device_get_match_data(&pdev->dev) *
diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
index b53d26167da5..6e8a76556e23 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
@@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/seq_file.h>
@@ -149,10 +150,11 @@ struct rzg2l_pinctrl {
struct gpio_chip gpio_chip;
struct pinctrl_gpio_range gpio_range;
DECLARE_BITMAP(tint_slot, RZG2L_TINT_MAX_INTERRUPT);
- spinlock_t bitmap_lock;
+ spinlock_t bitmap_lock; /* protect tint_slot bitmap */
unsigned int hwirq[RZG2L_TINT_MAX_INTERRUPT];
- spinlock_t lock;
+ spinlock_t lock; /* lock read/write registers */
+ struct mutex mutex; /* serialize adding groups and functions */
};
static const unsigned int iolh_groupa_mA[] = { 2, 4, 8, 12 };
@@ -362,11 +364,13 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
name = np->name;
}
+ mutex_lock(&pctrl->mutex);
+
/* Register a single pin group listing all the pins we read from DT */
gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
if (gsel < 0) {
ret = gsel;
- goto done;
+ goto unlock;
}
/*
@@ -380,6 +384,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
goto remove_group;
}
+ mutex_unlock(&pctrl->mutex);
+
maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
maps[idx].data.mux.group = name;
maps[idx].data.mux.function = name;
@@ -391,6 +397,8 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
remove_group:
pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+ mutex_unlock(&pctrl->mutex);
done:
*index = idx;
kfree(configs);
@@ -1509,6 +1517,7 @@ static int rzg2l_pinctrl_probe(struct platform_device *pdev)
spin_lock_init(&pctrl->lock);
spin_lock_init(&pctrl->bitmap_lock);
+ mutex_init(&pctrl->mutex);
platform_set_drvdata(pdev, pctrl);
diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
index 35b23c1a5684..9146101ea9e2 100644
--- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c
+++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
@@ -14,6 +14,7 @@
#include <linux/gpio/driver.h>
#include <linux/io.h>
#include <linux/module.h>
+#include <linux/mutex.h>
#include <linux/of_device.h>
#include <linux/spinlock.h>
@@ -123,7 +124,8 @@ struct rzv2m_pinctrl {
struct gpio_chip gpio_chip;
struct pinctrl_gpio_range gpio_range;
- spinlock_t lock;
+ spinlock_t lock; /* lock read/write registers */
+ struct mutex mutex; /* serialize adding groups and functions */
};
static const unsigned int drv_1_8V_group2_uA[] = { 1800, 3800, 7800, 11000 };
@@ -322,11 +324,13 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
name = np->name;
}
+ mutex_lock(&pctrl->mutex);
+
/* Register a single pin group listing all the pins we read from DT */
gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
if (gsel < 0) {
ret = gsel;
- goto done;
+ goto unlock;
}
/*
@@ -340,6 +344,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
goto remove_group;
}
+ mutex_unlock(&pctrl->mutex);
+
maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
maps[idx].data.mux.group = name;
maps[idx].data.mux.function = name;
@@ -351,6 +357,8 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
remove_group:
pinctrl_generic_remove_group(pctldev, gsel);
+unlock:
+ mutex_unlock(&pctrl->mutex);
done:
*index = idx;
kfree(configs);
@@ -1071,6 +1079,7 @@ static int rzv2m_pinctrl_probe(struct platform_device *pdev)
}
spin_lock_init(&pctrl->lock);
+ mutex_init(&pctrl->mutex);
platform_set_drvdata(pdev, pctrl);
diff --git a/drivers/platform/mellanox/mlxbf-tmfifo.c b/drivers/platform/mellanox/mlxbf-tmfifo.c
index a79318e90a13..b600b77d91ef 100644
--- a/drivers/platform/mellanox/mlxbf-tmfifo.c
+++ b/drivers/platform/mellanox/mlxbf-tmfifo.c
@@ -887,6 +887,7 @@ static bool mlxbf_tmfifo_virtio_notify(struct virtqueue *vq)
tm_vdev = fifo->vdev[VIRTIO_ID_CONSOLE];
mlxbf_tmfifo_console_output(tm_vdev, vring);
spin_unlock_irqrestore(&fifo->spin_lock[0], flags);
+ set_bit(MLXBF_TM_TX_LWM_IRQ, &fifo->pend_events);
} else if (test_and_set_bit(MLXBF_TM_TX_LWM_IRQ,
&fifo->pend_events)) {
return true;
diff --git a/drivers/platform/x86/amd/pmc-quirks.c b/drivers/platform/x86/amd/pmc-quirks.c
index 362e7c0097d7..ad702463a65d 100644
--- a/drivers/platform/x86/amd/pmc-quirks.c
+++ b/drivers/platform/x86/amd/pmc-quirks.c
@@ -11,7 +11,6 @@
#include <linux/dmi.h>
#include <linux/io.h>
#include <linux/ioport.h>
-#include <linux/slab.h>
#include "pmc.h"
@@ -135,12 +134,10 @@ static const struct dmi_system_id fwbug_list[] = {
*/
static void amd_pmc_skip_nvme_smi_handler(u32 s2idle_bug_mmio)
{
- struct resource *res;
void __iomem *addr;
u8 val;
- res = request_mem_region_muxed(s2idle_bug_mmio, 1, "amd_pmc_pm80");
- if (!res)
+ if (!request_mem_region_muxed(s2idle_bug_mmio, 1, "amd_pmc_pm80"))
return;
addr = ioremap(s2idle_bug_mmio, 1);
@@ -152,8 +149,7 @@ static void amd_pmc_skip_nvme_smi_handler(u32 s2idle_bug_mmio)
iounmap(addr);
cleanup_resource:
- release_resource(res);
- kfree(res);
+ release_mem_region(s2idle_bug_mmio, 1);
}
void amd_pmc_process_restore_quirks(struct amd_pmc_dev *dev)
diff --git a/drivers/platform/x86/amd/pmf/acpi.c b/drivers/platform/x86/amd/pmf/acpi.c
index 081e84e116e7..3fc5e4547d9f 100644
--- a/drivers/platform/x86/amd/pmf/acpi.c
+++ b/drivers/platform/x86/amd/pmf/acpi.c
@@ -106,6 +106,27 @@ int apmf_get_static_slider_granular(struct amd_pmf_dev *pdev,
data, sizeof(*data));
}
+int apmf_os_power_slider_update(struct amd_pmf_dev *pdev, u8 event)
+{
+ struct os_power_slider args;
+ struct acpi_buffer params;
+ union acpi_object *info;
+ int err = 0;
+
+ args.size = sizeof(args);
+ args.slider_event = event;
+
+ params.length = sizeof(args);
+ params.pointer = (void *)&args;
+
+ info = apmf_if_call(pdev, APMF_FUNC_OS_POWER_SLIDER_UPDATE, &params);
+ if (!info)
+ err = -EIO;
+
+ kfree(info);
+ return err;
+}
+
static void apmf_sbios_heartbeat_notify(struct work_struct *work)
{
struct amd_pmf_dev *dev = container_of(work, struct amd_pmf_dev, heart_beat.work);
@@ -289,7 +310,7 @@ int apmf_acpi_init(struct amd_pmf_dev *pmf_dev)
ret = apmf_get_system_params(pmf_dev);
if (ret) {
- dev_err(pmf_dev->dev, "APMF apmf_get_system_params failed :%d\n", ret);
+ dev_dbg(pmf_dev->dev, "APMF apmf_get_system_params failed :%d\n", ret);
goto out;
}
diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
index d8732557f9db..57bf1a9f0e76 100644
--- a/drivers/platform/x86/amd/pmf/core.c
+++ b/drivers/platform/x86/amd/pmf/core.c
@@ -72,7 +72,11 @@ static int amd_pmf_pwr_src_notify_call(struct notifier_block *nb, unsigned long
return NOTIFY_DONE;
}
- amd_pmf_set_sps_power_limits(pmf);
+ if (is_apmf_func_supported(pmf, APMF_FUNC_STATIC_SLIDER_GRANULAR))
+ amd_pmf_set_sps_power_limits(pmf);
+
+ if (is_apmf_func_supported(pmf, APMF_FUNC_OS_POWER_SLIDER_UPDATE))
+ amd_pmf_power_slider_update_event(pmf);
return NOTIFY_OK;
}
@@ -297,7 +301,8 @@ static void amd_pmf_init_features(struct amd_pmf_dev *dev)
int ret;
/* Enable Static Slider */
- if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
+ if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR) ||
+ is_apmf_func_supported(dev, APMF_FUNC_OS_POWER_SLIDER_UPDATE)) {
amd_pmf_init_sps(dev);
dev->pwr_src_notifier.notifier_call = amd_pmf_pwr_src_notify_call;
power_supply_reg_notifier(&dev->pwr_src_notifier);
diff --git a/drivers/platform/x86/amd/pmf/pmf.h b/drivers/platform/x86/amd/pmf/pmf.h
index 06c30cdc0573..deba88e6e4c8 100644
--- a/drivers/platform/x86/amd/pmf/pmf.h
+++ b/drivers/platform/x86/amd/pmf/pmf.h
@@ -21,6 +21,7 @@
#define APMF_FUNC_SBIOS_HEARTBEAT 4
#define APMF_FUNC_AUTO_MODE 5
#define APMF_FUNC_SET_FAN_IDX 7
+#define APMF_FUNC_OS_POWER_SLIDER_UPDATE 8
#define APMF_FUNC_STATIC_SLIDER_GRANULAR 9
#define APMF_FUNC_DYN_SLIDER_AC 11
#define APMF_FUNC_DYN_SLIDER_DC 12
@@ -44,6 +45,14 @@
#define GET_STT_LIMIT_APU 0x20
#define GET_STT_LIMIT_HS2 0x21
+/* OS slider update notification */
+#define DC_BEST_PERF 0
+#define DC_BETTER_PERF 1
+#define DC_BATTERY_SAVER 3
+#define AC_BEST_PERF 4
+#define AC_BETTER_PERF 5
+#define AC_BETTER_BATTERY 6
+
/* Fan Index for Auto Mode */
#define FAN_INDEX_AUTO 0xFFFFFFFF
@@ -193,6 +202,11 @@ struct amd_pmf_static_slider_granular {
struct apmf_sps_prop_granular prop[POWER_SOURCE_MAX][POWER_MODE_MAX];
};
+struct os_power_slider {
+ u16 size;
+ u8 slider_event;
+} __packed;
+
struct fan_table_control {
bool manual;
unsigned long fan_id;
@@ -383,6 +397,7 @@ int amd_pmf_send_cmd(struct amd_pmf_dev *dev, u8 message, bool get, u32 arg, u32
int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev);
int amd_pmf_get_power_source(void);
int apmf_install_handler(struct amd_pmf_dev *pmf_dev);
+int apmf_os_power_slider_update(struct amd_pmf_dev *dev, u8 flag);
/* SPS Layer */
int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf);
@@ -393,6 +408,7 @@ void amd_pmf_deinit_sps(struct amd_pmf_dev *dev);
int apmf_get_static_slider_granular(struct amd_pmf_dev *pdev,
struct apmf_static_slider_granular_output *output);
bool is_pprof_balanced(struct amd_pmf_dev *pmf);
+int amd_pmf_power_slider_update_event(struct amd_pmf_dev *dev);
int apmf_update_fan_idx(struct amd_pmf_dev *pdev, bool manual, u32 idx);
diff --git a/drivers/platform/x86/amd/pmf/sps.c b/drivers/platform/x86/amd/pmf/sps.c
index 445ff053b4df..a70e67749be3 100644
--- a/drivers/platform/x86/amd/pmf/sps.c
+++ b/drivers/platform/x86/amd/pmf/sps.c
@@ -174,14 +174,78 @@ int amd_pmf_get_pprof_modes(struct amd_pmf_dev *pmf)
return mode;
}
+int amd_pmf_power_slider_update_event(struct amd_pmf_dev *dev)
+{
+ u8 flag = 0;
+ int mode;
+ int src;
+
+ mode = amd_pmf_get_pprof_modes(dev);
+ if (mode < 0)
+ return mode;
+
+ src = amd_pmf_get_power_source();
+
+ if (src == POWER_SOURCE_AC) {
+ switch (mode) {
+ case POWER_MODE_PERFORMANCE:
+ flag |= BIT(AC_BEST_PERF);
+ break;
+ case POWER_MODE_BALANCED_POWER:
+ flag |= BIT(AC_BETTER_PERF);
+ break;
+ case POWER_MODE_POWER_SAVER:
+ flag |= BIT(AC_BETTER_BATTERY);
+ break;
+ default:
+ dev_err(dev->dev, "unsupported platform profile\n");
+ return -EOPNOTSUPP;
+ }
+
+ } else if (src == POWER_SOURCE_DC) {
+ switch (mode) {
+ case POWER_MODE_PERFORMANCE:
+ flag |= BIT(DC_BEST_PERF);
+ break;
+ case POWER_MODE_BALANCED_POWER:
+ flag |= BIT(DC_BETTER_PERF);
+ break;
+ case POWER_MODE_POWER_SAVER:
+ flag |= BIT(DC_BATTERY_SAVER);
+ break;
+ default:
+ dev_err(dev->dev, "unsupported platform profile\n");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ apmf_os_power_slider_update(dev, flag);
+
+ return 0;
+}
+
static int amd_pmf_profile_set(struct platform_profile_handler *pprof,
enum platform_profile_option profile)
{
struct amd_pmf_dev *pmf = container_of(pprof, struct amd_pmf_dev, pprof);
+ int ret = 0;
pmf->current_profile = profile;
- return amd_pmf_set_sps_power_limits(pmf);
+ /* Notify EC about the slider position change */
+ if (is_apmf_func_supported(pmf, APMF_FUNC_OS_POWER_SLIDER_UPDATE)) {
+ ret = amd_pmf_power_slider_update_event(pmf);
+ if (ret)
+ return ret;
+ }
+
+ if (is_apmf_func_supported(pmf, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
+ ret = amd_pmf_set_sps_power_limits(pmf);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
int amd_pmf_init_sps(struct amd_pmf_dev *dev)
@@ -189,10 +253,13 @@ int amd_pmf_init_sps(struct amd_pmf_dev *dev)
int err;
dev->current_profile = PLATFORM_PROFILE_BALANCED;
- amd_pmf_load_defaults_sps(dev);
- /* update SPS balanced power mode thermals */
- amd_pmf_set_sps_power_limits(dev);
+ if (is_apmf_func_supported(dev, APMF_FUNC_STATIC_SLIDER_GRANULAR)) {
+ amd_pmf_load_defaults_sps(dev);
+
+ /* update SPS balanced power mode thermals */
+ amd_pmf_set_sps_power_limits(dev);
+ }
dev->pprof.profile_get = amd_pmf_profile_get;
dev->pprof.profile_set = amd_pmf_profile_set;
diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 1038dfdcdd32..8bef66a2f0ce 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -738,13 +738,23 @@ static ssize_t kbd_rgb_mode_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
- u32 cmd, mode, r, g, b, speed;
+ u32 cmd, mode, r, g, b, speed;
int err;
if (sscanf(buf, "%d %d %d %d %d %d", &cmd, &mode, &r, &g, &b, &speed) != 6)
return -EINVAL;
- cmd = !!cmd;
+ /* B3 is set and B4 is save to BIOS */
+ switch (cmd) {
+ case 0:
+ cmd = 0xb3;
+ break;
+ case 1:
+ cmd = 0xb4;
+ break;
+ default:
+ return -EINVAL;
+ }
/* These are the known usable modes across all TUF/ROG */
if (mode >= 12 || mode == 9)
diff --git a/drivers/platform/x86/huawei-wmi.c b/drivers/platform/x86/huawei-wmi.c
index 70e5c4c0574d..0ef1c46b617b 100644
--- a/drivers/platform/x86/huawei-wmi.c
+++ b/drivers/platform/x86/huawei-wmi.c
@@ -85,6 +85,8 @@ static const struct key_entry huawei_wmi_keymap[] = {
{ KE_IGNORE, 0x293, { KEY_KBDILLUMTOGGLE } },
{ KE_IGNORE, 0x294, { KEY_KBDILLUMUP } },
{ KE_IGNORE, 0x295, { KEY_KBDILLUMUP } },
+ // Ignore Ambient Light Sensoring
+ { KE_KEY, 0x2c1, { KEY_RESERVED } },
{ KE_END, 0 }
};
diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index d2fee9a3e239..6d9297c1d96c 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c
@@ -1049,6 +1049,11 @@ static const struct key_entry ideapad_keymap[] = {
{ KE_IGNORE, 0x03 | IDEAPAD_WMI_KEY },
/* Customizable Lenovo Hotkey ("star" with 'S' inside) */
{ KE_KEY, 0x01 | IDEAPAD_WMI_KEY, { KEY_FAVORITES } },
+ { KE_KEY, 0x04 | IDEAPAD_WMI_KEY, { KEY_SELECTIVE_SCREENSHOT } },
+ /* Lenovo Support */
+ { KE_KEY, 0x07 | IDEAPAD_WMI_KEY, { KEY_HELP } },
+ { KE_KEY, 0x0e | IDEAPAD_WMI_KEY, { KEY_PICKUP_PHONE } },
+ { KE_KEY, 0x0f | IDEAPAD_WMI_KEY, { KEY_HANGUP_PHONE } },
/* Dark mode toggle */
{ KE_KEY, 0x13 | IDEAPAD_WMI_KEY, { KEY_PROG1 } },
/* Sound profile switch */
diff --git a/drivers/platform/x86/intel/hid.c b/drivers/platform/x86/intel/hid.c
index 5632bd3c534a..7457ca2b27a6 100644
--- a/drivers/platform/x86/intel/hid.c
+++ b/drivers/platform/x86/intel/hid.c
@@ -150,6 +150,12 @@ static const struct dmi_system_id dmi_vgbs_allow_list[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go"),
},
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "HP"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "HP Elite Dragonfly G2 Notebook PC"),
+ },
+ },
{ }
};
@@ -620,7 +626,7 @@ static bool button_array_present(struct platform_device *device)
static int intel_hid_probe(struct platform_device *device)
{
acpi_handle handle = ACPI_HANDLE(&device->dev);
- unsigned long long mode;
+ unsigned long long mode, dummy;
struct intel_hid_priv *priv;
acpi_status status;
int err;
@@ -692,18 +698,15 @@ static int intel_hid_probe(struct platform_device *device)
if (err)
goto err_remove_notify;
- if (priv->array) {
- unsigned long long dummy;
+ intel_button_array_enable(&device->dev, true);
- intel_button_array_enable(&device->dev, true);
-
- /* Call button load method to enable HID power button */
- if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_BTNL_FN,
- &dummy)) {
- dev_warn(&device->dev,
- "failed to enable HID power button\n");
- }
- }
+ /*
+ * Call button load method to enable HID power button
+ * Always do this since it activates events on some devices without
+ * a button array too.
+ */
+ if (!intel_hid_evaluate_method(handle, INTEL_HID_DSM_BTNL_FN, &dummy))
+ dev_warn(&device->dev, "failed to enable HID power button\n");
device_init_wakeup(&device->dev, true);
/*
diff --git a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
index 1f59ac55c5f7..a95004e3d80b 100644
--- a/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
+++ b/drivers/platform/x86/intel/speed_select_if/isst_if_common.c
@@ -335,8 +335,8 @@ static struct pci_dev *_isst_if_get_pci_dev(int cpu, int bus_no, int dev, int fn
node = dev_to_node(&_pci_dev->dev);
if (node == NUMA_NO_NODE) {
- pr_info("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n",
- cpu, bus_no, dev, fn);
+ pr_info_once("Fail to get numa node for CPU:%d bus:%d dev:%d fn:%d\n",
+ cpu, bus_no, dev, fn);
continue;
}
diff --git a/drivers/platform/x86/lenovo-ymc.c b/drivers/platform/x86/lenovo-ymc.c
index 41676188b373..e1fbc35504d4 100644
--- a/drivers/platform/x86/lenovo-ymc.c
+++ b/drivers/platform/x86/lenovo-ymc.c
@@ -24,6 +24,10 @@ static bool ec_trigger __read_mostly;
module_param(ec_trigger, bool, 0444);
MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events");
+static bool force;
+module_param(force, bool, 0444);
+MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type");
+
static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = {
{
/* Lenovo Yoga 7 14ARB7 */
@@ -32,6 +36,27 @@ static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = {
DMI_MATCH(DMI_PRODUCT_NAME, "82QF"),
},
},
+ {
+ /* Lenovo Yoga 7 14ACN6 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "82N7"),
+ },
+ },
+ { }
+};
+
+static const struct dmi_system_id allowed_chasis_types_dmi_table[] = {
+ {
+ .matches = {
+ DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "31" /* Convertible */),
+ },
+ },
+ {
+ .matches = {
+ DMI_EXACT_MATCH(DMI_CHASSIS_TYPE, "32" /* Detachable */),
+ },
+ },
{ }
};
@@ -111,6 +136,13 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx)
struct input_dev *input_dev;
int err;
+ if (!dmi_check_system(allowed_chasis_types_dmi_table)) {
+ if (force)
+ dev_info(&wdev->dev, "Force loading Lenovo YMC support\n");
+ else
+ return -ENODEV;
+ }
+
ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table);
priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL);
diff --git a/drivers/platform/x86/mlx-platform.c b/drivers/platform/x86/mlx-platform.c
index 67367f010139..7d33977d9c60 100644
--- a/drivers/platform/x86/mlx-platform.c
+++ b/drivers/platform/x86/mlx-platform.c
@@ -62,10 +62,6 @@
#define MLXPLAT_CPLD_LPC_REG_PWM_CONTROL_OFFSET 0x37
#define MLXPLAT_CPLD_LPC_REG_AGGR_OFFSET 0x3a
#define MLXPLAT_CPLD_LPC_REG_AGGR_MASK_OFFSET 0x3b
-#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET 0x3c
-#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET 0x3d
-#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET 0x3e
-#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET 0x3f
#define MLXPLAT_CPLD_LPC_REG_AGGRLO_OFFSET 0x40
#define MLXPLAT_CPLD_LPC_REG_AGGRLO_MASK_OFFSET 0x41
#define MLXPLAT_CPLD_LPC_REG_AGGRCO_OFFSET 0x42
@@ -126,6 +122,10 @@
#define MLXPLAT_CPLD_LPC_REG_LC_SD_EVENT_OFFSET 0xaa
#define MLXPLAT_CPLD_LPC_REG_LC_SD_MASK_OFFSET 0xab
#define MLXPLAT_CPLD_LPC_REG_LC_PWR_ON 0xb2
+#define MLXPLAT_CPLD_LPC_REG_DBG1_OFFSET 0xb6
+#define MLXPLAT_CPLD_LPC_REG_DBG2_OFFSET 0xb7
+#define MLXPLAT_CPLD_LPC_REG_DBG3_OFFSET 0xb8
+#define MLXPLAT_CPLD_LPC_REG_DBG4_OFFSET 0xb9
#define MLXPLAT_CPLD_LPC_REG_GP4_RO_OFFSET 0xc2
#define MLXPLAT_CPLD_LPC_REG_SPI_CHNL_SELECT 0xc3
#define MLXPLAT_CPLD_LPC_REG_WD_CLEAR_OFFSET 0xc7
@@ -222,7 +222,7 @@
MLXPLAT_CPLD_AGGR_MASK_LC_SDWN)
#define MLXPLAT_CPLD_LOW_AGGR_MASK_LOW 0xc1
#define MLXPLAT_CPLD_LOW_AGGR_MASK_ASIC2 BIT(2)
-#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT BIT(4)
+#define MLXPLAT_CPLD_LOW_AGGR_MASK_PWR_BUT GENMASK(5, 4)
#define MLXPLAT_CPLD_LOW_AGGR_MASK_I2C BIT(6)
#define MLXPLAT_CPLD_PSU_MASK GENMASK(1, 0)
#define MLXPLAT_CPLD_PWR_MASK GENMASK(1, 0)
@@ -237,7 +237,7 @@
#define MLXPLAT_CPLD_GWP_MASK GENMASK(0, 0)
#define MLXPLAT_CPLD_EROT_MASK GENMASK(1, 0)
#define MLXPLAT_CPLD_PWR_BUTTON_MASK BIT(0)
-#define MLXPLAT_CPLD_LATCH_RST_MASK BIT(5)
+#define MLXPLAT_CPLD_LATCH_RST_MASK BIT(6)
#define MLXPLAT_CPLD_THERMAL1_PDB_MASK BIT(3)
#define MLXPLAT_CPLD_THERMAL2_PDB_MASK BIT(4)
#define MLXPLAT_CPLD_INTRUSION_MASK BIT(6)
@@ -2356,7 +2356,7 @@ mlxplat_mlxcpld_l1_switch_pwr_events_handler(void *handle, enum mlxreg_hotplug_k
u8 action)
{
dev_info(&mlxplat_dev->dev, "System shutdown due to short press of power button");
- kernel_halt();
+ kernel_power_off();
return 0;
}
@@ -2475,7 +2475,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = {
.reg = MLXPLAT_CPLD_LPC_REG_PWRB_OFFSET,
.mask = MLXPLAT_CPLD_PWR_BUTTON_MASK,
.count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_pwr_events_items_data),
- .inversed = 0,
+ .inversed = 1,
.health = false,
},
{
@@ -2484,7 +2484,7 @@ static struct mlxreg_core_item mlxplat_mlxcpld_l1_switch_events_items[] = {
.reg = MLXPLAT_CPLD_LPC_REG_BRD_OFFSET,
.mask = MLXPLAT_CPLD_L1_CHA_HEALTH_MASK,
.count = ARRAY_SIZE(mlxplat_mlxcpld_l1_switch_health_events_items_data),
- .inversed = 0,
+ .inversed = 1,
.health = false,
.ind = 8,
},
@@ -3677,7 +3677,7 @@ static struct mlxreg_core_data mlxplat_mlxcpld_default_ng_regs_io_data[] = {
{
.label = "latch_reset",
.reg = MLXPLAT_CPLD_LPC_REG_GP1_OFFSET,
- .mask = GENMASK(7, 0) & ~BIT(5),
+ .mask = GENMASK(7, 0) & ~BIT(6),
.mode = 0200,
},
{
@@ -6238,8 +6238,6 @@ static void mlxplat_i2c_mux_topolgy_exit(struct mlxplat_priv *priv)
if (priv->pdev_mux[i])
platform_device_unregister(priv->pdev_mux[i]);
}
-
- mlxplat_post_exit();
}
static int mlxplat_i2c_main_complition_notify(void *handle, int id)
@@ -6369,6 +6367,7 @@ static void __exit mlxplat_exit(void)
pm_power_off = NULL;
mlxplat_pre_exit(priv);
mlxplat_i2c_main_exit(priv);
+ mlxplat_post_exit();
}
module_exit(mlxplat_exit);
diff --git a/drivers/platform/x86/msi-ec.c b/drivers/platform/x86/msi-ec.c
index ff93986e3d35..f26a3121092f 100644
--- a/drivers/platform/x86/msi-ec.c
+++ b/drivers/platform/x86/msi-ec.c
@@ -27,15 +27,15 @@
#include <linux/seq_file.h>
#include <linux/string.h>
-static const char *const SM_ECO_NAME = "eco";
-static const char *const SM_COMFORT_NAME = "comfort";
-static const char *const SM_SPORT_NAME = "sport";
-static const char *const SM_TURBO_NAME = "turbo";
-
-static const char *const FM_AUTO_NAME = "auto";
-static const char *const FM_SILENT_NAME = "silent";
-static const char *const FM_BASIC_NAME = "basic";
-static const char *const FM_ADVANCED_NAME = "advanced";
+#define SM_ECO_NAME "eco"
+#define SM_COMFORT_NAME "comfort"
+#define SM_SPORT_NAME "sport"
+#define SM_TURBO_NAME "turbo"
+
+#define FM_AUTO_NAME "auto"
+#define FM_SILENT_NAME "silent"
+#define FM_BASIC_NAME "basic"
+#define FM_ADVANCED_NAME "advanced"
static const char * const ALLOWED_FW_0[] __initconst = {
"14C1EMS1.012",
diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index 6b18ec543ac3..f4c6c36e05a5 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c
@@ -208,7 +208,7 @@ static ssize_t set_device_state(const char *buf, size_t count, u8 mask)
return -EINVAL;
if (quirks->ec_read_only)
- return -EOPNOTSUPP;
+ return 0;
/* read current device state */
result = ec_read(MSI_STANDARD_EC_COMMAND_ADDRESS, &rdata);
@@ -838,15 +838,15 @@ static bool msi_laptop_i8042_filter(unsigned char data, unsigned char str,
static void msi_init_rfkill(struct work_struct *ignored)
{
if (rfk_wlan) {
- rfkill_set_sw_state(rfk_wlan, !wlan_s);
+ msi_rfkill_set_state(rfk_wlan, !wlan_s);
rfkill_wlan_set(NULL, !wlan_s);
}
if (rfk_bluetooth) {
- rfkill_set_sw_state(rfk_bluetooth, !bluetooth_s);
+ msi_rfkill_set_state(rfk_bluetooth, !bluetooth_s);
rfkill_bluetooth_set(NULL, !bluetooth_s);
}
if (rfk_threeg) {
- rfkill_set_sw_state(rfk_threeg, !threeg_s);
+ msi_rfkill_set_state(rfk_threeg, !threeg_s);
rfkill_threeg_set(NULL, !threeg_s);
}
}
diff --git a/drivers/platform/x86/serial-multi-instantiate.c b/drivers/platform/x86/serial-multi-instantiate.c
index f3dcbdd72fec..8158e3cf5d6d 100644
--- a/drivers/platform/x86/serial-multi-instantiate.c
+++ b/drivers/platform/x86/serial-multi-instantiate.c
@@ -21,6 +21,7 @@
#define IRQ_RESOURCE_NONE 0
#define IRQ_RESOURCE_GPIO 1
#define IRQ_RESOURCE_APIC 2
+#define IRQ_RESOURCE_AUTO 3
enum smi_bus_type {
SMI_I2C,
@@ -52,6 +53,18 @@ static int smi_get_irq(struct platform_device *pdev, struct acpi_device *adev,
int ret;
switch (inst->flags & IRQ_RESOURCE_TYPE) {
+ case IRQ_RESOURCE_AUTO:
+ ret = acpi_dev_gpio_irq_get(adev, inst->irq_idx);
+ if (ret > 0) {
+ dev_dbg(&pdev->dev, "Using gpio irq\n");
+ break;
+ }
+ ret = platform_get_irq(pdev, inst->irq_idx);
+ if (ret > 0) {
+ dev_dbg(&pdev->dev, "Using platform irq\n");
+ break;
+ }
+ break;
case IRQ_RESOURCE_GPIO:
ret = acpi_dev_gpio_irq_get(adev, inst->irq_idx);
break;
@@ -307,10 +320,23 @@ static const struct smi_node int3515_data = {
static const struct smi_node cs35l41_hda = {
.instances = {
- { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
- { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
- { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
- { "cs35l41-hda", IRQ_RESOURCE_GPIO, 0 },
+ { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+ { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+ { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+ { "cs35l41-hda", IRQ_RESOURCE_AUTO, 0 },
+ {}
+ },
+ .bus_type = SMI_AUTO_DETECT,
+};
+
+static const struct smi_node cs35l56_hda = {
+ .instances = {
+ { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+ { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+ { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+ { "cs35l56-hda", IRQ_RESOURCE_AUTO, 0 },
+ /* a 5th entry is an alias address, not a real device */
+ { "cs35l56-hda_dummy_dev" },
{}
},
.bus_type = SMI_AUTO_DETECT,
@@ -324,6 +350,7 @@ static const struct acpi_device_id smi_acpi_ids[] = {
{ "BSG1160", (unsigned long)&bsg1160_data },
{ "BSG2150", (unsigned long)&bsg2150_data },
{ "CSC3551", (unsigned long)&cs35l41_hda },
+ { "CSC3556", (unsigned long)&cs35l56_hda },
{ "INT3515", (unsigned long)&int3515_data },
/* Non-conforming _HID for Cirrus Logic already released */
{ "CLSA0100", (unsigned long)&cs35l41_hda },
diff --git a/drivers/platform/x86/think-lmi.c b/drivers/platform/x86/think-lmi.c
index 52d1ce8dfe44..79346881cadb 100644
--- a/drivers/platform/x86/think-lmi.c
+++ b/drivers/platform/x86/think-lmi.c
@@ -719,12 +719,12 @@ static ssize_t cert_to_password_store(struct kobject *kobj,
/* Format: 'Password,Signature' */
auth_str = kasprintf(GFP_KERNEL, "%s,%s", passwd, setting->signature);
if (!auth_str) {
- kfree(passwd);
+ kfree_sensitive(passwd);
return -ENOMEM;
}
ret = tlmi_simple_call(LENOVO_CERT_TO_PASSWORD_GUID, auth_str);
kfree(auth_str);
- kfree(passwd);
+ kfree_sensitive(passwd);
return ret ?: count;
}
diff --git a/drivers/platform/x86/touchscreen_dmi.c b/drivers/platform/x86/touchscreen_dmi.c
index a5b687eed8f3..f9301a9382e7 100644
--- a/drivers/platform/x86/touchscreen_dmi.c
+++ b/drivers/platform/x86/touchscreen_dmi.c
@@ -27,11 +27,12 @@ struct ts_dmi_data {
/* NOTE: Please keep all entries sorted alphabetically */
static const struct property_entry archos_101_cesium_educ_props[] = {
- PROPERTY_ENTRY_U32("touchscreen-size-x", 1280),
- PROPERTY_ENTRY_U32("touchscreen-size-y", 1850),
- PROPERTY_ENTRY_BOOL("touchscreen-inverted-x"),
+ PROPERTY_ENTRY_U32("touchscreen-size-x", 1850),
+ PROPERTY_ENTRY_U32("touchscreen-size-y", 1280),
+ PROPERTY_ENTRY_BOOL("touchscreen-inverted-y"),
PROPERTY_ENTRY_BOOL("touchscreen-swapped-x-y"),
PROPERTY_ENTRY_U32("silead,max-fingers", 10),
+ PROPERTY_ENTRY_BOOL("silead,home-button"),
PROPERTY_ENTRY_STRING("firmware-name", "gsl1680-archos-101-cesium-educ.fw"),
{ }
};
diff --git a/drivers/powercap/intel_rapl_common.c b/drivers/powercap/intel_rapl_common.c
index 4e646e5e48f6..8fac57b28f8a 100644
--- a/drivers/powercap/intel_rapl_common.c
+++ b/drivers/powercap/intel_rapl_common.c
@@ -818,7 +818,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
return -EINVAL;
ra.reg = rd->regs[rpi->id];
- if (!ra.reg)
+ if (!ra.reg.val)
return -EINVAL;
/* non-hardware data are collected by the polling thread */
@@ -830,7 +830,7 @@ static int rapl_read_data_raw(struct rapl_domain *rd,
ra.mask = rpi->mask;
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
- pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg, rd->rp->name, rd->name);
+ pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
return -EIO;
}
@@ -920,7 +920,7 @@ static int rapl_check_unit_core(struct rapl_domain *rd)
ra.mask = ~0;
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
- ra.reg, rd->rp->name, rd->name);
+ ra.reg.val, rd->rp->name, rd->name);
return -ENODEV;
}
@@ -948,7 +948,7 @@ static int rapl_check_unit_atom(struct rapl_domain *rd)
ra.mask = ~0;
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
- ra.reg, rd->rp->name, rd->name);
+ ra.reg.val, rd->rp->name, rd->name);
return -ENODEV;
}
@@ -1135,7 +1135,7 @@ static int rapl_check_unit_tpmi(struct rapl_domain *rd)
ra.mask = ~0;
if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra)) {
pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
- ra.reg, rd->rp->name, rd->name);
+ ra.reg.val, rd->rp->name, rd->name);
return -ENODEV;
}
@@ -1411,8 +1411,8 @@ static int rapl_get_domain_unit(struct rapl_domain *rd)
struct rapl_defaults *defaults = get_defaults(rd->rp);
int ret;
- if (!rd->regs[RAPL_DOMAIN_REG_UNIT]) {
- if (!rd->rp->priv->reg_unit) {
+ if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
+ if (!rd->rp->priv->reg_unit.val) {
pr_err("No valid Unit register found\n");
return -ENODEV;
}
diff --git a/drivers/powercap/intel_rapl_msr.c b/drivers/powercap/intel_rapl_msr.c
index 569e25eab1e1..dd471021f237 100644
--- a/drivers/powercap/intel_rapl_msr.c
+++ b/drivers/powercap/intel_rapl_msr.c
@@ -34,28 +34,32 @@ static struct rapl_if_priv *rapl_msr_priv;
static struct rapl_if_priv rapl_msr_priv_intel = {
.type = RAPL_IF_MSR,
- .reg_unit = MSR_RAPL_POWER_UNIT,
- .regs[RAPL_DOMAIN_PACKAGE] = {
- MSR_PKG_POWER_LIMIT, MSR_PKG_ENERGY_STATUS, MSR_PKG_PERF_STATUS, 0, MSR_PKG_POWER_INFO },
- .regs[RAPL_DOMAIN_PP0] = {
- MSR_PP0_POWER_LIMIT, MSR_PP0_ENERGY_STATUS, 0, MSR_PP0_POLICY, 0 },
- .regs[RAPL_DOMAIN_PP1] = {
- MSR_PP1_POWER_LIMIT, MSR_PP1_ENERGY_STATUS, 0, MSR_PP1_POLICY, 0 },
- .regs[RAPL_DOMAIN_DRAM] = {
- MSR_DRAM_POWER_LIMIT, MSR_DRAM_ENERGY_STATUS, MSR_DRAM_PERF_STATUS, 0, MSR_DRAM_POWER_INFO },
- .regs[RAPL_DOMAIN_PLATFORM] = {
- MSR_PLATFORM_POWER_LIMIT, MSR_PLATFORM_ENERGY_STATUS, 0, 0, 0},
+ .reg_unit.msr = MSR_RAPL_POWER_UNIT,
+ .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PKG_POWER_LIMIT,
+ .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr = MSR_PKG_ENERGY_STATUS,
+ .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PERF].msr = MSR_PKG_PERF_STATUS,
+ .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_INFO].msr = MSR_PKG_POWER_INFO,
+ .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PP0_POWER_LIMIT,
+ .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr = MSR_PP0_ENERGY_STATUS,
+ .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_POLICY].msr = MSR_PP0_POLICY,
+ .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PP1_POWER_LIMIT,
+ .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_STATUS].msr = MSR_PP1_ENERGY_STATUS,
+ .regs[RAPL_DOMAIN_PP1][RAPL_DOMAIN_REG_POLICY].msr = MSR_PP1_POLICY,
+ .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_LIMIT].msr = MSR_DRAM_POWER_LIMIT,
+ .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_STATUS].msr = MSR_DRAM_ENERGY_STATUS,
+ .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_PERF].msr = MSR_DRAM_PERF_STATUS,
+ .regs[RAPL_DOMAIN_DRAM][RAPL_DOMAIN_REG_INFO].msr = MSR_DRAM_POWER_INFO,
+ .regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_LIMIT].msr = MSR_PLATFORM_POWER_LIMIT,
+ .regs[RAPL_DOMAIN_PLATFORM][RAPL_DOMAIN_REG_STATUS].msr = MSR_PLATFORM_ENERGY_STATUS,
.limits[RAPL_DOMAIN_PACKAGE] = BIT(POWER_LIMIT2),
.limits[RAPL_DOMAIN_PLATFORM] = BIT(POWER_LIMIT2),
};
static struct rapl_if_priv rapl_msr_priv_amd = {
.type = RAPL_IF_MSR,
- .reg_unit = MSR_AMD_RAPL_POWER_UNIT,
- .regs[RAPL_DOMAIN_PACKAGE] = {
- 0, MSR_AMD_PKG_ENERGY_STATUS, 0, 0, 0 },
- .regs[RAPL_DOMAIN_PP0] = {
- 0, MSR_AMD_CORE_ENERGY_STATUS, 0, 0, 0 },
+ .reg_unit.msr = MSR_AMD_RAPL_POWER_UNIT,
+ .regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_STATUS].msr = MSR_AMD_PKG_ENERGY_STATUS,
+ .regs[RAPL_DOMAIN_PP0][RAPL_DOMAIN_REG_STATUS].msr = MSR_AMD_CORE_ENERGY_STATUS,
};
/* Handles CPU hotplug on multi-socket systems.
@@ -99,10 +103,8 @@ static int rapl_cpu_down_prep(unsigned int cpu)
static int rapl_msr_read_raw(int cpu, struct reg_action *ra)
{
- u32 msr = (u32)ra->reg;
-
- if (rdmsrl_safe_on_cpu(cpu, msr, &ra->value)) {
- pr_debug("failed to read msr 0x%x on cpu %d\n", msr, cpu);
+ if (rdmsrl_safe_on_cpu(cpu, ra->reg.msr, &ra->value)) {
+ pr_debug("failed to read msr 0x%x on cpu %d\n", ra->reg.msr, cpu);
return -EIO;
}
ra->value &= ra->mask;
@@ -112,17 +114,16 @@ static int rapl_msr_read_raw(int cpu, struct reg_action *ra)
static void rapl_msr_update_func(void *info)
{
struct reg_action *ra = info;
- u32 msr = (u32)ra->reg;
u64 val;
- ra->err = rdmsrl_safe(msr, &val);
+ ra->err = rdmsrl_safe(ra->reg.msr, &val);
if (ra->err)
return;
val &= ~ra->mask;
val |= ra->value;
- ra->err = wrmsrl_safe(msr, val);
+ ra->err = wrmsrl_safe(ra->reg.msr, val);
}
static int rapl_msr_write_raw(int cpu, struct reg_action *ra)
@@ -171,7 +172,7 @@ static int rapl_msr_probe(struct platform_device *pdev)
if (id) {
rapl_msr_priv->limits[RAPL_DOMAIN_PACKAGE] |= BIT(POWER_LIMIT4);
- rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4] =
+ rapl_msr_priv->regs[RAPL_DOMAIN_PACKAGE][RAPL_DOMAIN_REG_PL4].msr =
MSR_VR_CURRENT_CONFIG;
pr_info("PL4 support detected.\n");
}
diff --git a/drivers/powercap/intel_rapl_tpmi.c b/drivers/powercap/intel_rapl_tpmi.c
index 4f4f13ded225..891c90fefd8b 100644
--- a/drivers/powercap/intel_rapl_tpmi.c
+++ b/drivers/powercap/intel_rapl_tpmi.c
@@ -59,10 +59,10 @@ static struct powercap_control_type *tpmi_control_type;
static int tpmi_rapl_read_raw(int id, struct reg_action *ra)
{
- if (!ra->reg)
+ if (!ra->reg.mmio)
return -EINVAL;
- ra->value = readq((void __iomem *)ra->reg);
+ ra->value = readq(ra->reg.mmio);
ra->value &= ra->mask;
return 0;
@@ -72,15 +72,15 @@ static int tpmi_rapl_write_raw(int id, struct reg_action *ra)
{
u64 val;
- if (!ra->reg)
+ if (!ra->reg.mmio)
return -EINVAL;
- val = readq((void __iomem *)ra->reg);
+ val = readq(ra->reg.mmio);
val &= ~ra->mask;
val |= ra->value;
- writeq(val, (void __iomem *)ra->reg);
+ writeq(val, ra->reg.mmio);
return 0;
}
@@ -138,8 +138,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
enum tpmi_rapl_register reg_index;
enum rapl_domain_reg_id reg_id;
int tpmi_domain_size, tpmi_domain_flags;
- u64 *tpmi_rapl_regs = trp->base + offset;
- u64 tpmi_domain_header = readq((void __iomem *)tpmi_rapl_regs);
+ u64 tpmi_domain_header = readq(trp->base + offset);
/* Domain Parent bits are ignored for now */
tpmi_domain_version = tpmi_domain_header & 0xff;
@@ -180,7 +179,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
return -EINVAL;
}
- if (trp->priv.regs[domain_type][RAPL_DOMAIN_REG_UNIT]) {
+ if (trp->priv.regs[domain_type][RAPL_DOMAIN_REG_UNIT].mmio) {
pr_warn(FW_BUG "Duplicate Domain type %d\n", tpmi_domain_type);
return -EINVAL;
}
@@ -218,7 +217,7 @@ static int parse_one_domain(struct tpmi_rapl_package *trp, u32 offset)
default:
continue;
}
- trp->priv.regs[domain_type][reg_id] = (u64)&tpmi_rapl_regs[reg_index];
+ trp->priv.regs[domain_type][reg_id].mmio = trp->base + offset + reg_index * 8;
}
return 0;
diff --git a/drivers/regulator/da9063-regulator.c b/drivers/regulator/da9063-regulator.c
index c5dd77be558b..a0621665a6d2 100644
--- a/drivers/regulator/da9063-regulator.c
+++ b/drivers/regulator/da9063-regulator.c
@@ -1028,9 +1028,12 @@ static int da9063_regulator_probe(struct platform_device *pdev)
config.of_node = da9063_reg_matches[id].of_node;
config.regmap = da9063->regmap;
- ret = da9063_check_xvp_constraints(&config);
- if (ret)
- return ret;
+ /* Checking constraints requires init_data from DT. */
+ if (config.init_data) {
+ ret = da9063_check_xvp_constraints(&config);
+ if (ret)
+ return ret;
+ }
regl->rdev = devm_regulator_register(&pdev->dev, &regl->desc,
&config);
diff --git a/drivers/regulator/mt6358-regulator.c b/drivers/regulator/mt6358-regulator.c
index 31a16fb28ecd..b9cda2210c33 100644
--- a/drivers/regulator/mt6358-regulator.c
+++ b/drivers/regulator/mt6358-regulator.c
@@ -661,7 +661,7 @@ static int mt6358_sync_vcn33_setting(struct device *dev)
/* Disable VCN33_WIFI */
ret = regmap_update_bits(mt6397->regmap, MT6358_LDO_VCN33_CON0_1, BIT(0), 0);
if (ret) {
- dev_err(dev, "Failed to disable VCN33_BT\n");
+ dev_err(dev, "Failed to disable VCN33_WIFI\n");
return ret;
}
@@ -676,10 +676,6 @@ static int mt6358_regulator_probe(struct platform_device *pdev)
const struct mt6358_regulator_info *mt6358_info;
int i, max_regulator, ret;
- ret = mt6358_sync_vcn33_setting(&pdev->dev);
- if (ret)
- return ret;
-
if (mt6397->chip_id == MT6366_CHIP_ID) {
max_regulator = MT6366_MAX_REGULATOR;
mt6358_info = mt6366_regulators;
@@ -688,6 +684,10 @@ static int mt6358_regulator_probe(struct platform_device *pdev)
mt6358_info = mt6358_regulators;
}
+ ret = mt6358_sync_vcn33_setting(&pdev->dev);
+ if (ret)
+ return ret;
+
for (i = 0; i < max_regulator; i++) {
config.dev = &pdev->dev;
config.regmap = mt6397->regmap;
diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c
index f3b280af0773..cd077b7c4aff 100644
--- a/drivers/regulator/qcom-rpmh-regulator.c
+++ b/drivers/regulator/qcom-rpmh-regulator.c
@@ -1068,7 +1068,7 @@ static const struct rpmh_vreg_init_data pm8550_vreg_data[] = {
RPMH_VREG("ldo9", "ldo%s9", &pmic5_pldo, "vdd-l8-l9"),
RPMH_VREG("ldo10", "ldo%s10", &pmic5_nldo515, "vdd-l1-l4-l10"),
RPMH_VREG("ldo11", "ldo%s11", &pmic5_nldo515, "vdd-l11"),
- RPMH_VREG("ldo12", "ldo%s12", &pmic5_pldo, "vdd-l12"),
+ RPMH_VREG("ldo12", "ldo%s12", &pmic5_nldo515, "vdd-l12"),
RPMH_VREG("ldo13", "ldo%s13", &pmic5_pldo, "vdd-l2-l13-l14"),
RPMH_VREG("ldo14", "ldo%s14", &pmic5_pldo, "vdd-l2-l13-l14"),
RPMH_VREG("ldo15", "ldo%s15", &pmic5_nldo515, "vdd-l15"),
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index edcbf77852c3..215597f73be4 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -2943,41 +2943,32 @@ static void _dasd_wake_block_flush_cb(struct dasd_ccw_req *cqr, void *data)
* Requeue a request back to the block request queue
* only works for block requests
*/
-static int _dasd_requeue_request(struct dasd_ccw_req *cqr)
+static void _dasd_requeue_request(struct dasd_ccw_req *cqr)
{
- struct dasd_block *block = cqr->block;
struct request *req;
- if (!block)
- return -EINVAL;
/*
* If the request is an ERP request there is nothing to requeue.
* This will be done with the remaining original request.
*/
if (cqr->refers)
- return 0;
+ return;
spin_lock_irq(&cqr->dq->lock);
req = (struct request *) cqr->callback_data;
blk_mq_requeue_request(req, true);
spin_unlock_irq(&cqr->dq->lock);
- return 0;
+ return;
}
-/*
- * Go through all request on the dasd_block request queue, cancel them
- * on the respective dasd_device, and return them to the generic
- * block layer.
- */
-static int dasd_flush_block_queue(struct dasd_block *block)
+static int _dasd_requests_to_flushqueue(struct dasd_block *block,
+ struct list_head *flush_queue)
{
struct dasd_ccw_req *cqr, *n;
- int rc, i;
- struct list_head flush_queue;
unsigned long flags;
+ int rc, i;
- INIT_LIST_HEAD(&flush_queue);
- spin_lock_bh(&block->queue_lock);
+ spin_lock_irqsave(&block->queue_lock, flags);
rc = 0;
restart:
list_for_each_entry_safe(cqr, n, &block->ccw_queue, blocklist) {
@@ -2992,13 +2983,32 @@ restart:
* is returned from the dasd_device layer.
*/
cqr->callback = _dasd_wake_block_flush_cb;
- for (i = 0; cqr != NULL; cqr = cqr->refers, i++)
- list_move_tail(&cqr->blocklist, &flush_queue);
+ for (i = 0; cqr; cqr = cqr->refers, i++)
+ list_move_tail(&cqr->blocklist, flush_queue);
if (i > 1)
/* moved more than one request - need to restart */
goto restart;
}
- spin_unlock_bh(&block->queue_lock);
+ spin_unlock_irqrestore(&block->queue_lock, flags);
+
+ return rc;
+}
+
+/*
+ * Go through all request on the dasd_block request queue, cancel them
+ * on the respective dasd_device, and return them to the generic
+ * block layer.
+ */
+static int dasd_flush_block_queue(struct dasd_block *block)
+{
+ struct dasd_ccw_req *cqr, *n;
+ struct list_head flush_queue;
+ unsigned long flags;
+ int rc;
+
+ INIT_LIST_HEAD(&flush_queue);
+ rc = _dasd_requests_to_flushqueue(block, &flush_queue);
+
/* Now call the callback function of flushed requests */
restart_cb:
list_for_each_entry_safe(cqr, n, &flush_queue, blocklist) {
@@ -3626,11 +3636,8 @@ int dasd_generic_set_offline(struct ccw_device *cdev)
* so sync bdev first and then wait for our queues to become
* empty
*/
- if (device->block) {
- rc = fsync_bdev(device->block->bdev);
- if (rc != 0)
- goto interrupted;
- }
+ if (device->block)
+ bdev_mark_dead(device->block->bdev, false);
dasd_schedule_device_bh(device);
rc = wait_event_interruptible(shutdown_waitq,
_wait_for_empty_queues(device));
@@ -3881,75 +3888,36 @@ EXPORT_SYMBOL_GPL(dasd_generic_space_avail);
*/
int dasd_generic_requeue_all_requests(struct dasd_device *device)
{
+ struct dasd_block *block = device->block;
struct list_head requeue_queue;
struct dasd_ccw_req *cqr, *n;
- struct dasd_ccw_req *refers;
int rc;
- INIT_LIST_HEAD(&requeue_queue);
- spin_lock_irq(get_ccwdev_lock(device->cdev));
- rc = 0;
- list_for_each_entry_safe(cqr, n, &device->ccw_queue, devlist) {
- /* Check status and move request to flush_queue */
- if (cqr->status == DASD_CQR_IN_IO) {
- rc = device->discipline->term_IO(cqr);
- if (rc) {
- /* unable to terminate requeust */
- dev_err(&device->cdev->dev,
- "Unable to terminate request %p "
- "on suspend\n", cqr);
- spin_unlock_irq(get_ccwdev_lock(device->cdev));
- dasd_put_device(device);
- return rc;
- }
- }
- list_move_tail(&cqr->devlist, &requeue_queue);
- }
- spin_unlock_irq(get_ccwdev_lock(device->cdev));
-
- list_for_each_entry_safe(cqr, n, &requeue_queue, devlist) {
- wait_event(dasd_flush_wq,
- (cqr->status != DASD_CQR_CLEAR_PENDING));
+ if (!block)
+ return 0;
- /*
- * requeue requests to blocklayer will only work
- * for block device requests
- */
- if (_dasd_requeue_request(cqr))
- continue;
+ INIT_LIST_HEAD(&requeue_queue);
+ rc = _dasd_requests_to_flushqueue(block, &requeue_queue);
- /* remove requests from device and block queue */
- list_del_init(&cqr->devlist);
- while (cqr->refers != NULL) {
- refers = cqr->refers;
- /* remove the request from the block queue */
- list_del(&cqr->blocklist);
- /* free the finished erp request */
- dasd_free_erp_request(cqr, cqr->memdev);
- cqr = refers;
+ /* Now call the callback function of flushed requests */
+restart_cb:
+ list_for_each_entry_safe(cqr, n, &requeue_queue, blocklist) {
+ wait_event(dasd_flush_wq, (cqr->status < DASD_CQR_QUEUED));
+ /* Process finished ERP request. */
+ if (cqr->refers) {
+ spin_lock_bh(&block->queue_lock);
+ __dasd_process_erp(block->base, cqr);
+ spin_unlock_bh(&block->queue_lock);
+ /* restart list_for_xx loop since dasd_process_erp
+ * might remove multiple elements
+ */
+ goto restart_cb;
}
-
- /*
- * _dasd_requeue_request already checked for a valid
- * blockdevice, no need to check again
- * all erp requests (cqr->refers) have a cqr->block
- * pointer copy from the original cqr
- */
+ _dasd_requeue_request(cqr);
list_del_init(&cqr->blocklist);
cqr->block->base->discipline->free_cp(
cqr, (struct request *) cqr->callback_data);
}
-
- /*
- * if requests remain then they are internal request
- * and go back to the device queue
- */
- if (!list_empty(&requeue_queue)) {
- /* move freeze_queue to start of the ccw_queue */
- spin_lock_irq(get_ccwdev_lock(device->cdev));
- list_splice_tail(&requeue_queue, &device->ccw_queue);
- spin_unlock_irq(get_ccwdev_lock(device->cdev));
- }
dasd_schedule_device_bh(device);
return rc;
}
diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c
index 9fd36c468706..89957bb7244d 100644
--- a/drivers/s390/block/dasd_3990_erp.c
+++ b/drivers/s390/block/dasd_3990_erp.c
@@ -1050,7 +1050,7 @@ dasd_3990_erp_com_rej(struct dasd_ccw_req * erp, char *sense)
dev_err(&device->cdev->dev, "An I/O request was rejected"
" because writing is inhibited\n");
erp = dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED);
- } else if (sense[7] & SNS7_INVALID_ON_SEC) {
+ } else if (sense[7] == SNS7_INVALID_ON_SEC) {
dev_err(&device->cdev->dev, "An I/O request was rejected on a copy pair secondary device\n");
/* suppress dump of sense data for this error */
set_bit(DASD_CQR_SUPPRESS_CR, &erp->refers->flags);
@@ -2441,7 +2441,7 @@ static struct dasd_ccw_req *dasd_3990_erp_add_erp(struct dasd_ccw_req *cqr)
erp->block = cqr->block;
erp->magic = cqr->magic;
erp->expires = cqr->expires;
- erp->retries = 256;
+ erp->retries = device->default_retries;
erp->buildclk = get_tod_clock();
erp->status = DASD_CQR_FILLED;
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 513a7e6eee63..d55862605b82 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -131,6 +131,7 @@ static int dasd_ioctl_resume(struct dasd_block *block)
spin_unlock_irqrestore(get_ccwdev_lock(base->cdev), flags);
dasd_schedule_block_bh(block);
+ dasd_schedule_device_bh(base);
return 0;
}
diff --git a/drivers/s390/crypto/zcrypt_msgtype6.c b/drivers/s390/crypto/zcrypt_msgtype6.c
index 67fd2ec9c5a1..e668ff5eb384 100644
--- a/drivers/s390/crypto/zcrypt_msgtype6.c
+++ b/drivers/s390/crypto/zcrypt_msgtype6.c
@@ -1101,23 +1101,36 @@ static long zcrypt_msgtype6_send_cprb(bool userspace, struct zcrypt_queue *zq,
struct ica_xcRB *xcrb,
struct ap_message *ap_msg)
{
- int rc;
struct response_type *rtype = ap_msg->private;
struct {
struct type6_hdr hdr;
struct CPRBX cprbx;
/* ... more data blocks ... */
} __packed * msg = ap_msg->msg;
-
- /*
- * Set the queue's reply buffer length minus 128 byte padding
- * as reply limit for the card firmware.
- */
- msg->hdr.fromcardlen1 = min_t(unsigned int, msg->hdr.fromcardlen1,
- zq->reply.bufsize - 128);
- if (msg->hdr.fromcardlen2)
- msg->hdr.fromcardlen2 =
- zq->reply.bufsize - msg->hdr.fromcardlen1 - 128;
+ unsigned int max_payload_size;
+ int rc, delta;
+
+ /* calculate maximum payload for this card and msg type */
+ max_payload_size = zq->reply.bufsize - sizeof(struct type86_fmt2_msg);
+
+ /* limit each of the two from fields to the maximum payload size */
+ msg->hdr.fromcardlen1 = min(msg->hdr.fromcardlen1, max_payload_size);
+ msg->hdr.fromcardlen2 = min(msg->hdr.fromcardlen2, max_payload_size);
+
+ /* calculate delta if the sum of both exceeds max payload size */
+ delta = msg->hdr.fromcardlen1 + msg->hdr.fromcardlen2
+ - max_payload_size;
+ if (delta > 0) {
+ /*
+ * Sum exceeds maximum payload size, prune fromcardlen1
+ * (always trust fromcardlen2)
+ */
+ if (delta > msg->hdr.fromcardlen1) {
+ rc = -EINVAL;
+ goto out;
+ }
+ msg->hdr.fromcardlen1 -= delta;
+ }
init_completion(&rtype->work);
rc = ap_queue_message(zq->queue, ap_msg);
diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 1d195429753d..613eab729704 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -716,7 +716,6 @@ struct qeth_card_info {
u16 chid;
u8 ids_valid:1; /* cssid,iid,chid */
u8 dev_addr_is_registered:1;
- u8 open_when_online:1;
u8 promisc_mode:1;
u8 use_v1_blkt:1;
u8 is_vm_nic:1;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 1d5b207c2b9e..cd783290bde5 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -5373,8 +5373,6 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
qeth_clear_ipacmd_list(card);
rtnl_lock();
- card->info.open_when_online = card->dev->flags & IFF_UP;
- dev_close(card->dev);
netif_device_detach(card->dev);
netif_carrier_off(card->dev);
rtnl_unlock();
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 9f13ed170a43..75910c0bcc2b 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -2388,9 +2388,12 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
qeth_enable_hw_features(dev);
qeth_l2_enable_brport_features(card);
- if (card->info.open_when_online) {
- card->info.open_when_online = 0;
- dev_open(dev, NULL);
+ if (netif_running(dev)) {
+ local_bh_disable();
+ napi_schedule(&card->napi);
+ /* kick-start the NAPI softirq: */
+ local_bh_enable();
+ qeth_l2_set_rx_mode(dev);
}
rtnl_unlock();
}
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index af4e60d2917e..b92a32b4b114 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2018,9 +2018,11 @@ static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok)
netif_device_attach(dev);
qeth_enable_hw_features(dev);
- if (card->info.open_when_online) {
- card->info.open_when_online = 0;
- dev_open(dev, NULL);
+ if (netif_running(dev)) {
+ local_bh_disable();
+ napi_schedule(&card->napi);
+ /* kick-start the NAPI softirq: */
+ local_bh_enable();
}
rtnl_unlock();
}
diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c
index f21307537829..4f0d0e55f0d4 100644
--- a/drivers/s390/scsi/zfcp_fc.c
+++ b/drivers/s390/scsi/zfcp_fc.c
@@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *data)
/* re-init to undo drop from zfcp_fc_adisc() */
port->d_id = ntoh24(adisc_resp->adisc_port_id);
- /* port is good, unblock rport without going through erp */
- zfcp_scsi_schedule_rport_register(port);
+ /* port is still good, nothing to do */
out:
atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
put_device(&port->dev);
@@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_struct *work)
int retval;
set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */
- get_device(&port->dev);
- port->rport_task = RPORT_DEL;
- zfcp_scsi_rport_work(&port->rport_work);
/* only issue one test command at one time per port */
if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index e1e4f9d10887..857be0f3ae5b 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1598,7 +1598,7 @@ NCR_700_intr(int irq, void *dev_id)
printk("scsi%d (%d:%d) PHASE MISMATCH IN SEND MESSAGE %d remain, return %p[%04x], phase %s\n", host->host_no, pun, lun, count, (void *)temp, temp - hostdata->pScript, sbcl_to_string(NCR_700_readb(host, SBCL_REG)));
#endif
resume_offset = hostdata->pScript + Ent_SendMessagePhaseMismatch;
- } else if(dsp >= to32bit(&slot->pSG[0].ins) &&
+ } else if (slot && dsp >= to32bit(&slot->pSG[0].ins) &&
dsp <= to32bit(&slot->pSG[NCR_700_SG_SEGMENTS].ins)) {
int data_transfer = NCR_700_readl(host, DBC_REG) & 0xffffff;
int SGcount = (dsp - to32bit(&slot->pSG[0].ins))/sizeof(struct NCR_700_SG_List);
diff --git a/drivers/scsi/fnic/fnic.h b/drivers/scsi/fnic/fnic.h
index d82de34f6fd7..e51e92f932fa 100644
--- a/drivers/scsi/fnic/fnic.h
+++ b/drivers/scsi/fnic/fnic.h
@@ -27,7 +27,7 @@
#define DRV_NAME "fnic"
#define DRV_DESCRIPTION "Cisco FCoE HBA Driver"
-#define DRV_VERSION "1.6.0.54"
+#define DRV_VERSION "1.6.0.55"
#define PFX DRV_NAME ": "
#define DFX DRV_NAME "%d: "
diff --git a/drivers/scsi/fnic/fnic_scsi.c b/drivers/scsi/fnic/fnic_scsi.c
index 26dbd347156e..be89ce96df46 100644
--- a/drivers/scsi/fnic/fnic_scsi.c
+++ b/drivers/scsi/fnic/fnic_scsi.c
@@ -2139,7 +2139,7 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
bool new_sc)
{
- int ret = SUCCESS;
+ int ret = 0;
struct fnic_pending_aborts_iter_data iter_data = {
.fnic = fnic,
.lun_dev = lr_sc->device,
@@ -2159,9 +2159,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
/* walk again to check, if IOs are still pending in fw */
if (fnic_is_abts_pending(fnic, lr_sc))
- ret = FAILED;
+ ret = 1;
clean_pending_aborts_end:
+ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+ "%s: exit status: %d\n", __func__, ret);
return ret;
}
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index a62e091894f6..d26941b131fd 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -109,8 +109,6 @@ lpfc_sli4_set_rsp_sgl_last(struct lpfc_hba *phba,
}
}
-#define LPFC_INVALID_REFTAG ((u32)-1)
-
/**
* lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread
* @phba: The Hba for which this call is being executed.
@@ -978,8 +976,6 @@ lpfc_bg_err_inject(struct lpfc_hba *phba, struct scsi_cmnd *sc,
sgpe = scsi_prot_sglist(sc);
lba = scsi_prot_ref_tag(sc);
- if (lba == LPFC_INVALID_REFTAG)
- return 0;
/* First check if we need to match the LBA */
if (phba->lpfc_injerr_lba != LPFC_INJERR_LBA_OFF) {
@@ -1560,8 +1556,6 @@ lpfc_bg_setup_bpl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
/* extract some info from the scsi command for pde*/
reftag = scsi_prot_ref_tag(sc);
- if (reftag == LPFC_INVALID_REFTAG)
- goto out;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -1723,8 +1717,6 @@ lpfc_bg_setup_bpl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
/* extract some info from the scsi command */
blksize = scsi_prot_interval(sc);
reftag = scsi_prot_ref_tag(sc);
- if (reftag == LPFC_INVALID_REFTAG)
- goto out;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -1953,8 +1945,6 @@ lpfc_bg_setup_sgl(struct lpfc_hba *phba, struct scsi_cmnd *sc,
/* extract some info from the scsi command for pde*/
reftag = scsi_prot_ref_tag(sc);
- if (reftag == LPFC_INVALID_REFTAG)
- goto out;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -2154,8 +2144,6 @@ lpfc_bg_setup_sgl_prot(struct lpfc_hba *phba, struct scsi_cmnd *sc,
/* extract some info from the scsi command */
blksize = scsi_prot_interval(sc);
reftag = scsi_prot_ref_tag(sc);
- if (reftag == LPFC_INVALID_REFTAG)
- goto out;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
rc = lpfc_bg_err_inject(phba, sc, &reftag, NULL, 1);
@@ -2746,8 +2734,6 @@ lpfc_calc_bg_err(struct lpfc_hba *phba, struct lpfc_io_buf *lpfc_cmd)
src = (struct scsi_dif_tuple *)sg_virt(sgpe);
start_ref_tag = scsi_prot_ref_tag(cmd);
- if (start_ref_tag == LPFC_INVALID_REFTAG)
- goto out;
start_app_tag = src->app_tag;
len = sgpe->length;
while (src && protsegcnt) {
@@ -3493,11 +3479,11 @@ err:
scsi_cmnd->sc_data_direction);
lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
- "9084 Cannot setup S/G List for HBA"
- "IO segs %d/%d SGL %d SCSI %d: %d %d\n",
+ "9084 Cannot setup S/G List for HBA "
+ "IO segs %d/%d SGL %d SCSI %d: %d %d %d\n",
lpfc_cmd->seg_cnt, lpfc_cmd->prot_seg_cnt,
phba->cfg_total_seg_cnt, phba->cfg_sg_seg_cnt,
- prot_group_type, num_sge);
+ prot_group_type, num_sge, ret);
lpfc_cmd->seg_cnt = 0;
lpfc_cmd->prot_seg_cnt = 0;
diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 2e886c1d867d..4995e1ef4e0e 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -1181,7 +1181,8 @@ static int pm8001_pci_probe(struct pci_dev *pdev,
pm80xx_set_thermal_config(pm8001_ha);
}
- if (pm8001_init_sas_add(pm8001_ha))
+ rc = pm8001_init_sas_add(pm8001_ha);
+ if (rc)
goto err_out_shost;
/* phy setting support for motherboard controller */
rc = pm8001_configure_phy_settings(pm8001_ha);
diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index 2a31ddc99dde..7825765c936c 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -31,6 +31,7 @@ static void qedf_remove(struct pci_dev *pdev);
static void qedf_shutdown(struct pci_dev *pdev);
static void qedf_schedule_recovery_handler(void *dev);
static void qedf_recovery_handler(struct work_struct *work);
+static int qedf_suspend(struct pci_dev *pdev, pm_message_t state);
/*
* Driver module parameters.
@@ -3271,6 +3272,7 @@ static struct pci_driver qedf_pci_driver = {
.probe = qedf_probe,
.remove = qedf_remove,
.shutdown = qedf_shutdown,
+ .suspend = qedf_suspend,
};
static int __qedf_probe(struct pci_dev *pdev, int mode)
@@ -4000,6 +4002,22 @@ static void qedf_shutdown(struct pci_dev *pdev)
__qedf_remove(pdev, QEDF_MODE_NORMAL);
}
+static int qedf_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct qedf_ctx *qedf;
+
+ if (!pdev) {
+ QEDF_ERR(NULL, "pdev is NULL.\n");
+ return -ENODEV;
+ }
+
+ qedf = pci_get_drvdata(pdev);
+
+ QEDF_ERR(&qedf->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
+
+ return -EPERM;
+}
+
/*
* Recovery handler code
*/
diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c
index 450522b204d6..cd0180b1f5b9 100644
--- a/drivers/scsi/qedi/qedi_main.c
+++ b/drivers/scsi/qedi/qedi_main.c
@@ -69,6 +69,7 @@ static struct nvm_iscsi_block *qedi_get_nvram_block(struct qedi_ctx *qedi);
static void qedi_recovery_handler(struct work_struct *work);
static void qedi_schedule_hw_err_handler(void *dev,
enum qed_hw_err_type err_type);
+static int qedi_suspend(struct pci_dev *pdev, pm_message_t state);
static int qedi_iscsi_event_cb(void *context, u8 fw_event_code, void *fw_handle)
{
@@ -1976,8 +1977,9 @@ static int qedi_cpu_offline(unsigned int cpu)
struct qedi_percpu_s *p = this_cpu_ptr(&qedi_percpu);
struct qedi_work *work, *tmp;
struct task_struct *thread;
+ unsigned long flags;
- spin_lock_bh(&p->p_work_lock);
+ spin_lock_irqsave(&p->p_work_lock, flags);
thread = p->iothread;
p->iothread = NULL;
@@ -1988,7 +1990,7 @@ static int qedi_cpu_offline(unsigned int cpu)
kfree(work);
}
- spin_unlock_bh(&p->p_work_lock);
+ spin_unlock_irqrestore(&p->p_work_lock, flags);
if (thread)
kthread_stop(thread);
return 0;
@@ -2510,6 +2512,22 @@ static void qedi_shutdown(struct pci_dev *pdev)
__qedi_remove(pdev, QEDI_MODE_SHUTDOWN);
}
+static int qedi_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ struct qedi_ctx *qedi;
+
+ if (!pdev) {
+ QEDI_ERR(NULL, "pdev is NULL.\n");
+ return -ENODEV;
+ }
+
+ qedi = pci_get_drvdata(pdev);
+
+ QEDI_ERR(&qedi->dbg_ctx, "%s: Device does not support suspend operation\n", __func__);
+
+ return -EPERM;
+}
+
static int __qedi_probe(struct pci_dev *pdev, int mode)
{
struct qedi_ctx *qedi;
@@ -2868,6 +2886,7 @@ static struct pci_driver qedi_pci_driver = {
.remove = qedi_remove,
.shutdown = qedi_shutdown,
.err_handler = &qedi_err_handler,
+ .suspend = qedi_suspend,
};
static int __init qedi_init(void)
diff --git a/drivers/scsi/raid_class.c b/drivers/scsi/raid_class.c
index 898a0bdf8df6..95a86e0dfd77 100644
--- a/drivers/scsi/raid_class.c
+++ b/drivers/scsi/raid_class.c
@@ -209,53 +209,6 @@ raid_attr_ro_state(level);
raid_attr_ro_fn(resync);
raid_attr_ro_state_fn(state);
-static void raid_component_release(struct device *dev)
-{
- struct raid_component *rc =
- container_of(dev, struct raid_component, dev);
- dev_printk(KERN_ERR, rc->dev.parent, "COMPONENT RELEASE\n");
- put_device(rc->dev.parent);
- kfree(rc);
-}
-
-int raid_component_add(struct raid_template *r,struct device *raid_dev,
- struct device *component_dev)
-{
- struct device *cdev =
- attribute_container_find_class_device(&r->raid_attrs.ac,
- raid_dev);
- struct raid_component *rc;
- struct raid_data *rd = dev_get_drvdata(cdev);
- int err;
-
- rc = kzalloc(sizeof(*rc), GFP_KERNEL);
- if (!rc)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&rc->node);
- device_initialize(&rc->dev);
- rc->dev.release = raid_component_release;
- rc->dev.parent = get_device(component_dev);
- rc->num = rd->component_count++;
-
- dev_set_name(&rc->dev, "component-%d", rc->num);
- list_add_tail(&rc->node, &rd->component_list);
- rc->dev.class = &raid_class.class;
- err = device_add(&rc->dev);
- if (err)
- goto err_out;
-
- return 0;
-
-err_out:
- list_del(&rc->node);
- rd->component_count--;
- put_device(component_dev);
- kfree(rc);
- return err;
-}
-EXPORT_SYMBOL(raid_component_add);
-
struct raid_template *
raid_class_attach(struct raid_function_template *ft)
{
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 4a6eb1741be0..41f23cd0bfb4 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -406,7 +406,7 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
size_t length, loff_t *ppos)
{
int host, channel, id, lun;
- char *buffer, *p;
+ char *buffer, *end, *p;
int err;
if (!buf || length > PAGE_SIZE)
@@ -421,10 +421,14 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
goto out;
err = -EINVAL;
- if (length < PAGE_SIZE)
- buffer[length] = '\0';
- else if (buffer[PAGE_SIZE-1])
- goto out;
+ if (length < PAGE_SIZE) {
+ end = buffer + length;
+ *end = '\0';
+ } else {
+ end = buffer + PAGE_SIZE - 1;
+ if (*end)
+ goto out;
+ }
/*
* Usage: echo "scsi add-single-device 0 1 2 3" >/proc/scsi/scsi
@@ -433,10 +437,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
if (!strncmp("scsi add-single-device", buffer, 22)) {
p = buffer + 23;
- host = simple_strtoul(p, &p, 0);
- channel = simple_strtoul(p + 1, &p, 0);
- id = simple_strtoul(p + 1, &p, 0);
- lun = simple_strtoul(p + 1, &p, 0);
+ host = (p < end) ? simple_strtoul(p, &p, 0) : 0;
+ channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+ id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+ lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
err = scsi_add_single_device(host, channel, id, lun);
@@ -447,10 +451,10 @@ static ssize_t proc_scsi_write(struct file *file, const char __user *buf,
} else if (!strncmp("scsi remove-single-device", buffer, 25)) {
p = buffer + 26;
- host = simple_strtoul(p, &p, 0);
- channel = simple_strtoul(p + 1, &p, 0);
- id = simple_strtoul(p + 1, &p, 0);
- lun = simple_strtoul(p + 1, &p, 0);
+ host = (p < end) ? simple_strtoul(p, &p, 0) : 0;
+ channel = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+ id = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
+ lun = (p + 1 < end) ? simple_strtoul(p + 1, &p, 0) : 0;
err = scsi_remove_single_device(host, channel, id, lun);
}
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 68b12afa0721..3c668cfb146d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -3876,7 +3876,7 @@ static int sd_suspend_runtime(struct device *dev)
static int sd_resume(struct device *dev)
{
struct scsi_disk *sdkp = dev_get_drvdata(dev);
- int ret;
+ int ret = 0;
if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */
return 0;
@@ -3884,8 +3884,11 @@ static int sd_resume(struct device *dev)
if (!sdkp->device->manage_start_stop)
return 0;
- sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
- ret = sd_start_stop_device(sdkp, 1);
+ if (!sdkp->device->no_start_on_resume) {
+ sd_printk(KERN_NOTICE, sdkp, "Starting disk\n");
+ ret = sd_start_stop_device(sdkp, 1);
+ }
+
if (!ret)
opal_unlock_from_suspend(sdkp->opal_dev);
return ret;
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 89fa046c7158..0d8afffd1683 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1497,9 +1497,10 @@ sg_add_device(struct device *cl_dev)
int error;
unsigned long iflags;
- error = blk_get_queue(scsidp->request_queue);
- if (error)
- return error;
+ if (!blk_get_queue(scsidp->request_queue)) {
+ pr_warn("%s: get scsi_device queue failed\n", __func__);
+ return -ENODEV;
+ }
error = -ENOMEM;
cdev = cdev_alloc();
diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c
index 3e2e5783924d..4db3ba62fcd3 100644
--- a/drivers/scsi/snic/snic_disc.c
+++ b/drivers/scsi/snic/snic_disc.c
@@ -307,7 +307,7 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid)
spin_lock_irqsave(snic->shost->host_lock, flags);
list_del(&tgt->list);
spin_unlock_irqrestore(snic->shost->host_lock, flags);
- kfree(tgt);
+ put_device(&tgt->dev);
tgt = NULL;
return tgt;
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 7f12d931fe7c..047ffaf7d42a 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -366,6 +366,7 @@ static void storvsc_on_channel_callback(void *context);
#define STORVSC_FC_MAX_LUNS_PER_TARGET 255
#define STORVSC_FC_MAX_TARGETS 128
#define STORVSC_FC_MAX_CHANNELS 8
+#define STORVSC_FC_MAX_XFER_SIZE ((u32)(512 * 1024))
#define STORVSC_IDE_MAX_LUNS_PER_TARGET 64
#define STORVSC_IDE_MAX_TARGETS 1
@@ -1673,10 +1674,6 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
*/
static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
{
-#if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
- if (scmnd->device->host->transportt == fc_transport_template)
- return fc_eh_timed_out(scmnd);
-#endif
return SCSI_EH_RESET_TIMER;
}
@@ -2006,6 +2003,9 @@ static int storvsc_probe(struct hv_device *device,
* protecting it from any weird value.
*/
max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
+ if (is_fc)
+ max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE);
+
/* max_hw_sectors_kb */
host->max_sectors = max_xfer_bytes >> 9;
/*
diff --git a/drivers/soc/aspeed/aspeed-socinfo.c b/drivers/soc/aspeed/aspeed-socinfo.c
index 1ca140356a08..3f759121dc00 100644
--- a/drivers/soc/aspeed/aspeed-socinfo.c
+++ b/drivers/soc/aspeed/aspeed-socinfo.c
@@ -137,6 +137,7 @@ static int __init aspeed_socinfo_init(void)
soc_dev = soc_device_register(attrs);
if (IS_ERR(soc_dev)) {
+ kfree(attrs->machine);
kfree(attrs->soc_id);
kfree(attrs->serial_number);
kfree(attrs);
diff --git a/drivers/soc/aspeed/aspeed-uart-routing.c b/drivers/soc/aspeed/aspeed-uart-routing.c
index ef8b24fd1851..59123e1f27ac 100644
--- a/drivers/soc/aspeed/aspeed-uart-routing.c
+++ b/drivers/soc/aspeed/aspeed-uart-routing.c
@@ -524,7 +524,7 @@ static ssize_t aspeed_uart_routing_store(struct device *dev,
struct aspeed_uart_routing_selector *sel = to_routing_selector(attr);
int val;
- val = match_string(sel->options, -1, buf);
+ val = __sysfs_match_string(sel->options, -1, buf);
if (val < 0) {
dev_err(dev, "invalid value \"%s\"\n", buf);
return -EINVAL;
diff --git a/drivers/soc/imx/imx8mp-blk-ctrl.c b/drivers/soc/imx/imx8mp-blk-ctrl.c
index 870aecc0202a..1c1fcab4979a 100644
--- a/drivers/soc/imx/imx8mp-blk-ctrl.c
+++ b/drivers/soc/imx/imx8mp-blk-ctrl.c
@@ -164,7 +164,7 @@ static int imx8mp_hsio_blk_ctrl_probe(struct imx8mp_blk_ctrl *bc)
clk_hsio_pll->hw.init = &init;
hw = &clk_hsio_pll->hw;
- ret = devm_clk_hw_register(bc->dev, hw);
+ ret = devm_clk_hw_register(bc->bus_power_dev, hw);
if (ret)
return ret;
diff --git a/drivers/soundwire/amd_manager.c b/drivers/soundwire/amd_manager.c
index 08aeb7ed00e1..3a99f6dcdfaf 100644
--- a/drivers/soundwire/amd_manager.c
+++ b/drivers/soundwire/amd_manager.c
@@ -910,9 +910,9 @@ static int amd_sdw_manager_probe(struct platform_device *pdev)
return -ENOMEM;
amd_manager->acp_mmio = devm_ioremap(dev, res->start, resource_size(res));
- if (IS_ERR(amd_manager->mmio)) {
+ if (!amd_manager->acp_mmio) {
dev_err(dev, "mmio not found\n");
- return PTR_ERR(amd_manager->mmio);
+ return -ENOMEM;
}
amd_manager->instance = pdata->instance;
amd_manager->mmio = amd_manager->acp_mmio +
diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c
index dba920ec88f6..cf78839b3f74 100644
--- a/drivers/soundwire/bus.c
+++ b/drivers/soundwire/bus.c
@@ -922,8 +922,8 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
"initializing enumeration and init completion for Slave %d\n",
slave->dev_num);
- init_completion(&slave->enumeration_complete);
- init_completion(&slave->initialization_complete);
+ reinit_completion(&slave->enumeration_complete);
+ reinit_completion(&slave->initialization_complete);
} else if ((status == SDW_SLAVE_ATTACHED) &&
(slave->status == SDW_SLAVE_UNATTACHED)) {
@@ -931,7 +931,7 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
"signaling enumeration completion for Slave %d\n",
slave->dev_num);
- complete(&slave->enumeration_complete);
+ complete_all(&slave->enumeration_complete);
}
slave->status = status;
mutex_unlock(&bus->bus_lock);
@@ -1951,7 +1951,7 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
"signaling initialization completion for Slave %d\n",
slave->dev_num);
- complete(&slave->initialization_complete);
+ complete_all(&slave->initialization_complete);
/*
* If the manager became pm_runtime active, the peripherals will be
diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index 7970fdb27ba0..c029e4d53573 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -540,7 +540,7 @@ static int qcom_swrm_get_alert_slave_dev_num(struct qcom_swrm_ctrl *ctrl)
status = (val >> (dev_num * SWRM_MCP_SLV_STATUS_SZ));
if ((status & SWRM_MCP_SLV_STATUS_MASK) == SDW_SLAVE_ALERT) {
- ctrl->status[dev_num] = status;
+ ctrl->status[dev_num] = status & SWRM_MCP_SLV_STATUS_MASK;
return dev_num;
}
}
diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c
index de8fe3c5becb..9b021390263e 100644
--- a/drivers/spi/spi-cadence.c
+++ b/drivers/spi/spi-cadence.c
@@ -317,12 +317,6 @@ static void cdns_spi_process_fifo(struct cdns_spi *xspi, int ntx, int nrx)
xspi->rx_bytes -= nrx;
while (ntx || nrx) {
- /* When xspi in busy condition, bytes may send failed,
- * then spi control did't work thoroughly, add one byte delay
- */
- if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
- udelay(10);
-
if (ntx) {
if (xspi->txbuf)
cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++);
@@ -392,6 +386,11 @@ static irqreturn_t cdns_spi_irq(int irq, void *dev_id)
if (xspi->tx_bytes) {
cdns_spi_process_fifo(xspi, trans_cnt, trans_cnt);
} else {
+ /* Fixed delay due to controller limitation with
+ * RX_NEMPTY incorrect status
+ * Xilinx AR:65885 contains more details
+ */
+ udelay(10);
cdns_spi_process_fifo(xspi, 0, trans_cnt);
cdns_spi_write(xspi, CDNS_SPI_IDR,
CDNS_SPI_IXR_DEFAULT);
@@ -439,12 +438,18 @@ static int cdns_transfer_one(struct spi_controller *ctlr,
cdns_spi_setup_transfer(spi, transfer);
} else {
/* Set TX empty threshold to half of FIFO depth
- * only if TX bytes are more than half FIFO depth.
+ * only if TX bytes are more than FIFO depth.
*/
if (xspi->tx_bytes > xspi->tx_fifo_depth)
cdns_spi_write(xspi, CDNS_SPI_THLD, xspi->tx_fifo_depth >> 1);
}
+ /* When xspi in busy condition, bytes may send failed,
+ * then spi control didn't work thoroughly, add one byte delay
+ */
+ if (cdns_spi_read(xspi, CDNS_SPI_ISR) & CDNS_SPI_IXR_TXFULL)
+ udelay(10);
+
cdns_spi_process_fifo(xspi, xspi->tx_fifo_depth, 0);
spi_transfer_delay_exec(transfer);
diff --git a/drivers/spi/spi-qcom-qspi.c b/drivers/spi/spi-qcom-qspi.c
index a8a683d6145c..1954c39b3d08 100644
--- a/drivers/spi/spi-qcom-qspi.c
+++ b/drivers/spi/spi-qcom-qspi.c
@@ -69,7 +69,7 @@
WR_FIFO_OVERRUN)
#define QSPI_ALL_IRQS (QSPI_ERR_IRQS | RESP_FIFO_RDY | \
WR_FIFO_EMPTY | WR_FIFO_FULL | \
- TRANSACTION_DONE)
+ TRANSACTION_DONE | DMA_CHAIN_DONE)
#define PIO_XFER_CTRL 0x0014
#define REQUEST_COUNT_MSK 0xffff
@@ -308,9 +308,11 @@ static int qcom_qspi_alloc_desc(struct qcom_qspi *ctrl, dma_addr_t dma_ptr,
dma_addr_t dma_cmd_desc;
/* allocate for dma cmd descriptor */
- virt_cmd_desc = dma_pool_alloc(ctrl->dma_cmd_pool, GFP_KERNEL | __GFP_ZERO, &dma_cmd_desc);
- if (!virt_cmd_desc)
- return -ENOMEM;
+ virt_cmd_desc = dma_pool_alloc(ctrl->dma_cmd_pool, GFP_ATOMIC | __GFP_ZERO, &dma_cmd_desc);
+ if (!virt_cmd_desc) {
+ dev_warn_once(ctrl->dev, "Couldn't find memory for descriptor\n");
+ return -EAGAIN;
+ }
ctrl->virt_cmd_desc[ctrl->n_cmd_desc] = virt_cmd_desc;
ctrl->dma_cmd_desc[ctrl->n_cmd_desc] = dma_cmd_desc;
@@ -355,10 +357,22 @@ static int qcom_qspi_setup_dma_desc(struct qcom_qspi *ctrl,
for (i = 0; i < sgt->nents; i++) {
dma_ptr_sg = sg_dma_address(sgt->sgl + i);
+ dma_len_sg = sg_dma_len(sgt->sgl + i);
if (!IS_ALIGNED(dma_ptr_sg, QSPI_ALIGN_REQ)) {
dev_warn_once(ctrl->dev, "dma_address not aligned to %d\n", QSPI_ALIGN_REQ);
return -EAGAIN;
}
+ /*
+ * When reading with DMA the controller writes to memory 1 word
+ * at a time. If the length isn't a multiple of 4 bytes then
+ * the controller can clobber the things later in memory.
+ * Fallback to PIO to be safe.
+ */
+ if (ctrl->xfer.dir == QSPI_READ && (dma_len_sg & 0x03)) {
+ dev_warn_once(ctrl->dev, "fallback to PIO for read of size %#010x\n",
+ dma_len_sg);
+ return -EAGAIN;
+ }
}
for (i = 0; i < sgt->nents; i++) {
@@ -441,8 +455,10 @@ static int qcom_qspi_transfer_one(struct spi_master *master,
ret = qcom_qspi_setup_dma_desc(ctrl, xfer);
if (ret != -EAGAIN) {
- if (!ret)
+ if (!ret) {
+ dma_wmb();
qcom_qspi_dma_xfer(ctrl);
+ }
goto exit;
}
dev_warn_once(ctrl->dev, "DMA failure, falling back to PIO\n");
@@ -603,6 +619,9 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
int_status = readl(ctrl->base + MSTR_INT_STATUS);
writel(int_status, ctrl->base + MSTR_INT_STATUS);
+ /* Ignore disabled interrupts */
+ int_status &= readl(ctrl->base + MSTR_INT_EN);
+
/* PIO mode handling */
if (ctrl->xfer.dir == QSPI_WRITE) {
if (int_status & WR_FIFO_EMPTY)
@@ -647,6 +666,30 @@ static irqreturn_t qcom_qspi_irq(int irq, void *dev_id)
return ret;
}
+static int qcom_qspi_adjust_op_size(struct spi_mem *mem, struct spi_mem_op *op)
+{
+ /*
+ * If qcom_qspi_can_dma() is going to return false we don't need to
+ * adjust anything.
+ */
+ if (op->data.nbytes <= QSPI_MAX_BYTES_FIFO)
+ return 0;
+
+ /*
+ * When reading, the transfer needs to be a multiple of 4 bytes so
+ * shrink the transfer if that's not true. The caller will then do a
+ * second transfer to finish things up.
+ */
+ if (op->data.dir == SPI_MEM_DATA_IN && (op->data.nbytes & 0x3))
+ op->data.nbytes &= ~0x3;
+
+ return 0;
+}
+
+static const struct spi_controller_mem_ops qcom_qspi_mem_ops = {
+ .adjust_op_size = qcom_qspi_adjust_op_size,
+};
+
static int qcom_qspi_probe(struct platform_device *pdev)
{
int ret;
@@ -731,6 +774,7 @@ static int qcom_qspi_probe(struct platform_device *pdev)
if (of_property_read_bool(pdev->dev.of_node, "iommus"))
master->can_dma = qcom_qspi_can_dma;
master->auto_runtime_pm = true;
+ master->mem_ops = &qcom_qspi_mem_ops;
ret = devm_pm_opp_set_clkname(&pdev->dev, "core");
if (ret)
diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c
index 6d10fa4ab783..7ddf9db776b0 100644
--- a/drivers/spi/spi-stm32.c
+++ b/drivers/spi/spi-stm32.c
@@ -1001,9 +1001,9 @@ static int stm32_spi_prepare_msg(struct spi_controller *ctrl,
if (spi->cfg->set_number_of_data) {
int ret;
- ret = spi_split_transfers_maxsize(ctrl, msg,
- STM32H7_SPI_TSIZE_MAX,
- GFP_KERNEL | GFP_DMA);
+ ret = spi_split_transfers_maxwords(ctrl, msg,
+ STM32H7_SPI_TSIZE_MAX,
+ GFP_KERNEL | GFP_DMA);
if (ret)
return ret;
}
diff --git a/drivers/staging/fbtft/fb_ili9341.c b/drivers/staging/fbtft/fb_ili9341.c
index 9ccd0823c3ab..47e72b87d76d 100644
--- a/drivers/staging/fbtft/fb_ili9341.c
+++ b/drivers/staging/fbtft/fb_ili9341.c
@@ -145,7 +145,7 @@ static struct fbtft_display display = {
},
};
-FBTFT_REGISTER_DRIVER(DRVNAME, "ilitek,ili9341", &display);
+FBTFT_REGISTER_SPI_DRIVER(DRVNAME, "ilitek", "ili9341", &display);
MODULE_ALIAS("spi:" DRVNAME);
MODULE_ALIAS("platform:" DRVNAME);
diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c
index e03c87f0bfe7..0fb97a79ad0b 100644
--- a/drivers/staging/ks7010/ks_wlan_net.c
+++ b/drivers/staging/ks7010/ks_wlan_net.c
@@ -1583,8 +1583,10 @@ static int ks_wlan_set_encode_ext(struct net_device *dev,
commit |= SME_WEP_FLAG;
}
if (enc->key_len) {
- memcpy(&key->key_val[0], &enc->key[0], enc->key_len);
- key->key_len = enc->key_len;
+ int key_len = clamp_val(enc->key_len, 0, IW_ENCODING_TOKEN_MAX);
+
+ memcpy(&key->key_val[0], &enc->key[0], key_len);
+ key->key_len = key_len;
commit |= (SME_WEP_VAL1 << index);
}
break;
diff --git a/drivers/staging/media/atomisp/Kconfig b/drivers/staging/media/atomisp/Kconfig
index c9bff98e5309..e9b168ba97bf 100644
--- a/drivers/staging/media/atomisp/Kconfig
+++ b/drivers/staging/media/atomisp/Kconfig
@@ -13,6 +13,7 @@ config VIDEO_ATOMISP
tristate "Intel Atom Image Signal Processor Driver"
depends on VIDEO_DEV && INTEL_ATOMISP
depends on PMIC_OPREGION
+ select V4L2_FWNODE
select IOSF_MBI
select VIDEOBUF2_VMALLOC
select VIDEO_V4L2_SUBDEV_API
diff --git a/drivers/staging/rtl8712/rtl871x_xmit.c b/drivers/staging/rtl8712/rtl871x_xmit.c
index 090345bad223..6353dbe554d3 100644
--- a/drivers/staging/rtl8712/rtl871x_xmit.c
+++ b/drivers/staging/rtl8712/rtl871x_xmit.c
@@ -21,6 +21,7 @@
#include "osdep_intf.h"
#include "usb_ops.h"
+#include <linux/usb.h>
#include <linux/ieee80211.h>
static const u8 P802_1H_OUI[P80211_OUI_LEN] = {0x00, 0x00, 0xf8};
@@ -55,6 +56,7 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
sint i;
struct xmit_buf *pxmitbuf;
struct xmit_frame *pxframe;
+ int j;
memset((unsigned char *)pxmitpriv, 0, sizeof(struct xmit_priv));
spin_lock_init(&pxmitpriv->lock);
@@ -117,11 +119,8 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
_init_queue(&pxmitpriv->pending_xmitbuf_queue);
pxmitpriv->pallocated_xmitbuf =
kmalloc(NR_XMITBUFF * sizeof(struct xmit_buf) + 4, GFP_ATOMIC);
- if (!pxmitpriv->pallocated_xmitbuf) {
- kfree(pxmitpriv->pallocated_frame_buf);
- pxmitpriv->pallocated_frame_buf = NULL;
- return -ENOMEM;
- }
+ if (!pxmitpriv->pallocated_xmitbuf)
+ goto clean_up_frame_buf;
pxmitpriv->pxmitbuf = pxmitpriv->pallocated_xmitbuf + 4 -
((addr_t)(pxmitpriv->pallocated_xmitbuf) & 3);
pxmitbuf = (struct xmit_buf *)pxmitpriv->pxmitbuf;
@@ -129,13 +128,17 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
INIT_LIST_HEAD(&pxmitbuf->list);
pxmitbuf->pallocated_buf =
kmalloc(MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ, GFP_ATOMIC);
- if (!pxmitbuf->pallocated_buf)
- return -ENOMEM;
+ if (!pxmitbuf->pallocated_buf) {
+ j = 0;
+ goto clean_up_alloc_buf;
+ }
pxmitbuf->pbuf = pxmitbuf->pallocated_buf + XMITBUF_ALIGN_SZ -
((addr_t) (pxmitbuf->pallocated_buf) &
(XMITBUF_ALIGN_SZ - 1));
- if (r8712_xmit_resource_alloc(padapter, pxmitbuf))
- return -ENOMEM;
+ if (r8712_xmit_resource_alloc(padapter, pxmitbuf)) {
+ j = 1;
+ goto clean_up_alloc_buf;
+ }
list_add_tail(&pxmitbuf->list,
&(pxmitpriv->free_xmitbuf_queue.queue));
pxmitbuf++;
@@ -146,6 +149,28 @@ int _r8712_init_xmit_priv(struct xmit_priv *pxmitpriv,
init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry);
tasklet_setup(&pxmitpriv->xmit_tasklet, r8712_xmit_bh);
return 0;
+
+clean_up_alloc_buf:
+ if (j) {
+ /* failure happened in r8712_xmit_resource_alloc()
+ * delete extra pxmitbuf->pallocated_buf
+ */
+ kfree(pxmitbuf->pallocated_buf);
+ }
+ for (j = 0; j < i; j++) {
+ int k;
+
+ pxmitbuf--; /* reset pointer */
+ kfree(pxmitbuf->pallocated_buf);
+ for (k = 0; k < 8; k++) /* delete xmit urb's */
+ usb_free_urb(pxmitbuf->pxmit_urb[k]);
+ }
+ kfree(pxmitpriv->pallocated_xmitbuf);
+ pxmitpriv->pallocated_xmitbuf = NULL;
+clean_up_frame_buf:
+ kfree(pxmitpriv->pallocated_frame_buf);
+ pxmitpriv->pallocated_frame_buf = NULL;
+ return -ENOMEM;
}
void _free_xmit_priv(struct xmit_priv *pxmitpriv)
diff --git a/drivers/staging/rtl8712/xmit_linux.c b/drivers/staging/rtl8712/xmit_linux.c
index 132afbf49dde..ceb6b590b310 100644
--- a/drivers/staging/rtl8712/xmit_linux.c
+++ b/drivers/staging/rtl8712/xmit_linux.c
@@ -112,6 +112,12 @@ int r8712_xmit_resource_alloc(struct _adapter *padapter,
for (i = 0; i < 8; i++) {
pxmitbuf->pxmit_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
if (!pxmitbuf->pxmit_urb[i]) {
+ int k;
+
+ for (k = i - 1; k >= 0; k--) {
+ /* handle allocation errors part way through loop */
+ usb_free_urb(pxmitbuf->pxmit_urb[k]);
+ }
netdev_err(padapter->pnetdev, "pxmitbuf->pxmit_urb[i] == NULL\n");
return -ENOMEM;
}
diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
index 013f1633f082..2f00fc3bf274 100644
--- a/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
+++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_rapl.c
@@ -57,10 +57,10 @@ static int rapl_mmio_cpu_down_prep(unsigned int cpu)
static int rapl_mmio_read_raw(int cpu, struct reg_action *ra)
{
- if (!ra->reg)
+ if (!ra->reg.mmio)
return -EINVAL;
- ra->value = readq((void __iomem *)ra->reg);
+ ra->value = readq(ra->reg.mmio);
ra->value &= ra->mask;
return 0;
}
@@ -69,13 +69,13 @@ static int rapl_mmio_write_raw(int cpu, struct reg_action *ra)
{
u64 val;
- if (!ra->reg)
+ if (!ra->reg.mmio)
return -EINVAL;
- val = readq((void __iomem *)ra->reg);
+ val = readq(ra->reg.mmio);
val &= ~ra->mask;
val |= ra->value;
- writeq(val, (void __iomem *)ra->reg);
+ writeq(val, ra->reg.mmio);
return 0;
}
@@ -92,13 +92,13 @@ int proc_thermal_rapl_add(struct pci_dev *pdev, struct proc_thermal_device *proc
for (domain = RAPL_DOMAIN_PACKAGE; domain < RAPL_DOMAIN_MAX; domain++) {
for (reg = RAPL_DOMAIN_REG_LIMIT; reg < RAPL_DOMAIN_REG_MAX; reg++)
if (rapl_regs->regs[domain][reg])
- rapl_mmio_priv.regs[domain][reg] =
- (u64)proc_priv->mmio_base +
+ rapl_mmio_priv.regs[domain][reg].mmio =
+ proc_priv->mmio_base +
rapl_regs->regs[domain][reg];
rapl_mmio_priv.limits[domain] = rapl_regs->limits[domain];
}
rapl_mmio_priv.type = RAPL_IF_MMIO;
- rapl_mmio_priv.reg_unit = (u64)proc_priv->mmio_base + rapl_regs->reg_unit;
+ rapl_mmio_priv.reg_unit.mmio = proc_priv->mmio_base + rapl_regs->reg_unit;
rapl_mmio_priv.read_raw = rapl_mmio_read_raw;
rapl_mmio_priv.write_raw = rapl_mmio_write_raw;
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 842f678c1c3e..cc2b5e81c620 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1203,7 +1203,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_get_crit_temp);
struct thermal_zone_device *
thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *trips, int num_trips, int mask,
void *devdata, struct thermal_zone_device_ops *ops,
- struct thermal_zone_params *tzp, int passive_delay,
+ const struct thermal_zone_params *tzp, int passive_delay,
int polling_delay)
{
struct thermal_zone_device *tz;
@@ -1371,7 +1371,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_device_register_with_trips);
struct thermal_zone_device *thermal_zone_device_register(const char *type, int ntrips, int mask,
void *devdata, struct thermal_zone_device_ops *ops,
- struct thermal_zone_params *tzp, int passive_delay,
+ const struct thermal_zone_params *tzp, int passive_delay,
int polling_delay)
{
return thermal_zone_device_register_with_trips(type, NULL, ntrips, mask,
diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index 6fb14e521197..bc07ae1c284c 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -238,17 +238,13 @@ static int thermal_of_monitor_init(struct device_node *np, int *delay, int *pdel
return 0;
}
-static struct thermal_zone_params *thermal_of_parameters_init(struct device_node *np)
+static void thermal_of_parameters_init(struct device_node *np,
+ struct thermal_zone_params *tzp)
{
- struct thermal_zone_params *tzp;
int coef[2];
int ncoef = ARRAY_SIZE(coef);
int prop, ret;
- tzp = kzalloc(sizeof(*tzp), GFP_KERNEL);
- if (!tzp)
- return ERR_PTR(-ENOMEM);
-
tzp->no_hwmon = true;
if (!of_property_read_u32(np, "sustainable-power", &prop))
@@ -267,8 +263,6 @@ static struct thermal_zone_params *thermal_of_parameters_init(struct device_node
tzp->slope = coef[0];
tzp->offset = coef[1];
-
- return tzp;
}
static struct device_node *thermal_of_zone_get_by_name(struct thermal_zone_device *tz)
@@ -442,13 +436,11 @@ static int thermal_of_unbind(struct thermal_zone_device *tz,
static void thermal_of_zone_unregister(struct thermal_zone_device *tz)
{
struct thermal_trip *trips = tz->trips;
- struct thermal_zone_params *tzp = tz->tzp;
struct thermal_zone_device_ops *ops = tz->ops;
thermal_zone_device_disable(tz);
thermal_zone_device_unregister(tz);
kfree(trips);
- kfree(tzp);
kfree(ops);
}
@@ -477,7 +469,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
{
struct thermal_zone_device *tz;
struct thermal_trip *trips;
- struct thermal_zone_params *tzp;
+ struct thermal_zone_params tzp = {};
struct thermal_zone_device_ops *of_ops;
struct device_node *np;
int delay, pdelay;
@@ -509,12 +501,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
goto out_kfree_trips;
}
- tzp = thermal_of_parameters_init(np);
- if (IS_ERR(tzp)) {
- ret = PTR_ERR(tzp);
- pr_err("Failed to initialize parameter from %pOFn: %d\n", np, ret);
- goto out_kfree_trips;
- }
+ thermal_of_parameters_init(np, &tzp);
of_ops->bind = thermal_of_bind;
of_ops->unbind = thermal_of_unbind;
@@ -522,12 +509,12 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
mask = GENMASK_ULL((ntrips) - 1, 0);
tz = thermal_zone_device_register_with_trips(np->name, trips, ntrips,
- mask, data, of_ops, tzp,
+ mask, data, of_ops, &tzp,
pdelay, delay);
if (IS_ERR(tz)) {
ret = PTR_ERR(tz);
pr_err("Failed to register thermal zone %pOFn: %d\n", np, ret);
- goto out_kfree_tzp;
+ goto out_kfree_trips;
}
ret = thermal_zone_device_enable(tz);
@@ -540,8 +527,6 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node *
return tz;
-out_kfree_tzp:
- kfree(tzp);
out_kfree_trips:
kfree(trips);
out_kfree_of_ops:
diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c
index 62b26b7998fd..3fb4553a6442 100644
--- a/drivers/thunderbolt/tb.c
+++ b/drivers/thunderbolt/tb.c
@@ -1964,6 +1964,8 @@ unlock:
pm_runtime_mark_last_busy(&tb->dev);
pm_runtime_put_autosuspend(&tb->dev);
+
+ kfree(ev);
}
static void tb_queue_dp_bandwidth_request(struct tb *tb, u64 route, u8 port)
diff --git a/drivers/thunderbolt/tmu.c b/drivers/thunderbolt/tmu.c
index 1269f417515b..0dfd1e083994 100644
--- a/drivers/thunderbolt/tmu.c
+++ b/drivers/thunderbolt/tmu.c
@@ -579,7 +579,9 @@ int tb_switch_tmu_disable(struct tb_switch *sw)
* uni-directional mode and we don't want to change it's TMU
* mode.
*/
- tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]);
+ ret = tb_switch_tmu_rate_write(sw, tmu_rates[TB_SWITCH_TMU_MODE_OFF]);
+ if (ret)
+ return ret;
tb_port_tmu_time_sync_disable(up);
ret = tb_port_tmu_time_sync_disable(down);
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 341abaed4ce2..069de553127c 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -164,6 +164,9 @@ config LEGACY_TIOCSTI
userspace depends on this functionality to continue operating
normally.
+ Processes which run with CAP_SYS_ADMIN, such as BRLTTY, can
+ use TIOCSTI even when this is set to N.
+
This functionality can be changed at runtime with the
dev.tty.legacy_tiocsti sysctl. This configuration option sets
the default value of the sysctl.
diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index b411a26cc092..739f522cb893 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -3042,12 +3042,13 @@ static void gsm_error(struct gsm_mux *gsm)
static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
{
int i;
- struct gsm_dlci *dlci = gsm->dlci[0];
+ struct gsm_dlci *dlci;
struct gsm_msg *txq, *ntxq;
gsm->dead = true;
mutex_lock(&gsm->mutex);
+ dlci = gsm->dlci[0];
if (dlci) {
if (disc && dlci->state != DLCI_CLOSED) {
gsm_dlci_begin_close(dlci);
@@ -3070,8 +3071,10 @@ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc)
gsm->has_devices = false;
}
for (i = NUM_DLCI - 1; i >= 0; i--)
- if (gsm->dlci[i])
+ if (gsm->dlci[i]) {
gsm_dlci_release(gsm->dlci[i]);
+ gsm->dlci[i] = NULL;
+ }
mutex_unlock(&gsm->mutex);
/* Now wipe the queues */
tty_ldisc_flush(gsm->tty);
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 914e0e6251bf..3449f8790e46 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -497,6 +497,7 @@ static struct uart_8250_port *serial8250_setup_port(int index)
up = &serial8250_ports[index];
up->port.line = index;
+ up->port.port_id = index;
serial8250_init_port(up);
if (!base_ops)
@@ -1040,6 +1041,7 @@ int serial8250_register_8250_port(const struct uart_8250_port *up)
uart_remove_one_port(&serial8250_reg, &uart->port);
uart->port.ctrl_id = up->port.ctrl_id;
+ uart->port.port_id = up->port.port_id;
uart->port.iobase = up->port.iobase;
uart->port.membase = up->port.membase;
uart->port.irq = up->port.irq;
@@ -1202,6 +1204,7 @@ void serial8250_unregister_port(int line)
uart->port.flags &= ~UPF_BOOT_AUTOCONF;
uart->port.type = PORT_UNKNOWN;
uart->port.dev = &serial8250_isa_devs->dev;
+ uart->port.port_id = line;
uart->capabilities = 0;
serial8250_init_port(uart);
serial8250_apply_quirks(uart);
diff --git a/drivers/tty/serial/8250/8250_dwlib.c b/drivers/tty/serial/8250/8250_dwlib.c
index 75f32f054ebb..84843e204a5e 100644
--- a/drivers/tty/serial/8250/8250_dwlib.c
+++ b/drivers/tty/serial/8250/8250_dwlib.c
@@ -244,7 +244,7 @@ void dw8250_setup_port(struct uart_port *p)
struct dw8250_port_data *pd = p->private_data;
struct dw8250_data *data = to_dw8250_data(pd);
struct uart_8250_port *up = up_to_u8250p(p);
- u32 reg;
+ u32 reg, old_dlf;
pd->hw_rs485_support = dw8250_detect_rs485_hw(p);
if (pd->hw_rs485_support) {
@@ -270,9 +270,11 @@ void dw8250_setup_port(struct uart_port *p)
dev_dbg(p->dev, "Designware UART version %c.%c%c\n",
(reg >> 24) & 0xff, (reg >> 16) & 0xff, (reg >> 8) & 0xff);
+ /* Preserve value written by firmware or bootloader */
+ old_dlf = dw8250_readl_ext(p, DW_UART_DLF);
dw8250_writel_ext(p, DW_UART_DLF, ~0U);
reg = dw8250_readl_ext(p, DW_UART_DLF);
- dw8250_writel_ext(p, DW_UART_DLF, 0);
+ dw8250_writel_ext(p, DW_UART_DLF, old_dlf);
if (reg) {
pd->dlf_size = fls(reg);
diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 16aeb1420137..483bb552cdc4 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -703,9 +703,6 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
static void serial8250_clear_IER(struct uart_8250_port *up)
{
- /* Port locked to synchronize UART_IER access against the console. */
- lockdep_assert_held_once(&up->port.lock);
-
if (up->capabilities & UART_CAP_UUE)
serial_out(up, UART_IER, UART_IER_UUE);
else
@@ -3278,6 +3275,7 @@ void serial8250_init_port(struct uart_8250_port *up)
spin_lock_init(&port->lock);
port->ctrl_id = 0;
+ port->pm = NULL;
port->ops = &serial8250_pops;
port->has_sysrq = IS_ENABLED(CONFIG_SERIAL_8250_CONSOLE);
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 4d80fae20177..c569a08b5b19 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -1139,8 +1139,8 @@ static void lpuart_copy_rx_to_tty(struct lpuart_port *sport)
unsigned long sr = lpuart32_read(&sport->port, UARTSTAT);
if (sr & (UARTSTAT_PE | UARTSTAT_FE)) {
- /* Read DR to clear the error flags */
- lpuart32_read(&sport->port, UARTDATA);
+ /* Clear the error flags */
+ lpuart32_write(&sport->port, sr, UARTSTAT);
if (sr & UARTSTAT_PE)
sport->port.icount.parity++;
diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 444c74eeab7d..daaf2a64e7f1 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -1681,13 +1681,6 @@ static int qcom_geni_serial_probe(struct platform_device *pdev)
if (ret)
return ret;
- /*
- * Set pm_runtime status as ACTIVE so that wakeup_irq gets
- * enabled/disabled from dev_pm_arm_wake_irq during system
- * suspend/resume respectively.
- */
- pm_runtime_set_active(&pdev->dev);
-
if (port->wakeup_irq > 0) {
device_init_wakeup(&pdev->dev, true);
ret = dev_pm_set_dedicated_wake_irq(&pdev->dev,
diff --git a/drivers/tty/serial/serial_base.h b/drivers/tty/serial/serial_base.h
index 9faac0ff6b89..c74c548f0db6 100644
--- a/drivers/tty/serial/serial_base.h
+++ b/drivers/tty/serial/serial_base.h
@@ -16,6 +16,7 @@ struct device;
struct serial_ctrl_device {
struct device dev;
+ struct ida port_ida;
};
struct serial_port_device {
diff --git a/drivers/tty/serial/serial_base_bus.c b/drivers/tty/serial/serial_base_bus.c
index 6ff59c89d867..3dfcf20c4eb6 100644
--- a/drivers/tty/serial/serial_base_bus.c
+++ b/drivers/tty/serial/serial_base_bus.c
@@ -10,6 +10,7 @@
#include <linux/container_of.h>
#include <linux/device.h>
+#include <linux/idr.h>
#include <linux/module.h>
#include <linux/serial_core.h>
#include <linux/slab.h>
@@ -19,11 +20,25 @@
static bool serial_base_initialized;
+static const struct device_type serial_ctrl_type = {
+ .name = "ctrl",
+};
+
+static const struct device_type serial_port_type = {
+ .name = "port",
+};
+
static int serial_base_match(struct device *dev, struct device_driver *drv)
{
- int len = strlen(drv->name);
+ if (dev->type == &serial_ctrl_type &&
+ str_has_prefix(drv->name, serial_ctrl_type.name))
+ return 1;
- return !strncmp(dev_name(dev), drv->name, len);
+ if (dev->type == &serial_port_type &&
+ str_has_prefix(drv->name, serial_port_type.name))
+ return 1;
+
+ return 0;
}
static struct bus_type serial_base_bus_type = {
@@ -48,7 +63,8 @@ static int serial_base_device_init(struct uart_port *port,
struct device *parent_dev,
const struct device_type *type,
void (*release)(struct device *dev),
- int id)
+ unsigned int ctrl_id,
+ unsigned int port_id)
{
device_initialize(dev);
dev->type = type;
@@ -61,12 +77,15 @@ static int serial_base_device_init(struct uart_port *port,
return -EPROBE_DEFER;
}
- return dev_set_name(dev, "%s.%s.%d", type->name, dev_name(port->dev), id);
-}
+ if (type == &serial_ctrl_type)
+ return dev_set_name(dev, "%s:%d", dev_name(port->dev), ctrl_id);
-static const struct device_type serial_ctrl_type = {
- .name = "ctrl",
-};
+ if (type == &serial_port_type)
+ return dev_set_name(dev, "%s:%d.%d", dev_name(port->dev),
+ ctrl_id, port_id);
+
+ return -EINVAL;
+}
static void serial_base_ctrl_release(struct device *dev)
{
@@ -81,6 +100,7 @@ void serial_base_ctrl_device_remove(struct serial_ctrl_device *ctrl_dev)
return;
device_del(&ctrl_dev->dev);
+ put_device(&ctrl_dev->dev);
}
struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
@@ -93,10 +113,12 @@ struct serial_ctrl_device *serial_base_ctrl_add(struct uart_port *port,
if (!ctrl_dev)
return ERR_PTR(-ENOMEM);
+ ida_init(&ctrl_dev->port_ida);
+
err = serial_base_device_init(port, &ctrl_dev->dev,
parent, &serial_ctrl_type,
serial_base_ctrl_release,
- port->ctrl_id);
+ port->ctrl_id, 0);
if (err)
goto err_put_device;
@@ -112,10 +134,6 @@ err_put_device:
return ERR_PTR(err);
}
-static const struct device_type serial_port_type = {
- .name = "port",
-};
-
static void serial_base_port_release(struct device *dev)
{
struct serial_port_device *port_dev = to_serial_base_port_device(dev);
@@ -127,16 +145,31 @@ struct serial_port_device *serial_base_port_add(struct uart_port *port,
struct serial_ctrl_device *ctrl_dev)
{
struct serial_port_device *port_dev;
+ int min = 0, max = -1; /* Use -1 for max to apply IDA defaults */
int err;
port_dev = kzalloc(sizeof(*port_dev), GFP_KERNEL);
if (!port_dev)
return ERR_PTR(-ENOMEM);
+ /* Device driver specified port_id vs automatic assignment? */
+ if (port->port_id) {
+ min = port->port_id;
+ max = port->port_id;
+ }
+
+ err = ida_alloc_range(&ctrl_dev->port_ida, min, max, GFP_KERNEL);
+ if (err < 0) {
+ kfree(port_dev);
+ return ERR_PTR(err);
+ }
+
+ port->port_id = err;
+
err = serial_base_device_init(port, &port_dev->dev,
&ctrl_dev->dev, &serial_port_type,
serial_base_port_release,
- port->line);
+ port->ctrl_id, port->port_id);
if (err)
goto err_put_device;
@@ -150,16 +183,25 @@ struct serial_port_device *serial_base_port_add(struct uart_port *port,
err_put_device:
put_device(&port_dev->dev);
+ ida_free(&ctrl_dev->port_ida, port->port_id);
return ERR_PTR(err);
}
void serial_base_port_device_remove(struct serial_port_device *port_dev)
{
+ struct serial_ctrl_device *ctrl_dev;
+ struct device *parent;
+
if (!port_dev)
return;
+ parent = port_dev->dev.parent;
+ ctrl_dev = to_serial_base_ctrl_device(parent);
+
device_del(&port_dev->dev);
+ ida_free(&ctrl_dev->port_ida, port_dev->port->port_id);
+ put_device(&port_dev->dev);
}
static int serial_base_init(void)
diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 7c9457962a3d..8b7a42e05d6d 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -590,7 +590,7 @@ static void sci_start_tx(struct uart_port *port)
dma_submit_error(s->cookie_tx)) {
if (s->cfg->regtype == SCIx_RZ_SCIFA_REGTYPE)
/* Switch irq from SCIF to DMA */
- disable_irq(s->irqs[SCIx_TXI_IRQ]);
+ disable_irq_nosync(s->irqs[SCIx_TXI_IRQ]);
s->cookie_tx = 0;
schedule_work(&s->work_tx);
diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c
index 1f565a216e74..a19db49327e2 100644
--- a/drivers/tty/serial/sifive.c
+++ b/drivers/tty/serial/sifive.c
@@ -811,7 +811,7 @@ static void sifive_serial_console_write(struct console *co, const char *s,
local_irq_restore(flags);
}
-static int __init sifive_serial_console_setup(struct console *co, char *options)
+static int sifive_serial_console_setup(struct console *co, char *options)
{
struct sifive_serial_port *ssp;
int baud = SIFIVE_DEFAULT_BAUD_RATE;
diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index 404230c1ebb2..0a370b9ea70b 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -59,7 +59,7 @@ static int firmware_loaded;
/* #define LOOPBACK */
/* The major and minor device numbers are defined in
- * http://www.lanana.org/docs/device-list/devices-2.6+.txt. For the QE
+ * Documentation/admin-guide/devices.txt. For the QE
* UART, we have major number 204 and minor numbers 46 - 49, which are the
* same as for the CPM2. This decision was made because no Freescale part
* has both a CPM and a QE.
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 3959efc717aa..63db04b9113a 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2285,7 +2285,7 @@ static int tiocsti(struct tty_struct *tty, char __user *p)
char ch, mbz = 0;
struct tty_ldisc *ld;
- if (!tty_legacy_tiocsti)
+ if (!tty_legacy_tiocsti && !capable(CAP_SYS_ADMIN))
return -EIO;
if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
diff --git a/drivers/ufs/core/ufs-mcq.c b/drivers/ufs/core/ufs-mcq.c
index 6fb0e007af63..386674ead7f0 100644
--- a/drivers/ufs/core/ufs-mcq.c
+++ b/drivers/ufs/core/ufs-mcq.c
@@ -580,7 +580,6 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
{
struct ufshcd_lrb *lrbp = &hba->lrb[task_tag];
struct utp_transfer_req_desc *utrd;
- u32 mask = hwq->max_entries - 1;
__le64 cmd_desc_base_addr;
bool ret = false;
u64 addr, match;
@@ -608,7 +607,10 @@ static bool ufshcd_mcq_sqe_search(struct ufs_hba *hba,
ret = true;
goto out;
}
- sq_head_slot = (sq_head_slot + 1) & mask;
+
+ sq_head_slot++;
+ if (sq_head_slot == hwq->max_entries)
+ sq_head_slot = 0;
}
out:
diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c
index 8d6fd4c3324f..c1557d21b027 100644
--- a/drivers/ufs/host/ufs-qcom.c
+++ b/drivers/ufs/host/ufs-qcom.c
@@ -321,7 +321,7 @@ static void ufs_qcom_select_unipro_mode(struct ufs_qcom_host *host)
ufs_qcom_cap_qunipro(host) ? QUNIPRO_SEL : 0,
REG_UFS_CFG1);
- if (host->hw_ver.major == 0x05)
+ if (host->hw_ver.major >= 0x05)
ufshcd_rmwl(host->hba, QUNIPRO_G4_SEL, 0, REG_UFS_CFG0);
/* make sure above configuration is applied before we return */
diff --git a/drivers/ufs/host/ufs-renesas.c b/drivers/ufs/host/ufs-renesas.c
index f8a5e79ed3b4..ab0652d8705a 100644
--- a/drivers/ufs/host/ufs-renesas.c
+++ b/drivers/ufs/host/ufs-renesas.c
@@ -359,7 +359,7 @@ static int ufs_renesas_init(struct ufs_hba *hba)
{
struct ufs_renesas_priv *priv;
- priv = devm_kmalloc(hba->dev, sizeof(*priv), GFP_KERNEL);
+ priv = devm_kzalloc(hba->dev, sizeof(*priv), GFP_KERNEL);
if (!priv)
return -ENOMEM;
ufshcd_set_variant(hba, priv);
diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-gadget.c
index ea19253fd2d0..aa0111b365bb 100644
--- a/drivers/usb/cdns3/cdns3-gadget.c
+++ b/drivers/usb/cdns3/cdns3-gadget.c
@@ -3015,12 +3015,14 @@ static int cdns3_gadget_udc_stop(struct usb_gadget *gadget)
static int cdns3_gadget_check_config(struct usb_gadget *gadget)
{
struct cdns3_device *priv_dev = gadget_to_cdns3_device(gadget);
+ struct cdns3_endpoint *priv_ep;
struct usb_ep *ep;
int n_in = 0;
int total;
list_for_each_entry(ep, &gadget->ep_list, ep_list) {
- if (ep->claimed && (ep->address & USB_DIR_IN))
+ priv_ep = ep_to_cdns3_ep(ep);
+ if ((priv_ep->flags & EP_CLAIMED) && (ep->address & USB_DIR_IN))
n_in++;
}
diff --git a/drivers/usb/common/usb-conn-gpio.c b/drivers/usb/common/usb-conn-gpio.c
index 766005d20bae..501e8bc9738e 100644
--- a/drivers/usb/common/usb-conn-gpio.c
+++ b/drivers/usb/common/usb-conn-gpio.c
@@ -42,6 +42,7 @@ struct usb_conn_info {
struct power_supply_desc desc;
struct power_supply *charger;
+ bool initial_detection;
};
/*
@@ -86,11 +87,13 @@ static void usb_conn_detect_cable(struct work_struct *work)
dev_dbg(info->dev, "role %s -> %s, gpios: id %d, vbus %d\n",
usb_role_string(info->last_role), usb_role_string(role), id, vbus);
- if (info->last_role == role) {
+ if (!info->initial_detection && info->last_role == role) {
dev_warn(info->dev, "repeated role: %s\n", usb_role_string(role));
return;
}
+ info->initial_detection = false;
+
if (info->last_role == USB_ROLE_HOST && info->vbus)
regulator_disable(info->vbus);
@@ -258,6 +261,7 @@ static int usb_conn_probe(struct platform_device *pdev)
device_set_wakeup_capable(&pdev->dev, true);
/* Perform initial detection */
+ info->initial_detection = true;
usb_conn_queue_dwork(info, 0);
return 0;
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 1a16a8bdea60..4f68f6ef3cc1 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -2642,21 +2642,21 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
snoop(&dev->dev, "%s: CONTROL\n", __func__);
ret = proc_control(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
case USBDEVFS_BULK:
snoop(&dev->dev, "%s: BULK\n", __func__);
ret = proc_bulk(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
case USBDEVFS_RESETEP:
snoop(&dev->dev, "%s: RESETEP\n", __func__);
ret = proc_resetep(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
case USBDEVFS_RESET:
@@ -2668,7 +2668,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
snoop(&dev->dev, "%s: CLEAR_HALT\n", __func__);
ret = proc_clearhalt(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
case USBDEVFS_GETDRIVER:
@@ -2695,7 +2695,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
snoop(&dev->dev, "%s: SUBMITURB\n", __func__);
ret = proc_submiturb(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
#ifdef CONFIG_COMPAT
@@ -2703,14 +2703,14 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
snoop(&dev->dev, "%s: CONTROL32\n", __func__);
ret = proc_control_compat(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
case USBDEVFS_BULK32:
snoop(&dev->dev, "%s: BULK32\n", __func__);
ret = proc_bulk_compat(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
case USBDEVFS_DISCSIGNAL32:
@@ -2722,7 +2722,7 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd,
snoop(&dev->dev, "%s: SUBMITURB32\n", __func__);
ret = proc_submiturb_compat(ps, p);
if (ret >= 0)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
break;
case USBDEVFS_IOCTL32:
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index 934b3d997702..15e9bd180a1d 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -436,6 +436,10 @@ static const struct usb_device_id usb_quirk_list[] = {
/* novation SoundControl XL */
{ USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME },
+ /* Focusrite Scarlett Solo USB */
+ { USB_DEVICE(0x1235, 0x8211), .driver_info =
+ USB_QUIRK_DISCONNECT_SUSPEND },
+
/* Huawei 4G LTE module */
{ USB_DEVICE(0x12d1, 0x15bb), .driver_info =
USB_QUIRK_DISCONNECT_SUSPEND },
diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index f6689b731718..9c6bf054f15d 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -277,9 +277,9 @@ int dwc3_core_soft_reset(struct dwc3 *dwc)
/*
* We're resetting only the device side because, if we're in host mode,
* XHCI driver will reset the host block. If dwc3 was configured for
- * host-only mode, then we can return early.
+ * host-only mode or current role is host, then we can return early.
*/
- if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
+ if (dwc->dr_mode == USB_DR_MODE_HOST || dwc->current_dr_role == DWC3_GCTL_PRTCAP_HOST)
return 0;
reg = dwc3_readl(dwc->regs, DWC3_DCTL);
@@ -1209,22 +1209,6 @@ static int dwc3_core_init(struct dwc3 *dwc)
dwc3_writel(dwc->regs, DWC3_GUCTL1, reg);
}
- if (dwc->dr_mode == USB_DR_MODE_HOST ||
- dwc->dr_mode == USB_DR_MODE_OTG) {
- reg = dwc3_readl(dwc->regs, DWC3_GUCTL);
-
- /*
- * Enable Auto retry Feature to make the controller operating in
- * Host mode on seeing transaction errors(CRC errors or internal
- * overrun scenerios) on IN transfers to reply to the device
- * with a non-terminating retry ACK (i.e, an ACK transcation
- * packet with Retry=1 & Nump != 0)
- */
- reg |= DWC3_GUCTL_HSTINAUTORETRY;
-
- dwc3_writel(dwc->regs, DWC3_GUCTL, reg);
- }
-
/*
* Must config both number of packets and max burst settings to enable
* RX and/or TX threshold.
diff --git a/drivers/usb/dwc3/core.h b/drivers/usb/dwc3/core.h
index 8b1295e4dcdd..a69ac67d89fe 100644
--- a/drivers/usb/dwc3/core.h
+++ b/drivers/usb/dwc3/core.h
@@ -256,9 +256,6 @@
#define DWC3_GCTL_GBLHIBERNATIONEN BIT(1)
#define DWC3_GCTL_DSBLCLKGTNG BIT(0)
-/* Global User Control Register */
-#define DWC3_GUCTL_HSTINAUTORETRY BIT(14)
-
/* Global User Control 1 Register */
#define DWC3_GUCTL1_DEV_DECOUPLE_L1L2_EVT BIT(31)
#define DWC3_GUCTL1_TX_IPGAP_LINECHECK_DIS BIT(28)
diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index 44a04c9b2073..6604845c397c 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -233,10 +233,12 @@ static int dwc3_pci_quirks(struct dwc3_pci *dwc,
/*
* A lot of BYT devices lack ACPI resource entries for
- * the GPIOs, add a fallback mapping to the reference
+ * the GPIOs. If the ACPI entry for the GPIO controller
+ * is present add a fallback mapping to the reference
* design GPIOs which all boards seem to use.
*/
- gpiod_add_lookup_table(&platform_bytcr_gpios);
+ if (acpi_dev_present("INT33FC", NULL, -1))
+ gpiod_add_lookup_table(&platform_bytcr_gpios);
/*
* These GPIOs will turn on the USB2 PHY. Note that we have to
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 5fd067151fbf..858fe4c299b7 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -4455,9 +4455,14 @@ static irqreturn_t dwc3_check_event_buf(struct dwc3_event_buffer *evt)
u32 count;
if (pm_runtime_suspended(dwc->dev)) {
+ dwc->pending_events = true;
+ /*
+ * Trigger runtime resume. The get() function will be balanced
+ * after processing the pending events in dwc3_process_pending
+ * events().
+ */
pm_runtime_get(dwc->dev);
disable_irq_nosync(dwc->irq_gadget);
- dwc->pending_events = true;
return IRQ_HANDLED;
}
@@ -4718,6 +4723,8 @@ void dwc3_gadget_process_pending_events(struct dwc3 *dwc)
{
if (dwc->pending_events) {
dwc3_interrupt(dwc->irq_gadget, dwc->ev_buf);
+ dwc3_thread_interrupt(dwc->irq_gadget, dwc->ev_buf);
+ pm_runtime_put(dwc->dev);
dwc->pending_events = false;
enable_irq(dwc->irq_gadget);
}
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 1b3489149e5e..dd9b90481b4c 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -1125,6 +1125,10 @@ int usb_add_config(struct usb_composite_dev *cdev,
goto done;
status = bind(config);
+
+ if (status == 0)
+ status = usb_gadget_check_config(cdev->gadget);
+
if (status < 0) {
while (!list_empty(&config->functions)) {
struct usb_function *f;
diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c
index f41a385a5c42..6e9ef35a43a7 100644
--- a/drivers/usb/gadget/function/f_fs.c
+++ b/drivers/usb/gadget/function/f_fs.c
@@ -1377,7 +1377,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
inode = new_inode(sb);
if (inode) {
- struct timespec64 ts = current_time(inode);
+ struct timespec64 ts = inode_set_ctime_current(inode);
inode->i_ino = get_next_ino();
inode->i_mode = perms->mode;
@@ -1385,7 +1385,6 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
inode->i_gid = perms->gid;
inode->i_atime = ts;
inode->i_mtime = ts;
- inode->i_ctime = ts;
inode->i_private = data;
if (fops)
inode->i_fop = fops;
diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c
index 28249d0bf062..ce9e31f3d26b 100644
--- a/drivers/usb/gadget/legacy/inode.c
+++ b/drivers/usb/gadget/legacy/inode.c
@@ -1969,8 +1969,7 @@ gadgetfs_make_inode (struct super_block *sb,
inode->i_mode = mode;
inode->i_uid = make_kuid(&init_user_ns, default_uid);
inode->i_gid = make_kgid(&init_user_ns, default_gid);
- inode->i_atime = inode->i_mtime = inode->i_ctime
- = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_private = data;
inode->i_fop = fops;
}
diff --git a/drivers/usb/gadget/legacy/raw_gadget.c b/drivers/usb/gadget/legacy/raw_gadget.c
index 2acece16b890..e549022642e5 100644
--- a/drivers/usb/gadget/legacy/raw_gadget.c
+++ b/drivers/usb/gadget/legacy/raw_gadget.c
@@ -310,13 +310,15 @@ static int gadget_bind(struct usb_gadget *gadget,
dev->eps_num = i;
spin_unlock_irqrestore(&dev->lock, flags);
- /* Matches kref_put() in gadget_unbind(). */
- kref_get(&dev->count);
-
ret = raw_queue_event(dev, USB_RAW_EVENT_CONNECT, 0, NULL);
- if (ret < 0)
+ if (ret < 0) {
dev_err(&gadget->dev, "failed to queue event\n");
+ set_gadget_data(gadget, NULL);
+ return ret;
+ }
+ /* Matches kref_put() in gadget_unbind(). */
+ kref_get(&dev->count);
return ret;
}
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index 59188ea065e0..7d49d8a0b00c 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -822,6 +822,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_disconnect);
* usb_gadget_activate() is called. For example, user mode components may
* need to be activated before the system can talk to hosts.
*
+ * This routine may sleep; it must not be called in interrupt context
+ * (such as from within a gadget driver's disconnect() callback).
+ *
* Returns zero on success, else negative errno.
*/
int usb_gadget_deactivate(struct usb_gadget *gadget)
@@ -860,6 +863,8 @@ EXPORT_SYMBOL_GPL(usb_gadget_deactivate);
* This routine activates gadget which was previously deactivated with
* usb_gadget_deactivate() call. It calls usb_gadget_connect() if needed.
*
+ * This routine may sleep; it must not be called in interrupt context.
+ *
* Returns zero on success, else negative errno.
*/
int usb_gadget_activate(struct usb_gadget *gadget)
@@ -878,7 +883,6 @@ int usb_gadget_activate(struct usb_gadget *gadget)
*/
if (gadget->connected)
ret = usb_gadget_connect_locked(gadget);
- mutex_unlock(&gadget->udc->connect_lock);
unlock:
mutex_unlock(&gadget->udc->connect_lock);
@@ -1639,7 +1643,11 @@ static void gadget_unbind_driver(struct device *dev)
usb_gadget_disable_async_callbacks(udc);
if (gadget->irq)
synchronize_irq(gadget->irq);
+ mutex_unlock(&udc->connect_lock);
+
udc->driver->unbind(gadget);
+
+ mutex_lock(&udc->connect_lock);
usb_gadget_udc_stop_locked(udc);
mutex_unlock(&udc->connect_lock);
diff --git a/drivers/usb/gadget/udc/tegra-xudc.c b/drivers/usb/gadget/udc/tegra-xudc.c
index 83eaa65ddde3..df6028f7b273 100644
--- a/drivers/usb/gadget/udc/tegra-xudc.c
+++ b/drivers/usb/gadget/udc/tegra-xudc.c
@@ -3718,15 +3718,15 @@ static int tegra_xudc_powerdomain_init(struct tegra_xudc *xudc)
int err;
xudc->genpd_dev_device = dev_pm_domain_attach_by_name(dev, "dev");
- if (IS_ERR_OR_NULL(xudc->genpd_dev_device)) {
- err = PTR_ERR(xudc->genpd_dev_device) ? : -ENODATA;
+ if (IS_ERR(xudc->genpd_dev_device)) {
+ err = PTR_ERR(xudc->genpd_dev_device);
dev_err(dev, "failed to get device power domain: %d\n", err);
return err;
}
xudc->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "ss");
- if (IS_ERR_OR_NULL(xudc->genpd_dev_ss)) {
- err = PTR_ERR(xudc->genpd_dev_ss) ? : -ENODATA;
+ if (IS_ERR(xudc->genpd_dev_ss)) {
+ err = PTR_ERR(xudc->genpd_dev_ss);
dev_err(dev, "failed to get SuperSpeed power domain: %d\n", err);
return err;
}
diff --git a/drivers/usb/host/ohci-at91.c b/drivers/usb/host/ohci-at91.c
index b9ce8d80f20b..b805c4b52ac3 100644
--- a/drivers/usb/host/ohci-at91.c
+++ b/drivers/usb/host/ohci-at91.c
@@ -672,7 +672,13 @@ ohci_hcd_at91_drv_resume(struct device *dev)
else
at91_start_clock(ohci_at91);
- ohci_resume(hcd, false);
+ /*
+ * According to the comment in ohci_hcd_at91_drv_suspend()
+ * we need to do a reset if the 48Mhz clock was stopped,
+ * that is, if ohci_at91->wakeup is clear. Tell ohci_resume()
+ * to reset in this case by setting its "hibernated" flag.
+ */
+ ohci_resume(hcd, !ohci_at91->wakeup);
return 0;
}
diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 51d9d4d4f6a5..bbdf1b0b7be1 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -586,6 +586,7 @@ static int xhci_mtk_probe(struct platform_device *pdev)
}
device_init_wakeup(dev, true);
+ dma_set_max_seg_size(dev, UINT_MAX);
xhci = hcd_to_xhci(hcd);
xhci->main_hcd = hcd;
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index c6742bae41c0..b9ae5c2a2527 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -479,10 +479,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
pdev->device == 0x3432)
xhci->quirks |= XHCI_BROKEN_STREAMS;
- if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483) {
+ if (pdev->vendor == PCI_VENDOR_ID_VIA && pdev->device == 0x3483)
xhci->quirks |= XHCI_LPM_SUPPORT;
- xhci->quirks |= XHCI_EP_CTX_BROKEN_DCS;
- }
if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) {
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 646ff125def5..1dde53f6eb31 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -626,11 +626,8 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci,
struct xhci_ring *ep_ring;
struct xhci_command *cmd;
struct xhci_segment *new_seg;
- struct xhci_segment *halted_seg = NULL;
union xhci_trb *new_deq;
int new_cycle;
- union xhci_trb *halted_trb;
- int index = 0;
dma_addr_t addr;
u64 hw_dequeue;
bool cycle_found = false;
@@ -668,27 +665,7 @@ static int xhci_move_dequeue_past_td(struct xhci_hcd *xhci,
hw_dequeue = xhci_get_hw_deq(xhci, dev, ep_index, stream_id);
new_seg = ep_ring->deq_seg;
new_deq = ep_ring->dequeue;
-
- /*
- * Quirk: xHC write-back of the DCS field in the hardware dequeue
- * pointer is wrong - use the cycle state of the TRB pointed to by
- * the dequeue pointer.
- */
- if (xhci->quirks & XHCI_EP_CTX_BROKEN_DCS &&
- !(ep->ep_state & EP_HAS_STREAMS))
- halted_seg = trb_in_td(xhci, td->start_seg,
- td->first_trb, td->last_trb,
- hw_dequeue & ~0xf, false);
- if (halted_seg) {
- index = ((dma_addr_t)(hw_dequeue & ~0xf) - halted_seg->dma) /
- sizeof(*halted_trb);
- halted_trb = &halted_seg->trbs[index];
- new_cycle = halted_trb->generic.field[3] & 0x1;
- xhci_dbg(xhci, "Endpoint DCS = %d TRB index = %d cycle = %d\n",
- (u8)(hw_dequeue & 0x1), index, new_cycle);
- } else {
- new_cycle = hw_dequeue & 0x1;
- }
+ new_cycle = hw_dequeue & 0x1;
/*
* We want to find the pointer, segment and cycle state of the new trb
diff --git a/drivers/usb/host/xhci-tegra.c b/drivers/usb/host/xhci-tegra.c
index 6ca8a37e53e1..4693d83351c6 100644
--- a/drivers/usb/host/xhci-tegra.c
+++ b/drivers/usb/host/xhci-tegra.c
@@ -1145,15 +1145,15 @@ static int tegra_xusb_powerdomain_init(struct device *dev,
int err;
tegra->genpd_dev_host = dev_pm_domain_attach_by_name(dev, "xusb_host");
- if (IS_ERR_OR_NULL(tegra->genpd_dev_host)) {
- err = PTR_ERR(tegra->genpd_dev_host) ? : -ENODATA;
+ if (IS_ERR(tegra->genpd_dev_host)) {
+ err = PTR_ERR(tegra->genpd_dev_host);
dev_err(dev, "failed to get host pm-domain: %d\n", err);
return err;
}
tegra->genpd_dev_ss = dev_pm_domain_attach_by_name(dev, "xusb_ss");
- if (IS_ERR_OR_NULL(tegra->genpd_dev_ss)) {
- err = PTR_ERR(tegra->genpd_dev_ss) ? : -ENODATA;
+ if (IS_ERR(tegra->genpd_dev_ss)) {
+ err = PTR_ERR(tegra->genpd_dev_ss);
dev_err(dev, "failed to get superspeed pm-domain: %d\n", err);
return err;
}
diff --git a/drivers/usb/misc/ehset.c b/drivers/usb/misc/ehset.c
index 986d6589f053..36b6e9fa7ffb 100644
--- a/drivers/usb/misc/ehset.c
+++ b/drivers/usb/misc/ehset.c
@@ -77,7 +77,7 @@ static int ehset_probe(struct usb_interface *intf,
switch (test_pid) {
case TEST_SE0_NAK_PID:
ret = ehset_prepare_port_for_testing(hub_udev, portnum);
- if (!ret)
+ if (ret < 0)
break;
ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
USB_RT_PORT, USB_PORT_FEAT_TEST,
@@ -86,7 +86,7 @@ static int ehset_probe(struct usb_interface *intf,
break;
case TEST_J_PID:
ret = ehset_prepare_port_for_testing(hub_udev, portnum);
- if (!ret)
+ if (ret < 0)
break;
ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
USB_RT_PORT, USB_PORT_FEAT_TEST,
@@ -95,7 +95,7 @@ static int ehset_probe(struct usb_interface *intf,
break;
case TEST_K_PID:
ret = ehset_prepare_port_for_testing(hub_udev, portnum);
- if (!ret)
+ if (ret < 0)
break;
ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
USB_RT_PORT, USB_PORT_FEAT_TEST,
@@ -104,7 +104,7 @@ static int ehset_probe(struct usb_interface *intf,
break;
case TEST_PACKET_PID:
ret = ehset_prepare_port_for_testing(hub_udev, portnum);
- if (!ret)
+ if (ret < 0)
break;
ret = usb_control_msg_send(hub_udev, 0, USB_REQ_SET_FEATURE,
USB_RT_PORT, USB_PORT_FEAT_TEST,
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 288a96a74266..8ac98e60fff5 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -251,6 +251,7 @@ static void option_instat_callback(struct urb *urb);
#define QUECTEL_PRODUCT_EM061K_LTA 0x0123
#define QUECTEL_PRODUCT_EM061K_LMS 0x0124
#define QUECTEL_PRODUCT_EC25 0x0125
+#define QUECTEL_PRODUCT_EM060K_128 0x0128
#define QUECTEL_PRODUCT_EG91 0x0191
#define QUECTEL_PRODUCT_EG95 0x0195
#define QUECTEL_PRODUCT_BG96 0x0296
@@ -268,6 +269,7 @@ static void option_instat_callback(struct urb *urb);
#define QUECTEL_PRODUCT_RM520N 0x0801
#define QUECTEL_PRODUCT_EC200U 0x0901
#define QUECTEL_PRODUCT_EC200S_CN 0x6002
+#define QUECTEL_PRODUCT_EC200A 0x6005
#define QUECTEL_PRODUCT_EM061K_LWW 0x6008
#define QUECTEL_PRODUCT_EM061K_LCN 0x6009
#define QUECTEL_PRODUCT_EC200T 0x6026
@@ -1197,6 +1199,9 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x30) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0x00, 0x40) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K_128, 0xff, 0xff, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x30) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0x00, 0x40) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM061K_LCN, 0xff, 0xff, 0x40) },
@@ -1225,6 +1230,7 @@ static const struct usb_device_id option_ids[] = {
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM520N, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, 0x0900, 0xff, 0, 0), /* RM500U-CN */
.driver_info = ZLP },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200A, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200U, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c
index 4c6747889a19..24b8772a345e 100644
--- a/drivers/usb/serial/usb-serial-simple.c
+++ b/drivers/usb/serial/usb-serial-simple.c
@@ -38,16 +38,6 @@ static struct usb_serial_driver vendor##_device = { \
{ USB_DEVICE(0x0a21, 0x8001) } /* MMT-7305WW */
DEVICE(carelink, CARELINK_IDS);
-/* ZIO Motherboard USB driver */
-#define ZIO_IDS() \
- { USB_DEVICE(0x1CBE, 0x0103) }
-DEVICE(zio, ZIO_IDS);
-
-/* Funsoft Serial USB driver */
-#define FUNSOFT_IDS() \
- { USB_DEVICE(0x1404, 0xcddc) }
-DEVICE(funsoft, FUNSOFT_IDS);
-
/* Infineon Flashloader driver */
#define FLASHLOADER_IDS() \
{ USB_DEVICE_INTERFACE_CLASS(0x058b, 0x0041, USB_CLASS_CDC_DATA) }, \
@@ -55,6 +45,11 @@ DEVICE(funsoft, FUNSOFT_IDS);
{ USB_DEVICE(0x8087, 0x0801) }
DEVICE(flashloader, FLASHLOADER_IDS);
+/* Funsoft Serial USB driver */
+#define FUNSOFT_IDS() \
+ { USB_DEVICE(0x1404, 0xcddc) }
+DEVICE(funsoft, FUNSOFT_IDS);
+
/* Google Serial USB SubClass */
#define GOOGLE_IDS() \
{ USB_VENDOR_AND_INTERFACE_INFO(0x18d1, \
@@ -63,16 +58,21 @@ DEVICE(flashloader, FLASHLOADER_IDS);
0x01) }
DEVICE(google, GOOGLE_IDS);
+/* HP4x (48/49) Generic Serial driver */
+#define HP4X_IDS() \
+ { USB_DEVICE(0x03f0, 0x0121) }
+DEVICE(hp4x, HP4X_IDS);
+
+/* KAUFMANN RKS+CAN VCP */
+#define KAUFMANN_IDS() \
+ { USB_DEVICE(0x16d0, 0x0870) }
+DEVICE(kaufmann, KAUFMANN_IDS);
+
/* Libtransistor USB console */
#define LIBTRANSISTOR_IDS() \
{ USB_DEVICE(0x1209, 0x8b00) }
DEVICE(libtransistor, LIBTRANSISTOR_IDS);
-/* ViVOpay USB Serial Driver */
-#define VIVOPAY_IDS() \
- { USB_DEVICE(0x1d5f, 0x1004) } /* ViVOpay 8800 */
-DEVICE(vivopay, VIVOPAY_IDS);
-
/* Motorola USB Phone driver */
#define MOTO_IDS() \
{ USB_DEVICE(0x05c6, 0x3197) }, /* unknown Motorola phone */ \
@@ -101,10 +101,10 @@ DEVICE(nokia, NOKIA_IDS);
{ USB_DEVICE(0x09d7, 0x0100) } /* NovAtel FlexPack GPS */
DEVICE_N(novatel_gps, NOVATEL_IDS, 3);
-/* HP4x (48/49) Generic Serial driver */
-#define HP4X_IDS() \
- { USB_DEVICE(0x03f0, 0x0121) }
-DEVICE(hp4x, HP4X_IDS);
+/* Siemens USB/MPI adapter */
+#define SIEMENS_IDS() \
+ { USB_DEVICE(0x908, 0x0004) }
+DEVICE(siemens_mpi, SIEMENS_IDS);
/* Suunto ANT+ USB Driver */
#define SUUNTO_IDS() \
@@ -112,45 +112,52 @@ DEVICE(hp4x, HP4X_IDS);
{ USB_DEVICE(0x0fcf, 0x1009) } /* Dynastream ANT USB-m Stick */
DEVICE(suunto, SUUNTO_IDS);
-/* Siemens USB/MPI adapter */
-#define SIEMENS_IDS() \
- { USB_DEVICE(0x908, 0x0004) }
-DEVICE(siemens_mpi, SIEMENS_IDS);
+/* ViVOpay USB Serial Driver */
+#define VIVOPAY_IDS() \
+ { USB_DEVICE(0x1d5f, 0x1004) } /* ViVOpay 8800 */
+DEVICE(vivopay, VIVOPAY_IDS);
+
+/* ZIO Motherboard USB driver */
+#define ZIO_IDS() \
+ { USB_DEVICE(0x1CBE, 0x0103) }
+DEVICE(zio, ZIO_IDS);
/* All of the above structures mushed into two lists */
static struct usb_serial_driver * const serial_drivers[] = {
&carelink_device,
- &zio_device,
- &funsoft_device,
&flashloader_device,
+ &funsoft_device,
&google_device,
+ &hp4x_device,
+ &kaufmann_device,
&libtransistor_device,
- &vivopay_device,
&moto_modem_device,
&motorola_tetra_device,
&nokia_device,
&novatel_gps_device,
- &hp4x_device,
- &suunto_device,
&siemens_mpi_device,
+ &suunto_device,
+ &vivopay_device,
+ &zio_device,
NULL
};
static const struct usb_device_id id_table[] = {
CARELINK_IDS(),
- ZIO_IDS(),
- FUNSOFT_IDS(),
FLASHLOADER_IDS(),
+ FUNSOFT_IDS(),
GOOGLE_IDS(),
+ HP4X_IDS(),
+ KAUFMANN_IDS(),
LIBTRANSISTOR_IDS(),
- VIVOPAY_IDS(),
MOTO_IDS(),
MOTOROLA_TETRA_IDS(),
NOKIA_IDS(),
NOVATEL_IDS(),
- HP4X_IDS(),
- SUUNTO_IDS(),
SIEMENS_IDS(),
+ SUUNTO_IDS(),
+ VIVOPAY_IDS(),
+ ZIO_IDS(),
{ },
};
MODULE_DEVICE_TABLE(usb, id_table);
diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c
index 5e912dd29b4c..115f05a6201a 100644
--- a/drivers/usb/storage/alauda.c
+++ b/drivers/usb/storage/alauda.c
@@ -318,7 +318,8 @@ static int alauda_get_media_status(struct us_data *us, unsigned char *data)
rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe,
command, 0xc0, 0, 1, data, 2);
- usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
+ if (rc == USB_STOR_XFER_GOOD)
+ usb_stor_dbg(us, "Media status %02X %02X\n", data[0], data[1]);
return rc;
}
@@ -454,9 +455,14 @@ static int alauda_init_media(struct us_data *us)
static int alauda_check_media(struct us_data *us)
{
struct alauda_info *info = (struct alauda_info *) us->extra;
- unsigned char status[2];
+ unsigned char *status = us->iobuf;
+ int rc;
- alauda_get_media_status(us, status);
+ rc = alauda_get_media_status(us, status);
+ if (rc != USB_STOR_XFER_GOOD) {
+ status[0] = 0xF0; /* Pretend there's no media */
+ status[1] = 0;
+ }
/* Check for no media or door open */
if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10)
diff --git a/drivers/usb/typec/altmodes/displayport.c b/drivers/usb/typec/altmodes/displayport.c
index 66de880b28d0..cdf8261e22db 100644
--- a/drivers/usb/typec/altmodes/displayport.c
+++ b/drivers/usb/typec/altmodes/displayport.c
@@ -60,6 +60,7 @@ struct dp_altmode {
enum dp_state state;
bool hpd;
+ bool pending_hpd;
struct mutex lock; /* device lock */
struct work_struct work;
@@ -144,8 +145,13 @@ static int dp_altmode_status_update(struct dp_altmode *dp)
dp->state = DP_STATE_EXIT;
} else if (!(con & DP_CONF_CURRENTLY(dp->data.conf))) {
ret = dp_altmode_configure(dp, con);
- if (!ret)
+ if (!ret) {
dp->state = DP_STATE_CONFIGURE;
+ if (dp->hpd != hpd) {
+ dp->hpd = hpd;
+ dp->pending_hpd = true;
+ }
+ }
} else {
if (dp->hpd != hpd) {
drm_connector_oob_hotplug_event(dp->connector_fwnode);
@@ -161,6 +167,16 @@ static int dp_altmode_configured(struct dp_altmode *dp)
{
sysfs_notify(&dp->alt->dev.kobj, "displayport", "configuration");
sysfs_notify(&dp->alt->dev.kobj, "displayport", "pin_assignment");
+ /*
+ * If the DFP_D/UFP_D sends a change in HPD when first notifying the
+ * DisplayPort driver that it is connected, then we wait until
+ * configuration is complete to signal HPD.
+ */
+ if (dp->pending_hpd) {
+ drm_connector_oob_hotplug_event(dp->connector_fwnode);
+ sysfs_notify(&dp->alt->dev.kobj, "displayport", "hpd");
+ dp->pending_hpd = false;
+ }
return dp_altmode_notify(dp);
}
diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index faa184ae3dac..9c1dbf3c00e0 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -1277,8 +1277,7 @@ static ssize_t select_usb_power_delivery_show(struct device *dev,
{
struct typec_port *port = to_typec_port(dev);
struct usb_power_delivery **pds;
- struct usb_power_delivery *pd;
- int ret = 0;
+ int i, ret = 0;
if (!port->ops || !port->ops->pd_get)
return -EOPNOTSUPP;
@@ -1287,11 +1286,11 @@ static ssize_t select_usb_power_delivery_show(struct device *dev,
if (!pds)
return 0;
- for (pd = pds[0]; pd; pd++) {
- if (pd == port->pd)
- ret += sysfs_emit(buf + ret, "[%s] ", dev_name(&pd->dev));
+ for (i = 0; pds[i]; i++) {
+ if (pds[i] == port->pd)
+ ret += sysfs_emit_at(buf, ret, "[%s] ", dev_name(&pds[i]->dev));
else
- ret += sysfs_emit(buf + ret, "%s ", dev_name(&pd->dev));
+ ret += sysfs_emit_at(buf, ret, "%s ", dev_name(&pds[i]->dev));
}
buf[ret - 1] = '\n';
@@ -2288,6 +2287,8 @@ struct typec_port *typec_register_port(struct device *parent,
return ERR_PTR(ret);
}
+ port->pd = cap->pd;
+
ret = device_add(&port->dev);
if (ret) {
dev_err(parent, "failed to register port (%d)\n", ret);
@@ -2295,7 +2296,7 @@ struct typec_port *typec_register_port(struct device *parent,
return ERR_PTR(ret);
}
- ret = typec_port_set_usb_power_delivery(port, cap->pd);
+ ret = usb_power_delivery_link_device(port->pd, &port->dev);
if (ret) {
dev_err(&port->dev, "failed to link pd\n");
device_unregister(&port->dev);
diff --git a/drivers/usb/typec/mux/Kconfig b/drivers/usb/typec/mux/Kconfig
index 784b9d8107e9..65da61150ba7 100644
--- a/drivers/usb/typec/mux/Kconfig
+++ b/drivers/usb/typec/mux/Kconfig
@@ -29,6 +29,7 @@ config TYPEC_MUX_INTEL_PMC
tristate "Intel PMC mux control"
depends on ACPI
depends on INTEL_SCU_IPC
+ select USB_COMMON
select USB_ROLE_SWITCH
help
Driver for USB muxes controlled by Intel PMC FW. Intel PMC FW can
diff --git a/drivers/usb/typec/mux/nb7vpq904m.c b/drivers/usb/typec/mux/nb7vpq904m.c
index 80e580d50129..4d1122d95013 100644
--- a/drivers/usb/typec/mux/nb7vpq904m.c
+++ b/drivers/usb/typec/mux/nb7vpq904m.c
@@ -463,16 +463,18 @@ static int nb7vpq904m_probe(struct i2c_client *client)
ret = nb7vpq904m_register_bridge(nb7);
if (ret)
- return ret;
+ goto err_disable_gpio;
sw_desc.drvdata = nb7;
sw_desc.fwnode = dev->fwnode;
sw_desc.set = nb7vpq904m_sw_set;
nb7->sw = typec_switch_register(dev, &sw_desc);
- if (IS_ERR(nb7->sw))
- return dev_err_probe(dev, PTR_ERR(nb7->sw),
- "Error registering typec switch\n");
+ if (IS_ERR(nb7->sw)) {
+ ret = dev_err_probe(dev, PTR_ERR(nb7->sw),
+ "Error registering typec switch\n");
+ goto err_disable_gpio;
+ }
retimer_desc.drvdata = nb7;
retimer_desc.fwnode = dev->fwnode;
@@ -480,12 +482,21 @@ static int nb7vpq904m_probe(struct i2c_client *client)
nb7->retimer = typec_retimer_register(dev, &retimer_desc);
if (IS_ERR(nb7->retimer)) {
- typec_switch_unregister(nb7->sw);
- return dev_err_probe(dev, PTR_ERR(nb7->retimer),
- "Error registering typec retimer\n");
+ ret = dev_err_probe(dev, PTR_ERR(nb7->retimer),
+ "Error registering typec retimer\n");
+ goto err_switch_unregister;
}
return 0;
+
+err_switch_unregister:
+ typec_switch_unregister(nb7->sw);
+
+err_disable_gpio:
+ gpiod_set_value(nb7->enable_gpio, 0);
+ regulator_disable(nb7->vcc_supply);
+
+ return ret;
}
static void nb7vpq904m_remove(struct i2c_client *client)
diff --git a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
index a905160dd860..9b467a346114 100644
--- a/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
+++ b/drivers/usb/typec/tcpm/qcom/qcom_pmic_typec.c
@@ -209,8 +209,8 @@ static int qcom_pmic_typec_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, tcpm);
tcpm->tcpc.fwnode = device_get_named_child_node(tcpm->dev, "connector");
- if (IS_ERR(tcpm->tcpc.fwnode))
- return PTR_ERR(tcpm->tcpc.fwnode);
+ if (!tcpm->tcpc.fwnode)
+ return -EINVAL;
tcpm->tcpm_port = tcpm_register_port(tcpm->dev, &tcpm->tcpc);
if (IS_ERR(tcpm->tcpm_port)) {
diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c
index 829d75ebab42..cc1d83926497 100644
--- a/drivers/usb/typec/tcpm/tcpm.c
+++ b/drivers/usb/typec/tcpm/tcpm.c
@@ -5349,6 +5349,10 @@ static void _tcpm_pd_vbus_off(struct tcpm_port *port)
/* Do nothing, vbus drop expected */
break;
+ case SNK_HARD_RESET_WAIT_VBUS:
+ /* Do nothing, its OK to receive vbus off events */
+ break;
+
default:
if (port->pwr_role == TYPEC_SINK && port->attached)
tcpm_set_state(port, SNK_UNATTACHED, tcpm_wait_for_discharge(port));
@@ -5395,6 +5399,9 @@ static void _tcpm_pd_vbus_vsafe0v(struct tcpm_port *port)
case SNK_DEBOUNCED:
/*Do nothing, still waiting for VSAFE5V for connect */
break;
+ case SNK_HARD_RESET_WAIT_VBUS:
+ /* Do nothing, its OK to receive vbus off events */
+ break;
default:
if (port->pwr_role == TYPEC_SINK && port->auto_vbus_discharge_enabled)
tcpm_set_state(port, SNK_UNATTACHED, 0);
diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index 9b6d6b14431f..f6901319639d 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -785,6 +785,8 @@ static void ucsi_unregister_partner(struct ucsi_connector *con)
if (!con->partner)
return;
+ typec_set_mode(con->port, TYPEC_STATE_SAFE);
+
ucsi_unregister_partner_pdos(con);
ucsi_unregister_altmodes(con, UCSI_RECIPIENT_SOP);
typec_unregister_partner(con->partner);
@@ -825,8 +827,6 @@ static void ucsi_partner_change(struct ucsi_connector *con)
UCSI_CONSTAT_PARTNER_FLAG_USB)
typec_set_mode(con->port, TYPEC_STATE_USB);
}
- } else {
- typec_set_mode(con->port, TYPEC_STATE_SAFE);
}
/* Only notify USB controller if partner supports USB data */
diff --git a/drivers/vdpa/mlx5/core/mlx5_vdpa.h b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
index 25fc4120b618..b53420e874ac 100644
--- a/drivers/vdpa/mlx5/core/mlx5_vdpa.h
+++ b/drivers/vdpa/mlx5/core/mlx5_vdpa.h
@@ -31,6 +31,7 @@ struct mlx5_vdpa_mr {
struct list_head head;
unsigned long num_directs;
unsigned long num_klms;
+ /* state of dvq mr */
bool initialized;
/* serialize mkey creation and destruction */
@@ -121,6 +122,7 @@ int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *io
int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
unsigned int asid);
void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev);
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid);
#define mlx5_vdpa_warn(__dev, format, ...) \
dev_warn((__dev)->mdev->device, "%s:%d:(pid %d) warning: " format, __func__, __LINE__, \
diff --git a/drivers/vdpa/mlx5/core/mr.c b/drivers/vdpa/mlx5/core/mr.c
index 03e543229791..5a1971fcd87b 100644
--- a/drivers/vdpa/mlx5/core/mr.c
+++ b/drivers/vdpa/mlx5/core/mr.c
@@ -489,60 +489,103 @@ static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr
}
}
-void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+static void _mlx5_vdpa_destroy_cvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+ if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+ return;
+
+ prune_iotlb(mvdev);
+}
+
+static void _mlx5_vdpa_destroy_dvq_mr(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
- mutex_lock(&mr->mkey_mtx);
+ if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
+ return;
+
if (!mr->initialized)
- goto out;
+ return;
- prune_iotlb(mvdev);
if (mr->user_mr)
destroy_user_mr(mvdev, mr);
else
destroy_dma_mr(mvdev, mr);
mr->initialized = false;
-out:
+}
+
+void mlx5_vdpa_destroy_mr_asid(struct mlx5_vdpa_dev *mvdev, unsigned int asid)
+{
+ struct mlx5_vdpa_mr *mr = &mvdev->mr;
+
+ mutex_lock(&mr->mkey_mtx);
+
+ _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
+ _mlx5_vdpa_destroy_cvq_mr(mvdev, asid);
+
mutex_unlock(&mr->mkey_mtx);
}
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
- struct vhost_iotlb *iotlb, unsigned int asid)
+void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
+{
+ mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_CVQ_GROUP]);
+ mlx5_vdpa_destroy_mr_asid(mvdev, mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP]);
+}
+
+static int _mlx5_vdpa_create_cvq_mr(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb,
+ unsigned int asid)
+{
+ if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] != asid)
+ return 0;
+
+ return dup_iotlb(mvdev, iotlb);
+}
+
+static int _mlx5_vdpa_create_dvq_mr(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb,
+ unsigned int asid)
{
struct mlx5_vdpa_mr *mr = &mvdev->mr;
int err;
- if (mr->initialized)
+ if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] != asid)
return 0;
- if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
- if (iotlb)
- err = create_user_mr(mvdev, iotlb);
- else
- err = create_dma_mr(mvdev, mr);
+ if (mr->initialized)
+ return 0;
- if (err)
- return err;
- }
+ if (iotlb)
+ err = create_user_mr(mvdev, iotlb);
+ else
+ err = create_dma_mr(mvdev, mr);
- if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
- err = dup_iotlb(mvdev, iotlb);
- if (err)
- goto out_err;
- }
+ if (err)
+ return err;
mr->initialized = true;
+
+ return 0;
+}
+
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
+ struct vhost_iotlb *iotlb, unsigned int asid)
+{
+ int err;
+
+ err = _mlx5_vdpa_create_dvq_mr(mvdev, iotlb, asid);
+ if (err)
+ return err;
+
+ err = _mlx5_vdpa_create_cvq_mr(mvdev, iotlb, asid);
+ if (err)
+ goto out_err;
+
return 0;
out_err:
- if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
- if (iotlb)
- destroy_user_mr(mvdev, mr);
- else
- destroy_dma_mr(mvdev, mr);
- }
+ _mlx5_vdpa_destroy_dvq_mr(mvdev, asid);
return err;
}
diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c
index 9138ef2fb2c8..37be945a0230 100644
--- a/drivers/vdpa/mlx5/net/mlx5_vnet.c
+++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c
@@ -2517,7 +2517,15 @@ static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
else
ndev->rqt_size = 1;
- ndev->cur_num_vqs = 2 * ndev->rqt_size;
+ /* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
+ * 5.1.6.5.5 "Device operation in multiqueue mode":
+ *
+ * Multiqueue is disabled by default.
+ * The driver enables multiqueue by sending a command using class
+ * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
+ * operation, as follows: ...
+ */
+ ndev->cur_num_vqs = 2;
update_cvq_info(mvdev);
return err;
@@ -2636,7 +2644,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
goto err_mr;
teardown_driver(ndev);
- mlx5_vdpa_destroy_mr(mvdev);
+ mlx5_vdpa_destroy_mr_asid(mvdev, asid);
err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
if (err)
goto err_mr;
@@ -2652,7 +2660,7 @@ static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
return 0;
err_setup:
- mlx5_vdpa_destroy_mr(mvdev);
+ mlx5_vdpa_destroy_mr_asid(mvdev, asid);
err_mr:
return err;
}
@@ -3548,17 +3556,6 @@ static void mlx5v_remove(struct auxiliary_device *adev)
kfree(mgtdev);
}
-static void mlx5v_shutdown(struct auxiliary_device *auxdev)
-{
- struct mlx5_vdpa_mgmtdev *mgtdev;
- struct mlx5_vdpa_net *ndev;
-
- mgtdev = auxiliary_get_drvdata(auxdev);
- ndev = mgtdev->ndev;
-
- free_irqs(ndev);
-}
-
static const struct auxiliary_device_id mlx5v_id_table[] = {
{ .name = MLX5_ADEV_NAME ".vnet", },
{},
@@ -3570,7 +3567,6 @@ static struct auxiliary_driver mlx5v_driver = {
.name = "vnet",
.probe = mlx5v_probe,
.remove = mlx5v_remove,
- .shutdown = mlx5v_shutdown,
.id_table = mlx5v_id_table,
};
diff --git a/drivers/vdpa/pds/Makefile b/drivers/vdpa/pds/Makefile
index 2e22418e3ab3..c2d314d4614d 100644
--- a/drivers/vdpa/pds/Makefile
+++ b/drivers/vdpa/pds/Makefile
@@ -5,6 +5,5 @@ obj-$(CONFIG_PDS_VDPA) := pds_vdpa.o
pds_vdpa-y := aux_drv.o \
cmds.o \
+ debugfs.o \
vdpa_dev.o
-
-pds_vdpa-$(CONFIG_DEBUG_FS) += debugfs.o
diff --git a/drivers/vdpa/pds/debugfs.c b/drivers/vdpa/pds/debugfs.c
index 21a0dc0cb607..9b04aad6ec35 100644
--- a/drivers/vdpa/pds/debugfs.c
+++ b/drivers/vdpa/pds/debugfs.c
@@ -176,6 +176,7 @@ static int identity_show(struct seq_file *seq, void *v)
{
struct pds_vdpa_aux *vdpa_aux = seq->private;
struct vdpa_mgmt_dev *mgmt;
+ u64 hw_features;
seq_printf(seq, "aux_dev: %s\n",
dev_name(&vdpa_aux->padev->aux_dev.dev));
@@ -183,8 +184,9 @@ static int identity_show(struct seq_file *seq, void *v)
mgmt = &vdpa_aux->vdpa_mdev;
seq_printf(seq, "max_vqs: %d\n", mgmt->max_supported_vqs);
seq_printf(seq, "config_attr_mask: %#llx\n", mgmt->config_attr_mask);
- seq_printf(seq, "supported_features: %#llx\n", mgmt->supported_features);
- print_feature_bits_all(seq, mgmt->supported_features);
+ hw_features = le64_to_cpu(vdpa_aux->ident.hw_features);
+ seq_printf(seq, "hw_features: %#llx\n", hw_features);
+ print_feature_bits_all(seq, hw_features);
return 0;
}
@@ -200,7 +202,6 @@ static int config_show(struct seq_file *seq, void *v)
{
struct pds_vdpa_device *pdsv = seq->private;
struct virtio_net_config vc;
- u64 driver_features;
u8 status;
memcpy_fromio(&vc, pdsv->vdpa_aux->vd_mdev.device,
@@ -223,12 +224,8 @@ static int config_show(struct seq_file *seq, void *v)
status = vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
seq_printf(seq, "dev_status: %#x\n", status);
print_status_bits(seq, status);
-
- seq_printf(seq, "req_features: %#llx\n", pdsv->req_features);
- print_feature_bits_all(seq, pdsv->req_features);
- driver_features = vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
- seq_printf(seq, "driver_features: %#llx\n", driver_features);
- print_feature_bits_all(seq, driver_features);
+ seq_printf(seq, "negotiated_features: %#llx\n", pdsv->negotiated_features);
+ print_feature_bits_all(seq, pdsv->negotiated_features);
seq_printf(seq, "vdpa_index: %d\n", pdsv->vdpa_index);
seq_printf(seq, "num_vqs: %d\n", pdsv->num_vqs);
diff --git a/drivers/vdpa/pds/vdpa_dev.c b/drivers/vdpa/pds/vdpa_dev.c
index 5071a4d58f8d..52b2449182ad 100644
--- a/drivers/vdpa/pds/vdpa_dev.c
+++ b/drivers/vdpa/pds/vdpa_dev.c
@@ -126,11 +126,9 @@ static void pds_vdpa_release_irq(struct pds_vdpa_device *pdsv, int qid)
static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool ready)
{
struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
- struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
struct device *dev = &pdsv->vdpa_dev.dev;
u64 driver_features;
u16 invert_idx = 0;
- int irq;
int err;
dev_dbg(dev, "%s: qid %d ready %d => %d\n",
@@ -143,19 +141,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
invert_idx = PDS_VDPA_PACKED_INVERT_IDX;
if (ready) {
- irq = pci_irq_vector(pdev, qid);
- snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name),
- "vdpa-%s-%d", dev_name(dev), qid);
-
- err = request_irq(irq, pds_vdpa_isr, 0,
- pdsv->vqs[qid].irq_name, &pdsv->vqs[qid]);
- if (err) {
- dev_err(dev, "%s: no irq for qid %d: %pe\n",
- __func__, qid, ERR_PTR(err));
- return;
- }
- pdsv->vqs[qid].irq = irq;
-
/* Pass vq setup info to DSC using adminq to gather up and
* send all info at once so FW can do its full set up in
* one easy operation
@@ -164,7 +149,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
if (err) {
dev_err(dev, "Failed to init vq %d: %pe\n",
qid, ERR_PTR(err));
- pds_vdpa_release_irq(pdsv, qid);
ready = false;
}
} else {
@@ -172,7 +156,6 @@ static void pds_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, bool re
if (err)
dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
__func__, qid, ERR_PTR(err));
- pds_vdpa_release_irq(pdsv, qid);
}
pdsv->vqs[qid].ready = ready;
@@ -318,6 +301,7 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur
struct device *dev = &pdsv->vdpa_dev.dev;
u64 driver_features;
u64 nego_features;
+ u64 hw_features;
u64 missing;
if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
@@ -325,21 +309,26 @@ static int pds_vdpa_set_driver_features(struct vdpa_device *vdpa_dev, u64 featur
return -EOPNOTSUPP;
}
- pdsv->req_features = features;
-
/* Check for valid feature bits */
- nego_features = features & le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
- missing = pdsv->req_features & ~nego_features;
+ nego_features = features & pdsv->supported_features;
+ missing = features & ~nego_features;
if (missing) {
dev_err(dev, "Can't support all requested features in %#llx, missing %#llx features\n",
- pdsv->req_features, missing);
+ features, missing);
return -EOPNOTSUPP;
}
+ pdsv->negotiated_features = nego_features;
+
driver_features = pds_vdpa_get_driver_features(vdpa_dev);
dev_dbg(dev, "%s: %#llx => %#llx\n",
__func__, driver_features, nego_features);
+ /* if we're faking the F_MAC, strip it before writing to device */
+ hw_features = le64_to_cpu(pdsv->vdpa_aux->ident.hw_features);
+ if (!(hw_features & BIT_ULL(VIRTIO_NET_F_MAC)))
+ nego_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
+
if (driver_features == nego_features)
return 0;
@@ -352,7 +341,7 @@ static u64 pds_vdpa_get_driver_features(struct vdpa_device *vdpa_dev)
{
struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
- return vp_modern_get_driver_features(&pdsv->vdpa_aux->vd_mdev);
+ return pdsv->negotiated_features;
}
static void pds_vdpa_set_config_cb(struct vdpa_device *vdpa_dev,
@@ -389,6 +378,72 @@ static u8 pds_vdpa_get_status(struct vdpa_device *vdpa_dev)
return vp_modern_get_status(&pdsv->vdpa_aux->vd_mdev);
}
+static int pds_vdpa_request_irqs(struct pds_vdpa_device *pdsv)
+{
+ struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
+ struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux;
+ struct device *dev = &pdsv->vdpa_dev.dev;
+ int max_vq, nintrs, qid, err;
+
+ max_vq = vdpa_aux->vdpa_mdev.max_supported_vqs;
+
+ nintrs = pci_alloc_irq_vectors(pdev, max_vq, max_vq, PCI_IRQ_MSIX);
+ if (nintrs < 0) {
+ dev_err(dev, "Couldn't get %d msix vectors: %pe\n",
+ max_vq, ERR_PTR(nintrs));
+ return nintrs;
+ }
+
+ for (qid = 0; qid < pdsv->num_vqs; ++qid) {
+ int irq = pci_irq_vector(pdev, qid);
+
+ snprintf(pdsv->vqs[qid].irq_name, sizeof(pdsv->vqs[qid].irq_name),
+ "vdpa-%s-%d", dev_name(dev), qid);
+
+ err = request_irq(irq, pds_vdpa_isr, 0,
+ pdsv->vqs[qid].irq_name,
+ &pdsv->vqs[qid]);
+ if (err) {
+ dev_err(dev, "%s: no irq for qid %d: %pe\n",
+ __func__, qid, ERR_PTR(err));
+ goto err_release;
+ }
+
+ pdsv->vqs[qid].irq = irq;
+ }
+
+ vdpa_aux->nintrs = nintrs;
+
+ return 0;
+
+err_release:
+ while (qid--)
+ pds_vdpa_release_irq(pdsv, qid);
+
+ pci_free_irq_vectors(pdev);
+
+ vdpa_aux->nintrs = 0;
+
+ return err;
+}
+
+static void pds_vdpa_release_irqs(struct pds_vdpa_device *pdsv)
+{
+ struct pci_dev *pdev = pdsv->vdpa_aux->padev->vf_pdev;
+ struct pds_vdpa_aux *vdpa_aux = pdsv->vdpa_aux;
+ int qid;
+
+ if (!vdpa_aux->nintrs)
+ return;
+
+ for (qid = 0; qid < pdsv->num_vqs; qid++)
+ pds_vdpa_release_irq(pdsv, qid);
+
+ pci_free_irq_vectors(pdev);
+
+ vdpa_aux->nintrs = 0;
+}
+
static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
{
struct pds_vdpa_device *pdsv = vdpa_to_pdsv(vdpa_dev);
@@ -399,6 +454,11 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
old_status = pds_vdpa_get_status(vdpa_dev);
dev_dbg(dev, "%s: old %#x new %#x\n", __func__, old_status, status);
+ if (status & ~old_status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ if (pds_vdpa_request_irqs(pdsv))
+ status = old_status | VIRTIO_CONFIG_S_FAILED;
+ }
+
pds_vdpa_cmd_set_status(pdsv, status);
/* Note: still working with FW on the need for this reset cmd */
@@ -409,6 +469,8 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
pdsv->vqs[i].avail_idx = 0;
pdsv->vqs[i].used_idx = 0;
}
+
+ pds_vdpa_cmd_set_mac(pdsv, pdsv->mac);
}
if (status & ~old_status & VIRTIO_CONFIG_S_FEATURES_OK) {
@@ -418,6 +480,20 @@ static void pds_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status)
i, &pdsv->vqs[i].notify_pa);
}
}
+
+ if (old_status & ~status & VIRTIO_CONFIG_S_DRIVER_OK)
+ pds_vdpa_release_irqs(pdsv);
+}
+
+static void pds_vdpa_init_vqs_entry(struct pds_vdpa_device *pdsv, int qid,
+ void __iomem *notify)
+{
+ memset(&pdsv->vqs[qid], 0, sizeof(pdsv->vqs[0]));
+ pdsv->vqs[qid].qid = qid;
+ pdsv->vqs[qid].pdsv = pdsv;
+ pdsv->vqs[qid].ready = false;
+ pdsv->vqs[qid].irq = VIRTIO_MSI_NO_VECTOR;
+ pdsv->vqs[qid].notify = notify;
}
static int pds_vdpa_reset(struct vdpa_device *vdpa_dev)
@@ -441,14 +517,17 @@ static int pds_vdpa_reset(struct vdpa_device *vdpa_dev)
if (err)
dev_err(dev, "%s: reset_vq failed qid %d: %pe\n",
__func__, i, ERR_PTR(err));
- pds_vdpa_release_irq(pdsv, i);
- memset(&pdsv->vqs[i], 0, sizeof(pdsv->vqs[0]));
- pdsv->vqs[i].ready = false;
}
}
pds_vdpa_set_status(vdpa_dev, 0);
+ if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ /* Reset the vq info */
+ for (i = 0; i < pdsv->num_vqs && !err; i++)
+ pds_vdpa_init_vqs_entry(pdsv, i, pdsv->vqs[i].notify);
+ }
+
return 0;
}
@@ -532,7 +611,6 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
struct device *dma_dev;
struct pci_dev *pdev;
struct device *dev;
- u8 mac[ETH_ALEN];
int err;
int i;
@@ -563,7 +641,7 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
u64 unsupp_features =
- add_config->device_features & ~mgmt->supported_features;
+ add_config->device_features & ~pdsv->supported_features;
if (unsupp_features) {
dev_err(dev, "Unsupported features: %#llx\n", unsupp_features);
@@ -614,29 +692,30 @@ static int pds_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
}
/* Set a mac, either from the user config if provided
- * or set a random mac if default is 00:..:00
+ * or use the device's mac if not 00:..:00
+ * or set a random mac
*/
if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
- ether_addr_copy(mac, add_config->net.mac);
- pds_vdpa_cmd_set_mac(pdsv, mac);
+ ether_addr_copy(pdsv->mac, add_config->net.mac);
} else {
struct virtio_net_config __iomem *vc;
vc = pdsv->vdpa_aux->vd_mdev.device;
- memcpy_fromio(mac, vc->mac, sizeof(mac));
- if (is_zero_ether_addr(mac)) {
- eth_random_addr(mac);
- dev_info(dev, "setting random mac %pM\n", mac);
- pds_vdpa_cmd_set_mac(pdsv, mac);
+ memcpy_fromio(pdsv->mac, vc->mac, sizeof(pdsv->mac));
+ if (is_zero_ether_addr(pdsv->mac) &&
+ (pdsv->supported_features & BIT_ULL(VIRTIO_NET_F_MAC))) {
+ eth_random_addr(pdsv->mac);
+ dev_info(dev, "setting random mac %pM\n", pdsv->mac);
}
}
+ pds_vdpa_cmd_set_mac(pdsv, pdsv->mac);
for (i = 0; i < pdsv->num_vqs; i++) {
- pdsv->vqs[i].qid = i;
- pdsv->vqs[i].pdsv = pdsv;
- pdsv->vqs[i].irq = VIRTIO_MSI_NO_VECTOR;
- pdsv->vqs[i].notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
- i, &pdsv->vqs[i].notify_pa);
+ void __iomem *notify;
+
+ notify = vp_modern_map_vq_notify(&pdsv->vdpa_aux->vd_mdev,
+ i, &pdsv->vqs[i].notify_pa);
+ pds_vdpa_init_vqs_entry(pdsv, i, notify);
}
pdsv->vdpa_dev.mdev = &vdpa_aux->vdpa_mdev;
@@ -746,24 +825,19 @@ int pds_vdpa_get_mgmt_info(struct pds_vdpa_aux *vdpa_aux)
max_vqs = min_t(u16, dev_intrs, max_vqs);
mgmt->max_supported_vqs = min_t(u16, PDS_VDPA_MAX_QUEUES, max_vqs);
- vdpa_aux->nintrs = mgmt->max_supported_vqs;
+ vdpa_aux->nintrs = 0;
mgmt->ops = &pds_vdpa_mgmt_dev_ops;
mgmt->id_table = pds_vdpa_id_table;
mgmt->device = dev;
mgmt->supported_features = le64_to_cpu(vdpa_aux->ident.hw_features);
+
+ /* advertise F_MAC even if the device doesn't */
+ mgmt->supported_features |= BIT_ULL(VIRTIO_NET_F_MAC);
+
mgmt->config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR);
mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP);
mgmt->config_attr_mask |= BIT_ULL(VDPA_ATTR_DEV_FEATURES);
- err = pci_alloc_irq_vectors(pdev, vdpa_aux->nintrs, vdpa_aux->nintrs,
- PCI_IRQ_MSIX);
- if (err < 0) {
- dev_err(dev, "Couldn't get %d msix vectors: %pe\n",
- vdpa_aux->nintrs, ERR_PTR(err));
- return err;
- }
- vdpa_aux->nintrs = err;
-
return 0;
}
diff --git a/drivers/vdpa/pds/vdpa_dev.h b/drivers/vdpa/pds/vdpa_dev.h
index a1bc37de9537..d984ba24a7da 100644
--- a/drivers/vdpa/pds/vdpa_dev.h
+++ b/drivers/vdpa/pds/vdpa_dev.h
@@ -35,10 +35,11 @@ struct pds_vdpa_device {
struct pds_vdpa_aux *vdpa_aux;
struct pds_vdpa_vq_info vqs[PDS_VDPA_MAX_QUEUES];
- u64 supported_features; /* specified device features */
- u64 req_features; /* features requested by vdpa */
+ u64 supported_features; /* supported device features */
+ u64 negotiated_features; /* negotiated features */
u8 vdpa_index; /* rsvd for future subdevice use */
u8 num_vqs; /* num vqs in use */
+ u8 mac[ETH_ALEN]; /* mac selected when the device was added */
struct vdpa_callback config_cb;
struct notifier_block nb;
};
diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index 965e32529eb8..a7612e0783b3 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -1247,44 +1247,41 @@ static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
[VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
[VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
[VDPA_ATTR_DEV_NET_CFG_MACADDR] = NLA_POLICY_ETH_ADDR,
+ [VDPA_ATTR_DEV_NET_CFG_MAX_VQP] = { .type = NLA_U16 },
/* virtio spec 1.1 section 5.1.4.1 for valid MTU range */
[VDPA_ATTR_DEV_NET_CFG_MTU] = NLA_POLICY_MIN(NLA_U16, 68),
+ [VDPA_ATTR_DEV_QUEUE_INDEX] = { .type = NLA_U32 },
+ [VDPA_ATTR_DEV_FEATURES] = { .type = NLA_U64 },
};
static const struct genl_ops vdpa_nl_ops[] = {
{
.cmd = VDPA_CMD_MGMTDEV_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_mgmtdev_get_doit,
.dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit,
},
{
.cmd = VDPA_CMD_DEV_NEW,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_add_set_doit,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = VDPA_CMD_DEV_DEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_del_set_doit,
.flags = GENL_ADMIN_PERM,
},
{
.cmd = VDPA_CMD_DEV_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_get_doit,
.dumpit = vdpa_nl_cmd_dev_get_dumpit,
},
{
.cmd = VDPA_CMD_DEV_CONFIG_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_config_get_doit,
.dumpit = vdpa_nl_cmd_dev_config_get_dumpit,
},
{
.cmd = VDPA_CMD_DEV_VSTATS_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = vdpa_nl_cmd_dev_stats_get_doit,
.flags = GENL_ADMIN_PERM,
},
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index dc38ed21319d..df7869537ef1 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -935,10 +935,10 @@ static void vduse_dev_irq_inject(struct work_struct *work)
{
struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
- spin_lock_irq(&dev->irq_lock);
+ spin_lock_bh(&dev->irq_lock);
if (dev->config_cb.callback)
dev->config_cb.callback(dev->config_cb.private);
- spin_unlock_irq(&dev->irq_lock);
+ spin_unlock_bh(&dev->irq_lock);
}
static void vduse_vq_irq_inject(struct work_struct *work)
@@ -946,10 +946,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
struct vduse_virtqueue *vq = container_of(work,
struct vduse_virtqueue, inject);
- spin_lock_irq(&vq->irq_lock);
+ spin_lock_bh(&vq->irq_lock);
if (vq->ready && vq->cb.callback)
vq->cb.callback(vq->cb.private);
- spin_unlock_irq(&vq->irq_lock);
+ spin_unlock_bh(&vq->irq_lock);
}
static bool vduse_vq_signal_irqfd(struct vduse_virtqueue *vq)
diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index c83f7f043470..abef0619c790 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -25,6 +25,8 @@
#include <linux/fs.h>
#include <linux/vmalloc.h>
#include <linux/miscdevice.h>
+#include <linux/blk_types.h>
+#include <linux/bio.h>
#include <asm/unaligned.h>
#include <scsi/scsi_common.h>
#include <scsi/scsi_proto.h>
@@ -75,6 +77,9 @@ struct vhost_scsi_cmd {
u32 tvc_prot_sgl_count;
/* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */
u32 tvc_lun;
+ u32 copied_iov:1;
+ const void *saved_iter_addr;
+ struct iov_iter saved_iter;
/* Pointer to the SGL formatted memory from virtio-scsi */
struct scatterlist *tvc_sgl;
struct scatterlist *tvc_prot_sgl;
@@ -328,8 +333,13 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd)
int i;
if (tv_cmd->tvc_sgl_count) {
- for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
- put_page(sg_page(&tv_cmd->tvc_sgl[i]));
+ for (i = 0; i < tv_cmd->tvc_sgl_count; i++) {
+ if (tv_cmd->copied_iov)
+ __free_page(sg_page(&tv_cmd->tvc_sgl[i]));
+ else
+ put_page(sg_page(&tv_cmd->tvc_sgl[i]));
+ }
+ kfree(tv_cmd->saved_iter_addr);
}
if (tv_cmd->tvc_prot_sgl_count) {
for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
@@ -504,6 +514,28 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
mutex_unlock(&vq->mutex);
}
+static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd)
+{
+ struct iov_iter *iter = &cmd->saved_iter;
+ struct scatterlist *sg = cmd->tvc_sgl;
+ struct page *page;
+ size_t len;
+ int i;
+
+ for (i = 0; i < cmd->tvc_sgl_count; i++) {
+ page = sg_page(&sg[i]);
+ len = sg[i].length;
+
+ if (copy_page_to_iter(page, 0, len, iter) != len) {
+ pr_err("Could not copy data while handling misaligned cmd. Error %zu\n",
+ len);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
/* Fill in status and signal that we are done processing this command
*
* This is scheduled in the vhost work queue so we are called with the owner
@@ -527,15 +559,20 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
cmd, se_cmd->residual_count, se_cmd->scsi_status);
-
memset(&v_rsp, 0, sizeof(v_rsp));
- v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count);
- /* TODO is status_qualifier field needed? */
- v_rsp.status = se_cmd->scsi_status;
- v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
- se_cmd->scsi_sense_length);
- memcpy(v_rsp.sense, cmd->tvc_sense_buf,
- se_cmd->scsi_sense_length);
+
+ if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) {
+ v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
+ } else {
+ v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq,
+ se_cmd->residual_count);
+ /* TODO is status_qualifier field needed? */
+ v_rsp.status = se_cmd->scsi_status;
+ v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
+ se_cmd->scsi_sense_length);
+ memcpy(v_rsp.sense, cmd->tvc_sense_buf,
+ se_cmd->scsi_sense_length);
+ }
iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov,
cmd->tvc_in_iovs, sizeof(v_rsp));
@@ -613,12 +650,12 @@ static int
vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
struct iov_iter *iter,
struct scatterlist *sgl,
- bool write)
+ bool is_prot)
{
struct page **pages = cmd->tvc_upages;
struct scatterlist *sg = sgl;
- ssize_t bytes;
- size_t offset;
+ ssize_t bytes, mapped_bytes;
+ size_t offset, mapped_offset;
unsigned int npages = 0;
bytes = iov_iter_get_pages2(iter, pages, LONG_MAX,
@@ -627,13 +664,53 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
if (bytes <= 0)
return bytes < 0 ? bytes : -EFAULT;
+ mapped_bytes = bytes;
+ mapped_offset = offset;
+
while (bytes) {
unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes);
+ /*
+ * The block layer requires bios/requests to be a multiple of
+ * 512 bytes, but Windows can send us vecs that are misaligned.
+ * This can result in bios and later requests with misaligned
+ * sizes if we have to break up a cmd/scatterlist into multiple
+ * bios.
+ *
+ * We currently only break up a command into multiple bios if
+ * we hit the vec/seg limit, so check if our sgl_count is
+ * greater than the max and if a vec in the cmd has a
+ * misaligned offset/size.
+ */
+ if (!is_prot &&
+ (offset & (SECTOR_SIZE - 1) || n & (SECTOR_SIZE - 1)) &&
+ cmd->tvc_sgl_count > BIO_MAX_VECS) {
+ WARN_ONCE(true,
+ "vhost-scsi detected misaligned IO. Performance may be degraded.");
+ goto revert_iter_get_pages;
+ }
+
sg_set_page(sg++, pages[npages++], n, offset);
bytes -= n;
offset = 0;
}
+
return npages;
+
+revert_iter_get_pages:
+ iov_iter_revert(iter, mapped_bytes);
+
+ npages = 0;
+ while (mapped_bytes) {
+ unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset,
+ mapped_bytes);
+
+ put_page(pages[npages++]);
+
+ mapped_bytes -= n;
+ mapped_offset = 0;
+ }
+
+ return -EINVAL;
}
static int
@@ -657,25 +734,80 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
}
static int
-vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write,
- struct iov_iter *iter,
- struct scatterlist *sg, int sg_count)
+vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
+ struct scatterlist *sg, int sg_count)
+{
+ size_t len = iov_iter_count(iter);
+ unsigned int nbytes = 0;
+ struct page *page;
+ int i;
+
+ if (cmd->tvc_data_direction == DMA_FROM_DEVICE) {
+ cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter,
+ GFP_KERNEL);
+ if (!cmd->saved_iter_addr)
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < sg_count; i++) {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ i--;
+ goto err;
+ }
+
+ nbytes = min_t(unsigned int, PAGE_SIZE, len);
+ sg_set_page(&sg[i], page, nbytes, 0);
+
+ if (cmd->tvc_data_direction == DMA_TO_DEVICE &&
+ copy_page_from_iter(page, 0, nbytes, iter) != nbytes)
+ goto err;
+
+ len -= nbytes;
+ }
+
+ cmd->copied_iov = 1;
+ return 0;
+
+err:
+ pr_err("Could not read %u bytes while handling misaligned cmd\n",
+ nbytes);
+
+ for (; i >= 0; i--)
+ __free_page(sg_page(&sg[i]));
+ kfree(cmd->saved_iter_addr);
+ return -ENOMEM;
+}
+
+static int
+vhost_scsi_map_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
+ struct scatterlist *sg, int sg_count, bool is_prot)
{
struct scatterlist *p = sg;
+ size_t revert_bytes;
int ret;
while (iov_iter_count(iter)) {
- ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write);
+ ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot);
if (ret < 0) {
+ revert_bytes = 0;
+
while (p < sg) {
- struct page *page = sg_page(p++);
- if (page)
+ struct page *page = sg_page(p);
+
+ if (page) {
put_page(page);
+ revert_bytes += p->length;
+ }
+ p++;
}
+
+ iov_iter_revert(iter, revert_bytes);
return ret;
}
sg += ret;
}
+
return 0;
}
@@ -685,7 +817,6 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
size_t data_bytes, struct iov_iter *data_iter)
{
int sgl_count, ret;
- bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE);
if (prot_bytes) {
sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes,
@@ -698,9 +829,9 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count);
- ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter,
- cmd->tvc_prot_sgl,
- cmd->tvc_prot_sgl_count);
+ ret = vhost_scsi_map_iov_to_sgl(cmd, prot_iter,
+ cmd->tvc_prot_sgl,
+ cmd->tvc_prot_sgl_count, true);
if (ret < 0) {
cmd->tvc_prot_sgl_count = 0;
return ret;
@@ -716,8 +847,14 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
pr_debug("%s data_sg %p data_sgl_count %u\n", __func__,
cmd->tvc_sgl, cmd->tvc_sgl_count);
- ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter,
- cmd->tvc_sgl, cmd->tvc_sgl_count);
+ ret = vhost_scsi_map_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
+ cmd->tvc_sgl_count, false);
+ if (ret == -EINVAL) {
+ sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count);
+ ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
+ cmd->tvc_sgl_count);
+ }
+
if (ret < 0) {
cmd->tvc_sgl_count = 0;
return ret;
diff --git a/drivers/video/console/sticon.c b/drivers/video/console/sticon.c
index d11cfd2d68b5..992a4fa431aa 100644
--- a/drivers/video/console/sticon.c
+++ b/drivers/video/console/sticon.c
@@ -156,7 +156,7 @@ static bool sticon_scroll(struct vc_data *conp, unsigned int t,
return false;
}
-static int sticon_set_def_font(int unit, struct console_font *op)
+static void sticon_set_def_font(int unit)
{
if (font_data[unit] != STI_DEF_FONT) {
if (--FNTREFCOUNT(font_data[unit]) == 0) {
@@ -165,8 +165,6 @@ static int sticon_set_def_font(int unit, struct console_font *op)
}
font_data[unit] = STI_DEF_FONT;
}
-
- return 0;
}
static int sticon_set_font(struct vc_data *vc, struct console_font *op,
@@ -246,7 +244,7 @@ static int sticon_set_font(struct vc_data *vc, struct console_font *op,
vc->vc_video_erase_char, font_data[vc->vc_num]);
/* delete old font in case it is a user font */
- sticon_set_def_font(unit, NULL);
+ sticon_set_def_font(unit);
FNTREFCOUNT(cooked_font)++;
font_data[unit] = cooked_font;
@@ -264,7 +262,9 @@ static int sticon_set_font(struct vc_data *vc, struct console_font *op,
static int sticon_font_default(struct vc_data *vc, struct console_font *op, char *name)
{
- return sticon_set_def_font(vc->vc_num, op);
+ sticon_set_def_font(vc->vc_num);
+
+ return 0;
}
static int sticon_font_set(struct vc_data *vc, struct console_font *font,
@@ -297,7 +297,7 @@ static void sticon_deinit(struct vc_data *c)
/* free memory used by user font */
for (i = 0; i < MAX_NR_CONSOLES; i++)
- sticon_set_def_font(i, NULL);
+ sticon_set_def_font(i);
}
static void sticon_clear(struct vc_data *conp, int sy, int sx, int height,
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c
index e25ba523892e..7ad047bcae17 100644
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -65,16 +65,8 @@ static struct vgastate vgastate;
* Interface used by the world
*/
-static const char *vgacon_startup(void);
-static void vgacon_init(struct vc_data *c, int init);
-static void vgacon_deinit(struct vc_data *c);
-static void vgacon_cursor(struct vc_data *c, int mode);
-static int vgacon_switch(struct vc_data *c);
-static int vgacon_blank(struct vc_data *c, int blank, int mode_switch);
-static void vgacon_scrolldelta(struct vc_data *c, int lines);
static int vgacon_set_origin(struct vc_data *c);
-static void vgacon_save_screen(struct vc_data *c);
-static void vgacon_invert_region(struct vc_data *c, u16 * p, int count);
+
static struct uni_pagedict *vgacon_uni_pagedir;
static int vgacon_refcount;
@@ -142,12 +134,6 @@ static inline void vga_set_mem_top(struct vc_data *c)
write_vga(12, (c->vc_visible_origin - vga_vram_base) / 2);
}
-static void vgacon_restore_screen(struct vc_data *c)
-{
- if (c->vc_origin != c->vc_visible_origin)
- vgacon_scrolldelta(c, 0);
-}
-
static void vgacon_scrolldelta(struct vc_data *c, int lines)
{
vc_scrolldelta_helper(c, lines, vga_rolled_over, (void *)vga_vram_base,
@@ -155,6 +141,12 @@ static void vgacon_scrolldelta(struct vc_data *c, int lines)
vga_set_mem_top(c);
}
+static void vgacon_restore_screen(struct vc_data *c)
+{
+ if (c->vc_origin != c->vc_visible_origin)
+ vgacon_scrolldelta(c, 0);
+}
+
static const char *vgacon_startup(void)
{
const char *display_desc = NULL;
@@ -445,7 +437,7 @@ static void vgacon_invert_region(struct vc_data *c, u16 * p, int count)
}
}
-static void vgacon_set_cursor_size(int xpos, int from, int to)
+static void vgacon_set_cursor_size(int from, int to)
{
unsigned long flags;
int curs, cure;
@@ -478,18 +470,22 @@ static void vgacon_set_cursor_size(int xpos, int from, int to)
static void vgacon_cursor(struct vc_data *c, int mode)
{
+ unsigned int c_height;
+
if (c->vc_mode != KD_TEXT)
return;
vgacon_restore_screen(c);
+ c_height = c->vc_cell_height;
+
switch (mode) {
case CM_ERASE:
write_vga(14, (c->vc_pos - vga_vram_base) / 2);
if (vga_video_type >= VIDEO_TYPE_VGAC)
- vgacon_set_cursor_size(c->state.x, 31, 30);
+ vgacon_set_cursor_size(31, 30);
else
- vgacon_set_cursor_size(c->state.x, 31, 31);
+ vgacon_set_cursor_size(31, 31);
break;
case CM_MOVE:
@@ -497,51 +493,38 @@ static void vgacon_cursor(struct vc_data *c, int mode)
write_vga(14, (c->vc_pos - vga_vram_base) / 2);
switch (CUR_SIZE(c->vc_cursor_type)) {
case CUR_UNDERLINE:
- vgacon_set_cursor_size(c->state.x,
- c->vc_cell_height -
- (c->vc_cell_height <
- 10 ? 2 : 3),
- c->vc_cell_height -
- (c->vc_cell_height <
- 10 ? 1 : 2));
+ vgacon_set_cursor_size(c_height -
+ (c_height < 10 ? 2 : 3),
+ c_height -
+ (c_height < 10 ? 1 : 2));
break;
case CUR_TWO_THIRDS:
- vgacon_set_cursor_size(c->state.x,
- c->vc_cell_height / 3,
- c->vc_cell_height -
- (c->vc_cell_height <
- 10 ? 1 : 2));
+ vgacon_set_cursor_size(c_height / 3, c_height -
+ (c_height < 10 ? 1 : 2));
break;
case CUR_LOWER_THIRD:
- vgacon_set_cursor_size(c->state.x,
- (c->vc_cell_height * 2) / 3,
- c->vc_cell_height -
- (c->vc_cell_height <
- 10 ? 1 : 2));
+ vgacon_set_cursor_size(c_height * 2 / 3, c_height -
+ (c_height < 10 ? 1 : 2));
break;
case CUR_LOWER_HALF:
- vgacon_set_cursor_size(c->state.x,
- c->vc_cell_height / 2,
- c->vc_cell_height -
- (c->vc_cell_height <
- 10 ? 1 : 2));
+ vgacon_set_cursor_size(c_height / 2, c_height -
+ (c_height < 10 ? 1 : 2));
break;
case CUR_NONE:
if (vga_video_type >= VIDEO_TYPE_VGAC)
- vgacon_set_cursor_size(c->state.x, 31, 30);
+ vgacon_set_cursor_size(31, 30);
else
- vgacon_set_cursor_size(c->state.x, 31, 31);
+ vgacon_set_cursor_size(31, 31);
break;
default:
- vgacon_set_cursor_size(c->state.x, 1,
- c->vc_cell_height);
+ vgacon_set_cursor_size(1, c_height);
break;
}
break;
}
}
-static int vgacon_doresize(struct vc_data *c,
+static void vgacon_doresize(struct vc_data *c,
unsigned int width, unsigned int height)
{
unsigned long flags;
@@ -600,7 +583,6 @@ static int vgacon_doresize(struct vc_data *c,
}
raw_spin_unlock_irqrestore(&vga_lock, flags);
- return 0;
}
static int vgacon_switch(struct vc_data *c)
diff --git a/drivers/video/fbdev/amifb.c b/drivers/video/fbdev/amifb.c
index d88265dbebf4..f216b2c702a1 100644
--- a/drivers/video/fbdev/amifb.c
+++ b/drivers/video/fbdev/amifb.c
@@ -687,7 +687,7 @@ struct fb_var_cursorinfo {
__u16 height;
__u16 xspot;
__u16 yspot;
- __u8 data[1]; /* field with [height][width] */
+ DECLARE_FLEX_ARRAY(__u8, data); /* field with [height][width] */
};
struct fb_cursorstate {
diff --git a/drivers/video/fbdev/atmel_lcdfb.c b/drivers/video/fbdev/atmel_lcdfb.c
index 987c5f5f0241..f245da138e68 100644
--- a/drivers/video/fbdev/atmel_lcdfb.c
+++ b/drivers/video/fbdev/atmel_lcdfb.c
@@ -1308,7 +1308,7 @@ static struct platform_driver atmel_lcdfb_driver = {
.resume = atmel_lcdfb_resume,
.driver = {
.name = "atmel_lcdfb",
- .of_match_table = of_match_ptr(atmel_lcdfb_dt_ids),
+ .of_match_table = atmel_lcdfb_dt_ids,
},
};
diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
index 5c232eb13724..c137d6afe484 100644
--- a/drivers/video/fbdev/au1200fb.c
+++ b/drivers/video/fbdev/au1200fb.c
@@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev)
/* Now hook interrupt too */
irq = platform_get_irq(dev, 0);
+ if (irq < 0)
+ return irq;
+
ret = request_irq(irq, au1200fb_handle_irq,
IRQF_SHARED, "lcd", (void *)dev);
if (ret) {
diff --git a/drivers/video/fbdev/bw2.c b/drivers/video/fbdev/bw2.c
index 025d663dc6fd..39f438de0d6b 100644
--- a/drivers/video/fbdev/bw2.c
+++ b/drivers/video/fbdev/bw2.c
@@ -17,7 +17,8 @@
#include <linux/init.h>
#include <linux/fb.h>
#include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/cg14.c b/drivers/video/fbdev/cg14.c
index 832a82f45c80..90fdc9d9bf5a 100644
--- a/drivers/video/fbdev/cg14.c
+++ b/drivers/video/fbdev/cg14.c
@@ -17,7 +17,8 @@
#include <linux/fb.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/cg3.c b/drivers/video/fbdev/cg3.c
index 6335cd364c74..98c60f72046a 100644
--- a/drivers/video/fbdev/cg3.c
+++ b/drivers/video/fbdev/cg3.c
@@ -17,7 +17,8 @@
#include <linux/init.h>
#include <linux/fb.h>
#include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/cg6.c b/drivers/video/fbdev/cg6.c
index 6884572efea1..6427b85f1a94 100644
--- a/drivers/video/fbdev/cg6.c
+++ b/drivers/video/fbdev/cg6.c
@@ -17,7 +17,8 @@
#include <linux/init.h>
#include <linux/fb.h>
#include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index c6c9d040bdec..887fad44e7ec 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -1612,8 +1612,7 @@ static void fbcon_redraw_blit(struct vc_data *vc, struct fb_info *info,
}
}
-static void fbcon_redraw(struct vc_data *vc, struct fbcon_display *p,
- int line, int count, int offset)
+static void fbcon_redraw(struct vc_data *vc, int line, int count, int offset)
{
unsigned short *d = (unsigned short *)
(vc->vc_origin + vc->vc_size_row * line);
@@ -1827,7 +1826,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
case SCROLL_REDRAW:
redraw_up:
- fbcon_redraw(vc, p, t, b - t - count,
+ fbcon_redraw(vc, t, b - t - count,
count * vc->vc_cols);
fbcon_clear(vc, b - count, 0, count, vc->vc_cols);
scr_memsetw((unsigned short *) (vc->vc_origin +
@@ -1913,7 +1912,7 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
case SCROLL_REDRAW:
redraw_down:
- fbcon_redraw(vc, p, b - 1, b - t - count,
+ fbcon_redraw(vc, b - 1, b - t - count,
-count * vc->vc_cols);
fbcon_clear(vc, t, 0, count, vc->vc_cols);
scr_memsetw((unsigned short *) (vc->vc_origin +
diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c
index 94fe52928be2..22158d9ca8dd 100644
--- a/drivers/video/fbdev/ep93xx-fb.c
+++ b/drivers/video/fbdev/ep93xx-fb.c
@@ -548,7 +548,9 @@ static int ep93xxfb_probe(struct platform_device *pdev)
}
ep93xxfb_set_par(info);
- clk_prepare_enable(fbi->clk);
+ err = clk_prepare_enable(fbi->clk);
+ if (err)
+ goto failed_check;
err = register_framebuffer(info);
if (err)
diff --git a/drivers/video/fbdev/ffb.c b/drivers/video/fbdev/ffb.c
index c6d3111dcbb0..c473841eb6ff 100644
--- a/drivers/video/fbdev/ffb.c
+++ b/drivers/video/fbdev/ffb.c
@@ -16,7 +16,8 @@
#include <linux/fb.h>
#include <linux/mm.h>
#include <linux/timer.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/upa.h>
diff --git a/drivers/video/fbdev/goldfishfb.c b/drivers/video/fbdev/goldfishfb.c
index 6fa2108fd912..e41c9fef4a3b 100644
--- a/drivers/video/fbdev/goldfishfb.c
+++ b/drivers/video/fbdev/goldfishfb.c
@@ -203,8 +203,8 @@ static int goldfish_fb_probe(struct platform_device *pdev)
}
fb->irq = platform_get_irq(pdev, 0);
- if (fb->irq <= 0) {
- ret = -ENODEV;
+ if (fb->irq < 0) {
+ ret = fb->irq;
goto err_no_irq;
}
diff --git a/drivers/video/fbdev/grvga.c b/drivers/video/fbdev/grvga.c
index 9aa15be29ea9..d4a9a58b3691 100644
--- a/drivers/video/fbdev/grvga.c
+++ b/drivers/video/fbdev/grvga.c
@@ -12,8 +12,7 @@
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
-#include <linux/of_platform.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/string.h>
diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
index adf36690c342..77dedd2c05fd 100644
--- a/drivers/video/fbdev/imxfb.c
+++ b/drivers/video/fbdev/imxfb.c
@@ -613,10 +613,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
if (var->hsync_len < 1 || var->hsync_len > 64)
printk(KERN_ERR "%s: invalid hsync_len %d\n",
info->fix.id, var->hsync_len);
- if (var->left_margin > 255)
+ if (var->left_margin < 3 || var->left_margin > 255)
printk(KERN_ERR "%s: invalid left_margin %d\n",
info->fix.id, var->left_margin);
- if (var->right_margin > 255)
+ if (var->right_margin < 1 || var->right_margin > 255)
printk(KERN_ERR "%s: invalid right_margin %d\n",
info->fix.id, var->right_margin);
if (var->yres < 1 || var->yres > ymax_mask)
@@ -673,7 +673,8 @@ static int imxfb_init_fbinfo(struct platform_device *pdev)
pr_debug("%s\n",__func__);
- info->pseudo_palette = kmalloc_array(16, sizeof(u32), GFP_KERNEL);
+ info->pseudo_palette = devm_kmalloc_array(&pdev->dev, 16,
+ sizeof(u32), GFP_KERNEL);
if (!info->pseudo_palette)
return -ENOMEM;
@@ -868,7 +869,6 @@ static int imxfb_probe(struct platform_device *pdev)
struct imxfb_info *fbi;
struct lcd_device *lcd;
struct fb_info *info;
- struct resource *res;
struct imx_fb_videomode *m;
const struct of_device_id *of_id;
struct device_node *display_np;
@@ -885,10 +885,6 @@ static int imxfb_probe(struct platform_device *pdev)
if (of_id)
pdev->id_entry = of_id->data;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!res)
- return -ENODEV;
-
info = framebuffer_alloc(sizeof(struct imxfb_info), &pdev->dev);
if (!info)
return -ENOMEM;
@@ -907,7 +903,7 @@ static int imxfb_probe(struct platform_device *pdev)
if (!display_np) {
dev_err(&pdev->dev, "No display defined in devicetree\n");
ret = -EINVAL;
- goto failed_of_parse;
+ goto failed_init;
}
/*
@@ -921,13 +917,13 @@ static int imxfb_probe(struct platform_device *pdev)
if (!fbi->mode) {
ret = -ENOMEM;
of_node_put(display_np);
- goto failed_of_parse;
+ goto failed_init;
}
ret = imxfb_of_read_mode(&pdev->dev, display_np, fbi->mode);
of_node_put(display_np);
if (ret)
- goto failed_of_parse;
+ goto failed_init;
/* Calculate maximum bytes used per pixel. In most cases this should
* be the same as m->bpp/8 */
@@ -940,7 +936,7 @@ static int imxfb_probe(struct platform_device *pdev)
fbi->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(fbi->clk_ipg)) {
ret = PTR_ERR(fbi->clk_ipg);
- goto failed_getclock;
+ goto failed_init;
}
/*
@@ -955,25 +951,25 @@ static int imxfb_probe(struct platform_device *pdev)
*/
ret = clk_prepare_enable(fbi->clk_ipg);
if (ret)
- goto failed_getclock;
+ goto failed_init;
clk_disable_unprepare(fbi->clk_ipg);
fbi->clk_ahb = devm_clk_get(&pdev->dev, "ahb");
if (IS_ERR(fbi->clk_ahb)) {
ret = PTR_ERR(fbi->clk_ahb);
- goto failed_getclock;
+ goto failed_init;
}
fbi->clk_per = devm_clk_get(&pdev->dev, "per");
if (IS_ERR(fbi->clk_per)) {
ret = PTR_ERR(fbi->clk_per);
- goto failed_getclock;
+ goto failed_init;
}
- fbi->regs = devm_ioremap_resource(&pdev->dev, res);
+ fbi->regs = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(fbi->regs)) {
ret = PTR_ERR(fbi->regs);
- goto failed_ioremap;
+ goto failed_init;
}
fbi->map_size = PAGE_ALIGN(info->fix.smem_len);
@@ -982,7 +978,7 @@ static int imxfb_probe(struct platform_device *pdev)
if (!info->screen_buffer) {
dev_err(&pdev->dev, "Failed to allocate video RAM\n");
ret = -ENOMEM;
- goto failed_map;
+ goto failed_init;
}
info->fix.smem_start = fbi->map_dma;
@@ -1034,18 +1030,11 @@ static int imxfb_probe(struct platform_device *pdev)
failed_lcd:
unregister_framebuffer(info);
-
failed_register:
fb_dealloc_cmap(&info->cmap);
failed_cmap:
dma_free_wc(&pdev->dev, fbi->map_size, info->screen_buffer,
fbi->map_dma);
-failed_map:
-failed_ioremap:
-failed_getclock:
- release_mem_region(res->start, resource_size(res));
-failed_of_parse:
- kfree(info->pseudo_palette);
failed_init:
framebuffer_release(info);
return ret;
@@ -1062,11 +1051,10 @@ static void imxfb_remove(struct platform_device *pdev)
fb_dealloc_cmap(&info->cmap);
dma_free_wc(&pdev->dev, fbi->map_size, info->screen_buffer,
fbi->map_dma);
- kfree(info->pseudo_palette);
framebuffer_release(info);
}
-static int __maybe_unused imxfb_suspend(struct device *dev)
+static int imxfb_suspend(struct device *dev)
{
struct fb_info *info = dev_get_drvdata(dev);
struct imxfb_info *fbi = info->par;
@@ -1076,7 +1064,7 @@ static int __maybe_unused imxfb_suspend(struct device *dev)
return 0;
}
-static int __maybe_unused imxfb_resume(struct device *dev)
+static int imxfb_resume(struct device *dev)
{
struct fb_info *info = dev_get_drvdata(dev);
struct imxfb_info *fbi = info->par;
@@ -1086,13 +1074,13 @@ static int __maybe_unused imxfb_resume(struct device *dev)
return 0;
}
-static SIMPLE_DEV_PM_OPS(imxfb_pm_ops, imxfb_suspend, imxfb_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(imxfb_pm_ops, imxfb_suspend, imxfb_resume);
static struct platform_driver imxfb_driver = {
.driver = {
.name = DRIVER_NAME,
.of_match_table = imxfb_of_dev_id,
- .pm = &imxfb_pm_ops,
+ .pm = pm_sleep_ptr(&imxfb_pm_ops),
},
.probe = imxfb_probe,
.remove_new = imxfb_remove,
diff --git a/drivers/video/fbdev/kyro/STG4000InitDevice.c b/drivers/video/fbdev/kyro/STG4000InitDevice.c
index edfa0a04854d..79886a246638 100644
--- a/drivers/video/fbdev/kyro/STG4000InitDevice.c
+++ b/drivers/video/fbdev/kyro/STG4000InitDevice.c
@@ -83,11 +83,11 @@ volatile u32 i,count=0; \
static u32 InitSDRAMRegisters(volatile STG4000REG __iomem *pSTGReg,
u32 dwSubSysID, u32 dwRevID)
{
- u32 adwSDRAMArgCfg0[] = { 0xa0, 0x80, 0xa0, 0xa0, 0xa0 };
- u32 adwSDRAMCfg1[] = { 0x8732, 0x8732, 0xa732, 0xa732, 0x8732 };
- u32 adwSDRAMCfg2[] = { 0x87d2, 0x87d2, 0xa7d2, 0x87d2, 0xa7d2 };
- u32 adwSDRAMRsh[] = { 36, 39, 40 };
- u32 adwChipSpeed[] = { 110, 120, 125 };
+ static const u8 adwSDRAMArgCfg0[] = { 0xa0, 0x80, 0xa0, 0xa0, 0xa0 };
+ static const u16 adwSDRAMCfg1[] = { 0x8732, 0x8732, 0xa732, 0xa732, 0x8732 };
+ static const u16 adwSDRAMCfg2[] = { 0x87d2, 0x87d2, 0xa7d2, 0x87d2, 0xa7d2 };
+ static const u8 adwSDRAMRsh[] = { 36, 39, 40 };
+ static const u8 adwChipSpeed[] = { 110, 120, 125 };
u32 dwMemTypeIdx;
u32 dwChipSpeedIdx;
diff --git a/drivers/video/fbdev/leo.c b/drivers/video/fbdev/leo.c
index 3ffc0a725f89..89ca48235dbe 100644
--- a/drivers/video/fbdev/leo.c
+++ b/drivers/video/fbdev/leo.c
@@ -16,8 +16,9 @@
#include <linux/init.h>
#include <linux/fb.h>
#include <linux/mm.h>
-#include <linux/of_device.h>
#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c b/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c
index 61aed7fc0b8d..c35a7479fbf2 100644
--- a/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c
+++ b/drivers/video/fbdev/mb862xx/mb862xxfb_accel.c
@@ -15,9 +15,7 @@
#include <linux/module.h>
#include <linux/pci.h>
#include <linux/slab.h>
-#if defined(CONFIG_OF)
-#include <linux/of_platform.h>
-#endif
+
#include "mb862xxfb.h"
#include "mb862xx_reg.h"
#include "mb862xxfb_accel.h"
diff --git a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
index b5c8fcab9940..9dc347d163cf 100644
--- a/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
+++ b/drivers/video/fbdev/mb862xx/mb862xxfbdrv.c
@@ -18,11 +18,11 @@
#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/pci.h>
-#if defined(CONFIG_OF)
+#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#endif
+#include <linux/platform_device.h>
+
#include "mb862xxfb.h"
#include "mb862xx_reg.h"
diff --git a/drivers/video/fbdev/mmp/hw/mmp_ctrl.c b/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
index 51fbf02a0343..76b50b6c98ad 100644
--- a/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
+++ b/drivers/video/fbdev/mmp/hw/mmp_ctrl.c
@@ -519,7 +519,9 @@ static int mmphw_probe(struct platform_device *pdev)
"unable to get clk %s\n", mi->clk_name);
goto failed;
}
- clk_prepare_enable(ctrl->clk);
+ ret = clk_prepare_enable(ctrl->clk);
+ if (ret)
+ goto failed;
/* init global regs */
ctrl_set_default(ctrl);
diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index ba94a0a7bd4f..77fce1223a64 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -15,12 +15,12 @@
#include <linux/gpio/consumer.h>
#include <linux/interrupt.h>
#include <linux/jiffies.h>
+#include <linux/mod_devicetable.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/workqueue.h>
-#include <linux/of_device.h>
#include <video/omapfb_dss.h>
#include <video/mipi_display.h>
diff --git a/drivers/video/fbdev/p9100.c b/drivers/video/fbdev/p9100.c
index 0876962c52eb..e2e747cae9b1 100644
--- a/drivers/video/fbdev/p9100.c
+++ b/drivers/video/fbdev/p9100.c
@@ -15,7 +15,8 @@
#include <linux/init.h>
#include <linux/fb.h>
#include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/platinumfb.c b/drivers/video/fbdev/platinumfb.c
index f8283fcd5edb..b27f43b3616e 100644
--- a/drivers/video/fbdev/platinumfb.c
+++ b/drivers/video/fbdev/platinumfb.c
@@ -30,9 +30,9 @@
#include <linux/fb.h>
#include <linux/init.h>
#include <linux/nvram.h>
+#include <linux/of.h>
#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include "macmodes.h"
#include "platinumfb.h"
diff --git a/drivers/video/fbdev/sbuslib.c b/drivers/video/fbdev/sbuslib.c
index 7f79db827b07..21e9fd8e69e2 100644
--- a/drivers/video/fbdev/sbuslib.c
+++ b/drivers/video/fbdev/sbuslib.c
@@ -11,7 +11,7 @@
#include <linux/fb.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index 11c373798279..46881a691549 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -399,8 +399,8 @@ static int ssd1307fb_init(struct ssd1307fb_par *par)
/* Enable the PWM */
pwm_enable(par->pwm);
- dev_dbg(&par->client->dev, "Using PWM%d with a %lluns period.\n",
- par->pwm->pwm, pwm_get_period(par->pwm));
+ dev_dbg(&par->client->dev, "Using PWM %s with a %lluns period.\n",
+ par->pwm->label, pwm_get_period(par->pwm));
}
/* Set initial contrast */
diff --git a/drivers/video/fbdev/sunxvr1000.c b/drivers/video/fbdev/sunxvr1000.c
index 490bd9a14763..17d61e1d11a6 100644
--- a/drivers/video/fbdev/sunxvr1000.c
+++ b/drivers/video/fbdev/sunxvr1000.c
@@ -8,7 +8,8 @@
#include <linux/kernel.h>
#include <linux/fb.h>
#include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
struct gfb_info {
struct fb_info *info;
diff --git a/drivers/video/fbdev/sunxvr2500.c b/drivers/video/fbdev/sunxvr2500.c
index 2cab4b9be68a..e64ec7d0caf9 100644
--- a/drivers/video/fbdev/sunxvr2500.c
+++ b/drivers/video/fbdev/sunxvr2500.c
@@ -10,7 +10,7 @@
#include <linux/fb.h>
#include <linux/pci.h>
#include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <asm/io.h>
diff --git a/drivers/video/fbdev/sunxvr500.c b/drivers/video/fbdev/sunxvr500.c
index 6ec358af1256..c4e01e871483 100644
--- a/drivers/video/fbdev/sunxvr500.c
+++ b/drivers/video/fbdev/sunxvr500.c
@@ -10,7 +10,7 @@
#include <linux/fb.h>
#include <linux/pci.h>
#include <linux/init.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
#include <asm/io.h>
diff --git a/drivers/video/fbdev/tcx.c b/drivers/video/fbdev/tcx.c
index fc3ac2301b45..255eb57aefa2 100644
--- a/drivers/video/fbdev/tcx.c
+++ b/drivers/video/fbdev/tcx.c
@@ -17,7 +17,8 @@
#include <linux/init.h>
#include <linux/fb.h>
#include <linux/mm.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
#include <asm/io.h>
#include <asm/fbio.h>
diff --git a/drivers/video/fbdev/xilinxfb.c b/drivers/video/fbdev/xilinxfb.c
index 2aa3a528277f..542baddd54ad 100644
--- a/drivers/video/fbdev/xilinxfb.c
+++ b/drivers/video/fbdev/xilinxfb.c
@@ -24,14 +24,13 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/errno.h>
+#include <linux/platform_device.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/fb.h>
#include <linux/init.h>
#include <linux/dma-mapping.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/of_address.h>
+#include <linux/of.h>
#include <linux/io.h>
#include <linux/slab.h>
diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c
index 835f6cc2fb66..fa5226c198cc 100644
--- a/drivers/virtio/virtio_mem.c
+++ b/drivers/virtio/virtio_mem.c
@@ -38,11 +38,6 @@ module_param(bbm_block_size, ulong, 0444);
MODULE_PARM_DESC(bbm_block_size,
"Big Block size in bytes. Default is 0 (auto-detection).");
-static bool bbm_safe_unplug = true;
-module_param(bbm_safe_unplug, bool, 0444);
-MODULE_PARM_DESC(bbm_safe_unplug,
- "Use a safe unplug mechanism in BBM, avoiding long/endless loops");
-
/*
* virtio-mem currently supports the following modes of operation:
*
@@ -173,6 +168,13 @@ struct virtio_mem {
/* The number of subblocks per Linux memory block. */
uint32_t sbs_per_mb;
+ /*
+ * Some of the Linux memory blocks tracked as "partially
+ * plugged" are completely unplugged and can be offlined
+ * and removed -- which previously failed.
+ */
+ bool have_unplugged_mb;
+
/* Summary of all memory block states. */
unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
@@ -746,11 +748,15 @@ static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
* immediately instead of waiting.
*/
virtio_mem_retry(vm);
- } else {
- dev_dbg(&vm->vdev->dev,
- "offlining and removing memory failed: %d\n", rc);
+ return 0;
}
- return rc;
+ dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc);
+ /*
+ * We don't really expect this to fail, because we fake-offlined all
+ * memory already. But it could fail in corner cases.
+ */
+ WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY);
+ return rc == -ENOMEM ? -ENOMEM : -EBUSY;
}
/*
@@ -767,6 +773,34 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
}
/*
+ * Try (offlining and) removing memory from Linux in case all subblocks are
+ * unplugged. Can be called on online and offline memory blocks.
+ *
+ * May modify the state of memory blocks in virtio-mem.
+ */
+static int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm,
+ unsigned long mb_id)
+{
+ int rc;
+
+ /*
+ * Once all subblocks of a memory block were unplugged, offline and
+ * remove it.
+ */
+ if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
+ return 0;
+
+ /* offline_and_remove_memory() works for online and offline memory. */
+ mutex_unlock(&vm->hotplug_mutex);
+ rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
+ mutex_lock(&vm->hotplug_mutex);
+ if (!rc)
+ virtio_mem_sbm_set_mb_state(vm, mb_id,
+ VIRTIO_MEM_SBM_MB_UNUSED);
+ return rc;
+}
+
+/*
* See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
* all Linux memory blocks covered by the big block.
*/
@@ -1155,7 +1189,8 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
* Try to allocate a range, marking pages fake-offline, effectively
* fake-offlining them.
*/
-static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
+static int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn,
+ unsigned long nr_pages)
{
const bool is_movable = is_zone_movable_page(pfn_to_page(pfn));
int rc, retry_count;
@@ -1168,6 +1203,14 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages)
* some guarantees.
*/
for (retry_count = 0; retry_count < 5; retry_count++) {
+ /*
+ * If the config changed, stop immediately and go back to the
+ * main loop: avoid trying to keep unplugging if the device
+ * might have decided to not remove any more memory.
+ */
+ if (atomic_read(&vm->config_changed))
+ return -EAGAIN;
+
rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE,
GFP_KERNEL);
if (rc == -ENOMEM)
@@ -1917,7 +1960,7 @@ static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
sb_id * vm->sbm.sb_size);
- rc = virtio_mem_fake_offline(start_pfn, nr_pages);
+ rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages);
if (rc)
return rc;
@@ -1989,20 +2032,10 @@ static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
}
unplugged:
- /*
- * Once all subblocks of a memory block were unplugged, offline and
- * remove it. This will usually not fail, as no memory is in use
- * anymore - however some other notifiers might NACK the request.
- */
- if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
- mutex_unlock(&vm->hotplug_mutex);
- rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
- mutex_lock(&vm->hotplug_mutex);
- if (!rc)
- virtio_mem_sbm_set_mb_state(vm, mb_id,
- VIRTIO_MEM_SBM_MB_UNUSED);
- }
-
+ rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id);
+ if (rc)
+ vm->sbm.have_unplugged_mb = 1;
+ /* Ignore errors, this is not critical. We'll retry later. */
return 0;
}
@@ -2111,38 +2144,32 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
VIRTIO_MEM_BBM_BB_ADDED))
return -EINVAL;
- if (bbm_safe_unplug) {
- /*
- * Start by fake-offlining all memory. Once we marked the device
- * block as fake-offline, all newly onlined memory will
- * automatically be kept fake-offline. Protect from concurrent
- * onlining/offlining until we have a consistent state.
- */
- mutex_lock(&vm->hotplug_mutex);
- virtio_mem_bbm_set_bb_state(vm, bb_id,
- VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
+ /*
+ * Start by fake-offlining all memory. Once we marked the device
+ * block as fake-offline, all newly onlined memory will
+ * automatically be kept fake-offline. Protect from concurrent
+ * onlining/offlining until we have a consistent state.
+ */
+ mutex_lock(&vm->hotplug_mutex);
+ virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
- for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
- page = pfn_to_online_page(pfn);
- if (!page)
- continue;
+ for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+ page = pfn_to_online_page(pfn);
+ if (!page)
+ continue;
- rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION);
- if (rc) {
- end_pfn = pfn;
- goto rollback_safe_unplug;
- }
+ rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION);
+ if (rc) {
+ end_pfn = pfn;
+ goto rollback;
}
- mutex_unlock(&vm->hotplug_mutex);
}
+ mutex_unlock(&vm->hotplug_mutex);
rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
if (rc) {
- if (bbm_safe_unplug) {
- mutex_lock(&vm->hotplug_mutex);
- goto rollback_safe_unplug;
- }
- return rc;
+ mutex_lock(&vm->hotplug_mutex);
+ goto rollback;
}
rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
@@ -2154,7 +2181,7 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
VIRTIO_MEM_BBM_BB_UNUSED);
return rc;
-rollback_safe_unplug:
+rollback:
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
page = pfn_to_online_page(pfn);
if (!page)
@@ -2260,12 +2287,13 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
/*
* Try to unplug all blocks that couldn't be unplugged before, for example,
- * because the hypervisor was busy.
+ * because the hypervisor was busy. Further, offline and remove any memory
+ * blocks where we previously failed.
*/
-static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
+static int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
{
unsigned long id;
- int rc;
+ int rc = 0;
if (!vm->in_sbm) {
virtio_mem_bbm_for_each_bb(vm, id,
@@ -2287,6 +2315,27 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
VIRTIO_MEM_SBM_MB_UNUSED);
}
+ if (!vm->sbm.have_unplugged_mb)
+ return 0;
+
+ /*
+ * Let's retry (offlining and) removing completely unplugged Linux
+ * memory blocks.
+ */
+ vm->sbm.have_unplugged_mb = false;
+
+ mutex_lock(&vm->hotplug_mutex);
+ virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL)
+ rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+ virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL)
+ rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+ virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
+ rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
+ mutex_unlock(&vm->hotplug_mutex);
+
+ if (rc)
+ vm->sbm.have_unplugged_mb = true;
+ /* Ignore errors, this is not critical. We'll retry later. */
return 0;
}
@@ -2368,9 +2417,9 @@ retry:
virtio_mem_refresh_config(vm);
}
- /* Unplug any leftovers from previous runs */
+ /* Cleanup any leftovers from previous runs */
if (!rc)
- rc = virtio_mem_unplug_pending_mb(vm);
+ rc = virtio_mem_cleanup_pending_mb(vm);
if (!rc && vm->requested_size != vm->plugged_size) {
if (vm->requested_size > vm->plugged_size) {
@@ -2382,6 +2431,13 @@ retry:
}
}
+ /*
+ * Keep retrying to offline and remove completely unplugged Linux
+ * memory blocks.
+ */
+ if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb)
+ rc = -EBUSY;
+
switch (rc) {
case 0:
vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index a46a4a29e929..97760f611295 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -607,9 +607,8 @@ static void virtio_mmio_release_dev(struct device *_d)
struct virtio_device *vdev =
container_of(_d, struct virtio_device, dev);
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
- struct platform_device *pdev = vm_dev->pdev;
- devm_kfree(&pdev->dev, vm_dev);
+ kfree(vm_dev);
}
/* Platform device */
@@ -620,7 +619,7 @@ static int virtio_mmio_probe(struct platform_device *pdev)
unsigned long magic;
int rc;
- vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
+ vm_dev = kzalloc(sizeof(*vm_dev), GFP_KERNEL);
if (!vm_dev)
return -ENOMEM;
diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index a6c86f916dbd..c2524a7207cf 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c
@@ -557,8 +557,6 @@ static int virtio_pci_probe(struct pci_dev *pci_dev,
pci_set_master(pci_dev);
- vp_dev->is_legacy = vp_dev->ldev.ioaddr ? true : false;
-
rc = register_virtio_device(&vp_dev->vdev);
reg_dev = vp_dev;
if (rc)
diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 2257f1b3d8ae..d9cbb02b35a1 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c
@@ -223,6 +223,7 @@ int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
+ vp_dev->is_legacy = true;
return 0;
}
diff --git a/drivers/virtio/virtio_vdpa.c b/drivers/virtio/virtio_vdpa.c
index 989e2d7184ce..961161da5900 100644
--- a/drivers/virtio/virtio_vdpa.c
+++ b/drivers/virtio/virtio_vdpa.c
@@ -393,11 +393,13 @@ static int virtio_vdpa_find_vqs(struct virtio_device *vdev, unsigned int nvqs,
cb.callback = virtio_vdpa_config_cb;
cb.private = vd_dev;
ops->set_config_cb(vdpa, &cb);
+ kfree(masks);
return 0;
err_setup_vq:
virtio_vdpa_del_vqs(vdev);
+ kfree(masks);
return err;
}
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index c7715f8bd452..3bdd5b59661d 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -112,6 +112,7 @@ struct irq_info {
unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
u64 eoi_time; /* Time in jiffies when to EOI. */
raw_spinlock_t lock;
+ bool is_static; /* Is event channel static */
union {
unsigned short virq;
@@ -815,15 +816,6 @@ static void xen_free_irq(unsigned irq)
irq_free_desc(irq);
}
-static void xen_evtchn_close(evtchn_port_t port)
-{
- struct evtchn_close close;
-
- close.port = port;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
-}
-
/* Not called for lateeoi events. */
static void event_handler_exit(struct irq_info *info)
{
@@ -982,7 +974,8 @@ static void __unbind_from_irq(unsigned int irq)
unsigned int cpu = cpu_from_irq(irq);
struct xenbus_device *dev;
- xen_evtchn_close(evtchn);
+ if (!info->is_static)
+ xen_evtchn_close(evtchn);
switch (type_from_irq(irq)) {
case IRQT_VIRQ:
@@ -1574,7 +1567,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority)
}
EXPORT_SYMBOL_GPL(xen_set_irq_priority);
-int evtchn_make_refcounted(evtchn_port_t evtchn)
+int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static)
{
int irq = get_evtchn_to_irq(evtchn);
struct irq_info *info;
@@ -1590,6 +1583,7 @@ int evtchn_make_refcounted(evtchn_port_t evtchn)
WARN_ON(info->refcnt != -1);
info->refcnt = 1;
+ info->is_static = is_static;
return 0;
}
diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index c99415a70051..9139a7364df5 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -366,10 +366,10 @@ static int evtchn_resize_ring(struct per_user_data *u)
return 0;
}
-static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
+static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port,
+ bool is_static)
{
struct user_evtchn *evtchn;
- struct evtchn_close close;
int rc = 0;
/*
@@ -402,14 +402,14 @@ static int evtchn_bind_to_user(struct per_user_data *u, evtchn_port_t port)
if (rc < 0)
goto err;
- rc = evtchn_make_refcounted(port);
+ rc = evtchn_make_refcounted(port, is_static);
return rc;
err:
/* bind failed, should close the port now */
- close.port = port;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
- BUG();
+ if (!is_static)
+ xen_evtchn_close(port);
+
del_evtchn(u, evtchn);
return rc;
}
@@ -456,7 +456,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
- rc = evtchn_bind_to_user(u, bind_virq.port);
+ rc = evtchn_bind_to_user(u, bind_virq.port, false);
if (rc == 0)
rc = bind_virq.port;
break;
@@ -482,7 +482,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
- rc = evtchn_bind_to_user(u, bind_interdomain.local_port);
+ rc = evtchn_bind_to_user(u, bind_interdomain.local_port, false);
if (rc == 0)
rc = bind_interdomain.local_port;
break;
@@ -507,7 +507,7 @@ static long evtchn_ioctl(struct file *file,
if (rc != 0)
break;
- rc = evtchn_bind_to_user(u, alloc_unbound.port);
+ rc = evtchn_bind_to_user(u, alloc_unbound.port, false);
if (rc == 0)
rc = alloc_unbound.port;
break;
@@ -536,6 +536,23 @@ static long evtchn_ioctl(struct file *file,
break;
}
+ case IOCTL_EVTCHN_BIND_STATIC: {
+ struct ioctl_evtchn_bind bind;
+ struct user_evtchn *evtchn;
+
+ rc = -EFAULT;
+ if (copy_from_user(&bind, uarg, sizeof(bind)))
+ break;
+
+ rc = -EISCONN;
+ evtchn = find_evtchn(u, bind.port);
+ if (evtchn)
+ break;
+
+ rc = evtchn_bind_to_user(u, bind.port, true);
+ break;
+ }
+
case IOCTL_EVTCHN_NOTIFY: {
struct ioctl_evtchn_notify notify;
struct user_evtchn *evtchn;
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index e1ec725c2819..f13c3b76ad1e 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -498,14 +498,21 @@ static LIST_HEAD(deferred_list);
static void gnttab_handle_deferred(struct timer_list *);
static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
+static atomic64_t deferred_count;
+static atomic64_t leaked_count;
+static unsigned int free_per_iteration = 10;
+module_param(free_per_iteration, uint, 0600);
+
static void gnttab_handle_deferred(struct timer_list *unused)
{
- unsigned int nr = 10;
+ unsigned int nr = READ_ONCE(free_per_iteration);
+ const bool ignore_limit = nr == 0;
struct deferred_entry *first = NULL;
unsigned long flags;
+ size_t freed = 0;
spin_lock_irqsave(&gnttab_list_lock, flags);
- while (nr--) {
+ while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
struct deferred_entry *entry
= list_first_entry(&deferred_list,
struct deferred_entry, list);
@@ -515,10 +522,14 @@ static void gnttab_handle_deferred(struct timer_list *unused)
list_del(&entry->list);
spin_unlock_irqrestore(&gnttab_list_lock, flags);
if (_gnttab_end_foreign_access_ref(entry->ref)) {
+ uint64_t ret = atomic64_dec_return(&deferred_count);
+
put_free_entry(entry->ref);
- pr_debug("freeing g.e. %#x (pfn %#lx)\n",
- entry->ref, page_to_pfn(entry->page));
+ pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
+ entry->ref, page_to_pfn(entry->page),
+ (unsigned long long)ret);
put_page(entry->page);
+ freed++;
kfree(entry);
entry = NULL;
} else {
@@ -530,21 +541,22 @@ static void gnttab_handle_deferred(struct timer_list *unused)
spin_lock_irqsave(&gnttab_list_lock, flags);
if (entry)
list_add_tail(&entry->list, &deferred_list);
- else if (list_empty(&deferred_list))
- break;
}
- if (!list_empty(&deferred_list) && !timer_pending(&deferred_timer)) {
+ if (list_empty(&deferred_list))
+ WARN_ON(atomic64_read(&deferred_count));
+ else if (!timer_pending(&deferred_timer)) {
deferred_timer.expires = jiffies + HZ;
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
+ pr_debug("Freed %zu references", freed);
}
static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
{
struct deferred_entry *entry;
gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
- const char *what = KERN_WARNING "leaking";
+ uint64_t leaked, deferred;
entry = kmalloc(sizeof(*entry), gfp);
if (!page) {
@@ -567,10 +579,16 @@ static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
add_timer(&deferred_timer);
}
spin_unlock_irqrestore(&gnttab_list_lock, flags);
- what = KERN_DEBUG "deferring";
+ deferred = atomic64_inc_return(&deferred_count);
+ leaked = atomic64_read(&leaked_count);
+ pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+ ref, page ? page_to_pfn(page) : -1, deferred, leaked);
+ } else {
+ deferred = atomic64_read(&deferred_count);
+ leaked = atomic64_inc_return(&leaked_count);
+ pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
+ ref, page ? page_to_pfn(page) : -1, deferred, leaked);
}
- printk("%s g.e. %#x (pfn %#lx)\n",
- what, ref, page ? page_to_pfn(page) : -1);
}
int gnttab_try_end_foreign_access(grant_ref_t ref)
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index 58b732dcbfb8..639bf628389b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -811,6 +811,9 @@ static int xenbus_probe_thread(void *unused)
static int __init xenbus_probe_initcall(void)
{
+ if (!xen_domain())
+ return -ENODEV;
+
/*
* Probe XenBus here in the XS_PV case, and also XS_HVM unless we
* need to wait for the platform PCI device to come up or
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 0c51889a60b3..29281b7c3887 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -46,8 +46,8 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
* NOTE: these are set after open so only reflect 9p client not
* underlying file system on server.
*/
-static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
- int s_cache, unsigned int f_flags)
+static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags,
+ unsigned int s_cache, unsigned int f_flags)
{
if (fid->qid.type != P9_QTFILE)
return;
@@ -57,7 +57,7 @@ static inline void v9fs_fid_add_modes(struct p9_fid *fid, int s_flags,
(s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) {
fid->mode |= P9L_DIRECT; /* no read or write cache */
} else if ((!(s_cache & CACHE_WRITEBACK)) ||
- (f_flags & O_DSYNC) | (s_flags & V9FS_SYNC)) {
+ (f_flags & O_DSYNC) || (s_flags & V9FS_SYNC)) {
fid->mode |= P9L_NOWRITECACHE;
}
}
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index c7f774fe398f..d525957594b6 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -545,8 +545,6 @@ void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses)
p9_client_begin_disconnect(v9ses->clnt);
}
-extern int v9fs_error_init(void);
-
static struct kobject *v9fs_kobj;
#ifdef CONFIG_9P_FSCACHE
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 06a2514f0d88..698c43dd5dc8 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -108,7 +108,7 @@ enum p9_cache_bits {
struct v9fs_session_info {
/* options */
- unsigned char flags;
+ unsigned int flags;
unsigned char nodev;
unsigned short debug;
unsigned int afid;
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 45b684b7d8d7..4102759a5cb5 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -208,7 +208,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
struct p9_fid *fid;
__le32 version;
loff_t i_size;
- int retval = 0;
+ int retval = 0, put_err;
fid = filp->private_data;
p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
@@ -221,7 +221,8 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
spin_lock(&inode->i_lock);
hlist_del(&fid->ilist);
spin_unlock(&inode->i_lock);
- retval = p9_fid_put(fid);
+ put_err = p9_fid_put(fid);
+ retval = retval < 0 ? retval : put_err;
}
if ((filp->f_mode & FMODE_WRITE)) {
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 2996fb00387f..11cd8d23f6f2 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -505,10 +505,7 @@ v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp);
if (!(v9ses->cache & CACHE_WRITEBACK)) {
- p9_debug(P9_DEBUG_CACHE, "(no mmap mode)");
- if (vma->vm_flags & VM_MAYSHARE)
- return -ENODEV;
- invalidate_inode_pages2(filp->f_mapping);
+ p9_debug(P9_DEBUG_CACHE, "(read-only mmap mode)");
return generic_file_readonly_mmap(filp, vma);
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 36b466e35887..0d28ecf668d0 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -163,7 +163,6 @@ int v9fs_uflags2omode(int uflags, int extended)
{
int ret;
- ret = 0;
switch (uflags&3) {
default:
case O_RDONLY:
@@ -261,7 +260,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
inode->i_blocks = 0;
inode->i_rdev = rdev;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_mapping->a_ops = &v9fs_addr_operations;
inode->i_private = NULL;
@@ -603,7 +602,6 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
- err = 0;
name = dentry->d_name.name;
dfid = v9fs_parent_fid(dentry);
if (IS_ERR(dfid)) {
@@ -815,8 +813,6 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
if (!(flags & O_CREAT) || d_really_is_positive(dentry))
return finish_no_open(file, res);
- err = 0;
-
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
p9_omode = v9fs_uflags2omode(flags, v9fs_proto_dotu(v9ses));
@@ -912,7 +908,6 @@ v9fs_vfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
return -EINVAL;
p9_debug(P9_DEBUG_VFS, "\n");
- retval = 0;
old_inode = d_inode(old_dentry);
new_inode = d_inode(new_dentry);
v9ses = v9fs_inode2v9ses(old_inode);
@@ -1016,7 +1011,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
return 0;
} else if (v9ses->cache & CACHE_WRITEBACK) {
if (S_ISREG(inode->i_mode)) {
@@ -1037,7 +1032,7 @@ v9fs_vfs_getattr(struct mnt_idmap *idmap, const struct path *path,
return PTR_ERR(st);
v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
- generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
p9stat_free(st);
kfree(st);
@@ -1066,7 +1061,6 @@ static int v9fs_vfs_setattr(struct mnt_idmap *idmap,
if (retval)
return retval;
- retval = -EPERM;
v9ses = v9fs_dentry2v9ses(dentry);
if (iattr->ia_valid & ATTR_FILE) {
fid = iattr->ia_file->private_data;
@@ -1158,7 +1152,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
inode->i_atime.tv_sec = stat->atime;
inode->i_mtime.tv_sec = stat->mtime;
- inode->i_ctime.tv_sec = stat->mtime;
+ inode_set_ctime(inode, stat->mtime, 0);
inode->i_uid = v9ses->dfltuid;
inode->i_gid = v9ses->dfltgid;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 5361cd2d7996..1312f68965ac 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -366,7 +366,6 @@ static int v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap,
struct posix_acl *dacl = NULL, *pacl = NULL;
p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
- err = 0;
v9ses = v9fs_inode2v9ses(dir);
omode |= S_IFDIR;
@@ -451,7 +450,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
v9ses = v9fs_dentry2v9ses(dentry);
if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) {
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
return 0;
} else if (v9ses->cache) {
if (S_ISREG(inode->i_mode)) {
@@ -476,7 +475,7 @@ v9fs_vfs_getattr_dotl(struct mnt_idmap *idmap,
return PTR_ERR(st);
v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
- generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
/* Change block size to what the server returned */
stat->blksize = st->st_blksize;
@@ -646,8 +645,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
inode->i_atime.tv_nsec = stat->st_atime_nsec;
inode->i_mtime.tv_sec = stat->st_mtime_sec;
inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
- inode->i_ctime.tv_sec = stat->st_ctime_sec;
- inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ inode_set_ctime(inode, stat->st_ctime_sec,
+ stat->st_ctime_nsec);
inode->i_uid = stat->st_uid;
inode->i_gid = stat->st_gid;
set_nlink(inode, stat->st_nlink);
@@ -669,8 +668,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
inode->i_mtime.tv_nsec = stat->st_mtime_nsec;
}
if (stat->st_result_mask & P9_STATS_CTIME) {
- inode->i_ctime.tv_sec = stat->st_ctime_sec;
- inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
+ inode_set_ctime(inode, stat->st_ctime_sec,
+ stat->st_ctime_nsec);
}
if (stat->st_result_mask & P9_STATS_UID)
inode->i_uid = stat->st_uid;
diff --git a/fs/Kconfig b/fs/Kconfig
index 18d034ec7953..7da21f563192 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -205,8 +205,8 @@ config TMPFS_XATTR
Extended attributes are name:value pairs associated with inodes by
the kernel or by users (see the attr(5) manual page for details).
- Currently this enables support for the trusted.* and
- security.* namespaces.
+ This enables support for the trusted.*, security.* and user.*
+ namespaces.
You need this for POSIX ACL support on tmpfs.
@@ -233,6 +233,18 @@ config TMPFS_INODE64
If unsure, say N.
+config TMPFS_QUOTA
+ bool "Tmpfs quota support"
+ depends on TMPFS
+ select QUOTA
+ help
+ Quota support allows to set per user and group limits for tmpfs
+ usage. Say Y to enable quota support. Once enabled you can control
+ user and group quota enforcement with quota, usrquota and grpquota
+ mount options.
+
+ If unsure, say N.
+
config ARCH_SUPPORTS_HUGETLBFS
def_bool n
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index c3ac613d0975..20963002578a 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -270,7 +270,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
inode->i_mode = adfs_atts2mode(sb, inode);
adfs_adfs2unix_time(&inode->i_mtime, inode);
inode->i_atime = inode->i_mtime;
- inode->i_ctime = inode->i_mtime;
+ inode_set_ctime_to_ts(inode, inode->i_mtime);
if (S_ISDIR(inode->i_mode)) {
inode->i_op = &adfs_dir_inode_operations;
@@ -331,7 +331,7 @@ adfs_notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
if (ia_valid & ATTR_ATIME)
inode->i_atime = attr->ia_atime;
if (ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
ADFS_I(inode)->attr = adfs_mode2atts(sb, inode, attr->ia_mode);
inode->i_mode = adfs_atts2mode(sb, inode);
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 29f11e10a7c7..7ba93efc1143 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -60,7 +60,7 @@ affs_insert_hash(struct inode *dir, struct buffer_head *bh)
mark_buffer_dirty_inode(dir_bh, dir);
affs_brelse(dir_bh);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
inode_inc_iversion(dir);
mark_inode_dirty(dir);
@@ -114,7 +114,7 @@ affs_remove_hash(struct inode *dir, struct buffer_head *rem_bh)
affs_brelse(bh);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
inode_inc_iversion(dir);
mark_inode_dirty(dir);
@@ -315,7 +315,7 @@ affs_remove_header(struct dentry *dentry)
else
clear_nlink(inode);
affs_unlock_link(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
done:
diff --git a/fs/affs/file.c b/fs/affs/file.c
index e43f2f007ac1..472e2bdd5349 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -15,6 +15,7 @@
#include <linux/uio.h>
#include <linux/blkdev.h>
+#include <linux/mpage.h>
#include "affs.h"
static struct buffer_head *affs_get_extblock_slow(struct inode *inode, u32 ext);
@@ -370,9 +371,10 @@ err_alloc:
return -ENOSPC;
}
-static int affs_writepage(struct page *page, struct writeback_control *wbc)
+static int affs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
{
- return block_write_full_page(page, affs_get_block, wbc);
+ return mpage_writepages(mapping, wbc, affs_get_block);
}
static int affs_read_folio(struct file *file, struct folio *folio)
@@ -456,10 +458,11 @@ const struct address_space_operations affs_aops = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
.read_folio = affs_read_folio,
- .writepage = affs_writepage,
+ .writepages = affs_writepages,
.write_begin = affs_write_begin,
.write_end = affs_write_end,
.direct_IO = affs_direct_IO,
+ .migrate_folio = buffer_migrate_folio,
.bmap = _affs_bmap
};
@@ -835,9 +838,10 @@ const struct address_space_operations affs_aops_ofs = {
.dirty_folio = block_dirty_folio,
.invalidate_folio = block_invalidate_folio,
.read_folio = affs_read_folio_ofs,
- //.writepage = affs_writepage_ofs,
+ //.writepages = affs_writepages_ofs,
.write_begin = affs_write_begin_ofs,
- .write_end = affs_write_end_ofs
+ .write_end = affs_write_end_ofs,
+ .migrate_folio = filemap_migrate_folio,
};
/* Free any preallocated blocks. */
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 27f77a52c5c8..060746c63151 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -149,13 +149,13 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
break;
}
- inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec
- = (be32_to_cpu(tail->change.days) * 86400LL +
- be32_to_cpu(tail->change.mins) * 60 +
- be32_to_cpu(tail->change.ticks) / 50 +
- AFFS_EPOCH_DELTA) +
- sys_tz.tz_minuteswest * 60;
- inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_atime.tv_nsec = 0;
+ inode->i_mtime.tv_sec = inode->i_atime.tv_sec =
+ inode_set_ctime(inode,
+ (be32_to_cpu(tail->change.days) * 86400LL +
+ be32_to_cpu(tail->change.mins) * 60 +
+ be32_to_cpu(tail->change.ticks) / 50 + AFFS_EPOCH_DELTA)
+ + sys_tz.tz_minuteswest * 60, 0).tv_sec;
+ inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = 0;
affs_brelse(bh);
unlock_new_inode(inode);
return inode;
@@ -314,7 +314,7 @@ affs_new_inode(struct inode *dir)
inode->i_gid = current_fsgid();
inode->i_ino = block;
set_nlink(inode, 1);
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
atomic_set(&AFFS_I(inode)->i_opencnt, 0);
AFFS_I(inode)->i_blkcnt = 0;
AFFS_I(inode)->i_lc = NULL;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index d12ccfd2a83d..2fe4a5832fcf 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -43,7 +43,7 @@ affs_get_toupper(struct super_block *sb)
* Note: the dentry argument is the parent dentry.
*/
static inline int
-__affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t toupper, bool notruncate)
+__affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t fn, bool notruncate)
{
const u8 *name = qstr->name;
unsigned long hash;
@@ -57,7 +57,7 @@ __affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr, toupper_t tou
hash = init_name_hash(dentry);
len = min(qstr->len, AFFSNAMEMAX);
for (; len > 0; name++, len--)
- hash = partial_name_hash(toupper(*name), hash);
+ hash = partial_name_hash(fn(*name), hash);
qstr->hash = end_name_hash(hash);
return 0;
@@ -80,7 +80,7 @@ affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr)
}
static inline int __affs_compare_dentry(unsigned int len,
- const char *str, const struct qstr *name, toupper_t toupper,
+ const char *str, const struct qstr *name, toupper_t fn,
bool notruncate)
{
const u8 *aname = str;
@@ -106,7 +106,7 @@ static inline int __affs_compare_dentry(unsigned int len,
return 1;
for (; len > 0; len--)
- if (toupper(*aname++) != toupper(*bname++))
+ if (fn(*aname++) != fn(*bname++))
return 1;
return 0;
@@ -135,7 +135,7 @@ affs_intl_compare_dentry(const struct dentry *dentry,
*/
static inline int
-affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
+affs_match(struct dentry *dentry, const u8 *name2, toupper_t fn)
{
const u8 *name = dentry->d_name.name;
int len = dentry->d_name.len;
@@ -148,7 +148,7 @@ affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
return 0;
for (name2++; len > 0; len--)
- if (toupper(*name++) != toupper(*name2++))
+ if (fn(*name++) != fn(*name2++))
return 0;
return 1;
}
@@ -156,12 +156,12 @@ affs_match(struct dentry *dentry, const u8 *name2, toupper_t toupper)
int
affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len)
{
- toupper_t toupper = affs_get_toupper(sb);
+ toupper_t fn = affs_get_toupper(sb);
u32 hash;
hash = len = min(len, AFFSNAMEMAX);
for (; len > 0; len--)
- hash = (hash * 13 + toupper(*name++)) & 0x7ff;
+ hash = (hash * 13 + fn(*name++)) & 0x7ff;
return hash % AFFS_SB(sb)->s_hashsize;
}
@@ -171,7 +171,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
{
struct super_block *sb = dir->i_sb;
struct buffer_head *bh;
- toupper_t toupper = affs_get_toupper(sb);
+ toupper_t fn = affs_get_toupper(sb);
u32 key;
pr_debug("%s(\"%pd\")\n", __func__, dentry);
@@ -189,7 +189,7 @@ affs_find_entry(struct inode *dir, struct dentry *dentry)
bh = affs_bread(sb, key);
if (!bh)
return ERR_PTR(-EIO);
- if (affs_match(dentry, AFFS_TAIL(sb, bh)->name, toupper))
+ if (affs_match(dentry, AFFS_TAIL(sb, bh)->name, fn))
return bh;
key = be32_to_cpu(AFFS_TAIL(sb, bh)->hash_chain);
}
diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c
index d7d9402ff718..95bcbd7654d1 100644
--- a/fs/afs/dynroot.c
+++ b/fs/afs/dynroot.c
@@ -88,7 +88,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root)
set_nlink(inode, 2);
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
- inode->i_ctime = inode->i_atime = inode->i_mtime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_blocks = 0;
inode->i_generation = 0;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 866bab860a88..1c794a1896aa 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -90,7 +90,7 @@ static int afs_inode_init_from_status(struct afs_operation *op,
vnode->status = *status;
t = status->mtime_client;
- inode->i_ctime = t;
+ inode_set_ctime_to_ts(inode, t);
inode->i_mtime = t;
inode->i_atime = t;
inode->i_flags |= S_NOATIME;
@@ -206,7 +206,7 @@ static void afs_apply_status(struct afs_operation *op,
t = status->mtime_client;
inode->i_mtime = t;
if (vp->update_ctime)
- inode->i_ctime = op->ctime;
+ inode_set_ctime_to_ts(inode, op->ctime);
if (vnode->status.data_version != status->data_version)
data_changed = true;
@@ -252,7 +252,7 @@ static void afs_apply_status(struct afs_operation *op,
vnode->netfs.remote_i_size = status->size;
if (change_size) {
afs_set_i_size(vnode, status->size);
- inode->i_ctime = t;
+ inode_set_ctime_to_ts(inode, t);
inode->i_atime = t;
}
}
@@ -773,7 +773,7 @@ int afs_getattr(struct mnt_idmap *idmap, const struct path *path,
do {
read_seqbegin_or_lock(&vnode->cb_lock, &seq);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
stat->nlink > 0)
stat->nlink -= 1;
diff --git a/fs/aio.c b/fs/aio.c
index 77e33619de40..b3174da80ff6 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1447,13 +1447,8 @@ static void aio_complete_rw(struct kiocb *kiocb, long res)
if (kiocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(kiocb->ki_filp);
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
if (S_ISREG(inode->i_mode))
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- file_end_write(kiocb->ki_filp);
+ kiocb_end_write(kiocb);
}
iocb->ki_res.res = res;
@@ -1581,17 +1576,8 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
return ret;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
- /*
- * Open-code file_start_write here to grab freeze protection,
- * which will be released by another thread in
- * aio_complete_rw(). Fool lockdep by telling it the lock got
- * released so that it doesn't complain about the held lock when
- * we return to userspace.
- */
- if (S_ISREG(file_inode(file)->i_mode)) {
- sb_start_write(file_inode(file)->i_sb);
- __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
- }
+ if (S_ISREG(file_inode(file)->i_mode))
+ kiocb_start_write(req);
req->ki_flags |= IOCB_WRITE;
aio_rw_done(req, call_write_iter(file, req, &iter));
}
diff --git a/fs/attr.c b/fs/attr.c
index d60dc1edb526..a8ae5f6d9b16 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -312,7 +312,7 @@ void setattr_copy(struct mnt_idmap *idmap, struct inode *inode,
if (ia_valid & ATTR_MTIME)
inode->i_mtime = attr->ia_mtime;
if (ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
if (!in_group_or_capable(idmap, inode,
@@ -394,9 +394,25 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry,
return error;
if ((ia_valid & ATTR_MODE)) {
- umode_t amode = attr->ia_mode;
+ /*
+ * Don't allow changing the mode of symlinks:
+ *
+ * (1) The vfs doesn't take the mode of symlinks into account
+ * during permission checking.
+ * (2) This has never worked correctly. Most major filesystems
+ * did return EOPNOTSUPP due to interactions with POSIX ACLs
+ * but did still updated the mode of the symlink.
+ * This inconsistency led system call wrapper providers such
+ * as libc to block changing the mode of symlinks with
+ * EOPNOTSUPP already.
+ * (3) To even do this in the first place one would have to use
+ * specific file descriptors and quite some effort.
+ */
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
/* Flag setting protected by i_mutex */
- if (is_sxid(amode))
+ if (is_sxid(attr->ia_mode))
inode->i_flags &= ~S_NOSEC;
}
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
index 3b3a6b1423c6..54c12d9484cb 100644
--- a/fs/autofs/Kconfig
+++ b/fs/autofs/Kconfig
@@ -1,18 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-config AUTOFS4_FS
- tristate "Old Kconfig name for Kernel automounter support"
- select AUTOFS_FS
- help
- This name exists for people to just automatically pick up the
- new name of the autofs Kconfig option. All it does is select
- the new option name.
-
- It will go away in a release or two as people have
- transitioned to just plain AUTOFS_FS.
-
config AUTOFS_FS
tristate "Kernel automounter support (supports v3, v4 and v5)"
- default n
help
The automounter is a tool to automatically mount remote file systems
on demand. This implementation is partially kernel-based to reduce
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index affa70360b1f..2b49662ed237 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -370,7 +370,7 @@ struct inode *autofs_get_inode(struct super_block *sb, umode_t mode)
inode->i_uid = d_inode(sb->s_root)->i_uid;
inode->i_gid = d_inode(sb->s_root)->i_gid;
}
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_ino = get_next_ino();
if (S_ISDIR(mode)) {
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
index 93046c9dc461..512b9a26c63d 100644
--- a/fs/autofs/root.c
+++ b/fs/autofs/root.c
@@ -600,7 +600,7 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
return 0;
}
@@ -633,7 +633,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry)
d_inode(dentry)->i_size = 0;
clear_nlink(d_inode(dentry));
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
spin_lock(&sbi->lookup_lock);
__autofs_add_expiring(dentry);
@@ -749,7 +749,7 @@ static int autofs_dir_mkdir(struct mnt_idmap *idmap,
p_ino = autofs_dentry_ino(dentry->d_parent);
p_ino->count++;
inc_nlink(dir);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
return 0;
}
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
index 54c1f8b8b075..33dd4660d82f 100644
--- a/fs/autofs/waitq.c
+++ b/fs/autofs/waitq.c
@@ -32,8 +32,9 @@ void autofs_catatonic_mode(struct autofs_sb_info *sbi)
wq->status = -ENOENT; /* Magic is gone - report failure */
kfree(wq->name.name - wq->offset);
wq->name.name = NULL;
- wq->wait_ctr--;
- wake_up_interruptible(&wq->queue);
+ wake_up(&wq->queue);
+ if (!--wq->wait_ctr)
+ kfree(wq);
wq = nwq;
}
fput(sbi->pipe); /* Close the pipe */
diff --git a/fs/bad_inode.c b/fs/bad_inode.c
index db649487d58c..83f9566c973b 100644
--- a/fs/bad_inode.c
+++ b/fs/bad_inode.c
@@ -133,8 +133,7 @@ static int bad_inode_fiemap(struct inode *inode,
return -EIO;
}
-static int bad_inode_update_time(struct inode *inode, struct timespec64 *time,
- int flags)
+static int bad_inode_update_time(struct inode *inode, int flags)
{
return -EIO;
}
@@ -209,8 +208,7 @@ void make_bad_inode(struct inode *inode)
remove_inode_hash(inode);
inode->i_mode = S_IFREG;
- inode->i_atime = inode->i_mtime = inode->i_ctime =
- current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_op = &bad_inode_ops;
inode->i_opflags &= ~IOP_XATTR;
inode->i_fop = &bad_file_ops;
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index eee9237386e2..9a16a51fbb88 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -363,7 +363,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_mtime.tv_sec =
fs64_to_cpu(sb, raw_inode->last_modified_time) >> 16;
inode->i_mtime.tv_nsec = 0; /* lower 16 bits are not a time */
- inode->i_ctime = inode->i_mtime;
+ inode_set_ctime_to_ts(inode, inode->i_mtime);
inode->i_atime = inode->i_mtime;
befs_ino->i_inode_num = fsrun_to_cpu(sb, raw_inode->inode_num);
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index 040d5140e426..12b8af04dcb3 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -97,7 +97,7 @@ static int bfs_create(struct mnt_idmap *idmap, struct inode *dir,
set_bit(ino, info->si_imap);
info->si_freei--;
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_blocks = 0;
inode->i_op = &bfs_file_inops;
inode->i_fop = &bfs_file_operations;
@@ -158,7 +158,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
return err;
}
inc_nlink(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
ihold(inode);
d_instantiate(new, inode);
@@ -187,9 +187,9 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
}
de->ino = 0;
mark_buffer_dirty_inode(bh, dir);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
inode_dec_link_count(inode);
error = 0;
@@ -240,10 +240,10 @@ static int bfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
goto end_rename;
}
old_de->ino = 0;
- old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+ old_dir->i_mtime = inode_set_ctime_current(old_dir);
mark_inode_dirty(old_dir);
if (new_inode) {
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
inode_dec_link_count(new_inode);
}
mark_buffer_dirty_inode(old_bh, old_dir);
@@ -292,9 +292,9 @@ static int bfs_add_entry(struct inode *dir, const struct qstr *child, int ino)
pos = (block - sblock) * BFS_BSIZE + off;
if (pos >= dir->i_size) {
dir->i_size += BFS_DIRENT_SIZE;
- dir->i_ctime = current_time(dir);
+ inode_set_ctime_current(dir);
}
- dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
de->ino = cpu_to_le16((u16)ino);
for (i = 0; i < BFS_NAMELEN; i++)
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 1926bec2c850..e6a76ae9eb44 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -82,10 +82,9 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
inode->i_blocks = BFS_FILEBLOCKS(di);
inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
inode->i_mtime.tv_sec = le32_to_cpu(di->i_mtime);
- inode->i_ctime.tv_sec = le32_to_cpu(di->i_ctime);
+ inode_set_ctime(inode, le32_to_cpu(di->i_ctime), 0);
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
brelse(bh);
unlock_new_inode(inode);
@@ -143,7 +142,7 @@ static int bfs_write_inode(struct inode *inode, struct writeback_control *wbc)
di->i_nlink = cpu_to_le32(inode->i_nlink);
di->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
di->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
- di->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+ di->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec);
i_sblock = BFS_I(inode)->i_sblock;
di->i_sblock = cpu_to_le32(i_sblock);
di->i_eblock = cpu_to_le32(BFS_I(inode)->i_eblock);
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index bb202ad369d5..e0108d17b085 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -547,8 +547,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
if (inode) {
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime =
- current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
return inode;
}
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 66fa9ab2c046..3282adc84d52 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -49,9 +49,11 @@ config BTRFS_FS_POSIX_ACL
If you don't know what Access Control Lists are, say N
config BTRFS_FS_CHECK_INTEGRITY
- bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
+ bool "Btrfs with integrity check tool compiled in (DEPRECATED)"
depends on BTRFS_FS
help
+ This feature has been deprecated and will be removed in 6.7.
+
Adds code that examines all block write requests (including
writes of the super block). The goal is to verify that the
state of the filesystem on disk is always consistent, i.e.,
diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h
index ceadfc5d6c66..8cfc8214109c 100644
--- a/fs/btrfs/accessors.h
+++ b/fs/btrfs/accessors.h
@@ -3,6 +3,8 @@
#ifndef BTRFS_ACCESSORS_H
#define BTRFS_ACCESSORS_H
+#include <linux/stddef.h>
+
struct btrfs_map_token {
struct extent_buffer *eb;
char *kaddr;
@@ -34,13 +36,13 @@ static inline void put_unaligned_le8(u8 val, void *p)
read_extent_buffer(eb, (char *)(result), \
((unsigned long)(ptr)) + \
offsetof(type, member), \
- sizeof(((type *)0)->member)))
+ sizeof_field(type, member)))
#define write_eb_member(eb, ptr, type, member, result) (\
write_extent_buffer(eb, (char *)(result), \
((unsigned long)(ptr)) + \
offsetof(type, member), \
- sizeof(((type *)0)->member)))
+ sizeof_field(type, member)))
#define DECLARE_BTRFS_SETGET_BITS(bits) \
u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
@@ -62,25 +64,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
const type *s) \
{ \
- static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
} \
static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
u##bits val) \
{ \
- static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \
static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
const type *s) \
{ \
- static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
return btrfs_get_token_##bits(token, s, offsetof(type, member));\
} \
static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
type *s, u##bits val) \
{ \
- static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
+ static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
}
@@ -111,17 +113,14 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s)
{
- static_assert(sizeof(u64) ==
- sizeof(((struct btrfs_dev_item *)0))->total_bytes);
- return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
- total_bytes));
+ static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
+ return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes));
}
static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s,
u64 val)
{
- static_assert(sizeof(u64) ==
- sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+ static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
}
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 79336fa853db..b7d54efb4728 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -3373,7 +3373,6 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
struct btrfs_key *node_key,
struct btrfs_backref_node *cur)
{
- struct btrfs_fs_info *fs_info = cache->fs_info;
struct btrfs_backref_edge *edge;
struct btrfs_backref_node *exist;
int ret;
@@ -3462,25 +3461,21 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
ret = handle_direct_tree_backref(cache, &key, cur);
if (ret < 0)
goto out;
- continue;
- } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
- ret = -EINVAL;
- btrfs_print_v0_err(fs_info);
- btrfs_handle_fs_error(fs_info, ret, NULL);
- goto out;
- } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
- continue;
+ } else if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
+ /*
+ * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref
+ * offset means the root objectid. We need to search
+ * the tree to get its parent bytenr.
+ */
+ ret = handle_indirect_tree_backref(cache, path, &key, node_key,
+ cur);
+ if (ret < 0)
+ goto out;
}
-
/*
- * key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
- * means the root objectid. We need to search the tree to get
- * its parent bytenr.
+ * Unrecognized tree backref items (if it can pass tree-checker)
+ * would be ignored.
*/
- ret = handle_indirect_tree_backref(cache, path, &key, node_key,
- cur);
- if (ret < 0)
- goto out;
}
ret = 0;
cur->checked = 1;
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 48ae509f2ac2..0cb1dee965a0 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -441,13 +441,23 @@ void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
u64 num_bytes)
{
struct btrfs_caching_control *caching_ctl;
+ int progress;
caching_ctl = btrfs_get_caching_control(cache);
if (!caching_ctl)
return;
+ /*
+ * We've already failed to allocate from this block group, so even if
+ * there's enough space in the block group it isn't contiguous enough to
+ * allow for an allocation, so wait for at least the next wakeup tick,
+ * or for the thing to be done.
+ */
+ progress = atomic_read(&caching_ctl->progress);
+
wait_event(caching_ctl->wait, btrfs_block_group_done(cache) ||
- (cache->free_space_ctl->free_space >= num_bytes));
+ (progress != atomic_read(&caching_ctl->progress) &&
+ (cache->free_space_ctl->free_space >= num_bytes)));
btrfs_put_caching_control(caching_ctl);
}
@@ -494,33 +504,45 @@ static void fragment_free_space(struct btrfs_block_group *block_group)
#endif
/*
- * This is only called by btrfs_cache_block_group, since we could have freed
- * extents we need to check the pinned_extents for any extents that can't be
- * used yet since their free space will be released as soon as the transaction
- * commits.
+ * Add a free space range to the in memory free space cache of a block group.
+ * This checks if the range contains super block locations and any such
+ * locations are not added to the free space cache.
+ *
+ * @block_group: The target block group.
+ * @start: Start offset of the range.
+ * @end: End offset of the range (exclusive).
+ * @total_added_ret: Optional pointer to return the total amount of space
+ * added to the block group's free space cache.
+ *
+ * Returns 0 on success or < 0 on error.
*/
-u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end)
+int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
+ u64 end, u64 *total_added_ret)
{
struct btrfs_fs_info *info = block_group->fs_info;
- u64 extent_start, extent_end, size, total_added = 0;
+ u64 extent_start, extent_end, size;
int ret;
+ if (total_added_ret)
+ *total_added_ret = 0;
+
while (start < end) {
- ret = find_first_extent_bit(&info->excluded_extents, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY | EXTENT_UPTODATE,
- NULL);
- if (ret)
+ if (!find_first_extent_bit(&info->excluded_extents, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY | EXTENT_UPTODATE,
+ NULL))
break;
if (extent_start <= start) {
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
- total_added += size;
ret = btrfs_add_free_space_async_trimmed(block_group,
start, size);
- BUG_ON(ret); /* -ENOMEM or logic error */
+ if (ret)
+ return ret;
+ if (total_added_ret)
+ *total_added_ret += size;
start = extent_end + 1;
} else {
break;
@@ -529,13 +551,15 @@ u64 add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
if (start < end) {
size = end - start;
- total_added += size;
ret = btrfs_add_free_space_async_trimmed(block_group, start,
size);
- BUG_ON(ret); /* -ENOMEM or logic error */
+ if (ret)
+ return ret;
+ if (total_added_ret)
+ *total_added_ret += size;
}
- return total_added;
+ return 0;
}
/*
@@ -779,8 +803,13 @@ next:
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
key.type == BTRFS_METADATA_ITEM_KEY) {
- total_found += add_new_free_space(block_group, last,
- key.objectid);
+ u64 space_added;
+
+ ret = btrfs_add_new_free_space(block_group, last,
+ key.objectid, &space_added);
+ if (ret)
+ goto out;
+ total_found += space_added;
if (key.type == BTRFS_METADATA_ITEM_KEY)
last = key.objectid +
fs_info->nodesize;
@@ -789,22 +818,29 @@ next:
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
- if (wakeup)
+ if (wakeup) {
+ atomic_inc(&caching_ctl->progress);
wake_up(&caching_ctl->wait);
+ }
}
}
path->slots[0]++;
}
- ret = 0;
-
- total_found += add_new_free_space(block_group, last,
- block_group->start + block_group->length);
+ ret = btrfs_add_new_free_space(block_group, last,
+ block_group->start + block_group->length,
+ NULL);
out:
btrfs_free_path(path);
return ret;
}
+static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
+{
+ clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
+ bg->start + bg->length - 1, EXTENT_UPTODATE);
+}
+
static noinline void caching_thread(struct btrfs_work *work)
{
struct btrfs_block_group *block_group;
@@ -898,6 +934,7 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait)
init_waitqueue_head(&caching_ctl->wait);
caching_ctl->block_group = cache;
refcount_set(&caching_ctl->count, 2);
+ atomic_set(&caching_ctl->progress, 0);
btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL);
spin_lock(&cache->lock);
@@ -1640,13 +1677,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
- trace_btrfs_add_unused_block_group(bg);
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&bg->bg_list)) {
btrfs_get_block_group(bg);
+ trace_btrfs_add_unused_block_group(bg);
list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
- } else {
+ } else if (!test_bit(BLOCK_GROUP_FLAG_NEW, &bg->runtime_flags)) {
/* Pull out the block group from the reclaim_bgs list. */
+ trace_btrfs_add_unused_block_group(bg);
list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
}
spin_unlock(&fs_info->unused_bgs_lock);
@@ -2072,8 +2110,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
cache->bytes_super += stripe_len;
- ret = btrfs_add_excluded_extent(fs_info, cache->start,
- stripe_len);
+ ret = set_extent_bit(&fs_info->excluded_extents, cache->start,
+ cache->start + stripe_len - 1,
+ EXTENT_UPTODATE, NULL);
if (ret)
return ret;
}
@@ -2087,6 +2126,7 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
/* Shouldn't have super stripes in sequential zones */
if (zoned && nr) {
+ kfree(logical);
btrfs_err(fs_info,
"zoned: block group %llu must not contain super block",
cache->start);
@@ -2098,8 +2138,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
cache->start + cache->length - logical[nr]);
cache->bytes_super += len;
- ret = btrfs_add_excluded_extent(fs_info, logical[nr],
- len);
+ ret = set_extent_bit(&fs_info->excluded_extents, logical[nr],
+ logical[nr] + len - 1,
+ EXTENT_UPTODATE, NULL);
if (ret) {
kfree(logical);
return ret;
@@ -2292,9 +2333,11 @@ static int read_one_block_group(struct btrfs_fs_info *info,
btrfs_free_excluded_extents(cache);
} else if (cache->used == 0) {
cache->cached = BTRFS_CACHE_FINISHED;
- add_new_free_space(cache, cache->start,
- cache->start + cache->length);
+ ret = btrfs_add_new_free_space(cache, cache->start,
+ cache->start + cache->length, NULL);
btrfs_free_excluded_extents(cache);
+ if (ret)
+ goto error;
}
ret = btrfs_add_block_group_cache(info, cache);
@@ -2668,6 +2711,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
next:
btrfs_delayed_refs_rsv_release(fs_info, 1);
list_del_init(&block_group->bg_list);
+ clear_bit(BLOCK_GROUP_FLAG_NEW, &block_group->runtime_flags);
}
btrfs_trans_release_chunk_metadata(trans);
}
@@ -2707,6 +2751,13 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
if (!cache)
return ERR_PTR(-ENOMEM);
+ /*
+ * Mark it as new before adding it to the rbtree of block groups or any
+ * list, so that no other task finds it and calls btrfs_mark_bg_unused()
+ * before the new flag is set.
+ */
+ set_bit(BLOCK_GROUP_FLAG_NEW, &cache->runtime_flags);
+
cache->length = size;
set_free_space_tree_thresholds(cache);
cache->flags = type;
@@ -2730,9 +2781,12 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
return ERR_PTR(ret);
}
- add_new_free_space(cache, chunk_offset, chunk_offset + size);
-
+ ret = btrfs_add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
btrfs_free_excluded_extents(cache);
+ if (ret) {
+ btrfs_put_block_group(cache);
+ return ERR_PTR(ret);
+ }
/*
* Ensure the corresponding space_info object is created and
@@ -4035,7 +4089,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
if (IS_ERR(ret_bg)) {
ret = PTR_ERR(ret_bg);
- } else if (from_extent_allocation) {
+ } else if (from_extent_allocation && (flags & BTRFS_BLOCK_GROUP_DATA)) {
/*
* New block group is likely to be used soon. Try to activate
* it now. Failure is OK for now.
@@ -4233,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
struct btrfs_caching_control *caching_ctl;
struct rb_node *n;
+ if (btrfs_is_zoned(info)) {
+ if (info->active_meta_bg) {
+ btrfs_put_block_group(info->active_meta_bg);
+ info->active_meta_bg = NULL;
+ }
+ if (info->active_system_bg) {
+ btrfs_put_block_group(info->active_system_bg);
+ info->active_system_bg = NULL;
+ }
+ }
+
write_lock(&info->block_group_cache_lock);
while (!list_empty(&info->caching_block_groups)) {
caching_ctl = list_entry(info->caching_block_groups.next,
diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h
index f204addc3fe8..2bdbcb834f95 100644
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@@ -70,6 +70,11 @@ enum btrfs_block_group_flags {
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
/* Indicate that the block group is placed on a sequential zone */
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
+ /*
+ * Indicate that block group is in the list of new block groups of a
+ * transaction.
+ */
+ BLOCK_GROUP_FLAG_NEW,
};
enum btrfs_caching_type {
@@ -85,6 +90,8 @@ struct btrfs_caching_control {
wait_queue_head_t wait;
struct btrfs_work work;
struct btrfs_block_group *block_group;
+ /* Track progress of caching during allocation. */
+ atomic_t progress;
refcount_t count;
};
@@ -284,8 +291,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
struct btrfs_caching_control *btrfs_get_caching_control(
struct btrfs_block_group *cache);
-u64 add_new_free_space(struct btrfs_block_group *block_group,
- u64 start, u64 end);
+int btrfs_add_new_free_space(struct btrfs_block_group *block_group,
+ u64 start, u64 end, u64 *total_added_ret);
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
struct btrfs_fs_info *fs_info,
const u64 chunk_offset);
diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c
index 6279d200cf83..77684c5e0c8b 100644
--- a/fs/btrfs/block-rsv.c
+++ b/fs/btrfs/block-rsv.c
@@ -349,6 +349,11 @@ void btrfs_update_global_block_rsv(struct btrfs_fs_info *fs_info)
}
read_unlock(&fs_info->global_root_lock);
+ if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) {
+ num_bytes += btrfs_root_used(&fs_info->block_group_root->root_item);
+ min_items++;
+ }
+
/*
* But we also want to reserve enough space so we can do the fallback
* global reserve for an unlink, which is an additional
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d47a927b3504..bda1fdbba666 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -498,12 +498,8 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
u64 start, u64 num_bytes, u64 min_size,
loff_t actual_len, u64 *alloc_hint);
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written, struct writeback_control *wbc);
+ u64 start, u64 end, struct writeback_control *wbc);
int btrfs_writepage_cow_fixup(struct page *page);
-void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
- struct page *page, u64 start,
- u64 end, bool uptodate);
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
int compress_type);
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index f2d2b313bde5..9419f4e37a58 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -443,6 +443,7 @@ struct btrfs_drop_extents_args {
struct btrfs_file_private {
void *filldir_buf;
+ u64 last_index;
struct extent_state *llseek_cached_state;
};
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 6b457b010cbc..53c1211dd60b 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1632,6 +1632,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
}
bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ u64 last_index,
struct list_head *ins_list,
struct list_head *del_list)
{
@@ -1651,14 +1652,14 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
mutex_lock(&delayed_node->mutex);
item = __btrfs_first_delayed_insertion_item(delayed_node);
- while (item) {
+ while (item && item->index <= last_index) {
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, ins_list);
item = __btrfs_next_delayed_item(item);
}
item = __btrfs_first_delayed_deletion_item(delayed_node);
- while (item) {
+ while (item && item->index <= last_index) {
refcount_inc(&item->refs);
list_add_tail(&item->readdir_list, del_list);
item = __btrfs_next_delayed_item(item);
@@ -1735,9 +1736,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
int over = 0;
unsigned char d_type;
- if (list_empty(ins_list))
- return 0;
-
/*
* Changing the data of the delayed item is impossible. So
* we needn't lock them. And we have held i_mutex of the
@@ -1808,9 +1806,9 @@ static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
inode->i_mtime.tv_nsec);
btrfs_set_stack_timespec_sec(&inode_item->ctime,
- inode->i_ctime.tv_sec);
+ inode_get_ctime(inode).tv_sec);
btrfs_set_stack_timespec_nsec(&inode_item->ctime,
- inode->i_ctime.tv_nsec);
+ inode_get_ctime(inode).tv_nsec);
btrfs_set_stack_timespec_sec(&inode_item->otime,
BTRFS_I(inode)->i_otime.tv_sec);
@@ -1861,8 +1859,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(&inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->mtime);
- inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(&inode_item->ctime);
- inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(&inode_item->ctime);
+ inode_set_ctime(inode, btrfs_stack_timespec_sec(&inode_item->ctime),
+ btrfs_stack_timespec_nsec(&inode_item->ctime));
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_stack_timespec_sec(&inode_item->otime);
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 4f21daa3dbc7..dc1085b2a397 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -148,6 +148,7 @@ void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
/* Used for readdir() */
bool btrfs_readdir_get_delayed_items(struct inode *inode,
+ u64 last_index,
struct list_head *ins_list,
struct list_head *del_list);
void btrfs_readdir_put_delayed_items(struct inode *inode,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 5f10965fd72b..fff22ed55c42 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -792,9 +792,9 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
- while (!find_first_extent_bit(&srcdev->alloc_state, start,
- &found_start, &found_end,
- CHUNK_ALLOCATED, &cached_state)) {
+ while (find_first_extent_bit(&srcdev->alloc_state, start,
+ &found_start, &found_end,
+ CHUNK_ALLOCATED, &cached_state)) {
ret = set_extent_bit(&tgtdev->alloc_state, found_start,
found_end, CHUNK_ALLOCATED, NULL);
if (ret)
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7513388b0567..0a96ea8c1d3a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -313,21 +313,16 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
u8 fsid[BTRFS_FSID_SIZE];
- u8 *metadata_uuid;
read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
BTRFS_FSID_SIZE);
+
/*
- * Checking the incompat flag is only valid for the current fs. For
- * seed devices it's forbidden to have their uuid changed so reading
- * ->fsid in this case is fine
+ * alloc_fs_devices() copies the fsid into metadata_uuid if the
+ * metadata_uuid is unset in the superblock, including for a seed device.
+ * So, we can use fs_devices->metadata_uuid.
*/
- if (btrfs_fs_incompat(fs_info, METADATA_UUID))
- metadata_uuid = fs_devices->metadata_uuid;
- else
- metadata_uuid = fs_devices->fsid;
-
- if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
+ if (memcmp(fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) == 0)
return false;
list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
@@ -1103,7 +1098,8 @@ static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
btrfs_drew_lock_init(&root->snapshot_lock);
if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID &&
- !btrfs_is_data_reloc_root(root)) {
+ !btrfs_is_data_reloc_root(root) &&
+ is_fstree(root->root_key.objectid)) {
set_bit(BTRFS_ROOT_SHAREABLE, &root->state);
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1300,6 +1296,16 @@ static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
root = btrfs_get_global_root(fs_info, objectid);
if (root)
return root;
+
+ /*
+ * If we're called for non-subvolume trees, and above function didn't
+ * find one, do not try to read it from disk.
+ *
+ * This is namely for free-space-tree and quota tree, which can change
+ * at runtime and should only be grabbed from fs_info.
+ */
+ if (!is_fstree(objectid) && objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+ return ERR_PTR(-ENOENT);
again:
root = btrfs_lookup_fs_root(fs_info, objectid);
if (root) {
@@ -2373,21 +2379,18 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
- if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
- BTRFS_FSID_SIZE)) {
+ if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
"superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
- fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
+ sb->fsid, fs_info->fs_devices->fsid);
ret = -EINVAL;
}
- if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
- memcmp(fs_info->fs_devices->metadata_uuid,
- fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
+ if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb),
+ BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
"superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
- fs_info->super_copy->metadata_uuid,
- fs_info->fs_devices->metadata_uuid);
+ btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid);
ret = -EINVAL;
}
@@ -2858,6 +2861,56 @@ static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
return 0;
}
+static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
+{
+ u64 root_objectid = 0;
+ struct btrfs_root *gang[8];
+ int i = 0;
+ int err = 0;
+ unsigned int ret = 0;
+
+ while (1) {
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, root_objectid,
+ ARRAY_SIZE(gang));
+ if (!ret) {
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ break;
+ }
+ root_objectid = gang[ret - 1]->root_key.objectid + 1;
+
+ for (i = 0; i < ret; i++) {
+ /* Avoid to grab roots in dead_roots. */
+ if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+ gang[i] = NULL;
+ continue;
+ }
+ /* Grab all the search result for later use. */
+ gang[i] = btrfs_grab_root(gang[i]);
+ }
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+
+ for (i = 0; i < ret; i++) {
+ if (!gang[i])
+ continue;
+ root_objectid = gang[i]->root_key.objectid;
+ err = btrfs_orphan_cleanup(gang[i]);
+ if (err)
+ goto out;
+ btrfs_put_root(gang[i]);
+ }
+ root_objectid++;
+ }
+out:
+ /* Release the uncleaned roots due to error. */
+ for (; i < ret; i++) {
+ if (gang[i])
+ btrfs_put_root(gang[i]);
+ }
+ return err;
+}
+
/*
* Some options only have meaning at mount time and shouldn't persist across
* remounts, or be displayed. Clear these at the end of mount and remount
@@ -3211,7 +3264,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
/* check FS state, whether FS is broken. */
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
- set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
+ WRITE_ONCE(fs_info->fs_error, -EUCLEAN);
/*
* In the long term, we'll store the compression type in the super
@@ -3406,6 +3459,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
btrfs_free_zone_cache(fs_info);
+ btrfs_check_active_zone_reservation(fs_info);
+
if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
!btrfs_check_rw_degradable(fs_info, NULL)) {
btrfs_warn(fs_info,
@@ -3438,11 +3493,16 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
* For devices supporting discard turn on discard=async automatically,
* unless it's already set or disabled. This could be turned off by
* nodiscard for the same mount.
+ *
+ * The zoned mode piggy backs on the discard functionality for
+ * resetting a zone. There is no reason to delay the zone reset as it is
+ * fast enough. So, do not enable async discard for zoned mode.
*/
if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) ||
btrfs_test_opt(fs_info, DISCARD_ASYNC) ||
btrfs_test_opt(fs_info, NODISCARD)) &&
- fs_info->fs_devices->discardable) {
+ fs_info->fs_devices->discardable &&
+ !btrfs_is_zoned(fs_info)) {
btrfs_set_and_info(fs_info, DISCARD_ASYNC,
"auto enabling async discard");
}
@@ -4120,56 +4180,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
btrfs_put_root(root);
}
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
-{
- u64 root_objectid = 0;
- struct btrfs_root *gang[8];
- int i = 0;
- int err = 0;
- unsigned int ret = 0;
-
- while (1) {
- spin_lock(&fs_info->fs_roots_radix_lock);
- ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
- (void **)gang, root_objectid,
- ARRAY_SIZE(gang));
- if (!ret) {
- spin_unlock(&fs_info->fs_roots_radix_lock);
- break;
- }
- root_objectid = gang[ret - 1]->root_key.objectid + 1;
-
- for (i = 0; i < ret; i++) {
- /* Avoid to grab roots in dead_roots */
- if (btrfs_root_refs(&gang[i]->root_item) == 0) {
- gang[i] = NULL;
- continue;
- }
- /* grab all the search result for later use */
- gang[i] = btrfs_grab_root(gang[i]);
- }
- spin_unlock(&fs_info->fs_roots_radix_lock);
-
- for (i = 0; i < ret; i++) {
- if (!gang[i])
- continue;
- root_objectid = gang[i]->root_key.objectid;
- err = btrfs_orphan_cleanup(gang[i]);
- if (err)
- goto out;
- btrfs_put_root(gang[i]);
- }
- root_objectid++;
- }
-out:
- /* release the uncleaned roots due to error */
- for (; i < ret; i++) {
- if (gang[i])
- btrfs_put_root(gang[i]);
- }
- return err;
-}
-
int btrfs_commit_super(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root = fs_info->tree_root;
@@ -4212,7 +4222,7 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
u64 found_end;
found = true;
- while (!find_first_extent_bit(&trans->dirty_pages, cur,
+ while (find_first_extent_bit(&trans->dirty_pages, cur,
&found_start, &found_end, EXTENT_DIRTY, &cached)) {
dirty_bytes += found_end + 1 - found_start;
cur = found_end + 1;
@@ -4536,9 +4546,7 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
+ LIST_HEAD(splice);
spin_lock(&fs_info->ordered_root_lock);
list_splice_init(&fs_info->ordered_roots, &splice);
@@ -4644,9 +4652,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
+ LIST_HEAD(splice);
spin_lock(&root->delalloc_lock);
list_splice_init(&root->delalloc_inodes, &splice);
@@ -4679,9 +4685,7 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root;
- struct list_head splice;
-
- INIT_LIST_HEAD(&splice);
+ LIST_HEAD(splice);
spin_lock(&fs_info->delalloc_root_lock);
list_splice_init(&fs_info->delalloc_roots, &splice);
@@ -4700,21 +4704,16 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
spin_unlock(&fs_info->delalloc_root_lock);
}
-static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *dirty_pages,
- int mark)
+static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *dirty_pages,
+ int mark)
{
- int ret;
struct extent_buffer *eb;
u64 start = 0;
u64 end;
- while (1) {
- ret = find_first_extent_bit(dirty_pages, start, &start, &end,
- mark, NULL);
- if (ret)
- break;
-
+ while (find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark, NULL)) {
clear_extent_bits(dirty_pages, start, end, mark);
while (start <= end) {
eb = find_extent_buffer(fs_info, start);
@@ -4730,16 +4729,13 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
free_extent_buffer_stale(eb);
}
}
-
- return ret;
}
-static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
- struct extent_io_tree *unpin)
+static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
+ struct extent_io_tree *unpin)
{
u64 start;
u64 end;
- int ret;
while (1) {
struct extent_state *cached_state = NULL;
@@ -4751,9 +4747,8 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
* the same extent range.
*/
mutex_lock(&fs_info->unused_bg_unpin_mutex);
- ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, &cached_state);
- if (ret) {
+ if (!find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY, &cached_state)) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
@@ -4764,8 +4759,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
cond_resched();
}
-
- return 0;
}
static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b03767f4d7ed..02b645744a82 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -77,7 +77,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr);
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info);
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
-int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
diff --git a/fs/btrfs/extent-io-tree.c b/fs/btrfs/extent-io-tree.c
index a2315a4b8b75..ff8e117a1ace 100644
--- a/fs/btrfs/extent-io-tree.c
+++ b/fs/btrfs/extent-io-tree.c
@@ -831,15 +831,15 @@ static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *t
*
* Note: If there are multiple bits set in @bits, any of them will match.
*
- * Return 0 if we find something, and update @start_ret and @end_ret.
- * Return 1 if we found nothing.
+ * Return true if we find something, and update @start_ret and @end_ret.
+ * Return false if we found nothing.
*/
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits,
- struct extent_state **cached_state)
+bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits,
+ struct extent_state **cached_state)
{
struct extent_state *state;
- int ret = 1;
+ bool ret = false;
spin_lock(&tree->lock);
if (cached_state && *cached_state) {
@@ -863,7 +863,7 @@ got_it:
cache_state_if_flags(state, cached_state, 0);
*start_ret = state->start;
*end_ret = state->end;
- ret = 0;
+ ret = true;
}
out:
spin_unlock(&tree->lock);
diff --git a/fs/btrfs/extent-io-tree.h b/fs/btrfs/extent-io-tree.h
index fbd3b275ab1c..28c23a23d121 100644
--- a/fs/btrfs/extent-io-tree.h
+++ b/fs/btrfs/extent-io-tree.h
@@ -182,9 +182,9 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u32 clear_bits,
struct extent_state **cached_state);
-int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
- u64 *start_ret, u64 *end_ret, u32 bits,
- struct extent_state **cached_state);
+bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
+ u64 *start_ret, u64 *end_ret, u32 bits,
+ struct extent_state **cached_state);
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits);
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 911908ea5f6f..f356f08b55cb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -69,27 +69,6 @@ static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
return (cache->flags & bits) == bits;
}
-int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
- u64 start, u64 num_bytes)
-{
- u64 end = start + num_bytes - 1;
- set_extent_bit(&fs_info->excluded_extents, start, end,
- EXTENT_UPTODATE, NULL);
- return 0;
-}
-
-void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
-{
- struct btrfs_fs_info *fs_info = cache->fs_info;
- u64 start, end;
-
- start = cache->start;
- end = start + cache->length - 1;
-
- clear_extent_bits(&fs_info->excluded_extents, start, end,
- EXTENT_UPTODATE);
-}
-
/* simple helper to search for an existing data extent at a given offset */
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
{
@@ -187,8 +166,10 @@ search_again:
num_refs = btrfs_extent_refs(leaf, ei);
extent_flags = btrfs_extent_flags(leaf, ei);
} else {
- ret = -EINVAL;
- btrfs_print_v0_err(fs_info);
+ ret = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected extent item size, has %u expect >= %zu",
+ item_size, sizeof(*ei));
if (trans)
btrfs_abort_transaction(trans, ret);
else
@@ -402,11 +383,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
}
}
+ WARN_ON(1);
btrfs_print_leaf(eb);
btrfs_err(eb->fs_info,
"eb %llu iref 0x%lx invalid extent inline ref type %d",
eb->start, (unsigned long)iref, type);
- WARN_ON(1);
return BTRFS_REF_TYPE_INVALID;
}
@@ -624,12 +605,12 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
ref2 = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_shared_data_ref);
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
- } else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
- btrfs_print_v0_err(trans->fs_info);
- btrfs_abort_transaction(trans, -EINVAL);
- return -EINVAL;
} else {
- BUG();
+ btrfs_err(trans->fs_info,
+ "unrecognized backref key (%llu %u %llu)",
+ key.objectid, key.type, key.offset);
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ return -EUCLEAN;
}
BUG_ON(num_refs < refs_to_drop);
@@ -660,7 +641,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
leaf = path->nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
- BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
if (iref) {
/*
* If type is invalid, we should have bailed out earlier than
@@ -869,6 +849,11 @@ again:
err = -ENOENT;
goto out;
} else if (WARN_ON(ret)) {
+ btrfs_print_leaf(path->nodes[0]);
+ btrfs_err(fs_info,
+"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
+ bytenr, num_bytes, parent, root_objectid, owner,
+ offset);
err = -EIO;
goto out;
}
@@ -876,8 +861,10 @@ again:
leaf = path->nodes[0];
item_size = btrfs_item_size(leaf, path->slots[0]);
if (unlikely(item_size < sizeof(*ei))) {
- err = -EINVAL;
- btrfs_print_v0_err(fs_info);
+ err = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected extent item size, has %llu expect >= %zu",
+ item_size, sizeof(*ei));
btrfs_abort_transaction(trans, err);
goto out;
}
@@ -1079,13 +1066,13 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
/*
* helper to update/remove inline back ref
*/
-static noinline_for_stack
-void update_inline_extent_backref(struct btrfs_path *path,
+static noinline_for_stack int update_inline_extent_backref(struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref,
int refs_to_mod,
struct btrfs_delayed_extent_op *extent_op)
{
struct extent_buffer *leaf = path->nodes[0];
+ struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_extent_item *ei;
struct btrfs_extent_data_ref *dref = NULL;
struct btrfs_shared_data_ref *sref = NULL;
@@ -1098,18 +1085,33 @@ void update_inline_extent_backref(struct btrfs_path *path,
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
refs = btrfs_extent_refs(leaf, ei);
- WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
+ if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
+ struct btrfs_key key;
+ u32 extent_size;
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ extent_size = fs_info->nodesize;
+ else
+ extent_size = key.offset;
+ btrfs_print_leaf(leaf);
+ btrfs_err(fs_info,
+ "invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
+ key.objectid, extent_size, refs_to_mod, refs);
+ return -EUCLEAN;
+ }
refs += refs_to_mod;
btrfs_set_extent_refs(leaf, ei, refs);
if (extent_op)
__run_delayed_extent_op(extent_op, leaf, ei);
+ type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
/*
- * If type is invalid, we should have bailed out after
- * lookup_inline_extent_backref().
+ * Function btrfs_get_extent_inline_ref_type() has already printed
+ * error messages.
*/
- type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
- ASSERT(type != BTRFS_REF_TYPE_INVALID);
+ if (unlikely(type == BTRFS_REF_TYPE_INVALID))
+ return -EUCLEAN;
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -1119,10 +1121,43 @@ void update_inline_extent_backref(struct btrfs_path *path,
refs = btrfs_shared_data_ref_count(leaf, sref);
} else {
refs = 1;
- BUG_ON(refs_to_mod != -1);
+ /*
+ * For tree blocks we can only drop one ref for it, and tree
+ * blocks should not have refs > 1.
+ *
+ * Furthermore if we're inserting a new inline backref, we
+ * won't reach this path either. That would be
+ * setup_inline_extent_backref().
+ */
+ if (unlikely(refs_to_mod != -1)) {
+ struct btrfs_key key;
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ btrfs_print_leaf(leaf);
+ btrfs_err(fs_info,
+ "invalid refs_to_mod for tree block %llu, has %d expect -1",
+ key.objectid, refs_to_mod);
+ return -EUCLEAN;
+ }
}
- BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
+ if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
+ struct btrfs_key key;
+ u32 extent_size;
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.type == BTRFS_METADATA_ITEM_KEY)
+ extent_size = fs_info->nodesize;
+ else
+ extent_size = key.offset;
+ btrfs_print_leaf(leaf);
+ btrfs_err(fs_info,
+"invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
+ (unsigned long)iref, key.objectid, extent_size,
+ refs_to_mod, refs);
+ return -EUCLEAN;
+ }
refs += refs_to_mod;
if (refs > 0) {
@@ -1142,6 +1177,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
btrfs_truncate_item(path, item_size, 1);
}
btrfs_mark_buffer_dirty(leaf);
+ return 0;
}
static noinline_for_stack
@@ -1170,7 +1206,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
bytenr, num_bytes, root_objectid, path->slots[0]);
return -EUCLEAN;
}
- update_inline_extent_backref(path, iref, refs_to_add, extent_op);
+ ret = update_inline_extent_backref(path, iref, refs_to_add, extent_op);
} else if (ret == -ENOENT) {
setup_inline_extent_backref(trans->fs_info, path, iref, parent,
root_objectid, owner, offset,
@@ -1190,7 +1226,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
BUG_ON(!is_data && refs_to_drop != 1);
if (iref)
- update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
+ ret = update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
else if (is_data)
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
else
@@ -1629,8 +1665,10 @@ again:
item_size = btrfs_item_size(leaf, path->slots[0]);
if (unlikely(item_size < sizeof(*ei))) {
- err = -EINVAL;
- btrfs_print_v0_err(fs_info);
+ err = -EUCLEAN;
+ btrfs_err(fs_info,
+ "unexpected extent item size, has %u expect >= %zu",
+ item_size, sizeof(*ei));
btrfs_abort_transaction(trans, err);
goto out;
}
@@ -2751,9 +2789,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
struct extent_state *cached_state = NULL;
mutex_lock(&fs_info->unused_bg_unpin_mutex);
- ret = find_first_extent_bit(unpin, 0, &start, &end,
- EXTENT_DIRTY, &cached_state);
- if (ret) {
+ if (!find_first_extent_bit(unpin, 0, &start, &end,
+ EXTENT_DIRTY, &cached_state)) {
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
break;
}
@@ -3059,8 +3096,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
leaf = path->nodes[0];
item_size = btrfs_item_size(leaf, extent_slot);
if (unlikely(item_size < sizeof(*ei))) {
- ret = -EINVAL;
- btrfs_print_v0_err(info);
+ ret = -EUCLEAN;
+ btrfs_err(trans->fs_info,
+ "unexpected extent item size, has %u expect >= %zu",
+ item_size, sizeof(*ei));
btrfs_abort_transaction(trans, ret);
goto out;
}
@@ -3351,11 +3390,38 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
}
enum btrfs_loop_type {
+ /*
+ * Start caching block groups but do not wait for progress or for them
+ * to be done.
+ */
LOOP_CACHING_NOWAIT,
+
+ /*
+ * Wait for the block group free_space >= the space we're waiting for if
+ * the block group isn't cached.
+ */
LOOP_CACHING_WAIT,
+
+ /*
+ * Allow allocations to happen from block groups that do not yet have a
+ * size classification.
+ */
LOOP_UNSET_SIZE_CLASS,
+
+ /*
+ * Allocate a chunk and then retry the allocation.
+ */
LOOP_ALLOC_CHUNK,
+
+ /*
+ * Ignore the size class restrictions for this allocation.
+ */
LOOP_WRONG_SIZE_CLASS,
+
+ /*
+ * Ignore the empty size, only try to allocate the number of bytes
+ * needed for this allocation.
+ */
LOOP_NO_EMPTY_SIZE,
};
@@ -3427,7 +3493,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
* Helper function for find_free_extent().
*
* Return -ENOENT to inform caller that we need fallback to unclustered mode.
- * Return -EAGAIN to inform caller that we need to re-search this block group
* Return >0 to inform caller that we find nothing
* Return 0 means we have found a location and set ffe_ctl->found_offset.
*/
@@ -3508,14 +3573,6 @@ refill_cluster:
trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
return 0;
}
- } else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
- !ffe_ctl->retry_clustered) {
- spin_unlock(&last_ptr->refill_lock);
-
- ffe_ctl->retry_clustered = true;
- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
- ffe_ctl->empty_cluster + ffe_ctl->empty_size);
- return -EAGAIN;
}
/*
* At this point we either didn't find a cluster or we weren't able to
@@ -3530,7 +3587,6 @@ refill_cluster:
/*
* Return >0 to inform caller that we find nothing
* Return 0 when we found an free extent and set ffe_ctrl->found_offset
- * Return -EAGAIN to inform caller that we need to re-search this block group
*/
static int find_free_extent_unclustered(struct btrfs_block_group *bg,
struct find_free_extent_ctl *ffe_ctl)
@@ -3568,25 +3624,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
ffe_ctl->num_bytes, ffe_ctl->empty_size,
&ffe_ctl->max_extent_size);
-
- /*
- * If we didn't find a chunk, and we haven't failed on this block group
- * before, and this block group is in the middle of caching and we are
- * ok with waiting, then go ahead and wait for progress to be made, and
- * set @retry_unclustered to true.
- *
- * If @retry_unclustered is true then we've already waited on this
- * block group once and should move on to the next block group.
- */
- if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
- ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
- btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
- ffe_ctl->empty_size);
- ffe_ctl->retry_unclustered = true;
- return -EAGAIN;
- } else if (!offset) {
+ if (!offset)
return 1;
- }
ffe_ctl->found_offset = offset;
return 0;
}
@@ -3600,7 +3639,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
/* We want to try and use the cluster allocator, so lets look there */
if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
- if (ret >= 0 || ret == -EAGAIN)
+ if (ret >= 0)
return ret;
/* ret == -ENOENT case falls through */
}
@@ -3685,7 +3724,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
}
spin_unlock(&block_group->lock);
- if (!ret && !btrfs_zone_activate(block_group)) {
+ /* Metadata block group is activated at write time. */
+ if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
+ !btrfs_zone_activate(block_group)) {
ret = 1;
/*
* May need to clear fs_info->{treelog,data_reloc}_bg.
@@ -3709,7 +3750,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
fs_info->data_reloc_bg == 0);
if (block_group->ro ||
- test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+ (!ffe_ctl->for_data_reloc &&
+ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
ret = 1;
goto out;
}
@@ -3752,8 +3794,26 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
fs_info->treelog_bg = block_group->start;
- if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
- fs_info->data_reloc_bg = block_group->start;
+ if (ffe_ctl->for_data_reloc) {
+ if (!fs_info->data_reloc_bg)
+ fs_info->data_reloc_bg = block_group->start;
+ /*
+ * Do not allow allocations from this block group, unless it is
+ * for data relocation. Compared to increasing the ->ro, setting
+ * the ->zoned_data_reloc_ongoing flag still allows nocow
+ * writers to come in. See btrfs_inc_nocow_writers().
+ *
+ * We need to disable an allocation to avoid an allocation of
+ * regular (non-relocation data) extent. With mix of relocation
+ * extents and regular extents, we can dispatch WRITE commands
+ * (for relocation extents) and ZONE APPEND commands (for
+ * regular extents) at the same time to the same zone, which
+ * easily break the write pointer.
+ *
+ * Also, this flag avoids this block group to be zone finished.
+ */
+ set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+ }
ffe_ctl->found_offset = start + block_group->alloc_offset;
block_group->alloc_offset += num_bytes;
@@ -3771,24 +3831,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
out:
if (ret && ffe_ctl->for_treelog)
fs_info->treelog_bg = 0;
- if (ret && ffe_ctl->for_data_reloc &&
- fs_info->data_reloc_bg == block_group->start) {
- /*
- * Do not allow further allocations from this block group.
- * Compared to increasing the ->ro, setting the
- * ->zoned_data_reloc_ongoing flag still allows nocow
- * writers to come in. See btrfs_inc_nocow_writers().
- *
- * We need to disable an allocation to avoid an allocation of
- * regular (non-relocation data) extent. With mix of relocation
- * extents and regular extents, we can dispatch WRITE commands
- * (for relocation extents) and ZONE APPEND commands (for
- * regular extents) at the same time to the same zone, which
- * easily break the write pointer.
- */
- set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
+ if (ret && ffe_ctl->for_data_reloc)
fs_info->data_reloc_bg = 0;
- }
spin_unlock(&fs_info->relocation_bg_lock);
spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock);
@@ -3816,8 +3860,7 @@ static void release_block_group(struct btrfs_block_group *block_group,
{
switch (ffe_ctl->policy) {
case BTRFS_EXTENT_ALLOC_CLUSTERED:
- ffe_ctl->retry_clustered = false;
- ffe_ctl->retry_unclustered = false;
+ ffe_ctl->retry_uncached = false;
break;
case BTRFS_EXTENT_ALLOC_ZONED:
/* Nothing to do */
@@ -3861,6 +3904,10 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
struct find_free_extent_ctl *ffe_ctl)
{
+ /* Block group's activeness is not a requirement for METADATA block groups. */
+ if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
+ return 0;
+
/* If we can activate new zone, just allocate a chunk and use it */
if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
return 0;
@@ -3949,15 +3996,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
return 1;
- /*
- * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
- * caching kthreads as we move along
- * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
- * LOOP_UNSET_SIZE_CLASS, allow unset size class
- * LOOP_ALLOC_CHUNK, force a chunk allocation and try again
- * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
- * again
- */
+ /* See the comments for btrfs_loop_type for an explanation of the phases. */
if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
ffe_ctl->index = 0;
/*
@@ -4168,9 +4207,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
ffe_ctl->orig_have_caching_bg = false;
ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
ffe_ctl->loop = 0;
- /* For clustered allocation */
- ffe_ctl->retry_clustered = false;
- ffe_ctl->retry_unclustered = false;
+ ffe_ctl->retry_uncached = false;
ffe_ctl->cached = 0;
ffe_ctl->max_extent_size = 0;
ffe_ctl->total_free_space = 0;
@@ -4310,24 +4347,23 @@ have_block_group:
ret = 0;
}
- if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
+ if (unlikely(block_group->cached == BTRFS_CACHE_ERROR)) {
+ if (!cache_block_group_error)
+ cache_block_group_error = -EIO;
goto loop;
+ }
if (!find_free_extent_check_size_class(ffe_ctl, block_group))
goto loop;
bg_ret = NULL;
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
- if (ret == 0) {
- if (bg_ret && bg_ret != block_group) {
- btrfs_release_block_group(block_group,
- ffe_ctl->delalloc);
- block_group = bg_ret;
- }
- } else if (ret == -EAGAIN) {
- goto have_block_group;
- } else if (ret > 0) {
+ if (ret > 0)
goto loop;
+
+ if (bg_ret && bg_ret != block_group) {
+ btrfs_release_block_group(block_group, ffe_ctl->delalloc);
+ block_group = bg_ret;
}
/* Checks */
@@ -4368,6 +4404,15 @@ have_block_group:
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
break;
loop:
+ if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
+ !ffe_ctl->retry_uncached) {
+ ffe_ctl->retry_uncached = true;
+ btrfs_wait_block_group_cache_progress(block_group,
+ ffe_ctl->num_bytes +
+ ffe_ctl->empty_cluster +
+ ffe_ctl->empty_size);
+ goto have_block_group;
+ }
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
cond_resched();
}
diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h
index 429d5c570061..88c249c37516 100644
--- a/fs/btrfs/extent-tree.h
+++ b/fs/btrfs/extent-tree.h
@@ -48,16 +48,11 @@ struct find_free_extent_ctl {
int loop;
/*
- * Whether we're refilling a cluster, if true we need to re-search
- * current block group but don't try to refill the cluster again.
+ * Set to true if we're retrying the allocation on this block group
+ * after waiting for caching progress, this is so that we retry only
+ * once before moving on to another block group.
*/
- bool retry_clustered;
-
- /*
- * Whether we're updating free space cache, if true we need to re-search
- * current block group but don't try updating free space cache again.
- */
- bool retry_unclustered;
+ bool retry_uncached;
/* If current block group is cached */
int cached;
@@ -96,9 +91,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
enum btrfs_inline_ref_type is_data);
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
-int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
- u64 start, u64 num_bytes);
-void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count);
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a91d5ad27984..ac3fca5a5e41 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -181,34 +181,9 @@ void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
}
}
-void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
-{
- struct address_space *mapping = inode->i_mapping;
- unsigned long index = start >> PAGE_SHIFT;
- unsigned long end_index = end >> PAGE_SHIFT;
- struct folio *folio;
-
- while (index <= end_index) {
- folio = filemap_get_folio(mapping, index);
- filemap_dirty_folio(mapping, folio);
- folio_account_redirty(folio);
- index += folio_nr_pages(folio);
- folio_put(folio);
- }
-}
-
-/*
- * Process one page for __process_pages_contig().
- *
- * Return >0 if we hit @page == @locked_page.
- * Return 0 if we updated the page status.
- * Return -EGAIN if the we need to try again.
- * (For PAGE_LOCK case but got dirty page or page not belong to mapping)
- */
-static int process_one_page(struct btrfs_fs_info *fs_info,
- struct address_space *mapping,
- struct page *page, struct page *locked_page,
- unsigned long page_ops, u64 start, u64 end)
+static void process_one_page(struct btrfs_fs_info *fs_info,
+ struct page *page, struct page *locked_page,
+ unsigned long page_ops, u64 start, u64 end)
{
u32 len;
@@ -224,94 +199,36 @@ static int process_one_page(struct btrfs_fs_info *fs_info,
if (page_ops & PAGE_END_WRITEBACK)
btrfs_page_clamp_clear_writeback(fs_info, page, start, len);
- if (page == locked_page)
- return 1;
-
- if (page_ops & PAGE_LOCK) {
- int ret;
-
- ret = btrfs_page_start_writer_lock(fs_info, page, start, len);
- if (ret)
- return ret;
- if (!PageDirty(page) || page->mapping != mapping) {
- btrfs_page_end_writer_lock(fs_info, page, start, len);
- return -EAGAIN;
- }
- }
- if (page_ops & PAGE_UNLOCK)
+ if (page != locked_page && (page_ops & PAGE_UNLOCK))
btrfs_page_end_writer_lock(fs_info, page, start, len);
- return 0;
}
-static int __process_pages_contig(struct address_space *mapping,
- struct page *locked_page,
- u64 start, u64 end, unsigned long page_ops,
- u64 *processed_end)
+static void __process_pages_contig(struct address_space *mapping,
+ struct page *locked_page, u64 start, u64 end,
+ unsigned long page_ops)
{
struct btrfs_fs_info *fs_info = btrfs_sb(mapping->host->i_sb);
pgoff_t start_index = start >> PAGE_SHIFT;
pgoff_t end_index = end >> PAGE_SHIFT;
pgoff_t index = start_index;
- unsigned long pages_processed = 0;
struct folio_batch fbatch;
- int err = 0;
int i;
- if (page_ops & PAGE_LOCK) {
- ASSERT(page_ops == PAGE_LOCK);
- ASSERT(processed_end && *processed_end == start);
- }
-
folio_batch_init(&fbatch);
while (index <= end_index) {
int found_folios;
found_folios = filemap_get_folios_contig(mapping, &index,
end_index, &fbatch);
-
- if (found_folios == 0) {
- /*
- * Only if we're going to lock these pages, we can find
- * nothing at @index.
- */
- ASSERT(page_ops & PAGE_LOCK);
- err = -EAGAIN;
- goto out;
- }
-
for (i = 0; i < found_folios; i++) {
- int process_ret;
struct folio *folio = fbatch.folios[i];
- process_ret = process_one_page(fs_info, mapping,
- &folio->page, locked_page, page_ops,
- start, end);
- if (process_ret < 0) {
- err = -EAGAIN;
- folio_batch_release(&fbatch);
- goto out;
- }
- pages_processed += folio_nr_pages(folio);
+
+ process_one_page(fs_info, &folio->page, locked_page,
+ page_ops, start, end);
}
folio_batch_release(&fbatch);
cond_resched();
}
-out:
- if (err && processed_end) {
- /*
- * Update @processed_end. I know this is awful since it has
- * two different return value patterns (inclusive vs exclusive).
- *
- * But the exclusive pattern is necessary if @start is 0, or we
- * underflow and check against processed_end won't work as
- * expected.
- */
- if (pages_processed)
- *processed_end = min(end,
- ((u64)(start_index + pages_processed) << PAGE_SHIFT) - 1);
- else
- *processed_end = start;
- }
- return err;
}
static noinline void __unlock_for_delalloc(struct inode *inode,
@@ -326,29 +243,63 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
return;
__process_pages_contig(inode->i_mapping, locked_page, start, end,
- PAGE_UNLOCK, NULL);
+ PAGE_UNLOCK);
}
static noinline int lock_delalloc_pages(struct inode *inode,
struct page *locked_page,
- u64 delalloc_start,
- u64 delalloc_end)
+ u64 start,
+ u64 end)
{
- unsigned long index = delalloc_start >> PAGE_SHIFT;
- unsigned long end_index = delalloc_end >> PAGE_SHIFT;
- u64 processed_end = delalloc_start;
- int ret;
+ struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct address_space *mapping = inode->i_mapping;
+ pgoff_t start_index = start >> PAGE_SHIFT;
+ pgoff_t end_index = end >> PAGE_SHIFT;
+ pgoff_t index = start_index;
+ u64 processed_end = start;
+ struct folio_batch fbatch;
- ASSERT(locked_page);
if (index == locked_page->index && index == end_index)
return 0;
- ret = __process_pages_contig(inode->i_mapping, locked_page, delalloc_start,
- delalloc_end, PAGE_LOCK, &processed_end);
- if (ret == -EAGAIN && processed_end > delalloc_start)
- __unlock_for_delalloc(inode, locked_page, delalloc_start,
- processed_end);
- return ret;
+ folio_batch_init(&fbatch);
+ while (index <= end_index) {
+ unsigned int found_folios, i;
+
+ found_folios = filemap_get_folios_contig(mapping, &index,
+ end_index, &fbatch);
+ if (found_folios == 0)
+ goto out;
+
+ for (i = 0; i < found_folios; i++) {
+ struct page *page = &fbatch.folios[i]->page;
+ u32 len = end + 1 - start;
+
+ if (page == locked_page)
+ continue;
+
+ if (btrfs_page_start_writer_lock(fs_info, page, start,
+ len))
+ goto out;
+
+ if (!PageDirty(page) || page->mapping != mapping) {
+ btrfs_page_end_writer_lock(fs_info, page, start,
+ len);
+ goto out;
+ }
+
+ processed_end = page_offset(page) + PAGE_SIZE - 1;
+ }
+ folio_batch_release(&fbatch);
+ cond_resched();
+ }
+
+ return 0;
+out:
+ folio_batch_release(&fbatch);
+ if (processed_end > start)
+ __unlock_for_delalloc(inode, locked_page, start, processed_end);
+ return -EAGAIN;
}
/*
@@ -467,7 +418,7 @@ void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
clear_extent_bit(&inode->io_tree, start, end, clear_bits, NULL);
__process_pages_contig(inode->vfs_inode.i_mapping, locked_page,
- start, end, page_ops, NULL);
+ start, end, page_ops);
}
static bool btrfs_verify_page(struct page *page, u64 start)
@@ -497,31 +448,6 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
btrfs_subpage_end_reader(fs_info, page, start, len);
}
-/* lots and lots of room for performance fixes in the end_bio funcs */
-
-void end_extent_writepage(struct page *page, int err, u64 start, u64 end)
-{
- struct btrfs_inode *inode;
- const bool uptodate = (err == 0);
- int ret = 0;
-
- ASSERT(page && page->mapping);
- inode = BTRFS_I(page->mapping->host);
- btrfs_writepage_endio_finish_ordered(inode, page, start, end, uptodate);
-
- if (!uptodate) {
- const struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 len;
-
- ASSERT(end + 1 - start <= U32_MAX);
- len = end + 1 - start;
-
- btrfs_page_clear_uptodate(fs_info, page, start, len);
- ret = err < 0 ? err : -EIO;
- mapping_set_error(page->mapping, ret);
- }
-}
-
/*
* after a writepage IO is done, we need to:
* clear the uptodate bits on error
@@ -902,7 +828,30 @@ static void submit_extent_page(struct btrfs_bio_ctrl *bio_ctrl,
size -= len;
pg_offset += len;
disk_bytenr += len;
- bio_ctrl->len_to_oe_boundary -= len;
+
+ /*
+ * len_to_oe_boundary defaults to U32_MAX, which isn't page or
+ * sector aligned. alloc_new_bio() then sets it to the end of
+ * our ordered extent for writes into zoned devices.
+ *
+ * When len_to_oe_boundary is tracking an ordered extent, we
+ * trust the ordered extent code to align things properly, and
+ * the check above to cap our write to the ordered extent
+ * boundary is correct.
+ *
+ * When len_to_oe_boundary is U32_MAX, the cap above would
+ * result in a 4095 byte IO for the last page right before
+ * we hit the bio limit of UINT_MAX. bio_add_page() has all
+ * the checks required to make sure we don't overflow the bio,
+ * and we should just ignore len_to_oe_boundary completely
+ * unless we're using it to track an ordered extent.
+ *
+ * It's pretty hard to make a bio sized U32_MAX, but it can
+ * happen when the page cache is able to feed us contiguous
+ * pages for large extents.
+ */
+ if (bio_ctrl->len_to_oe_boundary != U32_MAX)
+ bio_ctrl->len_to_oe_boundary -= len;
/* Ordered extent boundary: move on to a new bio. */
if (bio_ctrl->len_to_oe_boundary == 0)
@@ -1220,38 +1169,45 @@ static inline void contiguous_readpages(struct page *pages[], int nr_pages,
static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
struct page *page, struct writeback_control *wbc)
{
- const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
- u64 delalloc_start = page_offset(page);
+ const u64 page_start = page_offset(page);
+ const u64 page_end = page_start + PAGE_SIZE - 1;
+ u64 delalloc_start = page_start;
+ u64 delalloc_end = page_end;
u64 delalloc_to_write = 0;
- /* How many pages are started by btrfs_run_delalloc_range() */
- unsigned long nr_written = 0;
- int ret;
- int page_started = 0;
+ int ret = 0;
while (delalloc_start < page_end) {
- u64 delalloc_end = page_end;
- bool found;
-
- found = find_lock_delalloc_range(&inode->vfs_inode, page,
- &delalloc_start,
- &delalloc_end);
- if (!found) {
+ delalloc_end = page_end;
+ if (!find_lock_delalloc_range(&inode->vfs_inode, page,
+ &delalloc_start, &delalloc_end)) {
delalloc_start = delalloc_end + 1;
continue;
}
+
ret = btrfs_run_delalloc_range(inode, page, delalloc_start,
- delalloc_end, &page_started, &nr_written, wbc);
- if (ret)
+ delalloc_end, wbc);
+ if (ret < 0)
return ret;
- /*
- * delalloc_end is already one less than the total length, so
- * we don't subtract one from PAGE_SIZE
- */
- delalloc_to_write += (delalloc_end - delalloc_start +
- PAGE_SIZE) >> PAGE_SHIFT;
delalloc_start = delalloc_end + 1;
}
+
+ /*
+ * delalloc_end is already one less than the total length, so
+ * we don't subtract one from PAGE_SIZE
+ */
+ delalloc_to_write +=
+ DIV_ROUND_UP(delalloc_end + 1 - page_start, PAGE_SIZE);
+
+ /*
+ * If btrfs_run_dealloc_range() already started I/O and unlocked
+ * the pages, we just need to account for them here.
+ */
+ if (ret == 1) {
+ wbc->nr_to_write -= delalloc_to_write;
+ return 1;
+ }
+
if (wbc->nr_to_write < delalloc_to_write) {
int thresh = 8192;
@@ -1261,16 +1217,6 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
thresh);
}
- /* Did btrfs_run_dealloc_range() already unlock and start the IO? */
- if (page_started) {
- /*
- * We've unlocked the page, so we can't update the mapping's
- * writeback index, just update nr_to_write.
- */
- wbc->nr_to_write -= nr_written;
- return 1;
- }
-
return 0;
}
@@ -1359,6 +1305,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
bio_ctrl->end_io_func = end_bio_extent_writepage;
while (cur <= end) {
+ u32 len = end - cur + 1;
u64 disk_bytenr;
u64 em_end;
u64 dirty_range_start = cur;
@@ -1366,8 +1313,8 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
u32 iosize;
if (cur >= i_size) {
- btrfs_writepage_endio_finish_ordered(inode, page, cur,
- end, true);
+ btrfs_mark_ordered_io_finished(inode, page, cur, len,
+ true);
/*
* This range is beyond i_size, thus we don't need to
* bother writing back.
@@ -1376,7 +1323,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
* writeback the sectors with subpage dirty bits,
* causing writeback without ordered extent.
*/
- btrfs_page_clear_dirty(fs_info, page, cur, end + 1 - cur);
+ btrfs_page_clear_dirty(fs_info, page, cur, len);
break;
}
@@ -1387,7 +1334,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
continue;
}
- em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
+ em = btrfs_get_extent(inode, NULL, 0, cur, len);
if (IS_ERR(em)) {
ret = PTR_ERR_OR_ZERO(em);
goto out_error;
@@ -1463,7 +1410,6 @@ static int __extent_writepage(struct page *page, struct btrfs_bio_ctrl *bio_ctrl
struct folio *folio = page_folio(page);
struct inode *inode = page->mapping->host;
const u64 page_start = page_offset(page);
- const u64 page_end = page_start + PAGE_SIZE - 1;
int ret;
int nr = 0;
size_t pg_offset;
@@ -1507,8 +1453,13 @@ done:
set_page_writeback(page);
end_page_writeback(page);
}
- if (ret)
- end_extent_writepage(page, ret, page_start, page_end);
+ if (ret) {
+ btrfs_mark_ordered_io_finished(BTRFS_I(inode), page, page_start,
+ PAGE_SIZE, !ret);
+ btrfs_page_clear_uptodate(btrfs_sb(inode->i_sb), page,
+ page_start, PAGE_SIZE);
+ mapping_set_error(page->mapping, ret);
+ }
unlock_page(page);
ASSERT(ret <= 0);
return ret;
@@ -1854,11 +1805,10 @@ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc)
* previous call.
* Return <0 for fatal error.
*/
-static int submit_eb_page(struct page *page, struct writeback_control *wbc,
- struct extent_buffer **eb_context)
+static int submit_eb_page(struct page *page, struct btrfs_eb_write_context *ctx)
{
+ struct writeback_control *wbc = ctx->wbc;
struct address_space *mapping = page->mapping;
- struct btrfs_block_group *cache = NULL;
struct extent_buffer *eb;
int ret;
@@ -1885,7 +1835,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
return 0;
}
- if (eb == *eb_context) {
+ if (eb == ctx->eb) {
spin_unlock(&mapping->private_lock);
return 0;
}
@@ -1894,34 +1844,25 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
if (!ret)
return 0;
- if (!btrfs_check_meta_write_pointer(eb->fs_info, eb, &cache)) {
- /*
- * If for_sync, this hole will be filled with
- * trasnsaction commit.
- */
- if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
- ret = -EAGAIN;
- else
+ ctx->eb = eb;
+
+ ret = btrfs_check_meta_write_pointer(eb->fs_info, ctx);
+ if (ret) {
+ if (ret == -EBUSY)
ret = 0;
free_extent_buffer(eb);
return ret;
}
- *eb_context = eb;
-
if (!lock_extent_buffer_for_io(eb, wbc)) {
- btrfs_revert_meta_write_pointer(cache, eb);
- if (cache)
- btrfs_put_block_group(cache);
free_extent_buffer(eb);
return 0;
}
- if (cache) {
- /*
- * Implies write in zoned mode. Mark the last eb in a block group.
- */
- btrfs_schedule_zone_finish_bg(cache, eb);
- btrfs_put_block_group(cache);
+ /* Implies write in zoned mode. */
+ if (ctx->zoned_bg) {
+ /* Mark the last eb in the block group. */
+ btrfs_schedule_zone_finish_bg(ctx->zoned_bg, eb);
+ ctx->zoned_bg->meta_write_pointer += eb->len;
}
write_one_eb(eb, wbc);
free_extent_buffer(eb);
@@ -1931,7 +1872,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
int btree_write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc)
{
- struct extent_buffer *eb_context = NULL;
+ struct btrfs_eb_write_context ctx = { .wbc = wbc };
struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info;
int ret = 0;
int done = 0;
@@ -1973,7 +1914,7 @@ retry:
for (i = 0; i < nr_folios; i++) {
struct folio *folio = fbatch.folios[i];
- ret = submit_eb_page(&folio->page, wbc, &eb_context);
+ ret = submit_eb_page(&folio->page, &ctx);
if (ret == 0)
continue;
if (ret < 0) {
@@ -2034,6 +1975,9 @@ retry:
ret = 0;
if (!ret && BTRFS_FS_ERROR(fs_info))
ret = -EROFS;
+
+ if (ctx.zoned_bg)
+ btrfs_put_block_group(ctx.zoned_bg);
btrfs_zoned_meta_io_unlock(fs_info);
return ret;
}
@@ -2127,7 +2071,7 @@ retry:
for (i = 0; i < nr_folios; i++) {
struct folio *folio = fbatch.folios[i];
- done_index = folio->index + folio_nr_pages(folio);
+ done_index = folio_next_index(folio);
/*
* At this point we hold neither the i_pages lock nor
* the page lock: the page may be truncated or
@@ -2145,6 +2089,12 @@ retry:
continue;
}
+ if (!folio_test_dirty(folio)) {
+ /* Someone wrote it for us. */
+ folio_unlock(folio);
+ continue;
+ }
+
if (wbc->sync_mode != WB_SYNC_NONE) {
if (folio_test_writeback(folio))
submit_write_bio(bio_ctrl, 0);
@@ -2164,11 +2114,12 @@ retry:
}
/*
- * the filesystem may choose to bump up nr_to_write.
+ * The filesystem may choose to bump up nr_to_write.
* We have to make sure to honor the new nr_to_write
- * at any time
+ * at any time.
*/
- nr_to_write_done = wbc->nr_to_write <= 0;
+ nr_to_write_done = (wbc->sync_mode == WB_SYNC_NONE &&
+ wbc->nr_to_write <= 0);
}
folio_batch_release(&fbatch);
cond_resched();
@@ -2203,11 +2154,11 @@ retry:
* already been ran (aka, ordered extent inserted) and all pages are still
* locked.
*/
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
- struct writeback_control *wbc)
+void extent_write_locked_range(struct inode *inode, struct page *locked_page,
+ u64 start, u64 end, struct writeback_control *wbc,
+ bool pages_dirty)
{
bool found_error = false;
- int first_error = 0;
int ret = 0;
struct address_space *mapping = inode->i_mapping;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
@@ -2226,18 +2177,16 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
while (cur <= end) {
u64 cur_end = min(round_down(cur, PAGE_SIZE) + PAGE_SIZE - 1, end);
+ u32 cur_len = cur_end + 1 - cur;
struct page *page;
int nr = 0;
page = find_get_page(mapping, cur >> PAGE_SHIFT);
- /*
- * All pages in the range are locked since
- * btrfs_run_delalloc_range(), thus there is no way to clear
- * the page dirty flag.
- */
ASSERT(PageLocked(page));
- ASSERT(PageDirty(page));
- clear_page_dirty_for_io(page);
+ if (pages_dirty && page != locked_page) {
+ ASSERT(PageDirty(page));
+ clear_page_dirty_for_io(page);
+ }
ret = __extent_writepage_io(BTRFS_I(inode), page, &bio_ctrl,
i_size, &nr);
@@ -2249,23 +2198,21 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
set_page_writeback(page);
end_page_writeback(page);
}
- if (ret)
- end_extent_writepage(page, ret, cur, cur_end);
- btrfs_page_unlock_writer(fs_info, page, cur, cur_end + 1 - cur);
- if (ret < 0) {
- found_error = true;
- first_error = ret;
+ if (ret) {
+ btrfs_mark_ordered_io_finished(BTRFS_I(inode), page,
+ cur, cur_len, !ret);
+ btrfs_page_clear_uptodate(fs_info, page, cur, cur_len);
+ mapping_set_error(page->mapping, ret);
}
+ btrfs_page_unlock_writer(fs_info, page, cur, cur_len);
+ if (ret < 0)
+ found_error = true;
next_page:
put_page(page);
cur = cur_end + 1;
}
submit_write_bio(&bio_ctrl, found_error ? ret : 0);
-
- if (found_error)
- return first_error;
- return ret;
}
int extent_writepages(struct address_space *mapping,
@@ -3285,8 +3232,8 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src)
return NULL;
}
WARN_ON(PageDirty(p));
- copy_page(page_address(p), page_address(src->pages[i]));
}
+ copy_extent_buffer_full(new, src);
set_extent_buffer_uptodate(new);
return new;
@@ -3529,6 +3476,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
struct extent_buffer *exists = NULL;
struct page *p;
struct address_space *mapping = fs_info->btree_inode->i_mapping;
+ struct btrfs_subpage *prealloc = NULL;
u64 lockdep_owner = owner_root;
int uptodate = 1;
int ret;
@@ -3565,36 +3513,30 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
btrfs_set_buffer_lockdep_class(lockdep_owner, eb, level);
num_pages = num_extent_pages(eb);
- for (i = 0; i < num_pages; i++, index++) {
- struct btrfs_subpage *prealloc = NULL;
+ /*
+ * Preallocate page->private for subpage case, so that we won't
+ * allocate memory with private_lock nor page lock hold.
+ *
+ * The memory will be freed by attach_extent_buffer_page() or freed
+ * manually if we exit earlier.
+ */
+ if (fs_info->nodesize < PAGE_SIZE) {
+ prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
+ if (IS_ERR(prealloc)) {
+ exists = ERR_CAST(prealloc);
+ goto free_eb;
+ }
+ }
+
+ for (i = 0; i < num_pages; i++, index++) {
p = find_or_create_page(mapping, index, GFP_NOFS|__GFP_NOFAIL);
if (!p) {
exists = ERR_PTR(-ENOMEM);
+ btrfs_free_subpage(prealloc);
goto free_eb;
}
- /*
- * Preallocate page->private for subpage case, so that we won't
- * allocate memory with private_lock hold. The memory will be
- * freed by attach_extent_buffer_page() or freed manually if
- * we exit earlier.
- *
- * Although we have ensured one subpage eb can only have one
- * page, but it may change in the future for 16K page size
- * support, so we still preallocate the memory in the loop.
- */
- if (fs_info->nodesize < PAGE_SIZE) {
- prealloc = btrfs_alloc_subpage(fs_info, BTRFS_SUBPAGE_METADATA);
- if (IS_ERR(prealloc)) {
- ret = PTR_ERR(prealloc);
- unlock_page(p);
- put_page(p);
- exists = ERR_PTR(ret);
- goto free_eb;
- }
- }
-
spin_lock(&mapping->private_lock);
exists = grab_extent_buffer(fs_info, p);
if (exists) {
@@ -4180,30 +4122,9 @@ static void assert_eb_page_uptodate(const struct extent_buffer *eb,
}
}
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
- const void *srcv)
-{
- char *kaddr;
-
- assert_eb_page_uptodate(eb, eb->pages[0]);
- kaddr = page_address(eb->pages[0]) +
- get_eb_offset_in_page(eb, offsetof(struct btrfs_header,
- chunk_tree_uuid));
- memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *srcv)
-{
- char *kaddr;
-
- assert_eb_page_uptodate(eb, eb->pages[0]);
- kaddr = page_address(eb->pages[0]) +
- get_eb_offset_in_page(eb, offsetof(struct btrfs_header, fsid));
- memcpy(kaddr, srcv, BTRFS_FSID_SIZE);
-}
-
-void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
- unsigned long start, unsigned long len)
+static void __write_extent_buffer(const struct extent_buffer *eb,
+ const void *srcv, unsigned long start,
+ unsigned long len, bool use_memmove)
{
size_t cur;
size_t offset;
@@ -4211,6 +4132,8 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
char *kaddr;
char *src = (char *)srcv;
unsigned long i = get_eb_page_index(start);
+ /* For unmapped (dummy) ebs, no need to check their uptodate status. */
+ const bool check_uptodate = !test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
WARN_ON(test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags));
@@ -4221,11 +4144,15 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
while (len > 0) {
page = eb->pages[i];
- assert_eb_page_uptodate(eb, page);
+ if (check_uptodate)
+ assert_eb_page_uptodate(eb, page);
cur = min(len, PAGE_SIZE - offset);
kaddr = page_address(page);
- memcpy(kaddr + offset, src, cur);
+ if (use_memmove)
+ memmove(kaddr + offset, src, cur);
+ else
+ memcpy(kaddr + offset, src, cur);
src += cur;
len -= cur;
@@ -4234,55 +4161,54 @@ void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
}
}
-void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
- unsigned long len)
+void write_extent_buffer(const struct extent_buffer *eb, const void *srcv,
+ unsigned long start, unsigned long len)
{
- size_t cur;
- size_t offset;
- struct page *page;
- char *kaddr;
- unsigned long i = get_eb_page_index(start);
+ return __write_extent_buffer(eb, srcv, start, len, false);
+}
- if (check_eb_range(eb, start, len))
- return;
+static void memset_extent_buffer(const struct extent_buffer *eb, int c,
+ unsigned long start, unsigned long len)
+{
+ unsigned long cur = start;
- offset = get_eb_offset_in_page(eb, start);
+ while (cur < start + len) {
+ unsigned long index = get_eb_page_index(cur);
+ unsigned int offset = get_eb_offset_in_page(eb, cur);
+ unsigned int cur_len = min(start + len - cur, PAGE_SIZE - offset);
+ struct page *page = eb->pages[index];
- while (len > 0) {
- page = eb->pages[i];
assert_eb_page_uptodate(eb, page);
+ memset(page_address(page) + offset, c, cur_len);
- cur = min(len, PAGE_SIZE - offset);
- kaddr = page_address(page);
- memset(kaddr + offset, 0, cur);
-
- len -= cur;
- offset = 0;
- i++;
+ cur += cur_len;
}
}
+void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start,
+ unsigned long len)
+{
+ if (check_eb_range(eb, start, len))
+ return;
+ return memset_extent_buffer(eb, 0, start, len);
+}
+
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src)
{
- int i;
- int num_pages;
+ unsigned long cur = 0;
ASSERT(dst->len == src->len);
- if (dst->fs_info->nodesize >= PAGE_SIZE) {
- num_pages = num_extent_pages(dst);
- for (i = 0; i < num_pages; i++)
- copy_page(page_address(dst->pages[i]),
- page_address(src->pages[i]));
- } else {
- size_t src_offset = get_eb_offset_in_page(src, 0);
- size_t dst_offset = get_eb_offset_in_page(dst, 0);
+ while (cur < src->len) {
+ unsigned long index = get_eb_page_index(cur);
+ unsigned long offset = get_eb_offset_in_page(src, cur);
+ unsigned long cur_len = min(src->len, PAGE_SIZE - offset);
+ void *addr = page_address(src->pages[index]) + offset;
+
+ write_extent_buffer(dst, addr, cur, cur_len);
- ASSERT(src->fs_info->nodesize < PAGE_SIZE);
- memcpy(page_address(dst->pages[0]) + dst_offset,
- page_address(src->pages[0]) + src_offset,
- src->len);
+ cur += cur_len;
}
}
@@ -4376,6 +4302,15 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1)));
}
+static u8 *extent_buffer_get_byte(const struct extent_buffer *eb, unsigned long bytenr)
+{
+ unsigned long index = get_eb_page_index(bytenr);
+
+ if (check_eb_range(eb, bytenr, 1))
+ return NULL;
+ return page_address(eb->pages[index]) + get_eb_offset_in_page(eb, bytenr);
+}
+
/*
* Set an area of a bitmap to 1.
*
@@ -4387,35 +4322,28 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start,
void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start,
unsigned long pos, unsigned long len)
{
+ unsigned int first_byte = start + BIT_BYTE(pos);
+ unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+ const bool same_byte = (first_byte == last_byte);
+ u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
u8 *kaddr;
- struct page *page;
- unsigned long i;
- size_t offset;
- const unsigned int size = pos + len;
- int bits_to_set = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
- u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(pos);
- eb_bitmap_offset(eb, start, pos, &i, &offset);
- page = eb->pages[i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
+ if (same_byte)
+ mask &= BITMAP_LAST_BYTE_MASK(pos + len);
- while (len >= bits_to_set) {
- kaddr[offset] |= mask_to_set;
- len -= bits_to_set;
- bits_to_set = BITS_PER_BYTE;
- mask_to_set = ~0;
- if (++offset >= PAGE_SIZE && len > 0) {
- offset = 0;
- page = eb->pages[++i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
- }
- }
- if (len) {
- mask_to_set &= BITMAP_LAST_BYTE_MASK(size);
- kaddr[offset] |= mask_to_set;
- }
+ /* Handle the first byte. */
+ kaddr = extent_buffer_get_byte(eb, first_byte);
+ *kaddr |= mask;
+ if (same_byte)
+ return;
+
+ /* Handle the byte aligned part. */
+ ASSERT(first_byte + 1 <= last_byte);
+ memset_extent_buffer(eb, 0xff, first_byte + 1, last_byte - first_byte - 1);
+
+ /* Handle the last byte. */
+ kaddr = extent_buffer_get_byte(eb, last_byte);
+ *kaddr |= BITMAP_LAST_BYTE_MASK(pos + len);
}
@@ -4431,35 +4359,28 @@ void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len)
{
+ unsigned int first_byte = start + BIT_BYTE(pos);
+ unsigned int last_byte = start + BIT_BYTE(pos + len - 1);
+ const bool same_byte = (first_byte == last_byte);
+ u8 mask = BITMAP_FIRST_BYTE_MASK(pos);
u8 *kaddr;
- struct page *page;
- unsigned long i;
- size_t offset;
- const unsigned int size = pos + len;
- int bits_to_clear = BITS_PER_BYTE - (pos % BITS_PER_BYTE);
- u8 mask_to_clear = BITMAP_FIRST_BYTE_MASK(pos);
- eb_bitmap_offset(eb, start, pos, &i, &offset);
- page = eb->pages[i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
+ if (same_byte)
+ mask &= BITMAP_LAST_BYTE_MASK(pos + len);
- while (len >= bits_to_clear) {
- kaddr[offset] &= ~mask_to_clear;
- len -= bits_to_clear;
- bits_to_clear = BITS_PER_BYTE;
- mask_to_clear = ~0;
- if (++offset >= PAGE_SIZE && len > 0) {
- offset = 0;
- page = eb->pages[++i];
- assert_eb_page_uptodate(eb, page);
- kaddr = page_address(page);
- }
- }
- if (len) {
- mask_to_clear &= BITMAP_LAST_BYTE_MASK(size);
- kaddr[offset] &= ~mask_to_clear;
- }
+ /* Handle the first byte. */
+ kaddr = extent_buffer_get_byte(eb, first_byte);
+ *kaddr &= ~mask;
+ if (same_byte)
+ return;
+
+ /* Handle the byte aligned part. */
+ ASSERT(first_byte + 1 <= last_byte);
+ memset_extent_buffer(eb, 0, first_byte + 1, last_byte - first_byte - 1);
+
+ /* Handle the last byte. */
+ kaddr = extent_buffer_get_byte(eb, last_byte);
+ *kaddr &= ~BITMAP_LAST_BYTE_MASK(pos + len);
}
static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len)
@@ -4468,60 +4389,29 @@ static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned
return distance < len;
}
-static void copy_pages(struct page *dst_page, struct page *src_page,
- unsigned long dst_off, unsigned long src_off,
- unsigned long len)
-{
- char *dst_kaddr = page_address(dst_page);
- char *src_kaddr;
- int must_memmove = 0;
-
- if (dst_page != src_page) {
- src_kaddr = page_address(src_page);
- } else {
- src_kaddr = dst_kaddr;
- if (areas_overlap(src_off, dst_off, len))
- must_memmove = 1;
- }
-
- if (must_memmove)
- memmove(dst_kaddr + dst_off, src_kaddr + src_off, len);
- else
- memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len);
-}
-
void memcpy_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
- unsigned long dst_i;
- unsigned long src_i;
+ unsigned long cur_off = 0;
if (check_eb_range(dst, dst_offset, len) ||
check_eb_range(dst, src_offset, len))
return;
- while (len > 0) {
- dst_off_in_page = get_eb_offset_in_page(dst, dst_offset);
- src_off_in_page = get_eb_offset_in_page(dst, src_offset);
-
- dst_i = get_eb_page_index(dst_offset);
- src_i = get_eb_page_index(src_offset);
-
- cur = min(len, (unsigned long)(PAGE_SIZE -
- src_off_in_page));
- cur = min_t(unsigned long, cur,
- (unsigned long)(PAGE_SIZE - dst_off_in_page));
-
- copy_pages(dst->pages[dst_i], dst->pages[src_i],
- dst_off_in_page, src_off_in_page, cur);
-
- src_offset += cur;
- dst_offset += cur;
- len -= cur;
+ while (cur_off < len) {
+ unsigned long cur_src = cur_off + src_offset;
+ unsigned long pg_index = get_eb_page_index(cur_src);
+ unsigned long pg_off = get_eb_offset_in_page(dst, cur_src);
+ unsigned long cur_len = min(src_offset + len - cur_src,
+ PAGE_SIZE - pg_off);
+ void *src_addr = page_address(dst->pages[pg_index]) + pg_off;
+ const bool use_memmove = areas_overlap(src_offset + cur_off,
+ dst_offset + cur_off, cur_len);
+
+ __write_extent_buffer(dst, src_addr, dst_offset + cur_off, cur_len,
+ use_memmove);
+ cur_off += cur_len;
}
}
@@ -4529,23 +4419,26 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
unsigned long dst_offset, unsigned long src_offset,
unsigned long len)
{
- size_t cur;
- size_t dst_off_in_page;
- size_t src_off_in_page;
unsigned long dst_end = dst_offset + len - 1;
unsigned long src_end = src_offset + len - 1;
- unsigned long dst_i;
- unsigned long src_i;
if (check_eb_range(dst, dst_offset, len) ||
check_eb_range(dst, src_offset, len))
return;
+
if (dst_offset < src_offset) {
memcpy_extent_buffer(dst, dst_offset, src_offset, len);
return;
}
+
while (len > 0) {
- dst_i = get_eb_page_index(dst_end);
+ unsigned long src_i;
+ size_t cur;
+ size_t dst_off_in_page;
+ size_t src_off_in_page;
+ void *src_addr;
+ bool use_memmove;
+
src_i = get_eb_page_index(src_end);
dst_off_in_page = get_eb_offset_in_page(dst, dst_end);
@@ -4553,9 +4446,14 @@ void memmove_extent_buffer(const struct extent_buffer *dst,
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);
- copy_pages(dst->pages[dst_i], dst->pages[src_i],
- dst_off_in_page - cur + 1,
- src_off_in_page - cur + 1, cur);
+
+ src_addr = page_address(dst->pages[src_i]) + src_off_in_page -
+ cur + 1;
+ use_memmove = areas_overlap(src_end - cur + 1, dst_end - cur + 1,
+ cur);
+
+ __write_extent_buffer(dst, src_addr, dst_end - cur + 1, cur,
+ use_memmove);
dst_end -= cur;
src_end -= cur;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index c5fae3a7d911..68368ba99321 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -40,7 +40,6 @@ enum {
ENUM_BIT(PAGE_START_WRITEBACK),
ENUM_BIT(PAGE_END_WRITEBACK),
ENUM_BIT(PAGE_SET_ORDERED),
- ENUM_BIT(PAGE_LOCK),
};
/*
@@ -94,6 +93,13 @@ struct extent_buffer {
#endif
};
+struct btrfs_eb_write_context {
+ struct writeback_control *wbc;
+ struct extent_buffer *eb;
+ /* Block group @eb resides in. Only used for zoned mode. */
+ struct btrfs_block_group *zoned_bg;
+};
+
/*
* Get the correct offset inside the page of extent buffer.
*
@@ -178,8 +184,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int btrfs_read_folio(struct file *file, struct folio *folio);
-int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
- struct writeback_control *wbc);
+void extent_write_locked_range(struct inode *inode, struct page *locked_page,
+ u64 start, u64 end, struct writeback_control *wbc,
+ bool pages_dirty);
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
@@ -236,11 +243,24 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
void __user *dst, unsigned long start,
unsigned long len);
-void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
-void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
- const void *src);
void write_extent_buffer(const struct extent_buffer *eb, const void *src,
unsigned long start, unsigned long len);
+
+static inline void write_extent_buffer_chunk_tree_uuid(
+ const struct extent_buffer *eb, const void *chunk_tree_uuid)
+{
+ write_extent_buffer(eb, chunk_tree_uuid,
+ offsetof(struct btrfs_header, chunk_tree_uuid),
+ BTRFS_FSID_SIZE);
+}
+
+static inline void write_extent_buffer_fsid(const struct extent_buffer *eb,
+ const void *fsid)
+{
+ write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
+ BTRFS_FSID_SIZE);
+}
+
void copy_extent_buffer_full(const struct extent_buffer *dst,
const struct extent_buffer *src);
void copy_extent_buffer(const struct extent_buffer *dst,
@@ -266,7 +286,6 @@ void set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
-void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
struct page *locked_page,
u32 bits_to_clear, unsigned long page_ops);
@@ -277,8 +296,6 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
-void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
-
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
bool find_lock_delalloc_range(struct inode *inode,
struct page *locked_page, u64 *start,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 0cdb3e86f29b..a6d8368ed0ed 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -760,8 +760,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
start = em_end;
- if (end != (u64)-1)
- len = start + len - em_end;
goto next;
}
@@ -829,8 +827,8 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
if (!split)
goto remove_em;
}
- split->start = start + len;
- split->len = em_end - (start + len);
+ split->start = end;
+ split->len = em_end - end;
split->block_start = em->block_start;
split->flags = flags;
split->compress_type = em->compress_type;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 696bf695d8eb..1ce5dd154499 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -597,29 +597,37 @@ fail:
* Each bit represents a sector. Thus caller should ensure @csum_buf passed
* in is large enough to contain all csums.
*/
-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
- u8 *csum_buf, unsigned long *csum_bitmap,
- bool search_commit)
+int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
+ u64 start, u64 end, u8 *csum_buf,
+ unsigned long *csum_bitmap)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
- struct btrfs_path *path;
struct extent_buffer *leaf;
struct btrfs_csum_item *item;
const u64 orig_start = start;
+ bool free_path = false;
int ret;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(end + 1, fs_info->sectorsize));
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
+ if (!path) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ free_path = true;
+ }
- if (search_commit) {
- path->skip_locking = 1;
- path->reada = READA_FORWARD;
- path->search_commit_root = 1;
+ /* Check if we can reuse the previous path. */
+ if (path->nodes[0]) {
+ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
+
+ if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
+ key.type == BTRFS_EXTENT_CSUM_KEY &&
+ key.offset <= start)
+ goto search_forward;
+ btrfs_release_path(path);
}
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
@@ -656,6 +664,7 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
}
}
+search_forward:
while (start <= end) {
u64 csum_end;
@@ -712,7 +721,8 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
}
ret = 0;
fail:
- btrfs_free_path(path);
+ if (free_path)
+ btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h
index 4ec669b69008..04bd2d34efb1 100644
--- a/fs/btrfs/file-item.h
+++ b/fs/btrfs/file-item.h
@@ -57,9 +57,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
bool nowait);
-int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
- u8 *csum_buf, unsigned long *csum_bitmap,
- bool search_commit);
+int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
+ u64 start, u64 end, u8 *csum_buf,
+ unsigned long *csum_bitmap);
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
const struct btrfs_path *path,
struct btrfs_file_extent_item *fi,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fd03e689a6be..ca46a529d56b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -876,9 +876,9 @@ static int prepare_uptodate_page(struct inode *inode,
return 0;
}
-static unsigned int get_prepare_fgp_flags(bool nowait)
+static fgf_t get_prepare_fgp_flags(bool nowait)
{
- unsigned int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
+ fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
if (nowait)
fgp_flags |= FGP_NOWAIT;
@@ -910,7 +910,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
int i;
unsigned long index = pos >> PAGE_SHIFT;
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
- unsigned int fgp_flags = get_prepare_fgp_flags(nowait);
+ fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
int err = 0;
int faili;
@@ -1106,24 +1106,6 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
btrfs_drew_write_unlock(&inode->root->snapshot_lock);
}
-static void update_time_for_write(struct inode *inode)
-{
- struct timespec64 now;
-
- if (IS_NOCMTIME(inode))
- return;
-
- now = current_time(inode);
- if (!timespec64_equal(&inode->i_mtime, &now))
- inode->i_mtime = now;
-
- if (!timespec64_equal(&inode->i_ctime, &now))
- inode->i_ctime = now;
-
- if (IS_I_VERSION(inode))
- inode_inc_iversion(inode);
-}
-
static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
size_t count)
{
@@ -1155,7 +1137,10 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
* need to start yet another transaction to update the inode as we will
* update the inode when we finish writing whatever data we write.
*/
- update_time_for_write(inode);
+ if (!IS_NOCMTIME(inode)) {
+ inode->i_mtime = inode_set_ctime_current(inode);
+ inode_inc_iversion(inode);
+ }
start_pos = round_down(pos, fs_info->sectorsize);
oldsize = i_size_read(inode);
@@ -2459,10 +2444,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
*/
inode_inc_iversion(&inode->vfs_inode);
- if (!extent_info || extent_info->update_times) {
- inode->vfs_inode.i_mtime = current_time(&inode->vfs_inode);
- inode->vfs_inode.i_ctime = inode->vfs_inode.i_mtime;
- }
+ if (!extent_info || extent_info->update_times)
+ inode->vfs_inode.i_mtime = inode_set_ctime_current(&inode->vfs_inode);
ret = btrfs_update_inode(trans, root, inode);
if (ret)
@@ -2703,8 +2686,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
ASSERT(trans != NULL);
inode_inc_iversion(inode);
- inode->i_mtime = current_time(inode);
- inode->i_ctime = inode->i_mtime;
+ inode->i_mtime = inode_set_ctime_current(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
updated_inode = true;
btrfs_end_transaction(trans);
@@ -2721,11 +2703,10 @@ out_only_mutex:
* for detecting, at fsync time, if the inode isn't yet in the
* log tree or it's there but not up to date.
*/
- struct timespec64 now = current_time(inode);
+ struct timespec64 now = inode_set_ctime_current(inode);
inode_inc_iversion(inode);
inode->i_mtime = now;
- inode->i_ctime = now;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -2796,7 +2777,7 @@ static int btrfs_fallocate_update_isize(struct inode *inode,
if (IS_ERR(trans))
return PTR_ERR(trans);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
i_size_write(inode, end);
btrfs_inode_safe_disk_i_size_write(BTRFS_I(inode), 0);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
@@ -3018,7 +2999,7 @@ static long btrfs_fallocate(struct file *file, int mode,
struct extent_changeset *data_reserved = NULL;
struct falloc_range *range;
struct falloc_range *tmp;
- struct list_head reserve_list;
+ LIST_HEAD(reserve_list);
u64 cur_offset;
u64 last_byte;
u64 alloc_start;
@@ -3110,7 +3091,6 @@ static long btrfs_fallocate(struct file *file, int mode,
btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end);
/* First, check if we exceed the qgroup limit */
- INIT_LIST_HEAD(&reserve_list);
while (cur_offset < alloc_end) {
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
alloc_end - cur_offset);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 880800418075..27fad70451aa 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1219,10 +1219,9 @@ static noinline_for_stack int write_pinned_extent_entries(
start = block_group->start;
while (start < block_group->start + block_group->length) {
- ret = find_first_extent_bit(unpin, start,
- &extent_start, &extent_end,
- EXTENT_DIRTY, NULL);
- if (ret)
+ if (!find_first_extent_bit(unpin, start,
+ &extent_start, &extent_end,
+ EXTENT_DIRTY, NULL))
return 0;
/* This pinned extent is out of our range */
@@ -2705,13 +2704,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
spin_lock(&ctl->tree_lock);
- /* Count initial region as zone_unusable until it gets activated. */
if (!used)
to_free = size;
- else if (initial &&
- test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
- (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
- to_free = 0;
else if (initial)
to_free = block_group->zone_capacity;
else if (offset >= block_group->alloc_offset)
@@ -2739,8 +2733,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
reclaimable_unusable = block_group->zone_unusable -
(block_group->length - block_group->zone_capacity);
/* All the region is now unusable. Mark it as unused and reclaim */
- if (block_group->zone_unusable == block_group->length &&
- block_group->alloc_offset) {
+ if (block_group->zone_unusable == block_group->length) {
btrfs_mark_bg_unused(block_group);
} else if (bg_reclaim_threshold &&
reclaimable_unusable >=
@@ -2944,7 +2937,8 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
btrfs_info(fs_info, "block group has cluster?: %s",
list_empty(&block_group->cluster_list) ? "no" : "yes");
btrfs_info(fs_info,
- "%d blocks of free space at or bigger than bytes is", count);
+ "%d free space entries at or bigger than %llu bytes",
+ count, bytes);
}
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index 045ddce32eca..c0e734082dcc 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1515,9 +1515,15 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
if (prev_bit == 0 && bit == 1) {
extent_start = offset;
} else if (prev_bit == 1 && bit == 0) {
- total_found += add_new_free_space(block_group,
- extent_start,
- offset);
+ u64 space_added;
+
+ ret = btrfs_add_new_free_space(block_group,
+ extent_start,
+ offset,
+ &space_added);
+ if (ret)
+ goto out;
+ total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
wake_up(&caching_ctl->wait);
@@ -1529,8 +1535,9 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
}
}
if (prev_bit == 1) {
- total_found += add_new_free_space(block_group, extent_start,
- end);
+ ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
+ if (ret)
+ goto out;
extent_count++;
}
@@ -1569,6 +1576,8 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
end = block_group->start + block_group->length;
while (1) {
+ u64 space_added;
+
ret = btrfs_next_item(root, path);
if (ret < 0)
goto out;
@@ -1583,8 +1592,12 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
- total_found += add_new_free_space(block_group, key.objectid,
- key.objectid + key.offset);
+ ret = btrfs_add_new_free_space(block_group, key.objectid,
+ key.objectid + key.offset,
+ &space_added);
+ if (ret)
+ goto out;
+ total_found += space_added;
if (total_found > CACHING_CTL_WAKE_UP) {
total_found = 0;
wake_up(&caching_ctl->wait);
diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 203d2a267828..a523d64d5491 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -46,8 +46,6 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
* Runtime (in-memory) states of filesystem
*/
enum {
- /* Global indicator of serious filesystem errors */
- BTRFS_FS_STATE_ERROR,
/*
* Filesystem is being remounted, allow to skip some operations, like
* defrag
@@ -686,6 +684,12 @@ struct btrfs_fs_info {
bool qgroup_rescan_running;
u8 qgroup_drop_subtree_thres;
+ /*
+ * If this is not 0, then it indicates a serious filesystem error has
+ * happened and it contains that error (negative errno value).
+ */
+ int fs_error;
+
/* Filesystem state */
unsigned long fs_state;
@@ -766,6 +770,9 @@ struct btrfs_fs_info {
u64 data_reloc_bg;
struct mutex zoned_data_reloc_io_lock;
+ struct btrfs_block_group *active_meta_bg;
+ struct btrfs_block_group *active_system_bg;
+
u64 nr_global_roots;
spinlock_t zone_active_bgs_lock;
@@ -962,8 +969,8 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
}
-#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
- &(fs_info)->fs_state)))
+#define BTRFS_FS_ERROR(fs_info) (READ_ONCE((fs_info)->fs_error))
+
#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
&(fs_info)->fs_state)))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dbbb67293e34..f09fbdc43f0f 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -124,11 +124,11 @@ static struct kmem_cache *btrfs_inode_cachep;
static int btrfs_setsize(struct inode *inode, struct iattr *attr);
static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback);
-static noinline int cow_file_range(struct btrfs_inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock,
- u64 *done_offset);
+
+static noinline int run_delalloc_cow(struct btrfs_inode *inode,
+ struct page *locked_page, u64 start,
+ u64 end, struct writeback_control *wbc,
+ bool pages_dirty);
static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 len, u64 orig_start, u64 block_start,
u64 block_len, u64 orig_block_len,
@@ -423,11 +423,10 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
while (index <= end_index) {
/*
- * For locked page, we will call end_extent_writepage() on it
- * in run_delalloc_range() for the error handling. That
- * end_extent_writepage() function will call
- * btrfs_mark_ordered_io_finished() to clear page Ordered and
- * run the ordered extent accounting.
+ * For locked page, we will call btrfs_mark_ordered_io_finished
+ * through btrfs_mark_ordered_io_finished() on it
+ * in run_delalloc_range() for the error handling, which will
+ * clear page Ordered and run the ordered extent accounting.
*
* Here we can't just clear the Ordered bit, or
* btrfs_mark_ordered_io_finished() would skip the accounting
@@ -815,24 +814,22 @@ static inline void inode_should_defrag(struct btrfs_inode *inode,
}
/*
- * we create compressed extents in two phases. The first
- * phase compresses a range of pages that have already been
- * locked (both pages and state bits are locked).
+ * Work queue call back to started compression on a file and pages.
*
- * This is done inside an ordered work queue, and the compression
- * is spread across many cpus. The actual IO submission is step
- * two, and the ordered work queue takes care of making sure that
- * happens in the same order things were put onto the queue by
- * writepages and friends.
+ * This is done inside an ordered work queue, and the compression is spread
+ * across many cpus. The actual IO submission is step two, and the ordered work
+ * queue takes care of making sure that happens in the same order things were
+ * put onto the queue by writepages and friends.
*
- * If this code finds it can't get good compression, it puts an
- * entry onto the work queue to write the uncompressed bytes. This
- * makes sure that both compressed inodes and uncompressed inodes
- * are written in the same order that the flusher thread sent them
- * down.
+ * If this code finds it can't get good compression, it puts an entry onto the
+ * work queue to write the uncompressed bytes. This makes sure that both
+ * compressed inodes and uncompressed inodes are written in the same order that
+ * the flusher thread sent them down.
*/
-static noinline int compress_file_range(struct async_chunk *async_chunk)
+static void compress_file_range(struct btrfs_work *work)
{
+ struct async_chunk *async_chunk =
+ container_of(work, struct async_chunk, work);
struct btrfs_inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct address_space *mapping = inode->vfs_inode.i_mapping;
@@ -842,19 +839,24 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
u64 actual_end;
u64 i_size;
int ret = 0;
- struct page **pages = NULL;
+ struct page **pages;
unsigned long nr_pages;
unsigned long total_compressed = 0;
unsigned long total_in = 0;
+ unsigned int poff;
int i;
- int will_compress;
int compress_type = fs_info->compress_type;
- int compressed_extents = 0;
- int redirty = 0;
inode_should_defrag(inode, start, end, end - start + 1, SZ_16K);
/*
+ * We need to call clear_page_dirty_for_io on each page in the range.
+ * Otherwise applications with the file mmap'd can wander in and change
+ * the page contents while we are compressing them.
+ */
+ extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
+
+ /*
* We need to save i_size before now because it could change in between
* us evaluating the size and assigning it. This is because we lock and
* unlock the page in truncate and fallocate, and then modify the i_size
@@ -868,7 +870,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
barrier();
actual_end = min_t(u64, i_size, end + 1);
again:
- will_compress = 0;
+ pages = NULL;
nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
nr_pages = min_t(unsigned long, nr_pages, BTRFS_MAX_COMPRESSED_PAGES);
@@ -912,78 +914,57 @@ again:
ret = 0;
/*
- * we do compression for mount -o compress and when the
- * inode has not been flagged as nocompress. This flag can
- * change at any time if we discover bad compression ratios.
+ * We do compression for mount -o compress and when the inode has not
+ * been flagged as NOCOMPRESS. This flag can change at any time if we
+ * discover bad compression ratios.
*/
- if (inode_need_compress(inode, start, end)) {
- WARN_ON(pages);
- pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
- if (!pages) {
- /* just bail out to the uncompressed code */
- nr_pages = 0;
- goto cont;
- }
-
- if (inode->defrag_compress)
- compress_type = inode->defrag_compress;
- else if (inode->prop_compress)
- compress_type = inode->prop_compress;
+ if (!inode_need_compress(inode, start, end))
+ goto cleanup_and_bail_uncompressed;
+ pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
+ if (!pages) {
/*
- * we need to call clear_page_dirty_for_io on each
- * page in the range. Otherwise applications with the file
- * mmap'd can wander in and change the page contents while
- * we are compressing them.
- *
- * If the compression fails for any reason, we set the pages
- * dirty again later on.
- *
- * Note that the remaining part is redirtied, the start pointer
- * has moved, the end is the original one.
+ * Memory allocation failure is not a fatal error, we can fall
+ * back to uncompressed code.
*/
- if (!redirty) {
- extent_range_clear_dirty_for_io(&inode->vfs_inode, start, end);
- redirty = 1;
- }
+ goto cleanup_and_bail_uncompressed;
+ }
- /* Compression level is applied here and only here */
- ret = btrfs_compress_pages(
- compress_type | (fs_info->compress_level << 4),
- mapping, start,
- pages,
- &nr_pages,
- &total_in,
- &total_compressed);
+ if (inode->defrag_compress)
+ compress_type = inode->defrag_compress;
+ else if (inode->prop_compress)
+ compress_type = inode->prop_compress;
+
+ /* Compression level is applied here. */
+ ret = btrfs_compress_pages(compress_type | (fs_info->compress_level << 4),
+ mapping, start, pages, &nr_pages, &total_in,
+ &total_compressed);
+ if (ret)
+ goto mark_incompressible;
- if (!ret) {
- unsigned long offset = offset_in_page(total_compressed);
- struct page *page = pages[nr_pages - 1];
+ /*
+ * Zero the tail end of the last page, as we might be sending it down
+ * to disk.
+ */
+ poff = offset_in_page(total_compressed);
+ if (poff)
+ memzero_page(pages[nr_pages - 1], poff, PAGE_SIZE - poff);
- /* zero the tail end of the last page, we might be
- * sending it down to disk
- */
- if (offset)
- memzero_page(page, offset, PAGE_SIZE - offset);
- will_compress = 1;
- }
- }
-cont:
/*
+ * Try to create an inline extent.
+ *
+ * If we didn't compress the entire range, try to create an uncompressed
+ * inline extent, else a compressed one.
+ *
* Check cow_file_range() for why we don't even try to create inline
- * extent for subpage case.
+ * extent for the subpage case.
*/
if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
- /* lets try to make an inline extent */
- if (ret || total_in < actual_end) {
- /* we didn't compress the entire range, try
- * to make an uncompressed inline extent.
- */
- ret = cow_file_range_inline(inode, actual_end,
- 0, BTRFS_COMPRESS_NONE,
- NULL, false);
+ if (total_in < actual_end) {
+ ret = cow_file_range_inline(inode, actual_end, 0,
+ BTRFS_COMPRESS_NONE, NULL,
+ false);
} else {
- /* try making a compressed inline extent */
ret = cow_file_range_inline(inode, actual_end,
total_compressed,
compress_type, pages,
@@ -1013,99 +994,52 @@ cont:
PAGE_UNLOCK |
PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
-
- /*
- * Ensure we only free the compressed pages if we have
- * them allocated, as we can still reach here with
- * inode_need_compress() == false.
- */
- if (pages) {
- for (i = 0; i < nr_pages; i++) {
- WARN_ON(pages[i]->mapping);
- put_page(pages[i]);
- }
- kfree(pages);
- }
- return 0;
+ goto free_pages;
}
}
- if (will_compress) {
- /*
- * we aren't doing an inline extent round the compressed size
- * up to a block size boundary so the allocator does sane
- * things
- */
- total_compressed = ALIGN(total_compressed, blocksize);
+ /*
+ * We aren't doing an inline extent. Round the compressed size up to a
+ * block size boundary so the allocator does sane things.
+ */
+ total_compressed = ALIGN(total_compressed, blocksize);
- /*
- * one last check to make sure the compression is really a
- * win, compare the page count read with the blocks on disk,
- * compression must free at least one sector size
- */
- total_in = round_up(total_in, fs_info->sectorsize);
- if (total_compressed + blocksize <= total_in) {
- compressed_extents++;
+ /*
+ * One last check to make sure the compression is really a win, compare
+ * the page count read with the blocks on disk, compression must free at
+ * least one sector.
+ */
+ total_in = round_up(total_in, fs_info->sectorsize);
+ if (total_compressed + blocksize > total_in)
+ goto mark_incompressible;
- /*
- * The async work queues will take care of doing actual
- * allocation on disk for these compressed pages, and
- * will submit them to the elevator.
- */
- add_async_extent(async_chunk, start, total_in,
- total_compressed, pages, nr_pages,
- compress_type);
-
- if (start + total_in < end) {
- start += total_in;
- pages = NULL;
- cond_resched();
- goto again;
- }
- return compressed_extents;
- }
+ /*
+ * The async work queues will take care of doing actual allocation on
+ * disk for these compressed pages, and will submit the bios.
+ */
+ add_async_extent(async_chunk, start, total_in, total_compressed, pages,
+ nr_pages, compress_type);
+ if (start + total_in < end) {
+ start += total_in;
+ cond_resched();
+ goto again;
}
+ return;
+
+mark_incompressible:
+ if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) && !inode->prop_compress)
+ inode->flags |= BTRFS_INODE_NOCOMPRESS;
+cleanup_and_bail_uncompressed:
+ add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
+ BTRFS_COMPRESS_NONE);
+free_pages:
if (pages) {
- /*
- * the compression code ran but failed to make things smaller,
- * free any pages it allocated and our page pointer array
- */
for (i = 0; i < nr_pages; i++) {
WARN_ON(pages[i]->mapping);
put_page(pages[i]);
}
kfree(pages);
- pages = NULL;
- total_compressed = 0;
- nr_pages = 0;
-
- /* flag the file so we don't compress in the future */
- if (!btrfs_test_opt(fs_info, FORCE_COMPRESS) &&
- !(inode->prop_compress)) {
- inode->flags |= BTRFS_INODE_NOCOMPRESS;
- }
- }
-cleanup_and_bail_uncompressed:
- /*
- * No compression, but we still need to write the pages in the file
- * we've been given so far. redirty the locked page if it corresponds
- * to our extent and set things up for the async work queue to run
- * cow_file_range to do the normal delalloc dance.
- */
- if (async_chunk->locked_page &&
- (page_offset(async_chunk->locked_page) >= start &&
- page_offset(async_chunk->locked_page)) <= end) {
- __set_page_dirty_nobuffers(async_chunk->locked_page);
- /* unlocked later on in the async handlers */
}
-
- if (redirty)
- extent_range_redirty_for_io(&inode->vfs_inode, start, end);
- add_async_extent(async_chunk, start, end - start + 1, 0, NULL, 0,
- BTRFS_COMPRESS_NONE);
- compressed_extents++;
-
- return compressed_extents;
}
static void free_async_extent_pages(struct async_extent *async_extent)
@@ -1124,14 +1058,12 @@ static void free_async_extent_pages(struct async_extent *async_extent)
async_extent->pages = NULL;
}
-static int submit_uncompressed_range(struct btrfs_inode *inode,
- struct async_extent *async_extent,
- struct page *locked_page)
+static void submit_uncompressed_range(struct btrfs_inode *inode,
+ struct async_extent *async_extent,
+ struct page *locked_page)
{
u64 start = async_extent->start;
u64 end = async_extent->start + async_extent->ram_size - 1;
- unsigned long nr_written = 0;
- int page_started = 0;
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
@@ -1140,45 +1072,33 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
.no_cgroup_owner = 1,
};
- /*
- * Call cow_file_range() to run the delalloc range directly, since we
- * won't go to NOCOW or async path again.
- *
- * Also we call cow_file_range() with @unlock_page == 0, so that we
- * can directly submit them without interruption.
- */
- ret = cow_file_range(inode, locked_page, start, end, &page_started,
- &nr_written, 0, NULL);
- /* Inline extent inserted, page gets unlocked and everything is done */
- if (page_started)
- return 0;
-
+ wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
+ ret = run_delalloc_cow(inode, locked_page, start, end, &wbc, false);
+ wbc_detach_inode(&wbc);
if (ret < 0) {
btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
if (locked_page) {
const u64 page_start = page_offset(locked_page);
- const u64 page_end = page_start + PAGE_SIZE - 1;
set_page_writeback(locked_page);
end_page_writeback(locked_page);
- end_extent_writepage(locked_page, ret, page_start, page_end);
+ btrfs_mark_ordered_io_finished(inode, locked_page,
+ page_start, PAGE_SIZE,
+ !ret);
+ btrfs_page_clear_uptodate(inode->root->fs_info,
+ locked_page, page_start,
+ PAGE_SIZE);
+ mapping_set_error(locked_page->mapping, ret);
unlock_page(locked_page);
}
- return ret;
}
-
- /* All pages will be unlocked, including @locked_page */
- wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
- ret = extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
- wbc_detach_inode(&wbc);
- return ret;
}
-static int submit_one_async_extent(struct btrfs_inode *inode,
- struct async_chunk *async_chunk,
- struct async_extent *async_extent,
- u64 *alloc_hint)
+static void submit_one_async_extent(struct async_chunk *async_chunk,
+ struct async_extent *async_extent,
+ u64 *alloc_hint)
{
+ struct btrfs_inode *inode = async_chunk->inode;
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1206,9 +1126,8 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
}
lock_extent(io_tree, start, end, NULL);
- /* We have fall back to uncompressed write */
- if (!async_extent->pages) {
- ret = submit_uncompressed_range(inode, async_extent, locked_page);
+ if (async_extent->compress_type == BTRFS_COMPRESS_NONE) {
+ submit_uncompressed_range(inode, async_extent, locked_page);
goto done;
}
@@ -1217,7 +1136,6 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
async_extent->compressed_size,
0, *alloc_hint, &ins, 1, 1);
if (ret) {
- free_async_extent_pages(async_extent);
/*
* Here we used to try again by going back to non-compressed
* path for ENOSPC. But we can't reserve space even for
@@ -1272,7 +1190,7 @@ done:
if (async_chunk->blkcg_css)
kthread_associate_blkcg(NULL);
kfree(async_extent);
- return ret;
+ return;
out_free_reserve:
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1286,39 +1204,13 @@ out_free:
PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
free_async_extent_pages(async_extent);
- goto done;
-}
-
-/*
- * Phase two of compressed writeback. This is the ordered portion of the code,
- * which only gets called in the order the work was queued. We walk all the
- * async extents created by compress_file_range and send them down to the disk.
- */
-static noinline void submit_compressed_extents(struct async_chunk *async_chunk)
-{
- struct btrfs_inode *inode = async_chunk->inode;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct async_extent *async_extent;
- u64 alloc_hint = 0;
- int ret = 0;
-
- while (!list_empty(&async_chunk->extents)) {
- u64 extent_start;
- u64 ram_size;
-
- async_extent = list_entry(async_chunk->extents.next,
- struct async_extent, list);
- list_del(&async_extent->list);
- extent_start = async_extent->start;
- ram_size = async_extent->ram_size;
-
- ret = submit_one_async_extent(inode, async_chunk, async_extent,
- &alloc_hint);
- btrfs_debug(fs_info,
+ if (async_chunk->blkcg_css)
+ kthread_associate_blkcg(NULL);
+ btrfs_debug(fs_info,
"async extent submission failed root=%lld inode=%llu start=%llu len=%llu ret=%d",
- inode->root->root_key.objectid,
- btrfs_ino(inode), extent_start, ram_size, ret);
- }
+ root->root_key.objectid, btrfs_ino(inode), start,
+ async_extent->ram_size, ret);
+ kfree(async_extent);
}
static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
@@ -1362,25 +1254,18 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
* locked_page is the page that writepage had locked already. We use
* it to make sure we don't do extra locks or unlocks.
*
- * *page_started is set to one if we unlock locked_page and do everything
- * required to start IO on it. It may be clean and already done with
- * IO when we return.
- *
- * When unlock == 1, we unlock the pages in successfully allocated regions.
- * When unlock == 0, we leave them locked for writing them out.
+ * When this function fails, it unlocks all pages except @locked_page.
*
- * However, we unlock all the pages except @locked_page in case of failure.
+ * When this function successfully creates an inline extent, it returns 1 and
+ * unlocks all pages including locked_page and starts I/O on them.
+ * (In reality inline extents are limited to a single page, so locked_page is
+ * the only page handled anyway).
*
- * In summary, page locking state will be as follow:
+ * When this function succeed and creates a normal extent, the page locking
+ * status depends on the passed in flags:
*
- * - page_started == 1 (return value)
- * - All the pages are unlocked. IO is started.
- * - Note that this can happen only on success
- * - unlock == 1
- * - All the pages except @locked_page are unlocked in any case
- * - unlock == 0
- * - On success, all the pages are locked for writing out them
- * - On failure, all the pages except @locked_page are unlocked
+ * - If @keep_locked is set, all pages are kept locked.
+ * - Else all pages except for @locked_page are unlocked.
*
* When a failure happens in the second or later iteration of the
* while-loop, the ordered extents created in previous iterations are kept
@@ -1389,10 +1274,9 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
* example.
*/
static noinline int cow_file_range(struct btrfs_inode *inode,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written, int unlock,
- u64 *done_offset)
+ struct page *locked_page, u64 start, u64 end,
+ u64 *done_offset,
+ bool keep_locked, bool no_inline)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -1431,7 +1315,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* This means we can trigger inline extent even if we didn't want to.
* So here we skip inline extent creation completely.
*/
- if (start == 0 && fs_info->sectorsize == PAGE_SIZE) {
+ if (start == 0 && fs_info->sectorsize == PAGE_SIZE && !no_inline) {
u64 actual_end = min_t(u64, i_size_read(&inode->vfs_inode),
end + 1);
@@ -1451,9 +1335,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
PAGE_START_WRITEBACK | PAGE_END_WRITEBACK);
- *nr_written = *nr_written +
- (end - start + PAGE_SIZE) / PAGE_SIZE;
- *page_started = 1;
/*
* locked_page is locked by the caller of
* writepage_delalloc(), not locked by
@@ -1463,11 +1344,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* as it doesn't have any subpage::writers recorded.
*
* Here we manually unlock the page, since the caller
- * can't use page_started to determine if it's an
- * inline extent or a compressed extent.
+ * can't determine if it's an inline extent or a
+ * compressed extent.
*/
unlock_page(locked_page);
- goto out;
+ ret = 1;
+ goto done;
} else if (ret < 0) {
goto out_unlock;
}
@@ -1498,6 +1380,31 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
min_alloc_size, 0, alloc_hint,
&ins, 1, 1);
+ if (ret == -EAGAIN) {
+ /*
+ * btrfs_reserve_extent only returns -EAGAIN for zoned
+ * file systems, which is an indication that there are
+ * no active zones to allocate from at the moment.
+ *
+ * If this is the first loop iteration, wait for at
+ * least one zone to finish before retrying the
+ * allocation. Otherwise ask the caller to write out
+ * the already allocated blocks before coming back to
+ * us, or return -ENOSPC if it can't handle retries.
+ */
+ ASSERT(btrfs_is_zoned(fs_info));
+ if (start == orig_start) {
+ wait_on_bit_io(&inode->root->fs_info->flags,
+ BTRFS_FS_NEED_ZONE_FINISH,
+ TASK_UNINTERRUPTIBLE);
+ continue;
+ }
+ if (done_offset) {
+ *done_offset = start - 1;
+ return 0;
+ }
+ ret = -ENOSPC;
+ }
if (ret < 0)
goto out_unlock;
cur_alloc_size = ins.offset;
@@ -1558,7 +1465,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
* Do set the Ordered (Private2) bit so we know this page was
* properly setup for writepage.
*/
- page_ops = unlock ? PAGE_UNLOCK : 0;
+ page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
page_ops |= PAGE_SET_ORDERED;
extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
@@ -1581,7 +1488,9 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
if (ret)
goto out_unlock;
}
-out:
+done:
+ if (done_offset)
+ *done_offset = end;
return ret;
out_drop_extent_cache:
@@ -1591,21 +1500,6 @@ out_reserve:
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_unlock:
/*
- * If done_offset is non-NULL and ret == -EAGAIN, we expect the
- * caller to write out the successfully allocated region and retry.
- */
- if (done_offset && ret == -EAGAIN) {
- if (orig_start < start)
- *done_offset = start - 1;
- else
- *done_offset = start;
- return ret;
- } else if (ret == -EAGAIN) {
- /* Convert to -ENOSPC since the caller cannot retry. */
- ret = -ENOSPC;
- }
-
- /*
* Now, we have three regions to clean up:
*
* |-------(1)----|---(2)---|-------------(3)----------|
@@ -1627,10 +1521,10 @@ out_unlock:
* EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
* function.
*
- * However, in case of unlock == 0, we still need to unlock the pages
+ * However, in case of @keep_locked, we still need to unlock the pages
* (except @locked_page) to ensure all the pages are unlocked.
*/
- if (!unlock && orig_start < start) {
+ if (keep_locked && orig_start < start) {
if (!locked_page)
mapping_set_error(inode->vfs_inode.i_mapping, ret);
extent_clear_unlock_delalloc(inode, orig_start, start - 1,
@@ -1654,8 +1548,6 @@ out_unlock:
clear_bits,
page_ops);
start += cur_alloc_size;
- if (start >= end)
- return ret;
}
/*
@@ -1664,50 +1556,37 @@ out_unlock:
* space_info's bytes_may_use counter, reserved in
* btrfs_check_data_free_space().
*/
- extent_clear_unlock_delalloc(inode, start, end, locked_page,
- clear_bits | EXTENT_CLEAR_DATA_RESV,
- page_ops);
- return ret;
-}
-
-/*
- * work queue call back to started compression on a file and pages
- */
-static noinline void async_cow_start(struct btrfs_work *work)
-{
- struct async_chunk *async_chunk;
- int compressed_extents;
-
- async_chunk = container_of(work, struct async_chunk, work);
-
- compressed_extents = compress_file_range(async_chunk);
- if (compressed_extents == 0) {
- btrfs_add_delayed_iput(async_chunk->inode);
- async_chunk->inode = NULL;
+ if (start < end) {
+ clear_bits |= EXTENT_CLEAR_DATA_RESV;
+ extent_clear_unlock_delalloc(inode, start, end, locked_page,
+ clear_bits, page_ops);
}
+ return ret;
}
/*
- * work queue call back to submit previously compressed pages
+ * Phase two of compressed writeback. This is the ordered portion of the code,
+ * which only gets called in the order the work was queued. We walk all the
+ * async extents created by compress_file_range and send them down to the disk.
*/
-static noinline void async_cow_submit(struct btrfs_work *work)
+static noinline void submit_compressed_extents(struct btrfs_work *work)
{
struct async_chunk *async_chunk = container_of(work, struct async_chunk,
work);
struct btrfs_fs_info *fs_info = btrfs_work_owner(work);
+ struct async_extent *async_extent;
unsigned long nr_pages;
+ u64 alloc_hint = 0;
nr_pages = (async_chunk->end - async_chunk->start + PAGE_SIZE) >>
PAGE_SHIFT;
- /*
- * ->inode could be NULL if async_chunk_start has failed to compress,
- * in which case we don't have anything to submit, yet we need to
- * always adjust ->async_delalloc_pages as its paired with the init
- * happening in run_delalloc_compressed
- */
- if (async_chunk->inode)
- submit_compressed_extents(async_chunk);
+ while (!list_empty(&async_chunk->extents)) {
+ async_extent = list_entry(async_chunk->extents.next,
+ struct async_extent, list);
+ list_del(&async_extent->list);
+ submit_one_async_extent(async_chunk, async_extent, &alloc_hint);
+ }
/* atomic_sub_return implies a barrier */
if (atomic_sub_return(nr_pages, &fs_info->async_delalloc_pages) <
@@ -1721,8 +1600,7 @@ static noinline void async_cow_free(struct btrfs_work *work)
struct async_cow *async_cow;
async_chunk = container_of(work, struct async_chunk, work);
- if (async_chunk->inode)
- btrfs_add_delayed_iput(async_chunk->inode);
+ btrfs_add_delayed_iput(async_chunk->inode);
if (async_chunk->blkcg_css)
css_put(async_chunk->blkcg_css);
@@ -1732,10 +1610,8 @@ static noinline void async_cow_free(struct btrfs_work *work)
}
static bool run_delalloc_compressed(struct btrfs_inode *inode,
- struct writeback_control *wbc,
- struct page *locked_page,
- u64 start, u64 end, int *page_started,
- unsigned long *nr_written)
+ struct page *locked_page, u64 start,
+ u64 end, struct writeback_control *wbc)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
@@ -1809,65 +1685,42 @@ static bool run_delalloc_compressed(struct btrfs_inode *inode,
async_chunk[i].blkcg_css = NULL;
}
- btrfs_init_work(&async_chunk[i].work, async_cow_start,
- async_cow_submit, async_cow_free);
+ btrfs_init_work(&async_chunk[i].work, compress_file_range,
+ submit_compressed_extents, async_cow_free);
nr_pages = DIV_ROUND_UP(cur_end - start, PAGE_SIZE);
atomic_add(nr_pages, &fs_info->async_delalloc_pages);
btrfs_queue_work(fs_info->delalloc_workers, &async_chunk[i].work);
- *nr_written += nr_pages;
start = cur_end + 1;
}
- *page_started = 1;
return true;
}
-static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
- struct page *locked_page, u64 start,
- u64 end, int *page_started,
- unsigned long *nr_written,
- struct writeback_control *wbc)
+/*
+ * Run the delalloc range from start to end, and write back any dirty pages
+ * covered by the range.
+ */
+static noinline int run_delalloc_cow(struct btrfs_inode *inode,
+ struct page *locked_page, u64 start,
+ u64 end, struct writeback_control *wbc,
+ bool pages_dirty)
{
u64 done_offset = end;
int ret;
- bool locked_page_done = false;
while (start <= end) {
- ret = cow_file_range(inode, locked_page, start, end, page_started,
- nr_written, 0, &done_offset);
- if (ret && ret != -EAGAIN)
+ ret = cow_file_range(inode, locked_page, start, end, &done_offset,
+ true, false);
+ if (ret)
return ret;
-
- if (*page_started) {
- ASSERT(ret == 0);
- return 0;
- }
-
- if (ret == 0)
- done_offset = end;
-
- if (done_offset == start) {
- wait_on_bit_io(&inode->root->fs_info->flags,
- BTRFS_FS_NEED_ZONE_FINISH,
- TASK_UNINTERRUPTIBLE);
- continue;
- }
-
- if (!locked_page_done) {
- __set_page_dirty_nobuffers(locked_page);
- account_page_redirty(locked_page);
- }
- locked_page_done = true;
- extent_write_locked_range(&inode->vfs_inode, start, done_offset,
- wbc);
+ extent_write_locked_range(&inode->vfs_inode, locked_page, start,
+ done_offset, wbc, pages_dirty);
start = done_offset + 1;
}
- *page_started = 1;
-
- return 0;
+ return 1;
}
static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
@@ -1894,8 +1747,7 @@ static noinline int csum_exist_in_range(struct btrfs_fs_info *fs_info,
}
static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
- const u64 start, const u64 end,
- int *page_started, unsigned long *nr_written)
+ const u64 start, const u64 end)
{
const bool is_space_ino = btrfs_is_free_space_inode(inode);
const bool is_reloc_ino = btrfs_is_data_reloc_root(inode->root);
@@ -1903,6 +1755,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
struct extent_io_tree *io_tree = &inode->io_tree;
u64 range_start = start;
u64 count;
+ int ret;
/*
* If EXTENT_NORESERVE is set it means that when the buffered write was
@@ -1955,8 +1808,14 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
NULL);
}
- return cow_file_range(inode, locked_page, start, end, page_started,
- nr_written, 1, NULL);
+ /*
+ * Don't try to create inline extents, as a mix of inline extent that
+ * is written out and unlocked directly and a normal NOCOW extent
+ * doesn't work.
+ */
+ ret = cow_file_range(inode, locked_page, start, end, NULL, false, true);
+ ASSERT(ret != 1);
+ return ret;
}
struct can_nocow_file_extent_args {
@@ -2105,9 +1964,7 @@ static int can_nocow_file_extent(struct btrfs_path *path,
*/
static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
struct page *locked_page,
- const u64 start, const u64 end,
- int *page_started,
- unsigned long *nr_written)
+ const u64 start, const u64 end)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *root = inode->root;
@@ -2117,25 +1974,26 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
int ret;
bool check_prev = true;
u64 ino = btrfs_ino(inode);
- struct btrfs_block_group *bg;
- bool nocow = false;
struct can_nocow_file_extent_args nocow_args = { 0 };
+ /*
+ * Normally on a zoned device we're only doing COW writes, but in case
+ * of relocation on a zoned filesystem serializes I/O so that we're only
+ * writing sequentially and can end up here as well.
+ */
+ ASSERT(!btrfs_is_zoned(fs_info) || btrfs_is_data_reloc_root(root));
+
path = btrfs_alloc_path();
if (!path) {
- extent_clear_unlock_delalloc(inode, start, end, locked_page,
- EXTENT_LOCKED | EXTENT_DELALLOC |
- EXTENT_DO_ACCOUNTING |
- EXTENT_DEFRAG, PAGE_UNLOCK |
- PAGE_START_WRITEBACK |
- PAGE_END_WRITEBACK);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto error;
}
nocow_args.end = end;
nocow_args.writeback_path = true;
while (1) {
+ struct btrfs_block_group *nocow_bg = NULL;
struct btrfs_ordered_extent *ordered;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
@@ -2146,8 +2004,6 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
int extent_type;
bool is_prealloc;
- nocow = false;
-
ret = btrfs_lookup_file_extent(NULL, root, path, ino,
cur_offset, 0);
if (ret < 0)
@@ -2172,11 +2028,8 @@ next_slot:
leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root, path);
- if (ret < 0) {
- if (cow_start != (u64)-1)
- cur_offset = cow_start;
+ if (ret < 0)
goto error;
- }
if (ret > 0)
break;
leaf = path->nodes[0];
@@ -2209,7 +2062,7 @@ next_slot:
if (found_key.offset > cur_offset) {
extent_end = found_key.offset;
extent_type = 0;
- goto out_check;
+ goto must_cow;
}
/*
@@ -2239,24 +2092,22 @@ next_slot:
nocow_args.start = cur_offset;
ret = can_nocow_file_extent(path, &found_key, inode, &nocow_args);
- if (ret < 0) {
- if (cow_start != (u64)-1)
- cur_offset = cow_start;
+ if (ret < 0)
goto error;
- } else if (ret == 0) {
- goto out_check;
- }
+ if (ret == 0)
+ goto must_cow;
ret = 0;
- bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
- if (bg)
- nocow = true;
-out_check:
- /*
- * If nocow is false then record the beginning of the range
- * that needs to be COWed
- */
- if (!nocow) {
+ nocow_bg = btrfs_inc_nocow_writers(fs_info, nocow_args.disk_bytenr);
+ if (!nocow_bg) {
+must_cow:
+ /*
+ * If we can't perform NOCOW writeback for the range,
+ * then record the beginning of the range that needs to
+ * be COWed. It will be written out before the next
+ * NOCOW range if we find one, or when exiting this
+ * loop.
+ */
if (cow_start == (u64)-1)
cow_start = cur_offset;
cur_offset = extent_end;
@@ -2275,11 +2126,12 @@ out_check:
*/
if (cow_start != (u64)-1) {
ret = fallback_to_cow(inode, locked_page,
- cow_start, found_key.offset - 1,
- page_started, nr_written);
- if (ret)
- goto error;
+ cow_start, found_key.offset - 1);
cow_start = (u64)-1;
+ if (ret) {
+ btrfs_dec_nocow_writers(nocow_bg);
+ goto error;
+ }
}
nocow_end = cur_offset + nocow_args.num_bytes - 1;
@@ -2296,6 +2148,7 @@ out_check:
ram_bytes, BTRFS_COMPRESS_NONE,
BTRFS_ORDERED_PREALLOC);
if (IS_ERR(em)) {
+ btrfs_dec_nocow_writers(nocow_bg);
ret = PTR_ERR(em);
goto error;
}
@@ -2309,6 +2162,7 @@ out_check:
? (1 << BTRFS_ORDERED_PREALLOC)
: (1 << BTRFS_ORDERED_NOCOW),
BTRFS_COMPRESS_NONE);
+ btrfs_dec_nocow_writers(nocow_bg);
if (IS_ERR(ordered)) {
if (is_prealloc) {
btrfs_drop_extent_map_range(inode, cur_offset,
@@ -2318,11 +2172,6 @@ out_check:
goto error;
}
- if (nocow) {
- btrfs_dec_nocow_writers(bg);
- nocow = false;
- }
-
if (btrfs_is_data_reloc_root(root))
/*
* Error handled later, as we must prevent
@@ -2357,17 +2206,24 @@ out_check:
if (cow_start != (u64)-1) {
cur_offset = end;
- ret = fallback_to_cow(inode, locked_page, cow_start, end,
- page_started, nr_written);
+ ret = fallback_to_cow(inode, locked_page, cow_start, end);
+ cow_start = (u64)-1;
if (ret)
goto error;
}
-error:
- if (nocow)
- btrfs_dec_nocow_writers(bg);
+ btrfs_free_path(path);
+ return 0;
- if (ret && cur_offset < end)
+error:
+ /*
+ * If an error happened while a COW region is outstanding, cur_offset
+ * needs to be reset to cow_start to ensure the COW region is unlocked
+ * as well.
+ */
+ if (cow_start != (u64)-1)
+ cur_offset = cow_start;
+ if (cur_offset < end)
extent_clear_unlock_delalloc(inode, cur_offset, end,
locked_page, EXTENT_LOCKED |
EXTENT_DELALLOC | EXTENT_DEFRAG |
@@ -2395,49 +2251,37 @@ static bool should_nocow(struct btrfs_inode *inode, u64 start, u64 end)
* being touched for the first time.
*/
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
- u64 start, u64 end, int *page_started, unsigned long *nr_written,
- struct writeback_control *wbc)
+ u64 start, u64 end, struct writeback_control *wbc)
{
- int ret = 0;
const bool zoned = btrfs_is_zoned(inode->root->fs_info);
+ int ret;
/*
- * The range must cover part of the @locked_page, or the returned
- * @page_started can confuse the caller.
+ * The range must cover part of the @locked_page, or a return of 1
+ * can confuse the caller.
*/
ASSERT(!(end <= page_offset(locked_page) ||
start >= page_offset(locked_page) + PAGE_SIZE));
if (should_nocow(inode, start, end)) {
- /*
- * Normally on a zoned device we're only doing COW writes, but
- * in case of relocation on a zoned filesystem we have taken
- * precaution, that we're only writing sequentially. It's safe
- * to use run_delalloc_nocow() here, like for regular
- * preallocated inodes.
- */
- ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
- ret = run_delalloc_nocow(inode, locked_page, start, end,
- page_started, nr_written);
+ ret = run_delalloc_nocow(inode, locked_page, start, end);
goto out;
}
if (btrfs_inode_can_compress(inode) &&
inode_need_compress(inode, start, end) &&
- run_delalloc_compressed(inode, wbc, locked_page, start,
- end, page_started, nr_written))
- goto out;
+ run_delalloc_compressed(inode, locked_page, start, end, wbc))
+ return 1;
if (zoned)
- ret = run_delalloc_zoned(inode, locked_page, start, end,
- page_started, nr_written, wbc);
+ ret = run_delalloc_cow(inode, locked_page, start, end, wbc,
+ true);
else
- ret = cow_file_range(inode, locked_page, start, end,
- page_started, nr_written, 1, NULL);
+ ret = cow_file_range(inode, locked_page, start, end, NULL,
+ false, false);
out:
- ASSERT(ret <= 0);
- if (ret)
+ if (ret < 0)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
end - start + 1);
return ret;
@@ -2840,23 +2684,19 @@ struct btrfs_writepage_fixup {
static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
{
- struct btrfs_writepage_fixup *fixup;
+ struct btrfs_writepage_fixup *fixup =
+ container_of(work, struct btrfs_writepage_fixup, work);
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
- struct page *page;
- struct btrfs_inode *inode;
- u64 page_start;
- u64 page_end;
+ struct page *page = fixup->page;
+ struct btrfs_inode *inode = fixup->inode;
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ u64 page_start = page_offset(page);
+ u64 page_end = page_offset(page) + PAGE_SIZE - 1;
int ret = 0;
bool free_delalloc_space = true;
- fixup = container_of(work, struct btrfs_writepage_fixup, work);
- page = fixup->page;
- inode = fixup->inode;
- page_start = page_offset(page);
- page_end = page_offset(page) + PAGE_SIZE - 1;
-
/*
* This is similar to page_mkwrite, we need to reserve the space before
* we take the page lock.
@@ -2949,10 +2789,12 @@ out_page:
* to reflect the errors and clean the page.
*/
mapping_set_error(page->mapping, ret);
- end_extent_writepage(page, ret, page_start, page_end);
+ btrfs_mark_ordered_io_finished(inode, page, page_start,
+ PAGE_SIZE, !ret);
+ btrfs_page_clear_uptodate(fs_info, page, page_start, PAGE_SIZE);
clear_page_dirty_for_io(page);
}
- btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
+ btrfs_page_clear_checked(fs_info, page, page_start, PAGE_SIZE);
unlock_page(page);
put_page(page);
kfree(fixup);
@@ -3359,6 +3201,13 @@ out:
btrfs_free_reserved_extent(fs_info,
ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes, 1);
+ /*
+ * Actually free the qgroup rsv which was released when
+ * the ordered extent was created.
+ */
+ btrfs_qgroup_free_refroot(fs_info, inode->root->root_key.objectid,
+ ordered_extent->qgroup_rsv,
+ BTRFS_QGROUP_RSV_DATA);
}
}
@@ -3384,15 +3233,6 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
return btrfs_finish_one_ordered(ordered);
}
-void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
- struct page *page, u64 start,
- u64 end, bool uptodate)
-{
- trace_btrfs_writepage_end_io_hook(inode, start, end, uptodate);
-
- btrfs_mark_ordered_io_finished(inode, page, start, end + 1 - start, uptodate);
-}
-
/*
* Verify the checksum for a single sector without any extra action that depend
* on the type of I/O.
@@ -3482,15 +3322,21 @@ zeroit:
void btrfs_add_delayed_iput(struct btrfs_inode *inode)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ unsigned long flags;
if (atomic_add_unless(&inode->vfs_inode.i_count, -1, 1))
return;
atomic_inc(&fs_info->nr_delayed_iputs);
- spin_lock(&fs_info->delayed_iput_lock);
+ /*
+ * Need to be irq safe here because we can be called from either an irq
+ * context (see bio.c and btrfs_put_ordered_extent()) or a non-irq
+ * context.
+ */
+ spin_lock_irqsave(&fs_info->delayed_iput_lock, flags);
ASSERT(list_empty(&inode->delayed_iput));
list_add_tail(&inode->delayed_iput, &fs_info->delayed_iputs);
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irqrestore(&fs_info->delayed_iput_lock, flags);
if (!test_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags))
wake_up_process(fs_info->cleaner_kthread);
}
@@ -3499,37 +3345,46 @@ static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode)
{
list_del_init(&inode->delayed_iput);
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
iput(&inode->vfs_inode);
if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
wake_up(&fs_info->delayed_iputs_wait);
- spin_lock(&fs_info->delayed_iput_lock);
+ spin_lock_irq(&fs_info->delayed_iput_lock);
}
static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
struct btrfs_inode *inode)
{
if (!list_empty(&inode->delayed_iput)) {
- spin_lock(&fs_info->delayed_iput_lock);
+ spin_lock_irq(&fs_info->delayed_iput_lock);
if (!list_empty(&inode->delayed_iput))
run_delayed_iput_locked(fs_info, inode);
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
}
}
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
{
-
- spin_lock(&fs_info->delayed_iput_lock);
+ /*
+ * btrfs_put_ordered_extent() can run in irq context (see bio.c), which
+ * calls btrfs_add_delayed_iput() and that needs to lock
+ * fs_info->delayed_iput_lock. So we need to disable irqs here to
+ * prevent a deadlock.
+ */
+ spin_lock_irq(&fs_info->delayed_iput_lock);
while (!list_empty(&fs_info->delayed_iputs)) {
struct btrfs_inode *inode;
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
run_delayed_iput_locked(fs_info, inode);
- cond_resched_lock(&fs_info->delayed_iput_lock);
+ if (need_resched()) {
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
+ cond_resched();
+ spin_lock_irq(&fs_info->delayed_iput_lock);
+ }
}
- spin_unlock(&fs_info->delayed_iput_lock);
+ spin_unlock_irq(&fs_info->delayed_iput_lock);
}
/*
@@ -3647,9 +3502,16 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
*/
if (found_key.offset == last_objectid) {
+ /*
+ * We found the same inode as before. This means we were
+ * not able to remove its items via eviction triggered
+ * by an iput(). A transaction abort may have happened,
+ * due to -ENOSPC for example, so try to grab the error
+ * that lead to a transaction abort, if any.
+ */
btrfs_err(fs_info,
"Error removing orphan entry, stopping orphan cleanup");
- ret = -EINVAL;
+ ret = BTRFS_FS_ERROR(fs_info) ?: -EINVAL;
goto out;
}
@@ -3659,11 +3521,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(fs_info->sb, last_objectid, root);
- ret = PTR_ERR_OR_ZERO(inode);
- if (ret && ret != -ENOENT)
- goto out;
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ inode = NULL;
+ if (ret != -ENOENT)
+ goto out;
+ }
- if (ret == -ENOENT && root == fs_info->tree_root) {
+ if (!inode && root == fs_info->tree_root) {
struct btrfs_root *dead_root;
int is_dead_root = 0;
@@ -3724,17 +3589,17 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* deleted but wasn't. The inode number may have been reused,
* but either way, we can delete the orphan item.
*/
- if (ret == -ENOENT || inode->i_nlink) {
- if (!ret) {
+ if (!inode || inode->i_nlink) {
+ if (inode) {
ret = btrfs_drop_verity_items(BTRFS_I(inode));
iput(inode);
+ inode = NULL;
if (ret)
goto out;
}
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- iput(inode);
goto out;
}
btrfs_debug(fs_info, "auto deleting %Lu",
@@ -3742,10 +3607,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
ret = btrfs_del_orphan_item(trans, root,
found_key.objectid);
btrfs_end_transaction(trans);
- if (ret) {
- iput(inode);
+ if (ret)
goto out;
- }
continue;
}
@@ -3901,8 +3764,8 @@ static int btrfs_read_locked_inode(struct inode *inode,
inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->mtime);
inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->mtime);
- inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, &inode_item->ctime);
- inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, &inode_item->ctime);
+ inode_set_ctime(inode, btrfs_timespec_sec(leaf, &inode_item->ctime),
+ btrfs_timespec_nsec(leaf, &inode_item->ctime));
BTRFS_I(inode)->i_otime.tv_sec =
btrfs_timespec_sec(leaf, &inode_item->otime);
@@ -4073,9 +3936,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
inode->i_mtime.tv_nsec);
btrfs_set_token_timespec_sec(&token, &item->ctime,
- inode->i_ctime.tv_sec);
+ inode_get_ctime(inode).tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->ctime,
- inode->i_ctime.tv_nsec);
+ inode_get_ctime(inode).tv_nsec);
btrfs_set_token_timespec_sec(&token, &item->otime,
BTRFS_I(inode)->i_otime.tv_sec);
@@ -4273,9 +4136,8 @@ err:
btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2);
inode_inc_iversion(&inode->vfs_inode);
inode_inc_iversion(&dir->vfs_inode);
- inode->vfs_inode.i_ctime = current_time(&inode->vfs_inode);
- dir->vfs_inode.i_mtime = inode->vfs_inode.i_ctime;
- dir->vfs_inode.i_ctime = inode->vfs_inode.i_ctime;
+ inode_set_ctime_current(&inode->vfs_inode);
+ dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode);
ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
@@ -4448,8 +4310,7 @@ static int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_i_size_write(dir, dir->vfs_inode.i_size - fname.disk_name.len * 2);
inode_inc_iversion(&dir->vfs_inode);
- dir->vfs_inode.i_mtime = current_time(&dir->vfs_inode);
- dir->vfs_inode.i_ctime = dir->vfs_inode.i_mtime;
+ dir->vfs_inode.i_mtime = inode_set_ctime_current(&dir->vfs_inode);
ret = btrfs_update_inode_fallback(trans, root, dir);
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -4847,9 +4708,6 @@ again:
ret = -ENOMEM;
goto out;
}
- ret = set_page_extent_mapped(page);
- if (ret < 0)
- goto out_unlock;
if (!PageUptodate(page)) {
ret = btrfs_read_folio(NULL, page_folio(page));
@@ -4864,6 +4722,17 @@ again:
goto out_unlock;
}
}
+
+ /*
+ * We unlock the page after the io is completed and then re-lock it
+ * above. release_folio() could have come in between that and cleared
+ * PagePrivate(), but left the page in the mapping. Set the page mapped
+ * here to make sure it's properly set for the subpage stuff.
+ */
+ ret = set_page_extent_mapped(page);
+ if (ret < 0)
+ goto out_unlock;
+
wait_on_page_writeback(page);
lock_extent(io_tree, block_start, block_end, &cached_state);
@@ -5091,8 +4960,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
if (newsize != oldsize) {
inode_inc_iversion(inode);
if (!(mask & (ATTR_CTIME | ATTR_MTIME))) {
- inode->i_mtime = current_time(inode);
- inode->i_ctime = inode->i_mtime;
+ inode->i_mtime = inode_set_ctime_current(inode);
}
}
@@ -5714,11 +5582,11 @@ struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root
return btrfs_iget_path(s, ino, root, NULL);
}
-static struct inode *new_simple_dir(struct super_block *s,
+static struct inode *new_simple_dir(struct inode *dir,
struct btrfs_key *key,
struct btrfs_root *root)
{
- struct inode *inode = new_inode(s);
+ struct inode *inode = new_inode(dir->i_sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -5736,10 +5604,11 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_opflags &= ~IOP_XATTR;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- inode->i_mtime = current_time(inode);
- inode->i_atime = inode->i_mtime;
- inode->i_ctime = inode->i_mtime;
+ inode->i_mtime = inode_set_ctime_current(inode);
+ inode->i_atime = dir->i_atime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
+ inode->i_uid = dir->i_uid;
+ inode->i_gid = dir->i_gid;
return inode;
}
@@ -5798,7 +5667,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
if (ret != -ENOENT)
inode = ERR_PTR(ret);
else
- inode = new_simple_dir(dir->i_sb, &location, root);
+ inode = new_simple_dir(dir, &location, root);
} else {
inode = btrfs_iget(dir->i_sb, location.objectid, sub_root);
btrfs_put_root(sub_root);
@@ -5849,6 +5718,74 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
}
/*
+ * Find the highest existing sequence number in a directory and then set the
+ * in-memory index_cnt variable to the first free sequence number.
+ */
+static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
+{
+ struct btrfs_root *root = inode->root;
+ struct btrfs_key key, found_key;
+ struct btrfs_path *path;
+ struct extent_buffer *leaf;
+ int ret;
+
+ key.objectid = btrfs_ino(inode);
+ key.type = BTRFS_DIR_INDEX_KEY;
+ key.offset = (u64)-1;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto out;
+ /* FIXME: we should be able to handle this */
+ if (ret == 0)
+ goto out;
+ ret = 0;
+
+ if (path->slots[0] == 0) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ path->slots[0]--;
+
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+
+ if (found_key.objectid != btrfs_ino(inode) ||
+ found_key.type != BTRFS_DIR_INDEX_KEY) {
+ inode->index_cnt = BTRFS_DIR_START_INDEX;
+ goto out;
+ }
+
+ inode->index_cnt = found_key.offset + 1;
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int btrfs_get_dir_last_index(struct btrfs_inode *dir, u64 *index)
+{
+ if (dir->index_cnt == (u64)-1) {
+ int ret;
+
+ ret = btrfs_inode_delayed_dir_index_count(dir);
+ if (ret) {
+ ret = btrfs_set_inode_index_count(dir);
+ if (ret)
+ return ret;
+ }
+ }
+
+ *index = dir->index_cnt;
+
+ return 0;
+}
+
+/*
* All this infrastructure exists because dir_emit can fault, and we are holding
* the tree lock when doing readdir. For now just allocate a buffer and copy
* our information into that, and then dir_emit from the buffer. This is
@@ -5860,10 +5797,17 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
static int btrfs_opendir(struct inode *inode, struct file *file)
{
struct btrfs_file_private *private;
+ u64 last_index;
+ int ret;
+
+ ret = btrfs_get_dir_last_index(BTRFS_I(inode), &last_index);
+ if (ret)
+ return ret;
private = kzalloc(sizeof(struct btrfs_file_private), GFP_KERNEL);
if (!private)
return -ENOMEM;
+ private->last_index = last_index;
private->filldir_buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!private->filldir_buf) {
kfree(private);
@@ -5908,8 +5852,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
struct btrfs_key found_key;
struct btrfs_path *path;
void *addr;
- struct list_head ins_list;
- struct list_head del_list;
+ LIST_HEAD(ins_list);
+ LIST_HEAD(del_list);
int ret;
char *name_ptr;
int name_len;
@@ -5928,9 +5872,8 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
addr = private->filldir_buf;
path->reada = READA_FORWARD;
- INIT_LIST_HEAD(&ins_list);
- INIT_LIST_HEAD(&del_list);
- put = btrfs_readdir_get_delayed_items(inode, &ins_list, &del_list);
+ put = btrfs_readdir_get_delayed_items(inode, private->last_index,
+ &ins_list, &del_list);
again:
key.type = BTRFS_DIR_INDEX_KEY;
@@ -5948,6 +5891,8 @@ again:
break;
if (found_key.offset < ctx->pos)
continue;
+ if (found_key.offset > private->last_index)
+ break;
if (btrfs_should_delete_dir_index(&del_list, found_key.offset))
continue;
di = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dir_item);
@@ -6063,8 +6008,7 @@ static int btrfs_dirty_inode(struct btrfs_inode *inode)
* This is a copy of file_update_time. We need this so we can return error on
* ENOSPC for updating the inode in the case of file write and mmap writes.
*/
-static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
- int flags)
+static int btrfs_update_time(struct inode *inode, int flags)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
bool dirty = flags & ~S_VERSION;
@@ -6072,69 +6016,11 @@ static int btrfs_update_time(struct inode *inode, struct timespec64 *now,
if (btrfs_root_readonly(root))
return -EROFS;
- if (flags & S_VERSION)
- dirty |= inode_maybe_inc_iversion(inode, dirty);
- if (flags & S_CTIME)
- inode->i_ctime = *now;
- if (flags & S_MTIME)
- inode->i_mtime = *now;
- if (flags & S_ATIME)
- inode->i_atime = *now;
+ dirty = inode_update_timestamps(inode, flags);
return dirty ? btrfs_dirty_inode(BTRFS_I(inode)) : 0;
}
/*
- * find the highest existing sequence number in a directory
- * and then set the in-memory index_cnt variable to reflect
- * free sequence numbers
- */
-static int btrfs_set_inode_index_count(struct btrfs_inode *inode)
-{
- struct btrfs_root *root = inode->root;
- struct btrfs_key key, found_key;
- struct btrfs_path *path;
- struct extent_buffer *leaf;
- int ret;
-
- key.objectid = btrfs_ino(inode);
- key.type = BTRFS_DIR_INDEX_KEY;
- key.offset = (u64)-1;
-
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
- ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
- if (ret < 0)
- goto out;
- /* FIXME: we should be able to handle this */
- if (ret == 0)
- goto out;
- ret = 0;
-
- if (path->slots[0] == 0) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- path->slots[0]--;
-
- leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
-
- if (found_key.objectid != btrfs_ino(inode) ||
- found_key.type != BTRFS_DIR_INDEX_KEY) {
- inode->index_cnt = BTRFS_DIR_START_INDEX;
- goto out;
- }
-
- inode->index_cnt = found_key.offset + 1;
-out:
- btrfs_free_path(path);
- return ret;
-}
-
-/*
* helper to find a free sequence number in a given directory. This current
* code is very simple, later versions will do smarter things in the btree
*/
@@ -6378,9 +6264,8 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
goto discard;
}
- inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
inode->i_atime = inode->i_mtime;
- inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
/*
@@ -6545,12 +6430,10 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
* log replay procedure is responsible for setting them to their correct
* values (the ones it had when the fsync was done).
*/
- if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) {
- struct timespec64 now = current_time(&parent_inode->vfs_inode);
+ if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags))
+ parent_inode->vfs_inode.i_mtime =
+ inode_set_ctime_current(&parent_inode->vfs_inode);
- parent_inode->vfs_inode.i_mtime = now;
- parent_inode->vfs_inode.i_ctime = now;
- }
ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -6690,7 +6573,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
BTRFS_I(inode)->dir_index = 0ULL;
inc_nlink(inode);
inode_inc_iversion(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
ihold(inode);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
@@ -7849,8 +7732,11 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
if (ret) {
- bbio->bio.bi_status = errno_to_blk_status(ret);
- btrfs_dio_end_io(bbio);
+ btrfs_finish_ordered_extent(dio_data->ordered, NULL,
+ file_offset, dip->bytes,
+ !ret);
+ bio->bi_status = errno_to_blk_status(ret);
+ iomap_dio_bio_end_io(bio);
return;
}
}
@@ -8753,7 +8639,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
spin_lock(&BTRFS_I(inode)->lock);
@@ -8777,7 +8663,6 @@ static int btrfs_rename_exchange(struct inode *old_dir,
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = new_dentry->d_inode;
struct inode *old_inode = old_dentry->d_inode;
- struct timespec64 ctime = current_time(old_inode);
struct btrfs_rename_ctx old_rename_ctx;
struct btrfs_rename_ctx new_rename_ctx;
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
@@ -8908,12 +8793,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
inode_inc_iversion(new_dir);
inode_inc_iversion(old_inode);
inode_inc_iversion(new_inode);
- old_dir->i_mtime = ctime;
- old_dir->i_ctime = ctime;
- new_dir->i_mtime = ctime;
- new_dir->i_ctime = ctime;
- old_inode->i_ctime = ctime;
- new_inode->i_ctime = ctime;
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
@@ -9177,11 +9057,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
inode_inc_iversion(old_inode);
- old_dir->i_mtime = current_time(old_dir);
- old_dir->i_ctime = old_dir->i_mtime;
- new_dir->i_mtime = old_dir->i_mtime;
- new_dir->i_ctime = old_dir->i_mtime;
- old_inode->i_ctime = old_dir->i_mtime;
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
@@ -9203,7 +9079,6 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (new_inode) {
inode_inc_iversion(new_inode);
- new_inode->i_ctime = current_time(new_inode);
if (unlikely(btrfs_ino(BTRFS_I(new_inode)) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(new_dir), new_dentry);
@@ -9336,14 +9211,11 @@ static int start_delalloc_inodes(struct btrfs_root *root,
struct btrfs_inode *binode;
struct inode *inode;
struct btrfs_delalloc_work *work, *next;
- struct list_head works;
- struct list_head splice;
+ LIST_HEAD(works);
+ LIST_HEAD(splice);
int ret = 0;
bool full_flush = wbc->nr_to_write == LONG_MAX;
- INIT_LIST_HEAD(&works);
- INIT_LIST_HEAD(&splice);
-
mutex_lock(&root->delalloc_mutex);
spin_lock(&root->delalloc_lock);
list_splice_init(&root->delalloc_inodes, &splice);
@@ -9431,14 +9303,12 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
.range_end = LLONG_MAX,
};
struct btrfs_root *root;
- struct list_head splice;
+ LIST_HEAD(splice);
int ret;
if (BTRFS_FS_ERROR(fs_info))
return -EROFS;
- INIT_LIST_HEAD(&splice);
-
mutex_lock(&fs_info->delalloc_root_mutex);
spin_lock(&fs_info->delalloc_root_lock);
list_splice_init(&fs_info->delalloc_roots, &splice);
@@ -9743,7 +9613,7 @@ next:
*alloc_hint = ins.objectid + ins.offset;
inode_inc_iversion(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(actual_len > inode->i_size) &&
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index a895d105464b..a18ee7b5a166 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -384,7 +384,7 @@ update_flags:
binode->flags = binode_flags;
btrfs_sync_inode_flags_to_i_flags(inode);
inode_inc_iversion(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
out_end_trans:
diff --git a/fs/btrfs/messages.c b/fs/btrfs/messages.c
index 23fc11af498a..7695decc7243 100644
--- a/fs/btrfs/messages.c
+++ b/fs/btrfs/messages.c
@@ -10,14 +10,13 @@
#ifdef CONFIG_PRINTK
#define STATE_STRING_PREFACE ": state "
-#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
+#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1)
/*
* Characters to print to indicate error conditions or uncommon filesystem state.
* RO is not an error.
*/
static const char fs_state_chars[] = {
- [BTRFS_FS_STATE_ERROR] = 'E',
[BTRFS_FS_STATE_REMOUNTING] = 'M',
[BTRFS_FS_STATE_RO] = 0,
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
@@ -37,6 +36,11 @@ static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
curr += sizeof(STATE_STRING_PREFACE) - 1;
+ if (BTRFS_FS_ERROR(info)) {
+ *curr++ = 'E';
+ states_printed = true;
+ }
+
for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
@@ -155,7 +159,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
* Today we only save the error info to memory. Long term we'll also
* send it down to the disk.
*/
- set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
+ WRITE_ONCE(fs_info->fs_error, errno);
/* Don't go through full error handling during mount. */
if (!(sb->s_flags & SB_BORN))
@@ -252,12 +256,6 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,
}
#endif
-void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
-{
- btrfs_err(fs_info,
-"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
-}
-
#if BITS_PER_LONG == 32
void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
{
diff --git a/fs/btrfs/messages.h b/fs/btrfs/messages.h
index deedc1a168e2..1ae6f8e23e07 100644
--- a/fs/btrfs/messages.h
+++ b/fs/btrfs/messages.h
@@ -181,8 +181,6 @@ do { \
#define ASSERT(expr) (void)(expr)
#endif
-void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info);
-
__printf(5, 6)
__cold
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index a629532283bc..b46ab348e8e5 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -410,6 +410,10 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
unsigned long flags;
u64 cur = file_offset;
+ trace_btrfs_writepage_end_io_hook(inode, file_offset,
+ file_offset + num_bytes - 1,
+ uptodate);
+
spin_lock_irqsave(&tree->lock, flags);
while (cur < file_offset + num_bytes) {
u64 entry_end;
@@ -736,11 +740,9 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
const u64 range_start, const u64 range_len)
{
struct btrfs_root *root;
- struct list_head splice;
+ LIST_HEAD(splice);
u64 done;
- INIT_LIST_HEAD(&splice);
-
mutex_lock(&fs_info->ordered_operations_mutex);
spin_lock(&fs_info->ordered_root_lock);
list_splice_init(&fs_info->ordered_roots, &splice);
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index aa06d9ca911d..0c93439e929f 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -95,8 +95,10 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type
int ref_index = 0;
if (unlikely(item_size < sizeof(*ei))) {
- btrfs_print_v0_err(eb->fs_info);
- btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
+ btrfs_err(eb->fs_info,
+ "unexpected extent item size, has %u expect >= %zu",
+ item_size, sizeof(*ei));
+ btrfs_handle_fs_error(eb->fs_info, -EUCLEAN, NULL);
}
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
@@ -291,10 +293,6 @@ void btrfs_print_leaf(const struct extent_buffer *l)
btrfs_file_extent_num_bytes(l, fi),
btrfs_file_extent_ram_bytes(l, fi));
break;
- case BTRFS_EXTENT_REF_V0_KEY:
- btrfs_print_v0_err(fs_info);
- btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
- break;
case BTRFS_BLOCK_GROUP_ITEM_KEY:
bi = btrfs_item_ptr(l, i,
struct btrfs_block_group_item);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index da1f84a0eb29..b99230db3c82 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3590,15 +3590,16 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
* going to clear all tracking information for a clean start.
*/
- trans = btrfs_join_transaction(fs_info->fs_root);
- if (IS_ERR(trans)) {
+ trans = btrfs_attach_transaction_barrier(fs_info->fs_root);
+ if (IS_ERR(trans) && trans != ERR_PTR(-ENOENT)) {
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
return PTR_ERR(trans);
- }
- ret = btrfs_commit_transaction(trans);
- if (ret) {
- fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
- return ret;
+ } else if (trans != ERR_PTR(-ENOENT)) {
+ ret = btrfs_commit_transaction(trans);
+ if (ret) {
+ fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+ return ret;
+ }
}
qgroup_rescan_zero_tracking(fs_info);
@@ -3757,9 +3758,11 @@ static int try_flush_qgroup(struct btrfs_root *root)
goto out;
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
- trans = btrfs_join_transaction(root);
+ trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ if (ret == -ENOENT)
+ ret = 0;
goto out;
}
@@ -4445,4 +4448,5 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
ulist_free(entry->old_roots);
kfree(entry);
}
+ *root = RB_ROOT;
}
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index f37b925d587f..3e014b9370a3 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -71,7 +71,7 @@ static void rmw_rbio_work_locked(struct work_struct *work);
static void index_rbio_pages(struct btrfs_raid_bio *rbio);
static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check);
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
static void scrub_rbio_work_locked(struct work_struct *work);
static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
@@ -584,8 +584,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
return 0;
- if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
- last->operation == BTRFS_RBIO_READ_REBUILD)
+ if (last->operation == BTRFS_RBIO_READ_REBUILD)
return 0;
return 1;
@@ -784,10 +783,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
spin_unlock(&rbio->bio_list_lock);
spin_unlock(&h->lock);
- if (next->operation == BTRFS_RBIO_READ_REBUILD)
- start_async_work(next, recover_rbio_work_locked);
- else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
- steal_rbio(rbio, next);
+ if (next->operation == BTRFS_RBIO_READ_REBUILD) {
start_async_work(next, recover_rbio_work_locked);
} else if (next->operation == BTRFS_RBIO_WRITE) {
steal_rbio(rbio, next);
@@ -1517,11 +1513,11 @@ static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
while ((bio = bio_list_pop(bio_list))) {
bio->bi_end_io = raid_wait_read_end_io;
- if (trace_raid56_scrub_read_recover_enabled()) {
+ if (trace_raid56_read_enabled()) {
struct raid56_bio_trace_info trace_info = { 0 };
bio_get_trace_info(rbio, bio, &trace_info);
- trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
+ trace_raid56_read(rbio, bio, &trace_info);
}
submit_bio(bio);
}
@@ -1698,8 +1694,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
* If we're rebuilding a read, we have to use pages from the
* bio list if possible.
*/
- if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
- rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
} else {
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
@@ -1763,8 +1758,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
* If we're rebuilding a read, we have to use pages from the
* bio list if possible.
*/
- if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
- rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
} else {
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
@@ -1897,8 +1891,7 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)
goto out;
}
- if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
- rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
+ if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
spin_lock(&rbio->bio_list_lock);
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
spin_unlock(&rbio->bio_list_lock);
@@ -2112,8 +2105,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
goto error;
}
- ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1,
- rbio->csum_buf, rbio->csum_bitmap, false);
+ ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1,
+ rbio->csum_buf, rbio->csum_bitmap);
if (ret < 0)
goto error;
if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits))
@@ -2198,11 +2191,11 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
while ((bio = bio_list_pop(bio_list))) {
bio->bi_end_io = raid_wait_write_end_io;
- if (trace_raid56_write_stripe_enabled()) {
+ if (trace_raid56_write_enabled()) {
struct raid56_bio_trace_info trace_info = { 0 };
bio_get_trace_info(rbio, bio, &trace_info);
- trace_raid56_write_stripe(rbio, bio, &trace_info);
+ trace_raid56_write(rbio, bio, &trace_info);
}
submit_bio(bio);
}
@@ -2404,7 +2397,7 @@ static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
return 0;
}
-static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
+static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
{
struct btrfs_io_context *bioc = rbio->bioc;
const u32 sectorsize = bioc->fs_info->sectorsize;
@@ -2445,9 +2438,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
*/
clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
- if (!need_check)
- goto writeback;
-
p_sector.page = alloc_page(GFP_NOFS);
if (!p_sector.page)
return -ENOMEM;
@@ -2516,7 +2506,6 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check)
q_sector.page = NULL;
}
-writeback:
/*
* time to start writing. Make bios for everything from the
* higher layers (the bio_list in our rbio) and our p/q. Ignore
@@ -2699,7 +2688,6 @@ static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
static void scrub_rbio(struct btrfs_raid_bio *rbio)
{
- bool need_check = false;
int sector_nr;
int ret;
@@ -2722,7 +2710,7 @@ static void scrub_rbio(struct btrfs_raid_bio *rbio)
* We have every sector properly prepared. Can finish the scrub
* and writeback the good content.
*/
- ret = finish_parity_scrub(rbio, need_check);
+ ret = finish_parity_scrub(rbio);
wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
int found_errors;
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h
index 0e84c9c9293f..45e6ff78316f 100644
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -14,7 +14,6 @@ enum btrfs_rbio_ops {
BTRFS_RBIO_WRITE,
BTRFS_RBIO_READ_REBUILD,
BTRFS_RBIO_PARITY_SCRUB,
- BTRFS_RBIO_REBUILD_MISSING,
};
struct btrfs_raid_bio {
diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c
index 0474bbe39da7..65d2bd6910f2 100644
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@@ -30,8 +30,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
inode_inc_iversion(inode);
if (!no_time_update) {
- inode->i_mtime = current_time(inode);
- inode->i_ctime = inode->i_mtime;
+ inode->i_mtime = inode_set_ctime_current(inode);
}
/*
* We round up to the block size at eof when determining which
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 25a3361caedc..9951a0caf5bb 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1916,7 +1916,39 @@ again:
err = PTR_ERR(root);
break;
}
- ASSERT(root->reloc_root == reloc_root);
+
+ if (unlikely(root->reloc_root != reloc_root)) {
+ if (root->reloc_root) {
+ btrfs_err(fs_info,
+"reloc tree mismatch, root %lld has reloc root key (%lld %u %llu) gen %llu, expect reloc root key (%lld %u %llu) gen %llu",
+ root->root_key.objectid,
+ root->reloc_root->root_key.objectid,
+ root->reloc_root->root_key.type,
+ root->reloc_root->root_key.offset,
+ btrfs_root_generation(
+ &root->reloc_root->root_item),
+ reloc_root->root_key.objectid,
+ reloc_root->root_key.type,
+ reloc_root->root_key.offset,
+ btrfs_root_generation(
+ &reloc_root->root_item));
+ } else {
+ btrfs_err(fs_info,
+"reloc tree mismatch, root %lld has no reloc root, expect reloc root key (%lld %u %llu) gen %llu",
+ root->root_key.objectid,
+ reloc_root->root_key.objectid,
+ reloc_root->root_key.type,
+ reloc_root->root_key.offset,
+ btrfs_root_generation(
+ &reloc_root->root_item));
+ }
+ list_add(&reloc_root->root_list, &reloc_roots);
+ btrfs_put_root(root);
+ btrfs_abort_transaction(trans, -EUCLEAN);
+ if (!err)
+ err = -EUCLEAN;
+ break;
+ }
/*
* set reference count to 1, so btrfs_recover_relocation
@@ -1989,7 +2021,7 @@ again:
root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
false);
if (btrfs_root_refs(&reloc_root->root_item) > 0) {
- if (IS_ERR(root)) {
+ if (WARN_ON(IS_ERR(root))) {
/*
* For recovery we read the fs roots on mount,
* and if we didn't find the root then we marked
@@ -1998,17 +2030,14 @@ again:
* memory. However there's no reason we can't
* handle the error properly here just in case.
*/
- ASSERT(0);
ret = PTR_ERR(root);
goto out;
}
- if (root->reloc_root != reloc_root) {
+ if (WARN_ON(root->reloc_root != reloc_root)) {
/*
- * This is actually impossible without something
- * going really wrong (like weird race condition
- * or cosmic rays).
+ * This can happen if on-disk metadata has some
+ * corruption, e.g. bad reloc tree key offset.
*/
- ASSERT(0);
ret = -EINVAL;
goto out;
}
@@ -2977,9 +3006,6 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
if (!page)
return -ENOMEM;
}
- ret = set_page_extent_mapped(page);
- if (ret < 0)
- goto release_page;
if (PageReadahead(page))
page_cache_async_readahead(inode->i_mapping, ra, NULL,
@@ -2995,6 +3021,15 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
}
}
+ /*
+ * We could have lost page private when we dropped the lock to read the
+ * page above, make sure we set_page_extent_mapped here so we have any
+ * of the subpage blocksize stuff we need in place.
+ */
+ ret = set_page_extent_mapped(page);
+ if (ret < 0)
+ goto release_page;
+
page_start = page_offset(page);
page_end = page_start + PAGE_SIZE - 1;
@@ -3221,12 +3256,13 @@ static int add_tree_block(struct reloc_control *rc,
if (type == BTRFS_TREE_BLOCK_REF_KEY)
owner = btrfs_extent_inline_ref_offset(eb, iref);
}
- } else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
- btrfs_print_v0_err(eb->fs_info);
- btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
- return -EINVAL;
} else {
- BUG();
+ btrfs_print_leaf(eb);
+ btrfs_err(rc->block_group->fs_info,
+ "unrecognized tree backref at tree block %llu slot %u",
+ eb->start, path->slots[0]);
+ btrfs_release_path(path);
+ return -EUCLEAN;
}
btrfs_release_path(path);
@@ -3469,6 +3505,8 @@ int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
last = rc->block_group->start + rc->block_group->length;
while (1) {
+ bool block_found;
+
cond_resched();
if (rc->search_start >= last) {
ret = 1;
@@ -3519,11 +3557,11 @@ next:
goto next;
}
- ret = find_first_extent_bit(&rc->processed_blocks,
- key.objectid, &start, &end,
- EXTENT_DIRTY, NULL);
+ block_found = find_first_extent_bit(&rc->processed_blocks,
+ key.objectid, &start, &end,
+ EXTENT_DIRTY, NULL);
- if (ret == 0 && start <= key.objectid) {
+ if (block_found && start <= key.objectid) {
btrfs_release_path(path);
rc->search_start = end + 1;
} else {
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4cae41bd6de0..b877203f1dc5 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -43,9 +43,20 @@ struct scrub_ctx;
/*
* The following value only influences the performance.
*
- * This determines the batch size for stripe submitted in one go.
+ * This detemines how many stripes would be submitted in one go,
+ * which is 512KiB (BTRFS_STRIPE_LEN * SCRUB_STRIPES_PER_GROUP).
*/
-#define SCRUB_STRIPES_PER_SCTX 8 /* That would be 8 64K stripe per-device. */
+#define SCRUB_STRIPES_PER_GROUP 8
+
+/*
+ * How many groups we have for each sctx.
+ *
+ * This would be 8M per device, the same value as the old scrub in-flight bios
+ * size limit.
+ */
+#define SCRUB_GROUPS_PER_SCTX 16
+
+#define SCRUB_TOTAL_STRIPES (SCRUB_GROUPS_PER_SCTX * SCRUB_STRIPES_PER_GROUP)
/*
* The following value times PAGE_SIZE needs to be large enough to match the
@@ -172,9 +183,11 @@ struct scrub_stripe {
};
struct scrub_ctx {
- struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX];
+ struct scrub_stripe stripes[SCRUB_TOTAL_STRIPES];
struct scrub_stripe *raid56_data_stripes;
struct btrfs_fs_info *fs_info;
+ struct btrfs_path extent_path;
+ struct btrfs_path csum_path;
int first_free;
int cur_stripe;
atomic_t cancel_req;
@@ -315,10 +328,10 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
if (!sctx)
return;
- for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
+ for (i = 0; i < SCRUB_TOTAL_STRIPES; i++)
release_scrub_stripe(&sctx->stripes[i]);
- kfree(sctx);
+ kvfree(sctx);
}
static void scrub_put_ctx(struct scrub_ctx *sctx)
@@ -333,13 +346,20 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
struct scrub_ctx *sctx;
int i;
- sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
+ /* Since sctx has inline 128 stripes, it can go beyond 64K easily. Use
+ * kvzalloc().
+ */
+ sctx = kvzalloc(sizeof(*sctx), GFP_KERNEL);
if (!sctx)
goto nomem;
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->fs_info = fs_info;
- for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
+ sctx->extent_path.search_commit_root = 1;
+ sctx->extent_path.skip_locking = 1;
+ sctx->csum_path.search_commit_root = 1;
+ sctx->csum_path.skip_locking = 1;
+ for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) {
int ret;
ret = init_scrub_stripe(fs_info, &sctx->stripes[i]);
@@ -605,7 +625,8 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
btrfs_stack_header_bytenr(header), logical);
return;
}
- if (memcmp(header->fsid, fs_info->fs_devices->fsid, BTRFS_FSID_SIZE) != 0) {
+ if (memcmp(header->fsid, fs_info->fs_devices->metadata_uuid,
+ BTRFS_FSID_SIZE) != 0) {
bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
btrfs_warn_rl(fs_info,
@@ -969,6 +990,9 @@ skip:
spin_unlock(&sctx->stat_lock);
}
+static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe,
+ unsigned long write_bitmap, bool dev_replace);
+
/*
* The main entrance for all read related scrub work, including:
*
@@ -977,13 +1001,16 @@ skip:
* - Go through the remaining mirrors and try to read as large blocksize as
* possible
* - Go through all mirrors (including the failed mirror) sector-by-sector
+ * - Submit writeback for repaired sectors
*
- * Writeback does not happen here, it needs extra synchronization.
+ * Writeback for dev-replace does not happen here, it needs extra
+ * synchronization for zoned devices.
*/
static void scrub_stripe_read_repair_worker(struct work_struct *work)
{
struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
- struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
+ struct scrub_ctx *sctx = stripe->sctx;
+ struct btrfs_fs_info *fs_info = sctx->fs_info;
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
stripe->bg->length);
int mirror;
@@ -1048,7 +1075,23 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
goto out;
}
out:
- scrub_stripe_report_errors(stripe->sctx, stripe);
+ /*
+ * Submit the repaired sectors. For zoned case, we cannot do repair
+ * in-place, but queue the bg to be relocated.
+ */
+ if (btrfs_is_zoned(fs_info)) {
+ if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
+ btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start);
+ } else if (!sctx->readonly) {
+ unsigned long repaired;
+
+ bitmap_andnot(&repaired, &stripe->init_error_bitmap,
+ &stripe->error_bitmap, stripe->nr_sectors);
+ scrub_write_sectors(sctx, stripe, repaired, false);
+ wait_scrub_stripe_io(stripe);
+ }
+
+ scrub_stripe_report_errors(sctx, stripe);
set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
wake_up(&stripe->repair_wait);
}
@@ -1261,7 +1304,6 @@ static int get_raid56_logic_offset(u64 physical, int num,
/* Work out the disk rotation on this stripe-set */
rot = stripe_nr % map->num_stripes;
- stripe_nr /= map->num_stripes;
/* calculate which stripe this data locates */
rot += i;
stripe_index = rot % map->num_stripes;
@@ -1467,6 +1509,8 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
* Return <0 for error.
*/
static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
+ struct btrfs_path *extent_path,
+ struct btrfs_path *csum_path,
struct btrfs_device *dev, u64 physical,
int mirror_num, u64 logical_start,
u32 logical_len,
@@ -1476,7 +1520,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start);
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start);
const u64 logical_end = logical_start + logical_len;
- struct btrfs_path path = { 0 };
u64 cur_logical = logical_start;
u64 stripe_end;
u64 extent_start;
@@ -1492,14 +1535,13 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
/* The range must be inside the bg. */
ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
- path.search_commit_root = 1;
- path.skip_locking = 1;
-
- ret = find_first_extent_item(extent_root, &path, logical_start, logical_len);
+ ret = find_first_extent_item(extent_root, extent_path, logical_start,
+ logical_len);
/* Either error or not found. */
if (ret)
goto out;
- get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
+ get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags,
+ &extent_gen);
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
stripe->nr_meta_extents++;
if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
@@ -1527,7 +1569,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
/* Fill the extent info for the remaining sectors. */
while (cur_logical <= stripe_end) {
- ret = find_first_extent_item(extent_root, &path, cur_logical,
+ ret = find_first_extent_item(extent_root, extent_path, cur_logical,
stripe_end - cur_logical + 1);
if (ret < 0)
goto out;
@@ -1535,7 +1577,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
ret = 0;
break;
}
- get_extent_info(&path, &extent_start, &extent_len,
+ get_extent_info(extent_path, &extent_start, &extent_len,
&extent_flags, &extent_gen);
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
stripe->nr_meta_extents++;
@@ -1560,9 +1602,9 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
*/
ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
- ret = btrfs_lookup_csums_bitmap(csum_root, stripe->logical,
- stripe_end, stripe->csums,
- &csum_bitmap, true);
+ ret = btrfs_lookup_csums_bitmap(csum_root, csum_path,
+ stripe->logical, stripe_end,
+ stripe->csums, &csum_bitmap);
if (ret < 0)
goto out;
if (ret > 0)
@@ -1575,7 +1617,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
}
set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
out:
- btrfs_release_path(&path);
return ret;
}
@@ -1653,6 +1694,28 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
return false;
}
+static void submit_initial_group_read(struct scrub_ctx *sctx,
+ unsigned int first_slot,
+ unsigned int nr_stripes)
+{
+ struct blk_plug plug;
+
+ ASSERT(first_slot < SCRUB_TOTAL_STRIPES);
+ ASSERT(first_slot + nr_stripes <= SCRUB_TOTAL_STRIPES);
+
+ scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
+ btrfs_stripe_nr_to_offset(nr_stripes));
+ blk_start_plug(&plug);
+ for (int i = 0; i < nr_stripes; i++) {
+ struct scrub_stripe *stripe = &sctx->stripes[first_slot + i];
+
+ /* Those stripes should be initialized. */
+ ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
+ scrub_submit_initial_read(sctx, stripe);
+ }
+ blk_finish_plug(&plug);
+}
+
static int flush_scrub_stripes(struct scrub_ctx *sctx)
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
@@ -1665,11 +1728,11 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
- scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
- btrfs_stripe_nr_to_offset(nr_stripes));
- for (int i = 0; i < nr_stripes; i++) {
- stripe = &sctx->stripes[i];
- scrub_submit_initial_read(sctx, stripe);
+ /* Submit the stripes which are populated but not submitted. */
+ if (nr_stripes % SCRUB_STRIPES_PER_GROUP) {
+ const int first_slot = round_down(nr_stripes, SCRUB_STRIPES_PER_GROUP);
+
+ submit_initial_group_read(sctx, first_slot, nr_stripes - first_slot);
}
for (int i = 0; i < nr_stripes; i++) {
@@ -1679,32 +1742,6 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state));
}
- /*
- * Submit the repaired sectors. For zoned case, we cannot do repair
- * in-place, but queue the bg to be relocated.
- */
- if (btrfs_is_zoned(fs_info)) {
- for (int i = 0; i < nr_stripes; i++) {
- stripe = &sctx->stripes[i];
-
- if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) {
- btrfs_repair_one_zone(fs_info,
- sctx->stripes[0].bg->start);
- break;
- }
- }
- } else if (!sctx->readonly) {
- for (int i = 0; i < nr_stripes; i++) {
- unsigned long repaired;
-
- stripe = &sctx->stripes[i];
-
- bitmap_andnot(&repaired, &stripe->init_error_bitmap,
- &stripe->error_bitmap, stripe->nr_sectors);
- scrub_write_sectors(sctx, stripe, repaired, false);
- }
- }
-
/* Submit for dev-replace. */
if (sctx->is_dev_replace) {
/*
@@ -1749,28 +1786,40 @@ static void raid56_scrub_wait_endio(struct bio *bio)
static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg,
struct btrfs_device *dev, int mirror_num,
- u64 logical, u32 length, u64 physical)
+ u64 logical, u32 length, u64 physical,
+ u64 *found_logical_ret)
{
struct scrub_stripe *stripe;
int ret;
- /* No available slot, submit all stripes and wait for them. */
- if (sctx->cur_stripe >= SCRUB_STRIPES_PER_SCTX) {
- ret = flush_scrub_stripes(sctx);
- if (ret < 0)
- return ret;
- }
+ /*
+ * There should always be one slot left, as caller filling the last
+ * slot should flush them all.
+ */
+ ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
stripe = &sctx->stripes[sctx->cur_stripe];
-
- /* We can queue one stripe using the remaining slot. */
scrub_reset_stripe(stripe);
- ret = scrub_find_fill_first_stripe(bg, dev, physical, mirror_num,
- logical, length, stripe);
+ ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
+ &sctx->csum_path, dev, physical,
+ mirror_num, logical, length, stripe);
/* Either >0 as no more extents or <0 for error. */
if (ret)
return ret;
+ if (found_logical_ret)
+ *found_logical_ret = stripe->logical;
sctx->cur_stripe++;
+
+ /* We filled one group, submit it. */
+ if (sctx->cur_stripe % SCRUB_STRIPES_PER_GROUP == 0) {
+ const int first_slot = sctx->cur_stripe - SCRUB_STRIPES_PER_GROUP;
+
+ submit_initial_group_read(sctx, first_slot, SCRUB_STRIPES_PER_GROUP);
+ }
+
+ /* Last slot used, flush them all. */
+ if (sctx->cur_stripe == SCRUB_TOTAL_STRIPES)
+ return flush_scrub_stripes(sctx);
return 0;
}
@@ -1784,6 +1833,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
struct btrfs_fs_info *fs_info = sctx->fs_info;
struct btrfs_raid_bio *rbio;
struct btrfs_io_context *bioc = NULL;
+ struct btrfs_path extent_path = { 0 };
+ struct btrfs_path csum_path = { 0 };
struct bio *bio;
struct scrub_stripe *stripe;
bool all_empty = true;
@@ -1794,6 +1845,16 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
ASSERT(sctx->raid56_data_stripes);
+ /*
+ * For data stripe search, we cannot re-use the same extent/csum paths,
+ * as the data stripe bytenr may be smaller than previous extent. Thus
+ * we have to use our own extent/csum paths.
+ */
+ extent_path.search_commit_root = 1;
+ extent_path.skip_locking = 1;
+ csum_path.search_commit_root = 1;
+ csum_path.skip_locking = 1;
+
for (int i = 0; i < data_stripes; i++) {
int stripe_index;
int rot;
@@ -1808,7 +1869,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
scrub_reset_stripe(stripe);
set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
- ret = scrub_find_fill_first_stripe(bg,
+ ret = scrub_find_fill_first_stripe(bg, &extent_path, &csum_path,
map->stripes[stripe_index].dev, physical, 1,
full_stripe_start + btrfs_stripe_nr_to_offset(i),
BTRFS_STRIPE_LEN, stripe);
@@ -1853,24 +1914,6 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
/* For now, no zoned support for RAID56. */
ASSERT(!btrfs_is_zoned(sctx->fs_info));
- /* Writeback for the repaired sectors. */
- for (int i = 0; i < data_stripes; i++) {
- unsigned long repaired;
-
- stripe = &sctx->raid56_data_stripes[i];
-
- bitmap_andnot(&repaired, &stripe->init_error_bitmap,
- &stripe->error_bitmap, stripe->nr_sectors);
- scrub_write_sectors(sctx, stripe, repaired, false);
- }
-
- /* Wait for the above writebacks to finish. */
- for (int i = 0; i < data_stripes; i++) {
- stripe = &sctx->raid56_data_stripes[i];
-
- wait_scrub_stripe_io(stripe);
- }
-
/*
* Now all data stripes are properly verified. Check if we have any
* unrepaired, if so abort immediately or we could further corrupt the
@@ -1936,6 +1979,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bio_put(bio);
btrfs_bio_counter_dec(fs_info);
+ btrfs_release_path(&extent_path);
+ btrfs_release_path(&csum_path);
out:
return ret;
}
@@ -1957,18 +2002,15 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
{
struct btrfs_fs_info *fs_info = sctx->fs_info;
const u64 logical_end = logical_start + logical_length;
- /* An artificial limit, inherit from old scrub behavior */
- struct btrfs_path path = { 0 };
u64 cur_logical = logical_start;
int ret;
/* The range must be inside the bg */
ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
- path.search_commit_root = 1;
- path.skip_locking = 1;
/* Go through each extent items inside the logical range */
while (cur_logical < logical_end) {
+ u64 found_logical;
u64 cur_physical = physical + cur_logical - logical_start;
/* Canceled? */
@@ -1993,7 +2035,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
ret = queue_scrub_stripe(sctx, bg, device, mirror_num,
cur_logical, logical_end - cur_logical,
- cur_physical);
+ cur_physical, &found_logical);
if (ret > 0) {
/* No more extent, just update the accounting */
sctx->stat.last_physical = physical + logical_length;
@@ -2003,14 +2045,11 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
if (ret < 0)
break;
- ASSERT(sctx->cur_stripe > 0);
- cur_logical = sctx->stripes[sctx->cur_stripe - 1].logical
- + BTRFS_STRIPE_LEN;
+ cur_logical = found_logical + BTRFS_STRIPE_LEN;
/* Don't hold CPU for too long time */
cond_resched();
}
- btrfs_release_path(&path);
return ret;
}
@@ -2108,6 +2147,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 stripe_logical;
int stop_loop = 0;
+ /* Extent_path should be released by now. */
+ ASSERT(sctx->extent_path.nodes[0] == NULL);
+
scrub_blocked_if_needed(fs_info);
if (sctx->is_dev_replace &&
@@ -2226,6 +2268,9 @@ out:
ret2 = flush_scrub_stripes(sctx);
if (!ret)
ret = ret2;
+ btrfs_release_path(&sctx->extent_path);
+ btrfs_release_path(&sctx->csum_path);
+
if (sctx->raid56_data_stripes) {
for (int i = 0; i < nr_data_stripes(map); i++)
release_scrub_stripe(&sctx->raid56_data_stripes[i]);
@@ -2710,8 +2755,7 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
/*
* get a reference count on fs_info->scrub_workers. start worker if necessary
*/
-static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
- int is_dev_replace)
+static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info)
{
struct workqueue_struct *scrub_workers = NULL;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
@@ -2721,10 +2765,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
return 0;
- if (is_dev_replace)
- scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
- else
- scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
+ scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
if (!scrub_workers)
return -ENOMEM;
@@ -2776,7 +2817,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
if (IS_ERR(sctx))
return PTR_ERR(sctx);
- ret = scrub_workers_get(fs_info, is_dev_replace);
+ ret = scrub_workers_get(fs_info);
if (ret)
goto out_free_ctx;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 8bfd44750efe..3a566150c531 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -3685,7 +3685,7 @@ static void tail_append_pending_moves(struct send_ctx *sctx,
static int apply_children_dir_moves(struct send_ctx *sctx)
{
struct pending_dir_move *pm;
- struct list_head stack;
+ LIST_HEAD(stack);
u64 parent_ino = sctx->cur_ino;
int ret = 0;
@@ -3693,7 +3693,6 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
if (!pm)
return 0;
- INIT_LIST_HEAD(&stack);
tail_append_pending_moves(sctx, pm, &stack);
while (!list_empty(&stack)) {
@@ -4165,7 +4164,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
int ret = 0;
struct recorded_ref *cur;
struct recorded_ref *cur2;
- struct list_head check_dirs;
+ LIST_HEAD(check_dirs);
struct fs_path *valid_path = NULL;
u64 ow_inode = 0;
u64 ow_gen;
@@ -4184,7 +4183,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* which is always '..'
*/
BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
- INIT_LIST_HEAD(&check_dirs);
valid_path = fs_path_alloc();
if (!valid_path) {
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index 75e7fa337e66..d7e8cd4f140c 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -389,11 +389,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
return 0;
used = btrfs_space_info_used(space_info, true);
- if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
- (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
- avail = 0;
- else
- avail = calc_available_free_space(fs_info, space_info, flush);
+ avail = calc_available_free_space(fs_info, space_info, flush);
if (used + bytes < space_info->total_bytes + avail)
return 1;
@@ -510,6 +506,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
int dump_block_groups)
{
struct btrfs_block_group *cache;
+ u64 total_avail = 0;
int index = 0;
spin_lock(&info->lock);
@@ -523,18 +520,27 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
down_read(&info->groups_sem);
again:
list_for_each_entry(cache, &info->block_groups[index], list) {
+ u64 avail;
+
spin_lock(&cache->lock);
+ avail = cache->length - cache->used - cache->pinned -
+ cache->reserved - cache->delalloc_bytes -
+ cache->bytes_super - cache->zone_unusable;
btrfs_info(fs_info,
- "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
- cache->start, cache->length, cache->used, cache->pinned,
- cache->reserved, cache->zone_unusable,
- cache->ro ? "[readonly]" : "");
+"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s",
+ cache->start, cache->length, cache->used, cache->pinned,
+ cache->reserved, cache->delalloc_bytes,
+ cache->bytes_super, cache->zone_unusable,
+ avail, cache->ro ? "[readonly]" : "");
spin_unlock(&cache->lock);
btrfs_dump_free_space(cache, bytes);
+ total_avail += avail;
}
if (++index < BTRFS_NR_RAID_TYPES)
goto again;
up_read(&info->groups_sem);
+
+ btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
}
static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
@@ -715,9 +721,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
else
nr = -1;
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction_nostart(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ if (ret == -ENOENT)
+ ret = 0;
break;
}
ret = btrfs_run_delayed_items_nr(trans, nr);
@@ -733,9 +741,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case FLUSH_DELAYED_REFS_NR:
case FLUSH_DELAYED_REFS:
- trans = btrfs_join_transaction(root);
+ trans = btrfs_join_transaction_nostart(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ if (ret == -ENOENT)
+ ret = 0;
break;
}
if (state == FLUSH_DELAYED_REFS_NR)
@@ -747,18 +757,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case ALLOC_CHUNK:
case ALLOC_CHUNK_FORCE:
- /*
- * For metadata space on zoned filesystem, reaching here means we
- * don't have enough space left in active_total_bytes. Try to
- * activate a block group first, because we may have inactive
- * block group already allocated.
- */
- ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
- if (ret < 0)
- break;
- else if (ret == 1)
- break;
-
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
@@ -770,22 +768,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
CHUNK_ALLOC_FORCE);
btrfs_end_transaction(trans);
- /*
- * For metadata space on zoned filesystem, allocating a new chunk
- * is not enough. We still need to activate the block * group.
- * Active the newly allocated block group by (maybe) finishing
- * a block group.
- */
- if (ret == 1) {
- ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
- /*
- * Revert to the original ret regardless we could finish
- * one block group or not.
- */
- if (ret >= 0)
- ret = 1;
- }
-
if (ret > 0 || ret == -ENOSPC)
ret = 0;
break;
@@ -800,9 +782,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
break;
case COMMIT_TRANS:
ASSERT(current->journal_info == NULL);
- trans = btrfs_join_transaction(root);
+ /*
+ * We don't want to start a new transaction, just attach to the
+ * current one or wait it fully commits in case its commit is
+ * happening at the moment. Note: we don't use a nostart join
+ * because that does not wait for a transaction to fully commit
+ * (only for it to be unblocked, state TRANS_STATE_UNBLOCKED).
+ */
+ trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ if (ret == -ENOENT)
+ ret = 0;
break;
}
ret = btrfs_commit_transaction(trans);
@@ -1408,8 +1399,18 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
}
}
- /* Attempt to steal from the global rsv if we can. */
- if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
+ /*
+ * Attempt to steal from the global rsv if we can, except if the fs was
+ * turned into error mode due to a transaction abort when flushing space
+ * above, in that case fail with the abort error instead of returning
+ * success to the caller if we can steal from the global rsv - this is
+ * just to have caller fail immeditelly instead of later when trying to
+ * modify the fs, making it easier to debug -ENOSPC problems.
+ */
+ if (BTRFS_FS_ERROR(fs_info)) {
+ ticket->error = BTRFS_FS_ERROR(fs_info);
+ remove_ticket(space_info, ticket);
+ } else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
ticket->error = -ENOSPC;
remove_ticket(space_info, ticket);
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f1dd172d8d5b..09bfe68d2ea3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -709,12 +709,16 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break;
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
case Opt_check_integrity_including_extent_data:
+ btrfs_warn(info,
+ "integrity checker is deprecated and will be removed in 6.7");
btrfs_info(info,
"enabling check integrity including extent data");
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA);
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
break;
case Opt_check_integrity:
+ btrfs_warn(info,
+ "integrity checker is deprecated and will be removed in 6.7");
btrfs_info(info, "enabling check integrity");
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
break;
@@ -727,6 +731,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
goto out;
}
info->check_integrity_print_mask = intarg;
+ btrfs_warn(info,
+ "integrity checker is deprecated and will be removed in 6.7");
btrfs_info(info, "check_integrity_print_mask 0x%x",
info->check_integrity_print_mask);
break;
@@ -2144,7 +2150,7 @@ static struct file_system_type btrfs_fs_type = {
.name = "btrfs",
.mount = btrfs_mount,
.kill_sb = btrfs_kill_super,
- .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA,
+ .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_MGTIME,
};
static struct file_system_type btrfs_root_fs_type = {
@@ -2152,7 +2158,8 @@ static struct file_system_type btrfs_root_fs_type = {
.name = "btrfs",
.mount = btrfs_mount_root,
.kill_sb = btrfs_kill_super,
- .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA |
+ FS_ALLOW_IDMAP | FS_MGTIME,
};
MODULE_ALIAS_FS("btrfs");
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 25294e624851..b1d1ac25237b 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -414,6 +414,12 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
BTRFS_ATTR(static_feature, supported_sectorsizes,
supported_sectorsizes_show);
+static ssize_t acl_show(struct kobject *kobj, struct kobj_attribute *a, char *buf)
+{
+ return sysfs_emit(buf, "%d\n", !!IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
+}
+BTRFS_ATTR(static_feature, acl, acl_show);
+
/*
* Features which only depend on kernel version.
*
@@ -421,6 +427,7 @@ BTRFS_ATTR(static_feature, supported_sectorsizes,
* btrfs_supported_feature_attrs.
*/
static struct attribute *btrfs_supported_static_feature_attrs[] = {
+ BTRFS_ATTR_PTR(static_feature, acl),
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
BTRFS_ATTR_PTR(static_feature, supported_checksums),
BTRFS_ATTR_PTR(static_feature, send_stream_version),
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index f6bc6d738555..1cc86af97dc6 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -319,86 +319,139 @@ out:
return ret;
}
-static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
- unsigned long len)
+static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb)
{
unsigned long i;
- for (i = 0; i < len * BITS_PER_BYTE; i++) {
+ for (i = 0; i < eb->len * BITS_PER_BYTE; i++) {
int bit, bit1;
bit = !!test_bit(i, bitmap);
bit1 = !!extent_buffer_test_bit(eb, 0, i);
if (bit1 != bit) {
- test_err("bits do not match");
+ u8 has;
+ u8 expect;
+
+ read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+ expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+ test_err(
+ "bits do not match, start byte 0 bit %lu, byte %lu has 0x%02x expect 0x%02x",
+ i, i / BITS_PER_BYTE, has, expect);
return -EINVAL;
}
bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
i % BITS_PER_BYTE);
if (bit1 != bit) {
- test_err("offset bits do not match");
+ u8 has;
+ u8 expect;
+
+ read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
+ expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
+
+ test_err(
+ "bits do not match, start byte %lu bit %lu, byte %lu has 0x%02x expect 0x%02x",
+ i / BITS_PER_BYTE, i % BITS_PER_BYTE,
+ i / BITS_PER_BYTE, has, expect);
return -EINVAL;
}
}
return 0;
}
-static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
- unsigned long len)
+static int test_bitmap_set(const char *name, unsigned long *bitmap,
+ struct extent_buffer *eb,
+ unsigned long byte_start, unsigned long bit_start,
+ unsigned long bit_len)
+{
+ int ret;
+
+ bitmap_set(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+ extent_buffer_bitmap_set(eb, byte_start, bit_start, bit_len);
+ ret = check_eb_bitmap(bitmap, eb);
+ if (ret < 0)
+ test_err("%s test failed", name);
+ return ret;
+}
+
+static int test_bitmap_clear(const char *name, unsigned long *bitmap,
+ struct extent_buffer *eb,
+ unsigned long byte_start, unsigned long bit_start,
+ unsigned long bit_len)
+{
+ int ret;
+
+ bitmap_clear(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
+ extent_buffer_bitmap_clear(eb, byte_start, bit_start, bit_len);
+ ret = check_eb_bitmap(bitmap, eb);
+ if (ret < 0)
+ test_err("%s test failed", name);
+ return ret;
+}
+static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb)
{
unsigned long i, j;
+ unsigned long byte_len = eb->len;
u32 x;
int ret;
- memset(bitmap, 0, len);
- memzero_extent_buffer(eb, 0, len);
- if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
- test_err("bitmap was not zeroed");
- return -EINVAL;
- }
+ ret = test_bitmap_clear("clear all run 1", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
- bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("setting all bits failed");
+ ret = test_bitmap_set("set all", bitmap, eb, 0, 0, byte_len * BITS_PER_BYTE);
+ if (ret < 0)
return ret;
- }
- bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("clearing all bits failed");
+ ret = test_bitmap_clear("clear all run 2", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_set("same byte set", bitmap, eb, 0, 2, 4);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("same byte partial clear", bitmap, eb, 0, 4, 1);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_set("cross byte set", bitmap, eb, 2, 4, 8);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_set("cross multi byte set", bitmap, eb, 4, 4, 24);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("cross byte clear", bitmap, eb, 2, 6, 4);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_clear("cross multi byte clear", bitmap, eb, 4, 6, 20);
+ if (ret < 0)
return ret;
- }
/* Straddling pages test */
- if (len > PAGE_SIZE) {
- bitmap_set(bitmap,
- (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
- sizeof(long) * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("setting straddling pages failed");
+ if (byte_len > PAGE_SIZE) {
+ ret = test_bitmap_set("cross page set", bitmap, eb,
+ PAGE_SIZE - sizeof(long) / 2, 0,
+ sizeof(long) * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
+
+ ret = test_bitmap_set("cross page set all", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
return ret;
- }
- bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
- bitmap_clear(bitmap,
- (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
- sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
+ ret = test_bitmap_clear("cross page clear", bitmap, eb,
+ PAGE_SIZE - sizeof(long) / 2, 0,
sizeof(long) * BITS_PER_BYTE);
- ret = check_eb_bitmap(bitmap, eb, len);
- if (ret) {
- test_err("clearing straddling pages failed");
+ if (ret < 0)
return ret;
- }
}
/*
@@ -406,9 +459,12 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
* something repetitive that could miss some hypothetical off-by-n bug.
*/
x = 0;
- bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
- for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
+ ret = test_bitmap_clear("clear all run 3", bitmap, eb, 0, 0,
+ byte_len * BITS_PER_BYTE);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < byte_len * BITS_PER_BYTE / 32; i++) {
x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
for (j = 0; j < 32; j++) {
if (x & (1U << j)) {
@@ -418,7 +474,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
}
}
- ret = check_eb_bitmap(bitmap, eb, len);
+ ret = check_eb_bitmap(bitmap, eb);
if (ret) {
test_err("random bit pattern failed");
return ret;
@@ -456,7 +512,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
goto out;
}
- ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+ ret = __test_eb_bitmaps(bitmap, eb);
if (ret)
goto out;
@@ -473,7 +529,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
goto out;
}
- ret = __test_eb_bitmaps(bitmap, eb, nodesize);
+ ret = __test_eb_bitmaps(bitmap, eb);
out:
free_extent_buffer(eb);
kfree(bitmap);
@@ -592,6 +648,146 @@ out:
return ret;
}
+static void dump_eb_and_memory_contents(struct extent_buffer *eb, void *memory,
+ const char *test_name)
+{
+ for (int i = 0; i < eb->len; i++) {
+ struct page *page = eb->pages[i >> PAGE_SHIFT];
+ void *addr = page_address(page) + offset_in_page(i);
+
+ if (memcmp(addr, memory + i, 1) != 0) {
+ test_err("%s failed", test_name);
+ test_err("eb and memory diffs at byte %u, eb has 0x%02x memory has 0x%02x",
+ i, *(u8 *)addr, *(u8 *)(memory + i));
+ return;
+ }
+ }
+}
+
+static int verify_eb_and_memory(struct extent_buffer *eb, void *memory,
+ const char *test_name)
+{
+ for (int i = 0; i < (eb->len >> PAGE_SHIFT); i++) {
+ void *eb_addr = page_address(eb->pages[i]);
+
+ if (memcmp(memory + (i << PAGE_SHIFT), eb_addr, PAGE_SIZE) != 0) {
+ dump_eb_and_memory_contents(eb, memory, test_name);
+ return -EUCLEAN;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Init both memory and extent buffer contents to the same randomly generated
+ * contents.
+ */
+static void init_eb_and_memory(struct extent_buffer *eb, void *memory)
+{
+ get_random_bytes(memory, eb->len);
+ write_extent_buffer(eb, memory, 0, eb->len);
+}
+
+static int test_eb_mem_ops(u32 sectorsize, u32 nodesize)
+{
+ struct btrfs_fs_info *fs_info;
+ struct extent_buffer *eb = NULL;
+ void *memory = NULL;
+ int ret;
+
+ test_msg("running extent buffer memory operation tests");
+
+ fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
+ if (!fs_info) {
+ test_std_err(TEST_ALLOC_FS_INFO);
+ return -ENOMEM;
+ }
+
+ memory = kvzalloc(nodesize, GFP_KERNEL);
+ if (!memory) {
+ test_err("failed to allocate memory");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ eb = __alloc_dummy_extent_buffer(fs_info, SZ_1M, nodesize);
+ if (!eb) {
+ test_std_err(TEST_ALLOC_EXTENT_BUFFER);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ init_eb_and_memory(eb, memory);
+ ret = verify_eb_and_memory(eb, memory, "full eb write");
+ if (ret < 0)
+ goto out;
+
+ memcpy(memory, memory + 16, 16);
+ memcpy_extent_buffer(eb, 0, 16, 16);
+ ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memcpy(memory, memory + 2048, 16);
+ memcpy_extent_buffer(eb, 0, 2048, 16);
+ ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+ memcpy(memory, memory + 2048, 2048);
+ memcpy_extent_buffer(eb, 0, 2048, 2048);
+ ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 3");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 512, memory + 256, 512);
+ memmove_extent_buffer(eb, 512, 256, 512);
+ ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 2048, memory + 512, 2048);
+ memmove_extent_buffer(eb, 2048, 512, 2048);
+ ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+ memmove(memory + 512, memory + 2048, 2048);
+ memmove_extent_buffer(eb, 512, 2048, 2048);
+ ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 3");
+ if (ret < 0)
+ goto out;
+
+ if (nodesize > PAGE_SIZE) {
+ memcpy(memory, memory + 4096 - 128, 256);
+ memcpy_extent_buffer(eb, 0, 4096 - 128, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memcpy(memory + 4096 - 128, memory + 4096 + 128, 256);
+ memcpy_extent_buffer(eb, 4096 - 128, 4096 + 128, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 4096 - 128, memory + 4096 - 64, 256);
+ memmove_extent_buffer(eb, 4096 - 128, 4096 - 64, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 1");
+ if (ret < 0)
+ goto out;
+
+ memmove(memory + 4096 - 64, memory + 4096 - 128, 256);
+ memmove_extent_buffer(eb, 4096 - 64, 4096 - 128, 256);
+ ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 2");
+ if (ret < 0)
+ goto out;
+ }
+out:
+ free_extent_buffer(eb);
+ kvfree(memory);
+ btrfs_free_dummy_fs_info(fs_info);
+ return ret;
+}
+
int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
{
int ret;
@@ -607,6 +803,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
goto out;
ret = test_eb_bitmaps(sectorsize, nodesize);
+ if (ret)
+ goto out;
+
+ ret = test_eb_mem_ops(sectorsize, nodesize);
out:
return ret;
}
diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c
index ed0f36ae5346..29bdd08b241f 100644
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@@ -6,6 +6,7 @@
#include <linux/types.h>
#include "btrfs-tests.h"
#include "../ctree.h"
+#include "../btrfs_inode.h"
#include "../volumes.h"
#include "../disk-io.h"
#include "../block-group.h"
@@ -442,6 +443,406 @@ static int test_case_4(struct btrfs_fs_info *fs_info,
return ret;
}
+static int add_compressed_extent(struct extent_map_tree *em_tree,
+ u64 start, u64 len, u64 block_start)
+{
+ struct extent_map *em;
+ int ret;
+
+ em = alloc_extent_map();
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ return -ENOMEM;
+ }
+
+ em->start = start;
+ em->len = len;
+ em->block_start = block_start;
+ em->block_len = SZ_4K;
+ set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
+ free_extent_map(em);
+ if (ret < 0) {
+ test_err("cannot add extent map [%llu, %llu)", start, start + len);
+ return ret;
+ }
+
+ return 0;
+}
+
+struct extent_range {
+ u64 start;
+ u64 len;
+};
+
+/* The valid states of the tree after every drop, as described below. */
+struct extent_range valid_ranges[][7] = {
+ {
+ { .start = 0, .len = SZ_8K }, /* [0, 8K) */
+ { .start = SZ_4K * 3, .len = SZ_4K * 3}, /* [12k, 24k) */
+ { .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
+ { .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
+ { .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
+ },
+ {
+ { .start = 0, .len = SZ_8K }, /* [0, 8K) */
+ { .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
+ { .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
+ { .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
+ { .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
+ },
+ {
+ { .start = 0, .len = SZ_8K }, /* [0, 8K) */
+ { .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
+ { .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
+ { .start = SZ_32K, .len = SZ_4K}, /* [32k, 36k) */
+ { .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
+ { .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
+ },
+ {
+ { .start = 0, .len = SZ_8K}, /* [0, 8K) */
+ { .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
+ { .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
+ }
+};
+
+static int validate_range(struct extent_map_tree *em_tree, int index)
+{
+ struct rb_node *n;
+ int i;
+
+ for (i = 0, n = rb_first_cached(&em_tree->map);
+ valid_ranges[index][i].len && n;
+ i++, n = rb_next(n)) {
+ struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
+
+ if (entry->start != valid_ranges[index][i].start) {
+ test_err("mapping has start %llu expected %llu",
+ entry->start, valid_ranges[index][i].start);
+ return -EINVAL;
+ }
+
+ if (entry->len != valid_ranges[index][i].len) {
+ test_err("mapping has len %llu expected %llu",
+ entry->len, valid_ranges[index][i].len);
+ return -EINVAL;
+ }
+ }
+
+ /*
+ * We exited because we don't have any more entries in the extent_map
+ * but we still expect more valid entries.
+ */
+ if (valid_ranges[index][i].len) {
+ test_err("missing an entry");
+ return -EINVAL;
+ }
+
+ /* We exited the loop but still have entries in the extent map. */
+ if (n) {
+ test_err("we have a left over entry in the extent map we didn't expect");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Test scenario:
+ *
+ * Test the various edge cases of btrfs_drop_extent_map_range, create the
+ * following ranges
+ *
+ * [0, 12k)[12k, 24k)[24k, 36k)[36k, 40k)[40k,64k)
+ *
+ * And then we'll drop:
+ *
+ * [8k, 12k) - test the single front split
+ * [12k, 20k) - test the single back split
+ * [28k, 32k) - test the double split
+ * [32k, 64k) - test whole em dropping
+ *
+ * They'll have the EXTENT_FLAG_COMPRESSED flag set to keep the em tree from
+ * merging the em's.
+ */
+static int test_case_5(void)
+{
+ struct extent_map_tree *em_tree;
+ struct inode *inode;
+ u64 start, end;
+ int ret;
+
+ test_msg("Running btrfs_drop_extent_map_range tests");
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_std_err(TEST_ALLOC_INODE);
+ return -ENOMEM;
+ }
+
+ em_tree = &BTRFS_I(inode)->extent_tree;
+
+ /* [0, 12k) */
+ ret = add_compressed_extent(em_tree, 0, SZ_4K * 3, 0);
+ if (ret) {
+ test_err("cannot add extent range [0, 12K)");
+ goto out;
+ }
+
+ /* [12k, 24k) */
+ ret = add_compressed_extent(em_tree, SZ_4K * 3, SZ_4K * 3, SZ_4K);
+ if (ret) {
+ test_err("cannot add extent range [12k, 24k)");
+ goto out;
+ }
+
+ /* [24k, 36k) */
+ ret = add_compressed_extent(em_tree, SZ_4K * 6, SZ_4K * 3, SZ_8K);
+ if (ret) {
+ test_err("cannot add extent range [12k, 24k)");
+ goto out;
+ }
+
+ /* [36k, 40k) */
+ ret = add_compressed_extent(em_tree, SZ_32K + SZ_4K, SZ_4K, SZ_4K * 3);
+ if (ret) {
+ test_err("cannot add extent range [12k, 24k)");
+ goto out;
+ }
+
+ /* [40k, 64k) */
+ ret = add_compressed_extent(em_tree, SZ_4K * 10, SZ_4K * 6, SZ_16K);
+ if (ret) {
+ test_err("cannot add extent range [12k, 24k)");
+ goto out;
+ }
+
+ /* Drop [8k, 12k) */
+ start = SZ_8K;
+ end = (3 * SZ_4K) - 1;
+ btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+ ret = validate_range(&BTRFS_I(inode)->extent_tree, 0);
+ if (ret)
+ goto out;
+
+ /* Drop [12k, 20k) */
+ start = SZ_4K * 3;
+ end = SZ_16K + SZ_4K - 1;
+ btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+ ret = validate_range(&BTRFS_I(inode)->extent_tree, 1);
+ if (ret)
+ goto out;
+
+ /* Drop [28k, 32k) */
+ start = SZ_32K - SZ_4K;
+ end = SZ_32K - 1;
+ btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+ ret = validate_range(&BTRFS_I(inode)->extent_tree, 2);
+ if (ret)
+ goto out;
+
+ /* Drop [32k, 64k) */
+ start = SZ_32K;
+ end = SZ_64K - 1;
+ btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
+ ret = validate_range(&BTRFS_I(inode)->extent_tree, 3);
+ if (ret)
+ goto out;
+out:
+ iput(inode);
+ return ret;
+}
+
+/*
+ * Test the btrfs_add_extent_mapping helper which will attempt to create an em
+ * for areas between two existing ems. Validate it doesn't do this when there
+ * are two unmerged em's side by side.
+ */
+static int test_case_6(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree)
+{
+ struct extent_map *em = NULL;
+ int ret;
+
+ ret = add_compressed_extent(em_tree, 0, SZ_4K, 0);
+ if (ret)
+ goto out;
+
+ ret = add_compressed_extent(em_tree, SZ_4K, SZ_4K, 0);
+ if (ret)
+ goto out;
+
+ em = alloc_extent_map();
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ return -ENOMEM;
+ }
+
+ em->start = SZ_4K;
+ em->len = SZ_4K;
+ em->block_start = SZ_16K;
+ em->block_len = SZ_16K;
+ write_lock(&em_tree->lock);
+ ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, 0, SZ_8K);
+ write_unlock(&em_tree->lock);
+
+ if (ret != 0) {
+ test_err("got an error when adding our em: %d", ret);
+ goto out;
+ }
+
+ ret = -EINVAL;
+ if (em->start != 0) {
+ test_err("unexpected em->start at %llu, wanted 0", em->start);
+ goto out;
+ }
+ if (em->len != SZ_4K) {
+ test_err("unexpected em->len %llu, expected 4K", em->len);
+ goto out;
+ }
+ ret = 0;
+out:
+ free_extent_map(em);
+ free_extent_map_tree(em_tree);
+ return ret;
+}
+
+/*
+ * Regression test for btrfs_drop_extent_map_range. Calling with skip_pinned ==
+ * true would mess up the start/end calculations and subsequent splits would be
+ * incorrect.
+ */
+static int test_case_7(void)
+{
+ struct extent_map_tree *em_tree;
+ struct extent_map *em;
+ struct inode *inode;
+ int ret;
+
+ test_msg("Running btrfs_drop_extent_cache with pinned");
+
+ inode = btrfs_new_test_inode();
+ if (!inode) {
+ test_std_err(TEST_ALLOC_INODE);
+ return -ENOMEM;
+ }
+
+ em_tree = &BTRFS_I(inode)->extent_tree;
+
+ em = alloc_extent_map();
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* [0, 16K), pinned */
+ em->start = 0;
+ em->len = SZ_16K;
+ em->block_start = 0;
+ em->block_len = SZ_4K;
+ set_bit(EXTENT_FLAG_PINNED, &em->flags);
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
+ if (ret < 0) {
+ test_err("couldn't add extent map");
+ goto out;
+ }
+ free_extent_map(em);
+
+ em = alloc_extent_map();
+ if (!em) {
+ test_std_err(TEST_ALLOC_EXTENT_MAP);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* [32K, 48K), not pinned */
+ em->start = SZ_32K;
+ em->len = SZ_16K;
+ em->block_start = SZ_32K;
+ em->block_len = SZ_16K;
+ write_lock(&em_tree->lock);
+ ret = add_extent_mapping(em_tree, em, 0);
+ write_unlock(&em_tree->lock);
+ if (ret < 0) {
+ test_err("couldn't add extent map");
+ goto out;
+ }
+ free_extent_map(em);
+
+ /*
+ * Drop [0, 36K) This should skip the [0, 4K) extent and then split the
+ * [32K, 48K) extent.
+ */
+ btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (36 * SZ_1K) - 1, true);
+
+ /* Make sure our extent maps look sane. */
+ ret = -EINVAL;
+
+ em = lookup_extent_mapping(em_tree, 0, SZ_16K);
+ if (!em) {
+ test_err("didn't find an em at 0 as expected");
+ goto out;
+ }
+
+ if (em->start != 0) {
+ test_err("em->start is %llu, expected 0", em->start);
+ goto out;
+ }
+
+ if (em->len != SZ_16K) {
+ test_err("em->len is %llu, expected 16K", em->len);
+ goto out;
+ }
+
+ free_extent_map(em);
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
+ read_unlock(&em_tree->lock);
+ if (em) {
+ test_err("found an em when we weren't expecting one");
+ goto out;
+ }
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
+ read_unlock(&em_tree->lock);
+ if (!em) {
+ test_err("didn't find an em at 32K as expected");
+ goto out;
+ }
+
+ if (em->start != (36 * SZ_1K)) {
+ test_err("em->start is %llu, expected 36K", em->start);
+ goto out;
+ }
+
+ if (em->len != (12 * SZ_1K)) {
+ test_err("em->len is %llu, expected 12K", em->len);
+ goto out;
+ }
+
+ free_extent_map(em);
+
+ read_lock(&em_tree->lock);
+ em = lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
+ read_unlock(&em_tree->lock);
+ if (em) {
+ test_err("found an unexpected em above 48K");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ free_extent_map(em);
+ iput(inode);
+ return ret;
+}
+
struct rmap_test_vector {
u64 raid_type;
u64 physical_start;
@@ -619,6 +1020,17 @@ int btrfs_test_extent_map(void)
if (ret)
goto out;
ret = test_case_4(fs_info, em_tree);
+ if (ret)
+ goto out;
+ ret = test_case_5();
+ if (ret)
+ goto out;
+ ret = test_case_6(fs_info, em_tree);
+ if (ret)
+ goto out;
+ ret = test_case_7();
+ if (ret)
+ goto out;
test_msg("running rmap tests");
for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cf306351b148..874e4394df86 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -292,10 +292,11 @@ loop:
spin_unlock(&fs_info->trans_lock);
/*
- * If we are ATTACH, we just want to catch the current transaction,
- * and commit it. If there is no transaction, just return ENOENT.
+ * If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
+ * current transaction, and commit it. If there is no transaction, just
+ * return ENOENT.
*/
- if (type == TRANS_ATTACH)
+ if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
return -ENOENT;
/*
@@ -591,8 +592,13 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
u64 delayed_refs_bytes = 0;
qgroup_reserved = num_items * fs_info->nodesize;
- ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
- enforce_qgroups);
+ /*
+ * Use prealloc for now, as there might be a currently running
+ * transaction that could free this reserved space prematurely
+ * by committing.
+ */
+ ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserved,
+ enforce_qgroups, false);
if (ret)
return ERR_PTR(ret);
@@ -705,6 +711,14 @@ again:
h->reloc_reserved = reloc_reserved;
}
+ /*
+ * Now that we have found a transaction to be a part of, convert the
+ * qgroup reservation from prealloc to pertrans. A different transaction
+ * can't race in and free our pertrans out from under us.
+ */
+ if (qgroup_reserved)
+ btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
+
got_it:
if (!current->journal_info)
current->journal_info = h;
@@ -752,7 +766,7 @@ alloc_fail:
btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
num_bytes, NULL);
reserve_fail:
- btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
+ btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
return ERR_PTR(ret);
}
@@ -785,7 +799,10 @@ struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *
/*
* Similar to regular join but it never starts a transaction when none is
- * running or after waiting for the current one to finish.
+ * running or when there's a running one at a state >= TRANS_STATE_UNBLOCKED.
+ * This is similar to btrfs_attach_transaction() but it allows the join to
+ * happen if the transaction commit already started but it's not yet in the
+ * "doing" phase (the state is < TRANS_STATE_COMMIT_DOING).
*/
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
{
@@ -826,8 +843,13 @@ btrfs_attach_transaction_barrier(struct btrfs_root *root)
trans = start_transaction(root, 0, TRANS_ATTACH,
BTRFS_RESERVE_NO_FLUSH, true);
- if (trans == ERR_PTR(-ENOENT))
- btrfs_wait_for_commit(root->fs_info, 0);
+ if (trans == ERR_PTR(-ENOENT)) {
+ int ret;
+
+ ret = btrfs_wait_for_commit(root->fs_info, 0);
+ if (ret)
+ return ERR_PTR(ret);
+ }
return trans;
}
@@ -931,6 +953,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid)
}
wait_for_commit(cur_trans, TRANS_STATE_COMPLETED);
+ ret = cur_trans->aborted;
btrfs_put_transaction(cur_trans);
out:
return ret;
@@ -1054,8 +1077,8 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
- while (!find_first_extent_bit(dirty_pages, start, &start, &end,
- mark, &cached_state)) {
+ while (find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark, &cached_state)) {
bool wait_writeback = false;
err = convert_extent_bit(dirty_pages, start, end,
@@ -1108,8 +1131,8 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
- while (!find_first_extent_bit(dirty_pages, start, &start, &end,
- EXTENT_NEED_WAIT, &cached_state)) {
+ while (find_first_extent_bit(dirty_pages, start, &start, &end,
+ EXTENT_NEED_WAIT, &cached_state)) {
/*
* Ignore -ENOMEM errors returned by clear_extent_bit().
* When committing the transaction, we'll remove any entries
@@ -1831,8 +1854,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_i_size_write(BTRFS_I(parent_inode), parent_inode->i_size +
fname.disk_name.len * 2);
- parent_inode->i_mtime = current_time(parent_inode);
- parent_inode->i_ctime = parent_inode->i_mtime;
+ parent_inode->i_mtime = inode_set_ctime_current(parent_inode);
ret = btrfs_update_inode_fallback(trans, parent_root, BTRFS_I(parent_inode));
if (ret) {
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index 038dfa8f1788..ab08a0b01311 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -446,6 +446,20 @@ static int check_root_key(struct extent_buffer *leaf, struct btrfs_key *key,
btrfs_item_key_to_cpu(leaf, &item_key, slot);
is_root_item = (item_key.type == BTRFS_ROOT_ITEM_KEY);
+ /*
+ * Bad rootid for reloc trees.
+ *
+ * Reloc trees are only for subvolume trees, other trees only need
+ * to be COWed to be relocated.
+ */
+ if (unlikely(is_root_item && key->objectid == BTRFS_TREE_RELOC_OBJECTID &&
+ !is_fstree(key->offset))) {
+ generic_err(leaf, slot,
+ "invalid reloc tree for root %lld, root id is not a subvolume tree",
+ key->offset);
+ return -EUCLEAN;
+ }
+
/* No such tree id */
if (unlikely(key->objectid == 0)) {
if (is_root_item)
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 365a1cc0a3c3..d1e46b839519 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -4148,9 +4148,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
inode->i_mtime.tv_nsec);
btrfs_set_token_timespec_sec(&token, &item->ctime,
- inode->i_ctime.tv_sec);
+ inode_get_ctime(inode).tv_sec);
btrfs_set_token_timespec_nsec(&token, &item->ctime,
- inode->i_ctime.tv_nsec);
+ inode_get_ctime(inode).tv_nsec);
/*
* We do not need to set the nbytes field, in fact during a fast fsync
@@ -4841,13 +4841,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_extent *tmp;
struct extent_map *em, *n;
- struct list_head extents;
+ LIST_HEAD(extents);
struct extent_map_tree *tree = &inode->extent_tree;
int ret = 0;
int num = 0;
- INIT_LIST_HEAD(&extents);
-
write_lock(&tree->lock);
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
@@ -6794,8 +6792,8 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
while (true) {
struct btrfs_fs_info *fs_info = root->fs_info;
- struct extent_buffer *leaf = path->nodes[0];
- int slot = path->slots[0];
+ struct extent_buffer *leaf;
+ int slot;
struct btrfs_key search_key;
struct inode *inode;
u64 ino;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 73f9ea7672db..9621455edebc 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -681,6 +681,14 @@ error_free_page:
return -EINVAL;
}
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
+{
+ bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
+ BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
+
+ return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
+}
+
/*
* Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
* being created with a disk that has already completed its fsid change. Such
@@ -833,15 +841,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
found_transid > fs_devices->latest_generation) {
memcpy(fs_devices->fsid, disk_super->fsid,
BTRFS_FSID_SIZE);
-
- if (has_metadata_uuid)
- memcpy(fs_devices->metadata_uuid,
- disk_super->metadata_uuid,
- BTRFS_FSID_SIZE);
- else
- memcpy(fs_devices->metadata_uuid,
- disk_super->fsid, BTRFS_FSID_SIZE);
-
+ memcpy(fs_devices->metadata_uuid,
+ btrfs_sb_fsid_ptr(disk_super), BTRFS_FSID_SIZE);
fs_devices->fsid_change = false;
}
}
@@ -851,8 +852,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (fs_devices->opened) {
btrfs_err(NULL,
- "device %s belongs to fsid %pU, and the fs is already mounted",
- path, fs_devices->fsid);
+"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
+ path, fs_devices->fsid, current->comm,
+ task_pid_nr(current));
mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(-EBUSY);
}
@@ -1424,9 +1426,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
lockdep_assert_held(&device->fs_info->chunk_mutex);
- if (!find_first_extent_bit(&device->alloc_state, *start,
- &physical_start, &physical_end,
- CHUNK_ALLOCATED, NULL)) {
+ if (find_first_extent_bit(&device->alloc_state, *start,
+ &physical_start, &physical_end,
+ CHUNK_ALLOCATED, NULL)) {
if (in_range(physical_start, *start, len) ||
in_range(*start, physical_start,
@@ -1438,18 +1440,18 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
return false;
}
-static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
+static u64 dev_extent_search_start(struct btrfs_device *device)
{
switch (device->fs_devices->chunk_alloc_policy) {
case BTRFS_CHUNK_ALLOC_REGULAR:
- return max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
+ return BTRFS_DEVICE_RANGE_RESERVED;
case BTRFS_CHUNK_ALLOC_ZONED:
/*
* We don't care about the starting region like regular
* allocator, because we anyway use/reserve the first two zones
* for superblock logging.
*/
- return ALIGN(start, device->zone_info->zone_size);
+ return 0;
default:
BUG();
}
@@ -1581,15 +1583,15 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
* correct usable device space, as device extent freed in current transaction
* is not reported as available.
*/
-static int find_free_dev_extent_start(struct btrfs_device *device,
- u64 num_bytes, u64 search_start, u64 *start,
- u64 *len)
+static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
+ u64 *start, u64 *len)
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_key key;
struct btrfs_dev_extent *dev_extent;
struct btrfs_path *path;
+ u64 search_start;
u64 hole_size;
u64 max_hole_start;
u64 max_hole_size;
@@ -1599,7 +1601,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
int slot;
struct extent_buffer *l;
- search_start = dev_extent_search_start(device, search_start);
+ search_start = dev_extent_search_start(device);
WARN_ON(device->zone_info &&
!IS_ALIGNED(num_bytes, device->zone_info->zone_size));
@@ -1725,13 +1727,6 @@ out:
return ret;
}
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
- u64 *start, u64 *len)
-{
- /* FIXME use last free of some kind */
- return find_free_dev_extent_start(device, num_bytes, 0, start, len);
-}
-
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device,
u64 start, u64 *dev_extent_len)
@@ -1917,15 +1912,13 @@ out:
static void update_dev_time(const char *device_path)
{
struct path path;
- struct timespec64 now;
int ret;
ret = kern_path(device_path, LOOKUP_FOLLOW, &path);
if (ret)
return;
- now = current_time(d_inode(path.dentry));
- inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME | S_VERSION);
+ inode_update_time(d_inode(path.dentry), S_MTIME | S_CTIME | S_VERSION);
path_put(&path);
}
@@ -4078,14 +4071,6 @@ static int alloc_profile_is_valid(u64 flags, int extended)
return has_single_bit_set(flags);
}
-static inline int balance_need_close(struct btrfs_fs_info *fs_info)
-{
- /* cancel requested || normal exit path */
- return atomic_read(&fs_info->balance_cancel_req) ||
- (atomic_read(&fs_info->balance_pause_req) == 0 &&
- atomic_read(&fs_info->balance_cancel_req) == 0);
-}
-
/*
* Validate target profile against allowed profiles and return true if it's OK.
* Otherwise print the error message and return false.
@@ -4275,6 +4260,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
u64 num_devices;
unsigned seq;
bool reducing_redundancy;
+ bool paused = false;
int i;
if (btrfs_fs_closing(fs_info) ||
@@ -4405,6 +4391,7 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
if (ret == -ECANCELED && atomic_read(&fs_info->balance_pause_req)) {
btrfs_info(fs_info, "balance: paused");
btrfs_exclop_balance(fs_info, BTRFS_EXCLOP_BALANCE_PAUSED);
+ paused = true;
}
/*
* Balance can be canceled by:
@@ -4433,8 +4420,8 @@ int btrfs_balance(struct btrfs_fs_info *fs_info,
btrfs_update_ioctl_balance_args(fs_info, bargs);
}
- if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
- balance_need_close(fs_info)) {
+ /* We didn't pause, we can clean everything up. */
+ if (!paused) {
reset_balance_state(fs_info);
btrfs_exclop_finish(fs_info);
}
@@ -4644,8 +4631,7 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
}
}
- BUG_ON(fs_info->balance_ctl ||
- test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
+ ASSERT(!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags));
atomic_dec(&fs_info->balance_cancel_req);
mutex_unlock(&fs_info->balance_mutex);
return 0;
@@ -6226,6 +6212,45 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
}
+/*
+ * Map one logical range to one or more physical ranges.
+ *
+ * @length: (Mandatory) mapped length of this run.
+ * One logical range can be split into different segments
+ * due to factors like zones and RAID0/5/6/10 stripe
+ * boundaries.
+ *
+ * @bioc_ret: (Mandatory) returned btrfs_io_context structure.
+ * which has one or more physical ranges (btrfs_io_stripe)
+ * recorded inside.
+ * Caller should call btrfs_put_bioc() to free it after use.
+ *
+ * @smap: (Optional) single physical range optimization.
+ * If the map request can be fulfilled by one single
+ * physical range, and this is parameter is not NULL,
+ * then @bioc_ret would be NULL, and @smap would be
+ * updated.
+ *
+ * @mirror_num_ret: (Mandatory) returned mirror number if the original
+ * value is 0.
+ *
+ * Mirror number 0 means to choose any live mirrors.
+ *
+ * For non-RAID56 profiles, non-zero mirror_num means
+ * the Nth mirror. (e.g. mirror_num 1 means the first
+ * copy).
+ *
+ * For RAID56 profile, mirror 1 means rebuild from P and
+ * the remaining data stripes.
+ *
+ * For RAID6 profile, mirror > 2 means mark another
+ * data/P stripe error and rebuild from the remaining
+ * stripes..
+ *
+ * @need_raid_map: (Used only for integrity checker) whether the map wants
+ * a full stripe map (including all data and P/Q stripes)
+ * for RAID56. Should always be 1 except integrity checker.
+ */
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
@@ -6400,11 +6425,10 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* I/O context structure.
*/
if (smap && num_alloc_stripes == 1 &&
- !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
- (op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
- !dev_replace->tgtdev)) {
+ !((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
- *mirror_num_ret = mirror_num;
+ if (mirror_num_ret)
+ *mirror_num_ret = mirror_num;
*bioc_ret = NULL;
ret = 0;
goto out;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index b8c51f16ba86..2128a032c3b7 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -650,8 +650,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
int btrfs_uuid_scan_kthread(void *data);
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
-int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
- u64 *start, u64 *max_avail);
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
struct btrfs_ioctl_get_dev_stats *stats);
@@ -749,5 +747,6 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
+u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb);
#endif
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index fc4b20c2688a..96828a13dd43 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -264,7 +264,7 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
goto out;
inode_inc_iversion(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret)
btrfs_abort_transaction(trans, ret);
@@ -407,7 +407,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
ret = btrfs_set_prop(trans, inode, name, value, size, flags);
if (!ret) {
inode_inc_iversion(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
if (ret)
btrfs_abort_transaction(trans, ret);
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 85b8b332add9..09bc325d075d 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -65,6 +65,9 @@
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
+static void wait_eb_writebacks(struct btrfs_block_group *block_group);
+static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written);
+
static inline bool sb_zone_is_full(const struct blk_zone *zone)
{
return (zone->cond == BLK_ZONE_COND_FULL) ||
@@ -465,8 +468,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
* use the cache.
*/
if (populate_cache && bdev_is_zoned(device->bdev)) {
- zone_info->zone_cache = vzalloc(sizeof(struct blk_zone) *
- zone_info->nr_zones);
+ zone_info->zone_cache = vcalloc(zone_info->nr_zones,
+ sizeof(struct blk_zone));
if (!zone_info->zone_cache) {
btrfs_err_in_rcu(device->fs_info,
"zoned: failed to allocate zone cache for %s",
@@ -805,6 +808,9 @@ int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
return -EINVAL;
}
+ btrfs_clear_and_info(info, DISCARD_ASYNC,
+ "zoned: async discard ignored and disabled for zoned mode");
+
return 0;
}
@@ -1580,19 +1586,9 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
return;
WARN_ON(cache->bytes_super != 0);
-
- /* Check for block groups never get activated */
- if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
- cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
- !test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
- cache->alloc_offset == 0) {
- unusable = cache->length;
- free = 0;
- } else {
- unusable = (cache->alloc_offset - cache->used) +
- (cache->length - cache->zone_capacity);
- free = cache->zone_capacity - cache->alloc_offset;
- }
+ unusable = (cache->alloc_offset - cache->used) +
+ (cache->length - cache->zone_capacity);
+ free = cache->zone_capacity - cache->alloc_offset;
/* We only need ->free_space in ALLOC_SEQ block groups */
cache->cached = BTRFS_CACHE_FINISHED;
@@ -1704,10 +1700,21 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- struct btrfs_ordered_sum *sum =
- list_first_entry(&ordered->list, typeof(*sum), list);
- u64 logical = sum->logical;
- u64 len = sum->len;
+ struct btrfs_ordered_sum *sum;
+ u64 logical, len;
+
+ /*
+ * Write to pre-allocated region is for the data relocation, and so
+ * it should use WRITE operation. No split/rewrite are necessary.
+ */
+ if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
+ return;
+
+ ASSERT(!list_empty(&ordered->list));
+ /* The ordered->list can be empty in the above pre-alloc case. */
+ sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list);
+ logical = sum->logical;
+ len = sum->len;
while (len < ordered->disk_num_bytes) {
sum = list_next_entry(sum, list);
@@ -1744,41 +1751,121 @@ out:
}
}
-bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb,
- struct btrfs_block_group **cache_ret)
+static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
+ struct btrfs_block_group **active_bg)
{
- struct btrfs_block_group *cache;
- bool ret = true;
+ const struct writeback_control *wbc = ctx->wbc;
+ struct btrfs_block_group *block_group = ctx->zoned_bg;
+ struct btrfs_fs_info *fs_info = block_group->fs_info;
- if (!btrfs_is_zoned(fs_info))
+ if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
return true;
- cache = btrfs_lookup_block_group(fs_info, eb->start);
- if (!cache)
- return true;
+ if (fs_info->treelog_bg == block_group->start) {
+ if (!btrfs_zone_activate(block_group)) {
+ int ret_fin = btrfs_zone_finish_one_bg(fs_info);
- if (cache->meta_write_pointer != eb->start) {
- btrfs_put_block_group(cache);
- cache = NULL;
- ret = false;
- } else {
- cache->meta_write_pointer = eb->start + eb->len;
- }
+ if (ret_fin != 1 || !btrfs_zone_activate(block_group))
+ return false;
+ }
+ } else if (*active_bg != block_group) {
+ struct btrfs_block_group *tgt = *active_bg;
- *cache_ret = cache;
+ /* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */
+ lockdep_assert_held(&fs_info->zoned_meta_io_lock);
- return ret;
+ if (tgt) {
+ /*
+ * If there is an unsent IO left in the allocated area,
+ * we cannot wait for them as it may cause a deadlock.
+ */
+ if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) {
+ if (wbc->sync_mode == WB_SYNC_NONE ||
+ (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync))
+ return false;
+ }
+
+ /* Pivot active metadata/system block group. */
+ btrfs_zoned_meta_io_unlock(fs_info);
+ wait_eb_writebacks(tgt);
+ do_zone_finish(tgt, true);
+ btrfs_zoned_meta_io_lock(fs_info);
+ if (*active_bg == tgt) {
+ btrfs_put_block_group(tgt);
+ *active_bg = NULL;
+ }
+ }
+ if (!btrfs_zone_activate(block_group))
+ return false;
+ if (*active_bg != block_group) {
+ ASSERT(*active_bg == NULL);
+ *active_bg = block_group;
+ btrfs_get_block_group(block_group);
+ }
+ }
+
+ return true;
}
-void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
- struct extent_buffer *eb)
+/*
+ * Check if @ctx->eb is aligned to the write pointer.
+ *
+ * Return:
+ * 0: @ctx->eb is at the write pointer. You can write it.
+ * -EAGAIN: There is a hole. The caller should handle the case.
+ * -EBUSY: There is a hole, but the caller can just bail out.
+ */
+int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+ struct btrfs_eb_write_context *ctx)
{
- if (!btrfs_is_zoned(eb->fs_info) || !cache)
- return;
+ const struct writeback_control *wbc = ctx->wbc;
+ const struct extent_buffer *eb = ctx->eb;
+ struct btrfs_block_group *block_group = ctx->zoned_bg;
+
+ if (!btrfs_is_zoned(fs_info))
+ return 0;
+
+ if (block_group) {
+ if (block_group->start > eb->start ||
+ block_group->start + block_group->length <= eb->start) {
+ btrfs_put_block_group(block_group);
+ block_group = NULL;
+ ctx->zoned_bg = NULL;
+ }
+ }
+
+ if (!block_group) {
+ block_group = btrfs_lookup_block_group(fs_info, eb->start);
+ if (!block_group)
+ return 0;
+ ctx->zoned_bg = block_group;
+ }
+
+ if (block_group->meta_write_pointer == eb->start) {
+ struct btrfs_block_group **tgt;
+
+ if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+ return 0;
+
+ if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)
+ tgt = &fs_info->active_system_bg;
+ else
+ tgt = &fs_info->active_meta_bg;
+ if (check_bg_is_active(ctx, tgt))
+ return 0;
+ }
+
+ /*
+ * Since we may release fs_info->zoned_meta_io_lock, someone can already
+ * start writing this eb. In that case, we can just bail out.
+ */
+ if (block_group->meta_write_pointer > eb->start)
+ return -EBUSY;
- ASSERT(cache->meta_write_pointer == eb->start + eb->len);
- cache->meta_write_pointer = eb->start;
+ /* If for_sync, this hole will be filled with trasnsaction commit. */
+ if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
+ return -EAGAIN;
+ return -EBUSY;
}
int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length)
@@ -1876,10 +1963,10 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
bool btrfs_zone_activate(struct btrfs_block_group *block_group)
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
- struct btrfs_space_info *space_info = block_group->space_info;
struct map_lookup *map;
struct btrfs_device *device;
u64 physical;
+ const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA);
bool ret;
int i;
@@ -1888,7 +1975,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
map = block_group->physical_map;
- spin_lock(&space_info->lock);
spin_lock(&block_group->lock);
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
ret = true;
@@ -1901,30 +1987,44 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
goto out_unlock;
}
+ spin_lock(&fs_info->zone_active_bgs_lock);
for (i = 0; i < map->num_stripes; i++) {
+ struct btrfs_zoned_device_info *zinfo;
+ int reserved = 0;
+
device = map->stripes[i].dev;
physical = map->stripes[i].physical;
+ zinfo = device->zone_info;
- if (device->zone_info->max_active_zones == 0)
+ if (zinfo->max_active_zones == 0)
continue;
+ if (is_data)
+ reserved = zinfo->reserved_active_zones;
+ /*
+ * For the data block group, leave active zones for one
+ * metadata block group and one system block group.
+ */
+ if (atomic_read(&zinfo->active_zones_left) <= reserved) {
+ ret = false;
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+ goto out_unlock;
+ }
+
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
+ spin_unlock(&fs_info->zone_active_bgs_lock);
goto out_unlock;
}
+ if (!is_data)
+ zinfo->reserved_active_zones--;
}
+ spin_unlock(&fs_info->zone_active_bgs_lock);
/* Successfully activated all the zones */
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
- WARN_ON(block_group->alloc_offset != 0);
- if (block_group->zone_unusable == block_group->length) {
- block_group->zone_unusable = block_group->length - block_group->zone_capacity;
- space_info->bytes_zone_unusable -= block_group->zone_capacity;
- }
spin_unlock(&block_group->lock);
- btrfs_try_granting_tickets(fs_info, space_info);
- spin_unlock(&space_info->lock);
/* For the active block group list */
btrfs_get_block_group(block_group);
@@ -1937,7 +2037,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
out_unlock:
spin_unlock(&block_group->lock);
- spin_unlock(&space_info->lock);
return ret;
}
@@ -2003,6 +2102,10 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
* and block_group->meta_write_pointer for metadata.
*/
if (!fully_written) {
+ if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
+ spin_unlock(&block_group->lock);
+ return -EAGAIN;
+ }
spin_unlock(&block_group->lock);
ret = btrfs_inc_block_group_ro(block_group, false);
@@ -2031,7 +2134,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
return 0;
}
- if (block_group->reserved) {
+ if (block_group->reserved ||
+ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
+ &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
btrfs_dec_block_group_ro(block_group);
return -EAGAIN;
@@ -2040,6 +2145,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
block_group->alloc_offset = block_group->zone_capacity;
+ if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
+ block_group->meta_write_pointer = block_group->start +
+ block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
btrfs_clear_treelog_bg(block_group);
btrfs_clear_data_reloc_bg(block_group);
@@ -2049,18 +2157,21 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev;
const u64 physical = map->stripes[i].physical;
+ struct btrfs_zoned_device_info *zinfo = device->zone_info;
- if (device->zone_info->max_active_zones == 0)
+ if (zinfo->max_active_zones == 0)
continue;
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
- device->zone_info->zone_size >> SECTOR_SHIFT,
+ zinfo->zone_size >> SECTOR_SHIFT,
GFP_NOFS);
if (ret)
return ret;
+ if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
+ zinfo->reserved_active_zones++;
btrfs_dev_clear_active_zone(device, physical);
}
@@ -2099,8 +2210,10 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
/* Check if there is a device with active zones left */
mutex_lock(&fs_info->chunk_mutex);
+ spin_lock(&fs_info->zone_active_bgs_lock);
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
struct btrfs_zoned_device_info *zinfo = device->zone_info;
+ int reserved = 0;
if (!device->bdev)
continue;
@@ -2110,17 +2223,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
break;
}
+ if (flags & BTRFS_BLOCK_GROUP_DATA)
+ reserved = zinfo->reserved_active_zones;
+
switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
case 0: /* single */
- ret = (atomic_read(&zinfo->active_zones_left) >= 1);
+ ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
break;
case BTRFS_BLOCK_GROUP_DUP:
- ret = (atomic_read(&zinfo->active_zones_left) >= 2);
+ ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
break;
}
if (ret)
break;
}
+ spin_unlock(&fs_info->zone_active_bgs_lock);
mutex_unlock(&fs_info->chunk_mutex);
if (!ret)
@@ -2262,7 +2379,10 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
/* All relocation extents are written. */
if (block_group->start + block_group->alloc_offset == logical + length) {
- /* Now, release this block group for further allocations. */
+ /*
+ * Now, release this block group for further allocations and
+ * zone finish.
+ */
clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
&block_group->runtime_flags);
}
@@ -2286,7 +2406,8 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
spin_lock(&block_group->lock);
if (block_group->reserved || block_group->alloc_offset == 0 ||
- (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
+ (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
+ test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
spin_unlock(&block_group->lock);
continue;
}
@@ -2362,3 +2483,55 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
return 0;
}
+
+/*
+ * Reserve zones for one metadata block group, one tree-log block group, and one
+ * system block group.
+ */
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
+ struct btrfs_block_group *block_group;
+ struct btrfs_device *device;
+ /* Reserve zones for normal SINGLE metadata and tree-log block group. */
+ unsigned int metadata_reserve = 2;
+ /* Reserve a zone for SINGLE system block group. */
+ unsigned int system_reserve = 1;
+
+ if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
+ return;
+
+ /*
+ * This function is called from the mount context. So, there is no
+ * parallel process touching the bits. No need for read_seqretry().
+ */
+ if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+ metadata_reserve = 4;
+ if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
+ system_reserve = 2;
+
+ /* Apply the reservation on all the devices. */
+ mutex_lock(&fs_devices->device_list_mutex);
+ list_for_each_entry(device, &fs_devices->devices, dev_list) {
+ if (!device->bdev)
+ continue;
+
+ device->zone_info->reserved_active_zones =
+ metadata_reserve + system_reserve;
+ }
+ mutex_unlock(&fs_devices->device_list_mutex);
+
+ /* Release reservation for currently active block groups. */
+ spin_lock(&fs_info->zone_active_bgs_lock);
+ list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
+ struct map_lookup *map = block_group->physical_map;
+
+ if (!(block_group->flags &
+ (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
+ continue;
+
+ for (int i = 0; i < map->num_stripes; i++)
+ map->stripes[i].dev->zone_info->reserved_active_zones--;
+ }
+ spin_unlock(&fs_info->zone_active_bgs_lock);
+}
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index 27322b926038..b9cec523b778 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -22,6 +22,11 @@ struct btrfs_zoned_device_info {
u8 zone_size_shift;
u32 nr_zones;
unsigned int max_active_zones;
+ /*
+ * Reserved active zones for one metadata and one system block group.
+ * It can vary per-device depending on the allocation status.
+ */
+ int reserved_active_zones;
atomic_t active_zones_left;
unsigned long *seq_zones;
unsigned long *empty_zones;
@@ -58,11 +63,8 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb);
bool btrfs_use_zone_append(struct btrfs_bio *bbio);
void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
-bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb,
- struct btrfs_block_group **cache_ret);
-void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
- struct extent_buffer *eb);
+int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+ struct btrfs_eb_write_context *ctx);
int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
u64 physical_start, u64 physical_pos);
@@ -81,6 +83,7 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
struct btrfs_space_info *space_info, bool do_finish);
+void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
@@ -189,17 +192,10 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
}
-static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb,
- struct btrfs_block_group **cache_ret)
-{
- return true;
-}
-
-static inline void btrfs_revert_meta_write_pointer(
- struct btrfs_block_group *cache,
- struct extent_buffer *eb)
+static inline int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
+ struct btrfs_eb_write_context *ctx)
{
+ return 0;
}
static inline int btrfs_zoned_issue_zeroout(struct btrfs_device *device,
@@ -262,6 +258,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
return 0;
}
+static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
+
#endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
diff --git a/fs/buffer.c b/fs/buffer.c
index bd091329026c..084a6ade108a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -49,6 +49,7 @@
#include <trace/events/block.h>
#include <linux/fscrypt.h>
#include <linux/fsverity.h>
+#include <linux/sched/isolation.h>
#include "internal.h"
@@ -1225,19 +1226,14 @@ EXPORT_SYMBOL(mark_buffer_dirty);
void mark_buffer_write_io_error(struct buffer_head *bh)
{
- struct super_block *sb;
-
set_buffer_write_io_error(bh);
/* FIXME: do we need to set this in both places? */
if (bh->b_folio && bh->b_folio->mapping)
mapping_set_error(bh->b_folio->mapping, -EIO);
- if (bh->b_assoc_map)
+ if (bh->b_assoc_map) {
mapping_set_error(bh->b_assoc_map, -EIO);
- rcu_read_lock();
- sb = READ_ONCE(bh->b_bdev->bd_super);
- if (sb)
- errseq_set(&sb->s_wb_err, -EIO);
- rcu_read_unlock();
+ errseq_set(&bh->b_assoc_map->host->i_sb->s_wb_err, -EIO);
+ }
}
EXPORT_SYMBOL(mark_buffer_write_io_error);
@@ -1352,7 +1348,7 @@ static void bh_lru_install(struct buffer_head *bh)
* failing page migration.
* Skip putting upcoming bh into bh_lru until migration is done.
*/
- if (lru_cache_disabled()) {
+ if (lru_cache_disabled() || cpu_is_isolated(smp_processor_id())) {
bh_lru_unlock();
return;
}
@@ -1382,6 +1378,10 @@ lookup_bh_lru(struct block_device *bdev, sector_t block, unsigned size)
check_irqs_on();
bh_lru_lock();
+ if (cpu_is_isolated(smp_processor_id())) {
+ bh_lru_unlock();
+ return NULL;
+ }
for (i = 0; i < BH_LRU_SIZE; i++) {
struct buffer_head *bh = __this_cpu_read(bh_lrus.bhs[i]);
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 175a25fcade8..009d23cd435b 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -259,9 +259,7 @@ static void cachefiles_write_complete(struct kiocb *iocb, long ret)
_enter("%ld", ret);
- /* Tell lockdep we inherited freeze protection from submission thread */
- __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
- __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
+ kiocb_end_write(iocb);
if (ret < 0)
trace_cachefiles_io_error(object, inode, ret,
@@ -286,7 +284,6 @@ int __cachefiles_write(struct cachefiles_object *object,
{
struct cachefiles_cache *cache;
struct cachefiles_kiocb *ki;
- struct inode *inode;
unsigned int old_nofs;
ssize_t ret;
size_t len = iov_iter_count(iter);
@@ -322,19 +319,12 @@ int __cachefiles_write(struct cachefiles_object *object,
ki->iocb.ki_complete = cachefiles_write_complete;
atomic_long_add(ki->b_writing, &cache->b_writing);
- /* Open-code file_start_write here to grab freeze protection, which
- * will be released by another thread in aio_complete_rw(). Fool
- * lockdep by telling it the lock got released so that it doesn't
- * complain about the held lock when we return to userspace.
- */
- inode = file_inode(file);
- __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
- __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+ kiocb_start_write(&ki->iocb);
get_file(ki->iocb.ki_filp);
cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
- trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
+ trace_cachefiles_write(object, file_inode(file), ki->iocb.ki_pos, len);
old_nofs = memalloc_nofs_save();
ret = cachefiles_inject_write_error();
if (ret == 0)
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 6945a938d396..c91b293267d7 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -93,7 +93,7 @@ int ceph_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
char *value = NULL;
struct iattr newattrs;
struct inode *inode = d_inode(dentry);
- struct timespec64 old_ctime = inode->i_ctime;
+ struct timespec64 old_ctime = inode_get_ctime(inode);
umode_t new_mode = inode->i_mode, old_mode = inode->i_mode;
if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e2bb0d0072da..09cd6d334604 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1400,7 +1400,7 @@ static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
arg->mtime = inode->i_mtime;
arg->atime = inode->i_atime;
- arg->ctime = inode->i_ctime;
+ arg->ctime = inode_get_ctime(inode);
arg->btime = ci->i_btime;
arg->change_attr = inode_peek_iversion_raw(inode);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 4a2b39d9a61a..bdcffb04513f 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -2019,9 +2019,10 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
}
}
+WRAP_DIR_ITER(ceph_readdir) // FIXME!
const struct file_operations ceph_dir_fops = {
.read = ceph_read_dir,
- .iterate = ceph_readdir,
+ .iterate_shared = shared_ceph_readdir,
.llseek = ceph_dir_llseek,
.open = ceph_open,
.release = ceph_release,
@@ -2033,7 +2034,7 @@ const struct file_operations ceph_dir_fops = {
};
const struct file_operations ceph_snapdir_fops = {
- .iterate = ceph_readdir,
+ .iterate_shared = shared_ceph_readdir,
.llseek = ceph_dir_llseek,
.open = ceph_open,
.release = ceph_release,
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 8e5f41d45283..fd05d68e2990 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -100,7 +100,7 @@ struct inode *ceph_get_snapdir(struct inode *parent)
inode->i_uid = parent->i_uid;
inode->i_gid = parent->i_gid;
inode->i_mtime = parent->i_mtime;
- inode->i_ctime = parent->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(parent));
inode->i_atime = parent->i_atime;
ci->i_rbytes = 0;
ci->i_btime = ceph_inode(parent)->i_btime;
@@ -688,6 +688,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
struct timespec64 *mtime, struct timespec64 *atime)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct timespec64 ictime = inode_get_ctime(inode);
int warn = 0;
if (issued & (CEPH_CAP_FILE_EXCL|
@@ -696,11 +697,11 @@ void ceph_fill_file_time(struct inode *inode, int issued,
CEPH_CAP_AUTH_EXCL|
CEPH_CAP_XATTR_EXCL)) {
if (ci->i_version == 0 ||
- timespec64_compare(ctime, &inode->i_ctime) > 0) {
+ timespec64_compare(ctime, &ictime) > 0) {
dout("ctime %lld.%09ld -> %lld.%09ld inc w/ cap\n",
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ ictime.tv_sec, ictime.tv_nsec,
ctime->tv_sec, ctime->tv_nsec);
- inode->i_ctime = *ctime;
+ inode_set_ctime_to_ts(inode, *ctime);
}
if (ci->i_version == 0 ||
ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) > 0) {
@@ -738,7 +739,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
} else {
/* we have no write|excl caps; whatever the MDS says is true */
if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
- inode->i_ctime = *ctime;
+ inode_set_ctime_to_ts(inode, *ctime);
inode->i_mtime = *mtime;
inode->i_atime = *atime;
ci->i_time_warp_seq = time_warp_seq;
@@ -2166,7 +2167,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
bool only = (ia_valid & (ATTR_SIZE|ATTR_MTIME|ATTR_ATIME|
ATTR_MODE|ATTR_UID|ATTR_GID)) == 0;
dout("setattr %p ctime %lld.%ld -> %lld.%ld (%s)\n", inode,
- inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
+ inode_get_ctime(inode).tv_sec,
+ inode_get_ctime(inode).tv_nsec,
attr->ia_ctime.tv_sec, attr->ia_ctime.tv_nsec,
only ? "ctime only" : "ignored");
if (only) {
@@ -2191,7 +2193,7 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
if (dirtied) {
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
inode_inc_iversion_raw(inode);
}
@@ -2465,7 +2467,7 @@ int ceph_getattr(struct mnt_idmap *idmap, const struct path *path,
return err;
}
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->ino = ceph_present_inode(inode);
/*
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 66048a86c480..5fb367b1d4b0 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
dout("mdsc delayed_work\n");
- if (mdsc->stopping)
+ if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
return;
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
{
dout("pre_umount\n");
- mdsc->stopping = 1;
+ mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 724307ff89cd..86d2965e68a1 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -380,6 +380,11 @@ struct cap_wait {
int want;
};
+enum {
+ CEPH_MDSC_STOPPING_BEGIN = 1,
+ CEPH_MDSC_STOPPING_FLUSHED = 2,
+};
+
/*
* mds client state
*/
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index cce78d769f55..6d3584f16f9a 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -216,7 +216,7 @@ static void metric_delayed_work(struct work_struct *work)
struct ceph_mds_client *mdsc =
container_of(m, struct ceph_mds_client, metric);
- if (mdsc->stopping)
+ if (mdsc->stopping || disable_send_metrics)
return;
if (!m->session || !check_session_state(m->session)) {
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 343d738448dc..c9920ade15f5 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -660,7 +660,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
capsnap->size = i_size_read(inode);
capsnap->mtime = inode->i_mtime;
capsnap->atime = inode->i_atime;
- capsnap->ctime = inode->i_ctime;
+ capsnap->ctime = inode_get_ctime(inode);
capsnap->btime = ci->i_btime;
capsnap->change_attr = inode_peek_iversion_raw(inode);
capsnap->time_warp_seq = ci->i_time_warp_seq;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 3fc48b43cab0..a5f52013314d 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
ceph_mdsc_pre_umount(fsc->mdsc);
flush_fs_workqueues(fsc);
+ /*
+ * Though the kill_anon_super() will finally trigger the
+ * sync_filesystem() anyway, we still need to do it here
+ * and then bump the stage of shutdown to stop the work
+ * queue as earlier as possible.
+ */
+ sync_filesystem(s);
+
+ fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
+
kill_anon_super(s);
fsc->client->extra_mon_dispatch = NULL;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 806183959c47..1cbd84cc82a8 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -1238,7 +1238,7 @@ retry:
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
&prealloc_cf);
ci->i_xattrs.dirty = true;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
}
spin_unlock(&ci->i_ceph_lock);
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 903ca8fa4b9b..ae023853a98f 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -127,7 +127,8 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
if (attr->va_mtime.tv_sec != -1)
inode->i_mtime = coda_to_timespec64(attr->va_mtime);
if (attr->va_ctime.tv_sec != -1)
- inode->i_ctime = coda_to_timespec64(attr->va_ctime);
+ inode_set_ctime_to_ts(inode,
+ coda_to_timespec64(attr->va_ctime));
}
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 8450b1bd354b..cb512b10473b 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -111,7 +111,7 @@ static inline void coda_dir_update_mtime(struct inode *dir)
/* optimistically we can also act as if our nose bleeds. The
* granularity of the mtime is coarse anyways so we might actually be
* right most of the time. Note: we only do this for directories. */
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
#endif
}
@@ -429,21 +429,14 @@ static int coda_readdir(struct file *coda_file, struct dir_context *ctx)
cfi = coda_ftoc(coda_file);
host_file = cfi->cfi_container;
- if (host_file->f_op->iterate || host_file->f_op->iterate_shared) {
+ if (host_file->f_op->iterate_shared) {
struct inode *host_inode = file_inode(host_file);
ret = -ENOENT;
if (!IS_DEADDIR(host_inode)) {
- if (host_file->f_op->iterate_shared) {
- inode_lock_shared(host_inode);
- ret = host_file->f_op->iterate_shared(host_file, ctx);
- file_accessed(host_file);
- inode_unlock_shared(host_inode);
- } else {
- inode_lock(host_inode);
- ret = host_file->f_op->iterate(host_file, ctx);
- file_accessed(host_file);
- inode_unlock(host_inode);
- }
+ inode_lock_shared(host_inode);
+ ret = host_file->f_op->iterate_shared(host_file, ctx);
+ file_accessed(host_file);
+ inode_unlock_shared(host_inode);
}
return ret;
}
@@ -585,10 +578,11 @@ const struct inode_operations coda_dir_inode_operations = {
.setattr = coda_setattr,
};
+WRAP_DIR_ITER(coda_readdir) // FIXME!
const struct file_operations coda_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = coda_readdir,
+ .iterate_shared = shared_coda_readdir,
.open = coda_open,
.release = coda_release,
.fsync = coda_fsync,
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 12b26bd13564..42346618b4ed 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -84,7 +84,7 @@ coda_file_write_iter(struct kiocb *iocb, struct iov_iter *to)
ret = vfs_iter_write(cfi->cfi_container, to, &iocb->ki_pos, 0);
coda_inode->i_size = file_inode(host_file)->i_size;
coda_inode->i_blocks = (coda_inode->i_size + 511) >> 9;
- coda_inode->i_mtime = coda_inode->i_ctime = current_time(coda_inode);
+ coda_inode->i_mtime = inode_set_ctime_current(coda_inode);
inode_unlock(coda_inode);
file_end_write(host_file);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index d661e6cf17ac..0c7c2528791e 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -256,7 +256,8 @@ int coda_getattr(struct mnt_idmap *idmap, const struct path *path,
{
int err = coda_revalidate_inode(d_inode(path->dentry));
if (!err)
- generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask,
+ d_inode(path->dentry), stat);
return err;
}
@@ -269,7 +270,7 @@ int coda_setattr(struct mnt_idmap *idmap, struct dentry *de,
memset(&vattr, 0, sizeof(vattr));
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
coda_iattr_to_vattr(iattr, &vattr);
vattr.va_type = C_VNON; /* cannot set type */
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index 1c15edbe70ff..fbdcb3582926 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -88,8 +88,7 @@ int configfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
{
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime =
- inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
@@ -99,7 +98,7 @@ static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
inode->i_gid = iattr->ia_gid;
inode->i_atime = iattr->ia_atime;
inode->i_mtime = iattr->ia_mtime;
- inode->i_ctime = iattr->ia_ctime;
+ inode_set_ctime_to_ts(inode, iattr->ia_ctime);
}
struct inode *configfs_new_inode(umode_t mode, struct configfs_dirent *sd,
@@ -172,7 +171,7 @@ struct inode *configfs_create(struct dentry *dentry, umode_t mode)
return ERR_PTR(-ENOMEM);
p_inode = d_inode(dentry->d_parent);
- p_inode->i_mtime = p_inode->i_ctime = current_time(p_inode);
+ p_inode->i_mtime = inode_set_ctime_current(p_inode);
configfs_set_inode_lock_class(sd, inode);
return inode;
}
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 27c6597aa1be..5ee7d7bbb361 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -133,7 +133,8 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
}
/* Struct copy intentional */
- inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime;
+ inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+ zerotime);
/* inode->i_nlink is left 1 - arguably wrong for directories,
but it's the best we can do without reading the directory
contents. 1 yields the right result in GNU find, even
@@ -485,12 +486,16 @@ static void cramfs_kill_sb(struct super_block *sb)
{
struct cramfs_sb_info *sbi = CRAMFS_SB(sb);
+ generic_shutdown_super(sb);
+
if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) {
if (sbi && sbi->mtd_point_size)
mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size);
- kill_mtd_super(sb);
+ put_mtd_device(sb->s_mtd);
+ sb->s_mtd = NULL;
} else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) {
- kill_block_super(sb);
+ sync_blockdev(sb->s_bdev);
+ blkdev_put(sb->s_bdev, sb);
}
kfree(sbi);
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 52e6d5fdab6b..25ac74d30bff 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1664,7 +1664,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
if (dentry == _data && dentry->d_lockref.count == 1)
return D_WALK_CONTINUE;
- printk(KERN_ERR "BUG: Dentry %p{i=%lx,n=%pd} "
+ WARN(1, "BUG: Dentry %p{i=%lx,n=%pd} "
" still in use (%d) [unmount of %s %s]\n",
dentry,
dentry->d_inode ?
@@ -1673,7 +1673,6 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry)
dentry->d_lockref.count,
dentry->d_sb->s_type->name,
dentry->d_sb->s_id);
- WARN_ON(1);
return D_WALK_CONTINUE;
}
@@ -3247,8 +3246,6 @@ void d_genocide(struct dentry *parent)
d_walk(parent, parent, d_genocide_kill);
}
-EXPORT_SYMBOL(d_genocide);
-
void d_tmpfile(struct file *file, struct inode *inode)
{
struct dentry *dentry = file->f_path.dentry;
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 3f81f73c241a..83e57e9f9fa0 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -72,8 +72,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb)
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
- inode->i_atime = inode->i_mtime =
- inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
return inode;
}
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index fe3db0eda8e4..299c295a27a0 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -338,7 +338,7 @@ static int mknod_ptmx(struct super_block *sb)
}
inode->i_ino = 2;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
mode = S_IFCHR|opts->ptmxmode;
init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2));
@@ -451,7 +451,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
if (!inode)
goto fail;
inode->i_ino = 1;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
@@ -534,12 +534,12 @@ void devpts_kill_index(struct pts_fs_info *fsi, int idx)
/**
* devpts_pty_new -- create a new inode in /dev/pts/
- * @ptmx_inode: inode of the master
- * @device: major+minor of the node to be created
+ * @fsi: Filesystem info for this instance.
* @index: used as a name of the node
* @priv: what's given back by devpts_get_priv
*
- * The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
+ * The dentry for the created inode is returned.
+ * Remove it from /dev/pts/ with devpts_pty_kill().
*/
struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
{
@@ -560,7 +560,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
inode->i_ino = index + 3;
inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index));
sprintf(s, "%d", index);
@@ -580,7 +580,7 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
/**
* devpts_get_priv -- get private data for a slave
- * @pts_inode: inode of the slave
+ * @dentry: dentry of the slave
*
* Returns whatever was passed as priv in devpts_pty_new for a given inode.
*/
@@ -593,7 +593,7 @@ void *devpts_get_priv(struct dentry *dentry)
/**
* devpts_pty_kill -- remove inode form /dev/pts/
- * @inode: inode of the slave to be removed
+ * @dentry: dentry of the slave to be removed
*
* This is an inverse operation of devpts_pty_new.
*/
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c16f0d660cb7..03bd55069d86 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -441,10 +441,10 @@ int ecryptfs_encrypt_page(struct page *page)
}
lower_offset = lower_offset_for_page(crypt_stat, page);
- enc_extent_virt = kmap(enc_extent_page);
+ enc_extent_virt = kmap_local_page(enc_extent_page);
rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
PAGE_SIZE);
- kunmap(enc_extent_page);
+ kunmap_local(enc_extent_virt);
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to write lower page; rc = [%d]\n",
@@ -490,10 +490,10 @@ int ecryptfs_decrypt_page(struct page *page)
BUG_ON(!(crypt_stat->flags & ECRYPTFS_ENCRYPTED));
lower_offset = lower_offset_for_page(crypt_stat, page);
- page_virt = kmap(page);
+ page_virt = kmap_local_page(page);
rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
ecryptfs_inode);
- kunmap(page);
+ kunmap_local(page_virt);
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
"Error attempting to read lower page; rc = [%d]\n",
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 83274915ba6d..992d9c7e64ae 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -148,7 +148,7 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
}
fsstack_copy_attr_times(dir, lower_dir);
set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
out_unlock:
dput(lower_dentry);
inode_unlock(lower_dir);
@@ -982,7 +982,7 @@ static int ecryptfs_getattr_link(struct mnt_idmap *idmap,
mount_crypt_stat = &ecryptfs_superblock_to_private(
dentry->d_sb)->mount_crypt_stat;
- generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), stat);
if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
char *target;
size_t targetsiz;
@@ -1011,7 +1011,8 @@ static int ecryptfs_getattr(struct mnt_idmap *idmap,
if (!rc) {
fsstack_copy_attr_all(d_inode(dentry),
ecryptfs_inode_to_lower(d_inode(dentry)));
- generic_fillattr(&nop_mnt_idmap, d_inode(dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask,
+ d_inode(dentry), stat);
stat->blocks = lower_stat.blocks;
}
return rc;
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 373c3e5747e6..e2483acc4366 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -125,7 +125,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
/* This is a header extent */
char *page_virt;
- page_virt = kmap_atomic(page);
+ page_virt = kmap_local_page(page);
memset(page_virt, 0, PAGE_SIZE);
/* TODO: Support more than one header extent */
if (view_extent_num == 0) {
@@ -138,7 +138,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
crypt_stat,
&written);
}
- kunmap_atomic(page_virt);
+ kunmap_local(page_virt);
flush_dcache_page(page);
if (rc) {
printk(KERN_ERR "%s: Error reading xattr "
@@ -255,7 +255,6 @@ out:
* @mapping: The eCryptfs object
* @pos: The file offset at which to start writing
* @len: Length of the write
- * @flags: Various flags
* @pagep: Pointer to return the page
* @fsdata: Pointer to return fs data (unused)
*
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 60bdcaddcbe5..3458f153a588 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -64,11 +64,11 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT)
+ offset_in_page);
- virt = kmap(page_for_lower);
+ virt = kmap_local_page(page_for_lower);
rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
if (rc > 0)
rc = 0;
- kunmap(page_for_lower);
+ kunmap_local(virt);
return rc;
}
@@ -140,7 +140,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
ecryptfs_page_idx, rc);
goto out;
}
- ecryptfs_page_virt = kmap_atomic(ecryptfs_page);
+ ecryptfs_page_virt = kmap_local_page(ecryptfs_page);
/*
* pos: where we're now writing, offset: where the request was
@@ -163,7 +163,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
(data + data_offset), num_bytes);
data_offset += num_bytes;
}
- kunmap_atomic(ecryptfs_page_virt);
+ kunmap_local(ecryptfs_page_virt);
flush_dcache_page(ecryptfs_page);
SetPageUptodate(ecryptfs_page);
unlock_page(ecryptfs_page);
@@ -253,11 +253,11 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
int rc;
offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
- virt = kmap(page_for_ecryptfs);
+ virt = kmap_local_page(page_for_ecryptfs);
rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
if (rc > 0)
rc = 0;
- kunmap(page_for_ecryptfs);
+ kunmap_local(virt);
flush_dcache_page(page_for_ecryptfs);
return rc;
}
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index d57ee15874f9..59b52718a3a2 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -51,7 +51,7 @@ static ssize_t efivarfs_file_write(struct file *file,
} else {
inode_lock(inode);
i_size_write(inode, datasize + sizeof(attributes));
- inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
inode_unlock(inode);
}
diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c
index b973a2c03dde..db9231f0e77b 100644
--- a/fs/efivarfs/inode.c
+++ b/fs/efivarfs/inode.c
@@ -25,7 +25,7 @@ struct inode *efivarfs_get_inode(struct super_block *sb,
if (inode) {
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_flags = is_removable ? 0 : S_IMMUTABLE;
switch (mode & S_IFMT) {
case S_IFREG:
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 3ba94bb005a6..3789d22ba501 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -105,8 +105,8 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
inode->i_size = be32_to_cpu(efs_inode->di_size);
inode->i_atime.tv_sec = be32_to_cpu(efs_inode->di_atime);
inode->i_mtime.tv_sec = be32_to_cpu(efs_inode->di_mtime);
- inode->i_ctime.tv_sec = be32_to_cpu(efs_inode->di_ctime);
- inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+ inode_set_ctime(inode, be32_to_cpu(efs_inode->di_ctime), 0);
+ inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
/* this is the number of blocks in the file */
if (inode->i_size == 0) {
diff --git a/fs/erofs/Kconfig b/fs/erofs/Kconfig
index f259d92c9720..f6dc961e6c2b 100644
--- a/fs/erofs/Kconfig
+++ b/fs/erofs/Kconfig
@@ -38,6 +38,7 @@ config EROFS_FS_DEBUG
config EROFS_FS_XATTR
bool "EROFS extended attributes"
depends on EROFS_FS
+ select XXHASH
default y
help
Extended attributes are name:value pairs associated with inodes by
@@ -99,6 +100,21 @@ config EROFS_FS_ZIP_LZMA
If unsure, say N.
+config EROFS_FS_ZIP_DEFLATE
+ bool "EROFS DEFLATE compressed data support"
+ depends on EROFS_FS_ZIP
+ select ZLIB_INFLATE
+ help
+ Saying Y here includes support for reading EROFS file systems
+ containing DEFLATE compressed data. It gives better compression
+ ratios than the default LZ4 format, while it costs more CPU
+ overhead.
+
+ DEFLATE support is an experimental feature for now and so most
+ file systems will be readable without selecting this option.
+
+ If unsure, say N.
+
config EROFS_FS_ONDEMAND
bool "EROFS fscache-based on-demand read support"
depends on CACHEFILES_ONDEMAND && (EROFS_FS=m && FSCACHE || EROFS_FS=y && FSCACHE=y)
diff --git a/fs/erofs/Makefile b/fs/erofs/Makefile
index a3a98fc3e481..994d0b9deddf 100644
--- a/fs/erofs/Makefile
+++ b/fs/erofs/Makefile
@@ -5,4 +5,5 @@ erofs-objs := super.o inode.o data.o namei.o dir.o utils.o sysfs.o
erofs-$(CONFIG_EROFS_FS_XATTR) += xattr.o
erofs-$(CONFIG_EROFS_FS_ZIP) += decompressor.o zmap.o zdata.o pcpubuf.o
erofs-$(CONFIG_EROFS_FS_ZIP_LZMA) += decompressor_lzma.o
+erofs-$(CONFIG_EROFS_FS_ZIP_DEFLATE) += decompressor_deflate.o
erofs-$(CONFIG_EROFS_FS_ONDEMAND) += fscache.o
diff --git a/fs/erofs/compress.h b/fs/erofs/compress.h
index b1b846504027..349c3316ae6b 100644
--- a/fs/erofs/compress.h
+++ b/fs/erofs/compress.h
@@ -94,4 +94,6 @@ extern const struct z_erofs_decompressor erofs_decompressors[];
/* prototypes for specific algorithms */
int z_erofs_lzma_decompress(struct z_erofs_decompress_req *rq,
struct page **pagepool);
+int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+ struct page **pagepool);
#endif
diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index cfad1eac7fd9..332ec5f74002 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -379,4 +379,10 @@ const struct z_erofs_decompressor erofs_decompressors[] = {
.name = "lzma"
},
#endif
+#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
+ [Z_EROFS_COMPRESSION_DEFLATE] = {
+ .decompress = z_erofs_deflate_decompress,
+ .name = "deflate"
+ },
+#endif
};
diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c
new file mode 100644
index 000000000000..19e5bdeb30b6
--- /dev/null
+++ b/fs/erofs/decompressor_deflate.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/module.h>
+#include <linux/zlib.h>
+#include "compress.h"
+
+struct z_erofs_deflate {
+ struct z_erofs_deflate *next;
+ struct z_stream_s z;
+ u8 bounce[PAGE_SIZE];
+};
+
+static DEFINE_SPINLOCK(z_erofs_deflate_lock);
+static unsigned int z_erofs_deflate_nstrms, z_erofs_deflate_avail_strms;
+static struct z_erofs_deflate *z_erofs_deflate_head;
+static DECLARE_WAIT_QUEUE_HEAD(z_erofs_deflate_wq);
+
+module_param_named(deflate_streams, z_erofs_deflate_nstrms, uint, 0444);
+
+void z_erofs_deflate_exit(void)
+{
+ /* there should be no running fs instance */
+ while (z_erofs_deflate_avail_strms) {
+ struct z_erofs_deflate *strm;
+
+ spin_lock(&z_erofs_deflate_lock);
+ strm = z_erofs_deflate_head;
+ if (!strm) {
+ spin_unlock(&z_erofs_deflate_lock);
+ continue;
+ }
+ z_erofs_deflate_head = NULL;
+ spin_unlock(&z_erofs_deflate_lock);
+
+ while (strm) {
+ struct z_erofs_deflate *n = strm->next;
+
+ vfree(strm->z.workspace);
+ kfree(strm);
+ --z_erofs_deflate_avail_strms;
+ strm = n;
+ }
+ }
+}
+
+int __init z_erofs_deflate_init(void)
+{
+ /* by default, use # of possible CPUs instead */
+ if (!z_erofs_deflate_nstrms)
+ z_erofs_deflate_nstrms = num_possible_cpus();
+
+ for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms;
+ ++z_erofs_deflate_avail_strms) {
+ struct z_erofs_deflate *strm;
+
+ strm = kzalloc(sizeof(*strm), GFP_KERNEL);
+ if (!strm)
+ goto out_failed;
+
+ /* XXX: in-kernel zlib cannot shrink windowbits currently */
+ strm->z.workspace = vmalloc(zlib_inflate_workspacesize());
+ if (!strm->z.workspace) {
+ kfree(strm);
+ goto out_failed;
+ }
+
+ spin_lock(&z_erofs_deflate_lock);
+ strm->next = z_erofs_deflate_head;
+ z_erofs_deflate_head = strm;
+ spin_unlock(&z_erofs_deflate_lock);
+ }
+ return 0;
+
+out_failed:
+ pr_err("failed to allocate zlib workspace\n");
+ z_erofs_deflate_exit();
+ return -ENOMEM;
+}
+
+int z_erofs_load_deflate_config(struct super_block *sb,
+ struct erofs_super_block *dsb,
+ struct z_erofs_deflate_cfgs *dfl, int size)
+{
+ if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) {
+ erofs_err(sb, "invalid deflate cfgs, size=%u", size);
+ return -EINVAL;
+ }
+
+ if (dfl->windowbits > MAX_WBITS) {
+ erofs_err(sb, "unsupported windowbits %u", dfl->windowbits);
+ return -EOPNOTSUPP;
+ }
+
+ erofs_info(sb, "EXPERIMENTAL DEFLATE feature in use. Use at your own risk!");
+ return 0;
+}
+
+int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq,
+ struct page **pagepool)
+{
+ const unsigned int nrpages_out =
+ PAGE_ALIGN(rq->pageofs_out + rq->outputsize) >> PAGE_SHIFT;
+ const unsigned int nrpages_in =
+ PAGE_ALIGN(rq->inputsize) >> PAGE_SHIFT;
+ struct super_block *sb = rq->sb;
+ unsigned int insz, outsz, pofs;
+ struct z_erofs_deflate *strm;
+ u8 *kin, *kout = NULL;
+ bool bounced = false;
+ int no = -1, ni = 0, j = 0, zerr, err;
+
+ /* 1. get the exact DEFLATE compressed size */
+ kin = kmap_local_page(*rq->in);
+ err = z_erofs_fixup_insize(rq, kin + rq->pageofs_in,
+ min_t(unsigned int, rq->inputsize,
+ sb->s_blocksize - rq->pageofs_in));
+ if (err) {
+ kunmap_local(kin);
+ return err;
+ }
+
+ /* 2. get an available DEFLATE context */
+again:
+ spin_lock(&z_erofs_deflate_lock);
+ strm = z_erofs_deflate_head;
+ if (!strm) {
+ spin_unlock(&z_erofs_deflate_lock);
+ wait_event(z_erofs_deflate_wq, READ_ONCE(z_erofs_deflate_head));
+ goto again;
+ }
+ z_erofs_deflate_head = strm->next;
+ spin_unlock(&z_erofs_deflate_lock);
+
+ /* 3. multi-call decompress */
+ insz = rq->inputsize;
+ outsz = rq->outputsize;
+ zerr = zlib_inflateInit2(&strm->z, -MAX_WBITS);
+ if (zerr != Z_OK) {
+ err = -EIO;
+ goto failed_zinit;
+ }
+
+ pofs = rq->pageofs_out;
+ strm->z.avail_in = min_t(u32, insz, PAGE_SIZE - rq->pageofs_in);
+ insz -= strm->z.avail_in;
+ strm->z.next_in = kin + rq->pageofs_in;
+ strm->z.avail_out = 0;
+
+ while (1) {
+ if (!strm->z.avail_out) {
+ if (++no >= nrpages_out || !outsz) {
+ erofs_err(sb, "insufficient space for decompressed data");
+ err = -EFSCORRUPTED;
+ break;
+ }
+
+ if (kout)
+ kunmap_local(kout);
+ strm->z.avail_out = min_t(u32, outsz, PAGE_SIZE - pofs);
+ outsz -= strm->z.avail_out;
+ if (!rq->out[no]) {
+ rq->out[no] = erofs_allocpage(pagepool,
+ GFP_KERNEL | __GFP_NOFAIL);
+ set_page_private(rq->out[no],
+ Z_EROFS_SHORTLIVED_PAGE);
+ }
+ kout = kmap_local_page(rq->out[no]);
+ strm->z.next_out = kout + pofs;
+ pofs = 0;
+ }
+
+ if (!strm->z.avail_in && insz) {
+ if (++ni >= nrpages_in) {
+ erofs_err(sb, "invalid compressed data");
+ err = -EFSCORRUPTED;
+ break;
+ }
+
+ if (kout) { /* unlike kmap(), take care of the orders */
+ j = strm->z.next_out - kout;
+ kunmap_local(kout);
+ }
+ kunmap_local(kin);
+ strm->z.avail_in = min_t(u32, insz, PAGE_SIZE);
+ insz -= strm->z.avail_in;
+ kin = kmap_local_page(rq->in[ni]);
+ strm->z.next_in = kin;
+ bounced = false;
+ if (kout) {
+ kout = kmap_local_page(rq->out[no]);
+ strm->z.next_out = kout + j;
+ }
+ }
+
+ /*
+ * Handle overlapping: Use bounced buffer if the compressed
+ * data is under processing; Or use short-lived pages from the
+ * on-stack pagepool where pages share among the same request
+ * and not _all_ inplace I/O pages are needed to be doubled.
+ */
+ if (!bounced && rq->out[no] == rq->in[ni]) {
+ memcpy(strm->bounce, strm->z.next_in, strm->z.avail_in);
+ strm->z.next_in = strm->bounce;
+ bounced = true;
+ }
+
+ for (j = ni + 1; j < nrpages_in; ++j) {
+ struct page *tmppage;
+
+ if (rq->out[no] != rq->in[j])
+ continue;
+
+ DBG_BUGON(erofs_page_is_managed(EROFS_SB(sb),
+ rq->in[j]));
+ tmppage = erofs_allocpage(pagepool,
+ GFP_KERNEL | __GFP_NOFAIL);
+ set_page_private(tmppage, Z_EROFS_SHORTLIVED_PAGE);
+ copy_highpage(tmppage, rq->in[j]);
+ rq->in[j] = tmppage;
+ }
+
+ zerr = zlib_inflate(&strm->z, Z_SYNC_FLUSH);
+ if (zerr != Z_OK || !(outsz + strm->z.avail_out)) {
+ if (zerr == Z_OK && rq->partial_decoding)
+ break;
+ if (zerr == Z_STREAM_END && !outsz)
+ break;
+ erofs_err(sb, "failed to decompress %d in[%u] out[%u]",
+ zerr, rq->inputsize, rq->outputsize);
+ err = -EFSCORRUPTED;
+ break;
+ }
+ }
+
+ if (zlib_inflateEnd(&strm->z) != Z_OK && !err)
+ err = -EIO;
+ if (kout)
+ kunmap_local(kout);
+failed_zinit:
+ kunmap_local(kin);
+ /* 4. push back DEFLATE stream context to the global list */
+ spin_lock(&z_erofs_deflate_lock);
+ strm->next = z_erofs_deflate_head;
+ z_erofs_deflate_head = strm;
+ spin_unlock(&z_erofs_deflate_lock);
+ wake_up(&z_erofs_deflate_wq);
+ return err;
+}
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 2c7b16e340fe..a03ec70ba6f2 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -13,6 +13,7 @@
#define EROFS_FEATURE_COMPAT_SB_CHKSUM 0x00000001
#define EROFS_FEATURE_COMPAT_MTIME 0x00000002
+#define EROFS_FEATURE_COMPAT_XATTR_FILTER 0x00000004
/*
* Any bits that aren't in EROFS_ALL_FEATURE_INCOMPAT should
@@ -81,7 +82,8 @@ struct erofs_super_block {
__u8 xattr_prefix_count; /* # of long xattr name prefixes */
__le32 xattr_prefix_start; /* start of long xattr prefixes */
__le64 packed_nid; /* nid of the special packed inode */
- __u8 reserved2[24];
+ __u8 xattr_filter_reserved; /* reserved for xattr name filter */
+ __u8 reserved2[23];
};
/*
@@ -200,7 +202,7 @@ struct erofs_inode_extended {
* for read-only fs, no need to introduce h_refcount
*/
struct erofs_xattr_ibody_header {
- __le32 h_reserved;
+ __le32 h_name_filter; /* bit value 1 indicates not-present */
__u8 h_shared_count;
__u8 h_reserved2[7];
__le32 h_shared_xattrs[]; /* shared xattr id array */
@@ -221,6 +223,10 @@ struct erofs_xattr_ibody_header {
#define EROFS_XATTR_LONG_PREFIX 0x80
#define EROFS_XATTR_LONG_PREFIX_MASK 0x7f
+#define EROFS_XATTR_FILTER_BITS 32
+#define EROFS_XATTR_FILTER_DEFAULT UINT32_MAX
+#define EROFS_XATTR_FILTER_SEED 0x25BBE08F
+
/* xattr entry (for both inline & shared xattrs) */
struct erofs_xattr_entry {
__u8 e_name_len; /* length of name */
@@ -289,6 +295,7 @@ struct erofs_dirent {
enum {
Z_EROFS_COMPRESSION_LZ4 = 0,
Z_EROFS_COMPRESSION_LZMA = 1,
+ Z_EROFS_COMPRESSION_DEFLATE = 2,
Z_EROFS_COMPRESSION_MAX
};
#define Z_EROFS_ALL_COMPR_ALGS ((1 << Z_EROFS_COMPRESSION_MAX) - 1)
@@ -309,6 +316,12 @@ struct z_erofs_lzma_cfgs {
#define Z_EROFS_LZMA_MAX_DICT_SIZE (8 * Z_EROFS_PCLUSTER_MAX_SIZE)
+/* 6 bytes (+ length field = 8 bytes) */
+struct z_erofs_deflate_cfgs {
+ u8 windowbits; /* 8..15 for DEFLATE */
+ u8 reserved[5];
+} __packed;
+
/*
* bit 0 : COMPACTED_2B indexes (0 - off; 1 - on)
* e.g. for 4k logical cluster size, 4B if compacted 2B is off;
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index e12592727a54..edc8ec7581b8 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -105,8 +105,8 @@ static void *erofs_read_inode(struct erofs_buf *buf,
set_nlink(inode, le32_to_cpu(die->i_nlink));
/* extended inode has its own timestamp */
- inode->i_ctime.tv_sec = le64_to_cpu(die->i_mtime);
- inode->i_ctime.tv_nsec = le32_to_cpu(die->i_mtime_nsec);
+ inode_set_ctime(inode, le64_to_cpu(die->i_mtime),
+ le32_to_cpu(die->i_mtime_nsec));
inode->i_size = le64_to_cpu(die->i_size);
@@ -148,8 +148,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
set_nlink(inode, le16_to_cpu(dic->i_nlink));
/* use build time for compact inodes */
- inode->i_ctime.tv_sec = sbi->build_time;
- inode->i_ctime.tv_nsec = sbi->build_time_nsec;
+ inode_set_ctime(inode, sbi->build_time, sbi->build_time_nsec);
inode->i_size = le32_to_cpu(dic->i_size);
if (erofs_inode_is_data_compressed(vi->datalayout))
@@ -176,10 +175,7 @@ static void *erofs_read_inode(struct erofs_buf *buf,
vi->chunkbits = sb->s_blocksize_bits +
(vi->chunkformat & EROFS_CHUNK_FORMAT_BLKBITS_MASK);
}
- inode->i_mtime.tv_sec = inode->i_ctime.tv_sec;
- inode->i_atime.tv_sec = inode->i_ctime.tv_sec;
- inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec;
- inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec;
+ inode->i_mtime = inode->i_atime = inode_get_ctime(inode);
inode->i_flags &= ~S_DAX;
if (test_opt(&sbi->opt, DAX_ALWAYS) && S_ISREG(inode->i_mode) &&
@@ -373,7 +369,7 @@ int erofs_getattr(struct mnt_idmap *idmap, const struct path *path,
stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
STATX_ATTR_IMMUTABLE);
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
return 0;
}
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 36e32fa542f0..4ff88d0dd980 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -151,6 +151,7 @@ struct erofs_sb_info {
u32 xattr_prefix_start;
u8 xattr_prefix_count;
struct erofs_xattr_prefix_item *xattr_prefixes;
+ unsigned int xattr_filter_reserved;
#endif
u16 device_id_mask; /* valid bits of device id to be used */
@@ -251,6 +252,7 @@ EROFS_FEATURE_FUNCS(fragments, incompat, INCOMPAT_FRAGMENTS)
EROFS_FEATURE_FUNCS(dedupe, incompat, INCOMPAT_DEDUPE)
EROFS_FEATURE_FUNCS(xattr_prefixes, incompat, INCOMPAT_XATTR_PREFIXES)
EROFS_FEATURE_FUNCS(sb_chksum, compat, COMPAT_SB_CHKSUM)
+EROFS_FEATURE_FUNCS(xattr_filter, compat, COMPAT_XATTR_FILTER)
/* atomic flag definitions */
#define EROFS_I_EA_INITED_BIT 0
@@ -270,6 +272,7 @@ struct erofs_inode {
unsigned char inode_isize;
unsigned int xattr_isize;
+ unsigned int xattr_name_filter;
unsigned int xattr_shared_count;
unsigned int *xattr_shared_xattrs;
@@ -519,6 +522,26 @@ static inline int z_erofs_load_lzma_config(struct super_block *sb,
}
#endif /* !CONFIG_EROFS_FS_ZIP_LZMA */
+#ifdef CONFIG_EROFS_FS_ZIP_DEFLATE
+int __init z_erofs_deflate_init(void);
+void z_erofs_deflate_exit(void);
+int z_erofs_load_deflate_config(struct super_block *sb,
+ struct erofs_super_block *dsb,
+ struct z_erofs_deflate_cfgs *dfl, int size);
+#else
+static inline int z_erofs_deflate_init(void) { return 0; }
+static inline int z_erofs_deflate_exit(void) { return 0; }
+static inline int z_erofs_load_deflate_config(struct super_block *sb,
+ struct erofs_super_block *dsb,
+ struct z_erofs_deflate_cfgs *dfl, int size) {
+ if (dfl) {
+ erofs_err(sb, "deflate algorithm isn't enabled");
+ return -EINVAL;
+ }
+ return 0;
+}
+#endif /* !CONFIG_EROFS_FS_ZIP_DEFLATE */
+
#ifdef CONFIG_EROFS_FS_ONDEMAND
int erofs_fscache_register_fs(struct super_block *sb);
void erofs_fscache_unregister_fs(struct super_block *sb);
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 9d6a3c6158bd..44a24d573f1f 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -19,10 +19,8 @@
#include <trace/events/erofs.h>
static struct kmem_cache *erofs_inode_cachep __read_mostly;
-struct file_system_type erofs_fs_type;
-void _erofs_err(struct super_block *sb, const char *function,
- const char *fmt, ...)
+void _erofs_err(struct super_block *sb, const char *func, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -32,12 +30,11 @@ void _erofs_err(struct super_block *sb, const char *function,
vaf.fmt = fmt;
vaf.va = &args;
- pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf);
+ pr_err("(device %s): %s: %pV", sb->s_id, func, &vaf);
va_end(args);
}
-void _erofs_info(struct super_block *sb, const char *function,
- const char *fmt, ...)
+void _erofs_info(struct super_block *sb, const char *func, const char *fmt, ...)
{
struct va_format vaf;
va_list args;
@@ -102,11 +99,9 @@ static void erofs_free_inode(struct inode *inode)
{
struct erofs_inode *vi = EROFS_I(inode);
- /* be careful of RCU symlink path */
if (inode->i_op == &erofs_fast_symlink_iops)
kfree(inode->i_link);
kfree(vi->xattr_shared_xattrs);
-
kmem_cache_free(erofs_inode_cachep, vi);
}
@@ -119,8 +114,7 @@ static bool check_layout_compatibility(struct super_block *sb,
/* check if current kernel meets all mandatory requirements */
if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) {
- erofs_err(sb,
- "unidentified incompatible feature %x, please upgrade kernel version",
+ erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel",
feature & ~EROFS_ALL_FEATURE_INCOMPAT);
return false;
}
@@ -201,6 +195,9 @@ static int erofs_load_compr_cfgs(struct super_block *sb,
case Z_EROFS_COMPRESSION_LZMA:
ret = z_erofs_load_lzma_config(sb, dsb, data, size);
break;
+ case Z_EROFS_COMPRESSION_DEFLATE:
+ ret = z_erofs_load_deflate_config(sb, dsb, data, size);
+ break;
default:
DBG_BUGON(1);
ret = -EFAULT;
@@ -388,6 +385,7 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr);
sbi->xattr_prefix_start = le32_to_cpu(dsb->xattr_prefix_start);
sbi->xattr_prefix_count = dsb->xattr_prefix_count;
+ sbi->xattr_filter_reserved = dsb->xattr_filter_reserved;
#endif
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
sbi->root_nid = le16_to_cpu(dsb->root_nid);
@@ -420,16 +418,11 @@ static int erofs_read_superblock(struct super_block *sb)
if (erofs_is_fscache_mode(sb))
erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!");
- if (erofs_sb_has_fragments(sbi))
- erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!");
- if (erofs_sb_has_dedupe(sbi))
- erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!");
out:
erofs_put_metabuf(&buf);
return ret;
}
-/* set up default EROFS parameters */
static void erofs_default_options(struct erofs_fs_context *ctx)
{
#ifdef CONFIG_EROFS_FS_ZIP
@@ -731,7 +724,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
xa_init(&sbi->managed_pslots);
#endif
- /* get the root inode */
inode = erofs_iget(sb, ROOT_NID(sbi));
if (IS_ERR(inode))
return PTR_ERR(inode);
@@ -748,7 +740,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
return -ENOMEM;
erofs_shrinker_register(sb);
- /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */
if (erofs_sb_has_fragments(sbi) && sbi->packed_nid) {
sbi->packed_inode = erofs_iget(sb, sbi->packed_nid);
if (IS_ERR(sbi->packed_inode)) {
@@ -881,16 +872,10 @@ static int erofs_init_fs_context(struct fs_context *fc)
return 0;
}
-/*
- * could be triggered after deactivate_locked_super()
- * is called, thus including umount and failed to initialize.
- */
static void erofs_kill_sb(struct super_block *sb)
{
struct erofs_sb_info *sbi;
- WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC);
-
/* pseudo mount for anon inodes */
if (sb->s_flags & SB_KERNMOUNT) {
kill_anon_super(sb);
@@ -915,7 +900,6 @@ static void erofs_kill_sb(struct super_block *sb)
sb->s_fs_info = NULL;
}
-/* called when ->s_root is non-NULL */
static void erofs_put_super(struct super_block *sb)
{
struct erofs_sb_info *const sbi = EROFS_SB(sb);
@@ -952,9 +936,9 @@ static int __init erofs_module_init(void)
erofs_check_ondisk_layout_definitions();
erofs_inode_cachep = kmem_cache_create("erofs_inode",
- sizeof(struct erofs_inode), 0,
- SLAB_RECLAIM_ACCOUNT,
- erofs_inode_init_once);
+ sizeof(struct erofs_inode), 0,
+ SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT,
+ erofs_inode_init_once);
if (!erofs_inode_cachep)
return -ENOMEM;
@@ -966,6 +950,10 @@ static int __init erofs_module_init(void)
if (err)
goto lzma_err;
+ err = z_erofs_deflate_init();
+ if (err)
+ goto deflate_err;
+
erofs_pcpubuf_init();
err = z_erofs_init_zip_subsystem();
if (err)
@@ -986,6 +974,8 @@ fs_err:
sysfs_err:
z_erofs_exit_zip_subsystem();
zip_err:
+ z_erofs_deflate_exit();
+deflate_err:
z_erofs_lzma_exit();
lzma_err:
erofs_exit_shrinker();
@@ -1003,13 +993,13 @@ static void __exit erofs_module_exit(void)
erofs_exit_sysfs();
z_erofs_exit_zip_subsystem();
+ z_erofs_deflate_exit();
z_erofs_lzma_exit();
erofs_exit_shrinker();
kmem_cache_destroy(erofs_inode_cachep);
erofs_pcpubuf_exit();
}
-/* get filesystem statistics */
static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c
index 40178b6e0688..09d341675e89 100644
--- a/fs/erofs/xattr.c
+++ b/fs/erofs/xattr.c
@@ -5,6 +5,7 @@
* Copyright (C) 2021-2022, Alibaba Cloud
*/
#include <linux/security.h>
+#include <linux/xxhash.h>
#include "xattr.h"
struct erofs_xattr_iter {
@@ -87,6 +88,7 @@ static int erofs_init_inode_xattrs(struct inode *inode)
}
ih = it.kaddr + erofs_blkoff(sb, it.pos);
+ vi->xattr_name_filter = le32_to_cpu(ih->h_name_filter);
vi->xattr_shared_count = ih->h_shared_count;
vi->xattr_shared_xattrs = kmalloc_array(vi->xattr_shared_count,
sizeof(uint), GFP_KERNEL);
@@ -392,7 +394,10 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
void *buffer, size_t buffer_size)
{
int ret;
+ unsigned int hashbit;
struct erofs_xattr_iter it;
+ struct erofs_inode *vi = EROFS_I(inode);
+ struct erofs_sb_info *sbi = EROFS_SB(inode->i_sb);
if (!name)
return -EINVAL;
@@ -401,6 +406,15 @@ int erofs_getxattr(struct inode *inode, int index, const char *name,
if (ret)
return ret;
+ /* reserved flag is non-zero if there's any change of on-disk format */
+ if (erofs_sb_has_xattr_filter(sbi) && !sbi->xattr_filter_reserved) {
+ hashbit = xxh32(name, strlen(name),
+ EROFS_XATTR_FILTER_SEED + index);
+ hashbit &= EROFS_XATTR_FILTER_BITS - 1;
+ if (vi->xattr_name_filter & (1U << hashbit))
+ return -ENOATTR;
+ }
+
it.index = index;
it.name = (struct qstr)QSTR_INIT(name, strlen(name));
if (it.name.len > EROFS_NAME_LEN)
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index b69d89a11dd0..036f610e044b 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -143,22 +143,17 @@ static inline void z_erofs_onlinepage_split(struct page *page)
atomic_inc((atomic_t *)&page->private);
}
-static inline void z_erofs_page_mark_eio(struct page *page)
+static void z_erofs_onlinepage_endio(struct page *page, int err)
{
- int orig;
+ int orig, v;
+
+ DBG_BUGON(!PagePrivate(page));
do {
orig = atomic_read((atomic_t *)&page->private);
- } while (atomic_cmpxchg((atomic_t *)&page->private, orig,
- orig | Z_EROFS_PAGE_EIO) != orig);
-}
-
-static inline void z_erofs_onlinepage_endio(struct page *page)
-{
- unsigned int v;
+ v = (orig - 1) | (err ? Z_EROFS_PAGE_EIO : 0);
+ } while (atomic_cmpxchg((atomic_t *)&page->private, orig, v) != orig);
- DBG_BUGON(!PagePrivate(page));
- v = atomic_dec_return((atomic_t *)&page->private);
if (!(v & ~Z_EROFS_PAGE_EIO)) {
set_page_private(page, 0);
ClearPagePrivate(page);
@@ -507,19 +502,17 @@ enum z_erofs_pclustermode {
*/
Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE,
/*
- * The current collection has been linked with the owned chain, and
- * could also be linked with the remaining collections, which means
- * if the processing page is the tail page of the collection, thus
- * the current collection can safely use the whole page (since
- * the previous collection is under control) for in-place I/O, as
- * illustrated below:
- * ________________________________________________________________
- * | tail (partial) page | head (partial) page |
- * | (of the current cl) | (of the previous collection) |
- * | | |
- * |__PCLUSTER_FOLLOWED___|___________PCLUSTER_FOLLOWED____________|
+ * The pcluster was just linked to a decompression chain by us. It can
+ * also be linked with the remaining pclusters, which means if the
+ * processing page is the tail page of a pcluster, this pcluster can
+ * safely use the whole page (since the previous pcluster is within the
+ * same chain) for in-place I/O, as illustrated below:
+ * ___________________________________________________
+ * | tail (partial) page | head (partial) page |
+ * | (of the current pcl) | (of the previous pcl) |
+ * |___PCLUSTER_FOLLOWED___|_____PCLUSTER_FOLLOWED_____|
*
- * [ (*) the above page can be used as inplace I/O. ]
+ * [ (*) the page above can be used as inplace I/O. ]
*/
Z_EROFS_PCLUSTER_FOLLOWED,
};
@@ -535,8 +528,6 @@ struct z_erofs_decompress_frontend {
z_erofs_next_pcluster_t owned_head;
enum z_erofs_pclustermode mode;
- /* used for applying cache strategy on the fly */
- bool backmost;
erofs_off_t headoffset;
/* a pointer used to pick up inplace I/O pages */
@@ -545,7 +536,7 @@ struct z_erofs_decompress_frontend {
#define DECOMPRESS_FRONTEND_INIT(__i) { \
.inode = __i, .owned_head = Z_EROFS_PCLUSTER_TAIL, \
- .mode = Z_EROFS_PCLUSTER_FOLLOWED, .backmost = true }
+ .mode = Z_EROFS_PCLUSTER_FOLLOWED }
static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
{
@@ -554,7 +545,7 @@ static bool z_erofs_should_alloc_cache(struct z_erofs_decompress_frontend *fe)
if (cachestrategy <= EROFS_ZIP_CACHE_DISABLED)
return false;
- if (fe->backmost)
+ if (!(fe->map.m_flags & EROFS_MAP_FULL_MAPPED))
return true;
if (cachestrategy >= EROFS_ZIP_CACHE_READAROUND &&
@@ -851,9 +842,11 @@ err_out:
return err;
}
-static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
+static int z_erofs_pcluster_begin(struct z_erofs_decompress_frontend *fe)
{
struct erofs_map_blocks *map = &fe->map;
+ struct super_block *sb = fe->inode->i_sb;
+ erofs_blk_t blknr = erofs_blknr(sb, map->m_pa);
struct erofs_workgroup *grp = NULL;
int ret;
@@ -863,8 +856,7 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
DBG_BUGON(fe->owned_head == Z_EROFS_PCLUSTER_NIL);
if (!(map->m_flags & EROFS_MAP_META)) {
- grp = erofs_find_workgroup(fe->inode->i_sb,
- map->m_pa >> PAGE_SHIFT);
+ grp = erofs_find_workgroup(sb, blknr);
} else if ((map->m_pa & ~PAGE_MASK) + map->m_plen > PAGE_SIZE) {
DBG_BUGON(1);
return -EFSCORRUPTED;
@@ -883,9 +875,26 @@ static int z_erofs_collector_begin(struct z_erofs_decompress_frontend *fe)
} else if (ret) {
return ret;
}
+
z_erofs_bvec_iter_begin(&fe->biter, &fe->pcl->bvset,
Z_EROFS_INLINE_BVECS, fe->pcl->vcnt);
- /* since file-backed online pages are traversed in reverse order */
+ if (!z_erofs_is_inline_pcluster(fe->pcl)) {
+ /* bind cache first when cached decompression is preferred */
+ z_erofs_bind_cache(fe);
+ } else {
+ void *mptr;
+
+ mptr = erofs_read_metabuf(&map->buf, sb, blknr, EROFS_NO_KMAP);
+ if (IS_ERR(mptr)) {
+ ret = PTR_ERR(mptr);
+ erofs_err(sb, "failed to get inline data %d", ret);
+ return ret;
+ }
+ get_page(map->buf.page);
+ WRITE_ONCE(fe->pcl->compressed_bvecs[0].page, map->buf.page);
+ fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
+ }
+ /* file-backed inplace I/O pages are traversed in reverse order */
fe->icur = z_erofs_pclusterpages(fe->pcl);
return 0;
}
@@ -908,12 +917,12 @@ void erofs_workgroup_free_rcu(struct erofs_workgroup *grp)
call_rcu(&pcl->rcu, z_erofs_rcu_callback);
}
-static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
+static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe)
{
struct z_erofs_pcluster *pcl = fe->pcl;
if (!pcl)
- return false;
+ return;
z_erofs_bvec_iter_end(&fe->biter);
mutex_unlock(&pcl->lock);
@@ -929,37 +938,29 @@ static bool z_erofs_collector_end(struct z_erofs_decompress_frontend *fe)
erofs_workgroup_put(&pcl->obj);
fe->pcl = NULL;
- return true;
}
-static int z_erofs_read_fragment(struct inode *inode, erofs_off_t pos,
- struct page *page, unsigned int pageofs,
- unsigned int len)
+static int z_erofs_read_fragment(struct super_block *sb, struct page *page,
+ unsigned int cur, unsigned int end, erofs_off_t pos)
{
- struct super_block *sb = inode->i_sb;
- struct inode *packed_inode = EROFS_I_SB(inode)->packed_inode;
+ struct inode *packed_inode = EROFS_SB(sb)->packed_inode;
struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
- u8 *src, *dst;
- unsigned int i, cnt;
+ unsigned int cnt;
+ u8 *src;
if (!packed_inode)
return -EFSCORRUPTED;
buf.inode = packed_inode;
- pos += EROFS_I(inode)->z_fragmentoff;
- for (i = 0; i < len; i += cnt) {
- cnt = min_t(unsigned int, len - i,
+ for (; cur < end; cur += cnt, pos += cnt) {
+ cnt = min_t(unsigned int, end - cur,
sb->s_blocksize - erofs_blkoff(sb, pos));
src = erofs_bread(&buf, erofs_blknr(sb, pos), EROFS_KMAP);
if (IS_ERR(src)) {
erofs_put_metabuf(&buf);
return PTR_ERR(src);
}
-
- dst = kmap_local_page(page);
- memcpy(dst + pageofs + i, src + erofs_blkoff(sb, pos), cnt);
- kunmap_local(dst);
- pos += cnt;
+ memcpy_to_page(page, cur, src + erofs_blkoff(sb, pos), cnt);
}
erofs_put_metabuf(&buf);
return 0;
@@ -972,94 +973,60 @@ static int z_erofs_do_read_page(struct z_erofs_decompress_frontend *fe,
struct erofs_map_blocks *const map = &fe->map;
const loff_t offset = page_offset(page);
bool tight = true, exclusive;
- unsigned int cur, end, spiltted;
+ unsigned int cur, end, len, split;
int err = 0;
- /* register locked file pages as online pages in pack */
z_erofs_onlinepage_init(page);
- spiltted = 0;
+ split = 0;
end = PAGE_SIZE;
repeat:
- cur = end - 1;
-
- if (offset + cur < map->m_la ||
- offset + cur >= map->m_la + map->m_llen) {
- if (z_erofs_collector_end(fe))
- fe->backmost = false;
- map->m_la = offset + cur;
+ if (offset + end - 1 < map->m_la ||
+ offset + end - 1 >= map->m_la + map->m_llen) {
+ z_erofs_pcluster_end(fe);
+ map->m_la = offset + end - 1;
map->m_llen = 0;
err = z_erofs_map_blocks_iter(inode, map, 0);
if (err)
goto out;
- } else {
- if (fe->pcl)
- goto hitted;
- /* didn't get a valid pcluster previously (very rare) */
}
- if (!(map->m_flags & EROFS_MAP_MAPPED) ||
- map->m_flags & EROFS_MAP_FRAGMENT)
- goto hitted;
-
- err = z_erofs_collector_begin(fe);
- if (err)
- goto out;
+ cur = offset > map->m_la ? 0 : map->m_la - offset;
+ /* bump split parts first to avoid several separate cases */
+ ++split;
- if (z_erofs_is_inline_pcluster(fe->pcl)) {
- void *mp;
-
- mp = erofs_read_metabuf(&fe->map.buf, inode->i_sb,
- erofs_blknr(inode->i_sb, map->m_pa),
- EROFS_NO_KMAP);
- if (IS_ERR(mp)) {
- err = PTR_ERR(mp);
- erofs_err(inode->i_sb,
- "failed to get inline page, err %d", err);
- goto out;
- }
- get_page(fe->map.buf.page);
- WRITE_ONCE(fe->pcl->compressed_bvecs[0].page,
- fe->map.buf.page);
- fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE;
- } else {
- /* bind cache first when cached decompression is preferred */
- z_erofs_bind_cache(fe);
- }
-hitted:
- /*
- * Ensure the current partial page belongs to this submit chain rather
- * than other concurrent submit chains or the noio(bypass) chain since
- * those chains are handled asynchronously thus the page cannot be used
- * for inplace I/O or bvpage (should be processed in a strict order.)
- */
- tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
-
- cur = end - min_t(erofs_off_t, offset + end - map->m_la, end);
if (!(map->m_flags & EROFS_MAP_MAPPED)) {
zero_user_segment(page, cur, end);
+ tight = false;
goto next_part;
}
+
if (map->m_flags & EROFS_MAP_FRAGMENT) {
- unsigned int pageofs, skip, len;
+ erofs_off_t fpos = offset + cur - map->m_la;
- if (offset > map->m_la) {
- pageofs = 0;
- skip = offset - map->m_la;
- } else {
- pageofs = map->m_la & ~PAGE_MASK;
- skip = 0;
- }
- len = min_t(unsigned int, map->m_llen - skip, end - cur);
- err = z_erofs_read_fragment(inode, skip, page, pageofs, len);
+ len = min_t(unsigned int, map->m_llen - fpos, end - cur);
+ err = z_erofs_read_fragment(inode->i_sb, page, cur, cur + len,
+ EROFS_I(inode)->z_fragmentoff + fpos);
if (err)
goto out;
- ++spiltted;
tight = false;
goto next_part;
}
- exclusive = (!cur && (!spiltted || tight));
+ if (!fe->pcl) {
+ err = z_erofs_pcluster_begin(fe);
+ if (err)
+ goto out;
+ }
+
+ /*
+ * Ensure the current partial page belongs to this submit chain rather
+ * than other concurrent submit chains or the noio(bypass) chain since
+ * those chains are handled asynchronously thus the page cannot be used
+ * for inplace I/O or bvpage (should be processed in a strict order.)
+ */
+ tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE);
+ exclusive = (!cur && ((split <= 1) || tight));
if (cur)
tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED);
@@ -1072,8 +1039,6 @@ hitted:
goto out;
z_erofs_onlinepage_split(page);
- /* bump up the number of spiltted parts of a page */
- ++spiltted;
if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK))
fe->pcl->multibases = true;
if (fe->pcl->length < offset + end - map->m_la) {
@@ -1094,9 +1059,7 @@ next_part:
goto repeat;
out:
- if (err)
- z_erofs_page_mark_eio(page);
- z_erofs_onlinepage_endio(page);
+ z_erofs_onlinepage_endio(page, err);
return err;
}
@@ -1144,10 +1107,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
struct z_erofs_bvec *bvec)
{
struct z_erofs_bvec_item *item;
+ unsigned int pgnr;
- if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
- unsigned int pgnr;
-
+ if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) &&
+ (bvec->end == PAGE_SIZE ||
+ bvec->offset + bvec->end == be->pcl->length)) {
pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
DBG_BUGON(pgnr >= be->nr_pages);
if (!be->decompressed_pages[pgnr]) {
@@ -1198,9 +1162,7 @@ static void z_erofs_fill_other_copies(struct z_erofs_decompress_backend *be,
cur += len;
}
kunmap_local(dst);
- if (err)
- z_erofs_page_mark_eio(bvi->bvec.page);
- z_erofs_onlinepage_endio(bvi->bvec.page);
+ z_erofs_onlinepage_endio(bvi->bvec.page, err);
list_del(p);
kfree(bvi);
}
@@ -1371,9 +1333,7 @@ out:
/* recycle all individual short-lived pages */
if (z_erofs_put_shortlivedpage(be->pagepool, page))
continue;
- if (err)
- z_erofs_page_mark_eio(page);
- z_erofs_onlinepage_endio(page);
+ z_erofs_onlinepage_endio(page, err);
}
if (be->decompressed_pages != be->onstack_pages)
@@ -1409,7 +1369,10 @@ static void z_erofs_decompress_queue(const struct z_erofs_decompressqueue *io,
owned = READ_ONCE(be.pcl->next);
z_erofs_decompress_pcluster(&be, io->eio ? -EIO : 0);
- erofs_workgroup_put(&be.pcl->obj);
+ if (z_erofs_is_inline_pcluster(be.pcl))
+ z_erofs_free_pcluster(be.pcl);
+ else
+ erofs_workgroup_put(&be.pcl->obj);
}
}
@@ -1847,15 +1810,10 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
if (page) {
- if (PageUptodate(page)) {
+ if (PageUptodate(page))
unlock_page(page);
- } else {
- err = z_erofs_do_read_page(f, page);
- if (err)
- erofs_err(inode->i_sb,
- "readmore error at page %lu @ nid %llu",
- index, EROFS_I(inode)->nid);
- }
+ else
+ (void)z_erofs_do_read_page(f, page);
put_page(page);
}
@@ -1867,25 +1825,25 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
static int z_erofs_read_folio(struct file *file, struct folio *folio)
{
- struct page *page = &folio->page;
- struct inode *const inode = page->mapping->host;
+ struct inode *const inode = folio->mapping->host;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
int err;
- trace_erofs_readpage(page, false);
- f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
+ trace_erofs_read_folio(folio, false);
+ f.headoffset = (erofs_off_t)folio->index << PAGE_SHIFT;
z_erofs_pcluster_readmore(&f, NULL, true);
- err = z_erofs_do_read_page(&f, page);
+ err = z_erofs_do_read_page(&f, &folio->page);
z_erofs_pcluster_readmore(&f, NULL, false);
- (void)z_erofs_collector_end(&f);
+ z_erofs_pcluster_end(&f);
/* if some compressed cluster ready, need submit them anyway */
z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, 0), false);
- if (err)
- erofs_err(inode->i_sb, "failed to read, err [%d]", err);
+ if (err && err != -EINTR)
+ erofs_err(inode->i_sb, "read error %d @ %lu of nid %llu",
+ err, folio->index, EROFS_I(inode)->nid);
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&f.pagepool);
@@ -1897,38 +1855,35 @@ static void z_erofs_readahead(struct readahead_control *rac)
struct inode *const inode = rac->mapping->host;
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
- struct page *head = NULL, *page;
- unsigned int nr_pages;
+ struct folio *head = NULL, *folio;
+ unsigned int nr_folios;
+ int err;
f.headoffset = readahead_pos(rac);
z_erofs_pcluster_readmore(&f, rac, true);
- nr_pages = readahead_count(rac);
- trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
+ nr_folios = readahead_count(rac);
+ trace_erofs_readpages(inode, readahead_index(rac), nr_folios, false);
- while ((page = readahead_page(rac))) {
- set_page_private(page, (unsigned long)head);
- head = page;
+ while ((folio = readahead_folio(rac))) {
+ folio->private = head;
+ head = folio;
}
+ /* traverse in reverse order for best metadata I/O performance */
while (head) {
- struct page *page = head;
- int err;
-
- /* traversal in reverse order */
- head = (void *)page_private(page);
+ folio = head;
+ head = folio_get_private(folio);
- err = z_erofs_do_read_page(&f, page);
- if (err)
- erofs_err(inode->i_sb,
- "readahead error at page %lu @ nid %llu",
- page->index, EROFS_I(inode)->nid);
- put_page(page);
+ err = z_erofs_do_read_page(&f, &folio->page);
+ if (err && err != -EINTR)
+ erofs_err(inode->i_sb, "readahead error at folio %lu @ nid %llu",
+ folio->index, EROFS_I(inode)->nid);
}
z_erofs_pcluster_readmore(&f, rac, false);
- (void)z_erofs_collector_end(&f);
+ z_erofs_pcluster_end(&f);
- z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_pages), true);
+ z_erofs_runqueue(&f, z_erofs_is_sync_decompress(sbi, nr_folios), true);
erofs_put_metabuf(&f.map.buf);
erofs_release_pages(&f.pagepool);
}
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index 1909ddafd9c7..7b55111fd533 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -561,8 +561,9 @@ static int z_erofs_do_map_blocks(struct inode *inode,
if ((flags & EROFS_GET_BLOCKS_FIEMAP) ||
((flags & EROFS_GET_BLOCKS_READMORE) &&
- map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA &&
- map->m_llen >= i_blocksize(inode))) {
+ (map->m_algorithmformat == Z_EROFS_COMPRESSION_LZMA ||
+ map->m_algorithmformat == Z_EROFS_COMPRESSION_DEFLATE) &&
+ map->m_llen >= i_blocksize(inode))) {
err = z_erofs_get_extent_decompressedlen(&m);
if (!err)
map->m_flags |= EROFS_MAP_FULL_MAPPED;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8aa36cd37351..33a918f9566c 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -189,7 +189,7 @@ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
{
lockdep_assert_held(&ctx->wqh.lock);
- *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+ *cnt = ((ctx->flags & EFD_SEMAPHORE) && ctx->count) ? 1 : ctx->count;
ctx->count -= *cnt;
}
EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 4b1b3362f697..1d9a71a0c4c1 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -975,15 +975,11 @@ again:
static int ep_alloc(struct eventpoll **pep)
{
- int error;
- struct user_struct *user;
struct eventpoll *ep;
- user = get_current_user();
- error = -ENOMEM;
ep = kzalloc(sizeof(*ep), GFP_KERNEL);
if (unlikely(!ep))
- goto free_uid;
+ return -ENOMEM;
mutex_init(&ep->mtx);
rwlock_init(&ep->lock);
@@ -992,16 +988,12 @@ static int ep_alloc(struct eventpoll **pep)
INIT_LIST_HEAD(&ep->rdllist);
ep->rbr = RB_ROOT_CACHED;
ep->ovflist = EP_UNACTIVE_PTR;
- ep->user = user;
+ ep->user = get_current_user();
refcount_set(&ep->refcount, 1);
*pep = ep;
return 0;
-
-free_uid:
- free_uid(user);
- return error;
}
/*
diff --git a/fs/exfat/balloc.c b/fs/exfat/balloc.c
index 9f42f25fab92..e918decb3735 100644
--- a/fs/exfat/balloc.c
+++ b/fs/exfat/balloc.c
@@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
}
sbi->map_sectors = ((need_map_size - 1) >>
(sb->s_blocksize_bits)) + 1;
- sbi->vol_amap = kmalloc_array(sbi->map_sectors,
+ sbi->vol_amap = kvmalloc_array(sbi->map_sectors,
sizeof(struct buffer_head *), GFP_KERNEL);
if (!sbi->vol_amap)
return -ENOMEM;
@@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct super_block *sb,
while (j < i)
brelse(sbi->vol_amap[j++]);
- kfree(sbi->vol_amap);
+ kvfree(sbi->vol_amap);
sbi->vol_amap = NULL;
return -EIO;
}
@@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_info *sbi)
for (i = 0; i < sbi->map_sectors; i++)
__brelse(sbi->vol_amap[i]);
- kfree(sbi->vol_amap);
+ kvfree(sbi->vol_amap);
}
int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 957574180a5e..e1586bba6d86 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -34,6 +34,7 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
{
int i, err;
struct exfat_entry_set_cache es;
+ unsigned int uni_len = 0, len;
err = exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES);
if (err)
@@ -52,7 +53,10 @@ static int exfat_get_uniname_from_ext_entry(struct super_block *sb,
if (exfat_get_entry_type(ep) != TYPE_EXTEND)
break;
- exfat_extract_uni_name(ep, uniname);
+ len = exfat_extract_uni_name(ep, uniname);
+ uni_len += len;
+ if (len != EXFAT_FILE_NAME_LEN || uni_len >= MAX_NAME_LENGTH)
+ break;
uniname += EXFAT_FILE_NAME_LEN;
}
@@ -214,7 +218,10 @@ static void exfat_free_namebuf(struct exfat_dentry_namebuf *nb)
exfat_init_namebuf(nb);
}
-/* skip iterating emit_dots when dir is empty */
+/*
+ * Before calling dir_emit*(), sbi->s_lock should be released
+ * because page fault can occur in dir_emit*().
+ */
#define ITER_POS_FILLED_DOTS (2)
static int exfat_iterate(struct file *file, struct dir_context *ctx)
{
@@ -229,11 +236,10 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
int err = 0, fake_offset = 0;
exfat_init_namebuf(nb);
- mutex_lock(&EXFAT_SB(sb)->s_lock);
cpos = ctx->pos;
if (!dir_emit_dots(file, ctx))
- goto unlock;
+ goto out;
if (ctx->pos == ITER_POS_FILLED_DOTS) {
cpos = 0;
@@ -245,16 +251,18 @@ static int exfat_iterate(struct file *file, struct dir_context *ctx)
/* name buffer should be allocated before use */
err = exfat_alloc_namebuf(nb);
if (err)
- goto unlock;
+ goto out;
get_new:
+ mutex_lock(&EXFAT_SB(sb)->s_lock);
+
if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode))
goto end_of_dir;
err = exfat_readdir(inode, &cpos, &de);
if (err) {
/*
- * At least we tried to read a sector. Move cpos to next sector
- * position (should be aligned).
+ * At least we tried to read a sector.
+ * Move cpos to next sector position (should be aligned).
*/
if (err == -EIO) {
cpos += 1 << (sb->s_blocksize_bits);
@@ -277,16 +285,10 @@ get_new:
inum = iunique(sb, EXFAT_ROOT_INO);
}
- /*
- * Before calling dir_emit(), sb_lock should be released.
- * Because page fault can occur in dir_emit() when the size
- * of buffer given from user is larger than one page size.
- */
mutex_unlock(&EXFAT_SB(sb)->s_lock);
if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
(de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
- goto out_unlocked;
- mutex_lock(&EXFAT_SB(sb)->s_lock);
+ goto out;
ctx->pos = cpos;
goto get_new;
@@ -294,9 +296,8 @@ end_of_dir:
if (!cpos && fake_offset)
cpos = ITER_POS_FILLED_DOTS;
ctx->pos = cpos;
-unlock:
mutex_unlock(&EXFAT_SB(sb)->s_lock);
-out_unlocked:
+out:
/*
* To improve performance, free namebuf after unlock sb_lock.
* If namebuf is not allocated, this function do nothing
@@ -305,10 +306,11 @@ out_unlocked:
return err;
}
+WRAP_DIR_ITER(exfat_iterate) // FIXME!
const struct file_operations exfat_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = exfat_iterate,
+ .iterate_shared = shared_exfat_iterate,
.unlocked_ioctl = exfat_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = exfat_compat_ioctl,
@@ -1079,7 +1081,8 @@ rewind:
if (entry_type == TYPE_EXTEND) {
unsigned short entry_uniname[16], unichar;
- if (step != DIRENT_STEP_NAME) {
+ if (step != DIRENT_STEP_NAME ||
+ name_len >= MAX_NAME_LENGTH) {
step = DIRENT_STEP_FILE;
continue;
}
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 729ada9e26e8..f55498e5c23d 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -273,8 +273,6 @@ struct exfat_sb_info {
spinlock_t inode_hash_lock;
struct hlist_head inode_hashtable[EXFAT_HASH_SIZE];
-
- struct rcu_head rcu;
};
#define EXFAT_CACHE_VALID 0
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 3cbd270e0cba..32395ef686a2 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -22,7 +22,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size)
if (err)
return err;
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
if (!IS_SYNC(inode))
@@ -232,7 +232,7 @@ int exfat_getattr(struct mnt_idmap *idmap, const struct path *path,
struct inode *inode = d_backing_inode(path->dentry);
struct exfat_inode_info *ei = EXFAT_I(inode);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
exfat_truncate_atime(&stat->atime);
stat->result_mask |= STATX_BTIME;
stat->btime.tv_sec = ei->i_crtime.tv_sec;
@@ -290,7 +290,7 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
}
if (attr->ia_valid & ATTR_SIZE)
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
setattr_copy(&nop_mnt_idmap, inode, attr);
exfat_truncate_atime(&inode->i_atime);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 481dd338f2b8..13329baeafbc 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -355,7 +355,7 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to)
if (to > i_size_read(inode)) {
truncate_pagecache(inode, i_size_read(inode));
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
exfat_truncate(inode);
}
}
@@ -398,7 +398,7 @@ static int exfat_write_end(struct file *file, struct address_space *mapping,
exfat_write_failed(mapping, pos+len);
if (!(err < 0) && !(ei->attr & ATTR_ARCHIVE)) {
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
ei->attr |= ATTR_ARCHIVE;
mark_inode_dirty(inode);
}
@@ -577,7 +577,7 @@ static int exfat_fill_inode(struct inode *inode, struct exfat_dir_entry *info)
inode->i_blocks = round_up(i_size_read(inode), sbi->cluster_size) >> 9;
inode->i_mtime = info->mtime;
- inode->i_ctime = info->mtime;
+ inode_set_ctime_to_ts(inode, info->mtime);
ei->i_crtime = info->crtime;
inode->i_atime = info->atime;
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index e0ff9d156f6f..1b9f587f6cca 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -569,7 +569,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
goto unlock;
inode_inc_iversion(dir);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
if (IS_DIRSYNC(dir))
exfat_sync_inode(dir);
else
@@ -582,8 +582,7 @@ static int exfat_create(struct mnt_idmap *idmap, struct inode *dir,
goto unlock;
inode_inc_iversion(inode);
- inode->i_mtime = inode->i_atime = inode->i_ctime =
- EXFAT_I(inode)->i_crtime = current_time(inode);
+ inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode);
exfat_truncate_atime(&inode->i_atime);
/* timestamp is already written, so mark_inode_dirty() is unneeded. */
@@ -817,7 +816,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
ei->dir.dir = DIR_DELETED;
inode_inc_iversion(dir);
- dir->i_mtime = dir->i_atime = current_time(dir);
+ dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
exfat_truncate_atime(&dir->i_atime);
if (IS_DIRSYNC(dir))
exfat_sync_inode(dir);
@@ -825,7 +824,7 @@ static int exfat_unlink(struct inode *dir, struct dentry *dentry)
mark_inode_dirty(dir);
clear_nlink(inode);
- inode->i_mtime = inode->i_atime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
exfat_truncate_atime(&inode->i_atime);
exfat_unhash_inode(inode);
exfat_d_version_set(dentry, inode_query_iversion(dir));
@@ -852,7 +851,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
goto unlock;
inode_inc_iversion(dir);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
if (IS_DIRSYNC(dir))
exfat_sync_inode(dir);
else
@@ -866,8 +865,7 @@ static int exfat_mkdir(struct mnt_idmap *idmap, struct inode *dir,
goto unlock;
inode_inc_iversion(inode);
- inode->i_mtime = inode->i_atime = inode->i_ctime =
- EXFAT_I(inode)->i_crtime = current_time(inode);
+ inode->i_mtime = inode->i_atime = EXFAT_I(inode)->i_crtime = inode_set_ctime_current(inode);
exfat_truncate_atime(&inode->i_atime);
/* timestamp is already written, so mark_inode_dirty() is unneeded. */
@@ -979,7 +977,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
ei->dir.dir = DIR_DELETED;
inode_inc_iversion(dir);
- dir->i_mtime = dir->i_atime = current_time(dir);
+ dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
exfat_truncate_atime(&dir->i_atime);
if (IS_DIRSYNC(dir))
exfat_sync_inode(dir);
@@ -988,7 +986,7 @@ static int exfat_rmdir(struct inode *dir, struct dentry *dentry)
drop_nlink(dir);
clear_nlink(inode);
- inode->i_mtime = inode->i_atime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
exfat_truncate_atime(&inode->i_atime);
exfat_unhash_inode(inode);
exfat_d_version_set(dentry, inode_query_iversion(dir));
@@ -1312,8 +1310,8 @@ static int exfat_rename(struct mnt_idmap *idmap,
goto unlock;
inode_inc_iversion(new_dir);
- new_dir->i_ctime = new_dir->i_mtime = new_dir->i_atime =
- EXFAT_I(new_dir)->i_crtime = current_time(new_dir);
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
+ EXFAT_I(new_dir)->i_crtime = current_time(new_dir);
exfat_truncate_atime(&new_dir->i_atime);
if (IS_DIRSYNC(new_dir))
exfat_sync_inode(new_dir);
@@ -1336,7 +1334,6 @@ static int exfat_rename(struct mnt_idmap *idmap,
}
inode_inc_iversion(old_dir);
- old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
if (IS_DIRSYNC(old_dir))
exfat_sync_inode(old_dir);
else
@@ -1354,8 +1351,7 @@ static int exfat_rename(struct mnt_idmap *idmap,
exfat_warn(sb, "abnormal access to an inode dropped");
WARN_ON(new_inode->i_nlink == 0);
}
- new_inode->i_ctime = EXFAT_I(new_inode)->i_crtime =
- current_time(new_inode);
+ EXFAT_I(new_inode)->i_crtime = current_time(new_inode);
}
unlock:
diff --git a/fs/exfat/super.c b/fs/exfat/super.c
index 8c32460e031e..2778bd9b631e 100644
--- a/fs/exfat/super.c
+++ b/fs/exfat/super.c
@@ -31,16 +31,6 @@ static void exfat_free_iocharset(struct exfat_sb_info *sbi)
kfree(sbi->options.iocharset);
}
-static void exfat_delayed_free(struct rcu_head *p)
-{
- struct exfat_sb_info *sbi = container_of(p, struct exfat_sb_info, rcu);
-
- unload_nls(sbi->nls_io);
- exfat_free_iocharset(sbi);
- exfat_free_upcase_table(sbi);
- kfree(sbi);
-}
-
static void exfat_put_super(struct super_block *sb)
{
struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -50,7 +40,8 @@ static void exfat_put_super(struct super_block *sb)
brelse(sbi->boot_bh);
mutex_unlock(&sbi->s_lock);
- call_rcu(&sbi->rcu, exfat_delayed_free);
+ unload_nls(sbi->nls_io);
+ exfat_free_upcase_table(sbi);
}
static int exfat_sync_fs(struct super_block *sb, int wait)
@@ -379,8 +370,7 @@ static int exfat_read_root(struct inode *inode)
ei->i_size_ondisk = i_size_read(inode);
exfat_save_attr(inode, ATTR_SUBDIR);
- inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
- current_time(inode);
+ inode->i_mtime = inode->i_atime = ei->i_crtime = inode_set_ctime_current(inode);
exfat_truncate_atime(&inode->i_atime);
return 0;
}
@@ -710,9 +700,6 @@ free_table:
check_nls_io:
unload_nls(sbi->nls_io);
- exfat_free_iocharset(sbi);
- sb->s_fs_info = NULL;
- kfree(sbi);
return err;
}
@@ -721,14 +708,18 @@ static int exfat_get_tree(struct fs_context *fc)
return get_tree_bdev(fc, exfat_fill_super);
}
+static void exfat_free_sbi(struct exfat_sb_info *sbi)
+{
+ exfat_free_iocharset(sbi);
+ kfree(sbi);
+}
+
static void exfat_free(struct fs_context *fc)
{
struct exfat_sb_info *sbi = fc->s_fs_info;
- if (sbi) {
- exfat_free_iocharset(sbi);
- kfree(sbi);
- }
+ if (sbi)
+ exfat_free_sbi(sbi);
}
static int exfat_reconfigure(struct fs_context *fc)
@@ -773,12 +764,21 @@ static int exfat_init_fs_context(struct fs_context *fc)
return 0;
}
+static void exfat_kill_sb(struct super_block *sb)
+{
+ struct exfat_sb_info *sbi = sb->s_fs_info;
+
+ kill_block_super(sb);
+ if (sbi)
+ exfat_free_sbi(sbi);
+}
+
static struct file_system_type exfat_fs_type = {
.owner = THIS_MODULE,
.name = "exfat",
.init_fs_context = exfat_init_fs_context,
.parameters = exfat_parameters,
- .kill_sb = kill_block_super,
+ .kill_sb = exfat_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 40e624cf7e92..d1dbe47c7975 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -315,7 +315,7 @@ static int get_name(const struct path *path, char *name, struct dentry *child)
goto out;
error = -EINVAL;
- if (!file->f_op->iterate && !file->f_op->iterate_shared)
+ if (!file->f_op->iterate_shared)
goto out_close;
buffer.sequence = 0;
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 82b17d7fc93f..7e54c31589c7 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -237,7 +237,7 @@ ext2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
error = __ext2_set_acl(inode, acl, type);
if (!error && update_mode) {
inode->i_mode = mode;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
}
return error;
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 42db804794bd..b335f17f682f 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -468,7 +468,7 @@ int ext2_set_link(struct inode *dir, struct ext2_dir_entry_2 *de,
ext2_set_de_type(de, inode);
ext2_commit_chunk(page, pos, len);
if (update_times)
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
return ext2_handle_dirsync(dir);
@@ -555,7 +555,7 @@ got_it:
de->inode = cpu_to_le32(inode->i_ino);
ext2_set_de_type (de, inode);
ext2_commit_chunk(page, pos, rec_len);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(dir);
err = ext2_handle_dirsync(dir);
@@ -606,7 +606,7 @@ int ext2_delete_entry(struct ext2_dir_entry_2 *dir, struct page *page)
pde->rec_len = ext2_rec_len_to_disk(to - from);
dir->inode = 0;
ext2_commit_chunk(page, pos, to - from);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
EXT2_I(inode)->i_flags &= ~EXT2_BTREE_FL;
mark_inode_dirty(inode);
return ext2_handle_dirsync(inode);
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index a4e1d7a9c544..124df89689e1 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -549,7 +549,7 @@ got:
inode->i_ino = ino;
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_flags =
ext2_mask_flags(mode, EXT2_I(dir)->i_flags & EXT2_FL_INHERITED);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 75983215c7a1..acbab27fe957 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -595,7 +595,7 @@ static void ext2_splice_branch(struct inode *inode,
if (where->bh)
mark_buffer_dirty_inode(where->bh, inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
}
@@ -1287,7 +1287,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
__ext2_truncate_blocks(inode, newsize);
filemap_invalidate_unlock(inode->i_mapping);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
if (inode_needs_sync(inode)) {
sync_mapping_buffers(inode->i_mapping);
sync_inode_metadata(inode, 1);
@@ -1409,9 +1409,9 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
- inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
+ inode_set_ctime(inode, (signed)le32_to_cpu(raw_inode->i_ctime), 0);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(raw_inode->i_mtime);
- inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+ inode->i_atime.tv_nsec = inode->i_mtime.tv_nsec = 0;
ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
/* We now have enough fields to check if the inode was active or not.
* This is needed because nfsd might try to access dead inodes
@@ -1541,7 +1541,7 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le32(inode->i_size);
raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
- raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+ raw_inode->i_ctime = cpu_to_le32(inode_get_ctime(inode).tv_sec);
raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
@@ -1628,7 +1628,7 @@ int ext2_getattr(struct mnt_idmap *idmap, const struct path *path,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
return 0;
}
diff --git a/fs/ext2/ioctl.c b/fs/ext2/ioctl.c
index cc87d413eb43..44e04484e570 100644
--- a/fs/ext2/ioctl.c
+++ b/fs/ext2/ioctl.c
@@ -44,7 +44,7 @@ int ext2_fileattr_set(struct mnt_idmap *idmap,
(fa->flags & EXT2_FL_USER_MODIFIABLE);
ext2_set_inode_flags(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return 0;
@@ -77,7 +77,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
inode_lock(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode->i_generation = generation;
inode_unlock(inode);
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 937dd8f60f96..059517068adc 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -211,7 +211,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
if (err)
return err;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_link_count(inode);
ihold(inode);
@@ -291,7 +291,7 @@ static int ext2_unlink(struct inode *dir, struct dentry *dentry)
if (err)
goto out;
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
inode_dec_link_count(inode);
err = 0;
out:
@@ -367,7 +367,7 @@ static int ext2_rename (struct mnt_idmap * idmap,
ext2_put_page(new_page, new_de);
if (err)
goto out_dir;
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
if (dir_de)
drop_nlink(new_inode);
inode_dec_link_count(new_inode);
@@ -383,7 +383,7 @@ static int ext2_rename (struct mnt_idmap * idmap,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = current_time(old_inode);
+ inode_set_ctime_current(old_inode);
mark_inode_dirty(old_inode);
err = ext2_delete_entry(old_de, old_page);
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 2959afc7541c..aaf3e3e88cb2 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1572,7 +1572,7 @@ out:
if (inode->i_size < off+len-towrite)
i_size_write(inode, off+len-towrite);
inode_inc_iversion(inode);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return len - towrite;
}
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 8906ba479aaf..1c9187188d68 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -773,7 +773,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
/* Update the inode. */
EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (IS_SYNC(inode)) {
error = sync_inode_metadata(inode, 1);
/* In case sync failed due to ENOSPC the inode was actually
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 27fcbddfb148..3bffe862f954 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -259,7 +259,7 @@ retry:
error = __ext4_set_acl(handle, inode, type, acl, 0 /* xattr_flags */);
if (!error && update_mode) {
inode->i_mode = mode;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
error = ext4_mark_inode_dirty(handle, inode);
}
out_stop:
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0a2d55faa095..1e2259d9967d 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -868,64 +868,70 @@ struct ext4_inode {
* affected filesystem before 2242.
*/
-static inline __le32 ext4_encode_extra_time(struct timespec64 *time)
+static inline __le32 ext4_encode_extra_time(struct timespec64 ts)
{
- u32 extra =((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK;
- return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS));
+ u32 extra = ((ts.tv_sec - (s32)ts.tv_sec) >> 32) & EXT4_EPOCH_MASK;
+ return cpu_to_le32(extra | (ts.tv_nsec << EXT4_EPOCH_BITS));
}
-static inline void ext4_decode_extra_time(struct timespec64 *time,
- __le32 extra)
+static inline struct timespec64 ext4_decode_extra_time(__le32 base,
+ __le32 extra)
{
+ struct timespec64 ts = { .tv_sec = (signed)le32_to_cpu(base) };
+
if (unlikely(extra & cpu_to_le32(EXT4_EPOCH_MASK)))
- time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
- time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+ ts.tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32;
+ ts.tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS;
+ return ts;
}
-#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
+#define EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, ts) \
do { \
- if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) {\
- (raw_inode)->xtime = cpu_to_le32((inode)->xtime.tv_sec); \
- (raw_inode)->xtime ## _extra = \
- ext4_encode_extra_time(&(inode)->xtime); \
- } \
- else \
- (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (inode)->xtime.tv_sec, S32_MIN, S32_MAX)); \
+ if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \
+ (raw_inode)->xtime = cpu_to_le32((ts).tv_sec); \
+ (raw_inode)->xtime ## _extra = ext4_encode_extra_time(ts); \
+ } else \
+ (raw_inode)->xtime = cpu_to_le32(clamp_t(int32_t, (ts).tv_sec, S32_MIN, S32_MAX)); \
} while (0)
-#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
-do { \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
- (raw_inode)->xtime = cpu_to_le32((einode)->xtime.tv_sec); \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
- (raw_inode)->xtime ## _extra = \
- ext4_encode_extra_time(&(einode)->xtime); \
-} while (0)
+#define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \
+ EXT4_INODE_SET_XTIME_VAL(xtime, inode, raw_inode, (inode)->xtime)
+
+#define EXT4_INODE_SET_CTIME(inode, raw_inode) \
+ EXT4_INODE_SET_XTIME_VAL(i_ctime, inode, raw_inode, inode_get_ctime(inode))
+
+#define EXT4_EINODE_SET_XTIME(xtime, einode, raw_inode) \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ EXT4_INODE_SET_XTIME_VAL(xtime, &((einode)->vfs_inode), \
+ raw_inode, (einode)->xtime)
+
+#define EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode) \
+ (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra) ? \
+ ext4_decode_extra_time((raw_inode)->xtime, \
+ (raw_inode)->xtime ## _extra) : \
+ (struct timespec64) { \
+ .tv_sec = (signed)le32_to_cpu((raw_inode)->xtime) \
+ })
#define EXT4_INODE_GET_XTIME(xtime, inode, raw_inode) \
do { \
- (inode)->xtime.tv_sec = (signed)le32_to_cpu((raw_inode)->xtime); \
- if (EXT4_FITS_IN_INODE(raw_inode, EXT4_I(inode), xtime ## _extra)) { \
- ext4_decode_extra_time(&(inode)->xtime, \
- raw_inode->xtime ## _extra); \
- } \
- else \
- (inode)->xtime.tv_nsec = 0; \
+ (inode)->xtime = EXT4_INODE_GET_XTIME_VAL(xtime, inode, raw_inode); \
} while (0)
+#define EXT4_INODE_GET_CTIME(inode, raw_inode) \
+do { \
+ inode_set_ctime_to_ts(inode, \
+ EXT4_INODE_GET_XTIME_VAL(i_ctime, inode, raw_inode)); \
+} while (0)
-#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
-do { \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
- (einode)->xtime.tv_sec = \
- (signed)le32_to_cpu((raw_inode)->xtime); \
- else \
- (einode)->xtime.tv_sec = 0; \
- if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
- ext4_decode_extra_time(&(einode)->xtime, \
- raw_inode->xtime ## _extra); \
- else \
- (einode)->xtime.tv_nsec = 0; \
+#define EXT4_EINODE_GET_XTIME(xtime, einode, raw_inode) \
+do { \
+ if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
+ (einode)->xtime = \
+ EXT4_INODE_GET_XTIME_VAL(xtime, &(einode->vfs_inode), \
+ raw_inode); \
+ else \
+ (einode)->xtime = (struct timespec64){0, 0}; \
} while (0)
#define i_disk_version osd1.linux1.l_i_version
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 77f318ec8abb..b38d59581411 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -234,8 +234,7 @@ int __ext4_journal_get_write_access(const char *where, unsigned int line,
might_sleep();
- if (bh->b_bdev->bd_super)
- ext4_check_bdev_write_error(bh->b_bdev->bd_super);
+ ext4_check_bdev_write_error(sb);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_get_write_access(handle, bh);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e4115d338f10..202c76996b62 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4476,12 +4476,12 @@ retry:
map.m_lblk += ret;
map.m_len = len = len - ret;
epos = (loff_t)map.m_lblk << inode->i_blkbits;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (new_size) {
if (epos > new_size)
epos = new_size;
if (ext4_update_inode_size(inode, epos) & 0x1)
- inode->i_mtime = inode->i_ctime;
+ inode->i_mtime = inode_get_ctime(inode);
}
ret2 = ext4_mark_inode_dirty(handle, inode);
ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4617,7 +4617,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags);
@@ -4642,7 +4642,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
goto out_mutex;
}
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
if (new_size)
ext4_update_inode_size(inode, new_size);
ret = ext4_mark_inode_dirty(handle, inode);
@@ -5378,7 +5378,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
ret = ext4_mark_inode_dirty(handle, inode);
ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -5488,7 +5488,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
/* Expand file to avoid data loss if there is error while shifting */
inode->i_size += len;
EXT4_I(inode)->i_disksize += len;
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
ret = ext4_mark_inode_dirty(handle, inode);
if (ret)
goto out_stop;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 754f961cd9fd..48abef5f23e7 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1250,7 +1250,7 @@ got:
inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
/* This is the optimal IO size (for stat), not the fs block size */
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
ei->i_crtime = inode->i_mtime;
memset(ei->i_data, 0, sizeof(ei->i_data));
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index a4b7e4bc32d4..003861037374 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1037,7 +1037,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
ext4_update_dx_flag(dir);
inode_inc_iversion(dir);
return 1;
@@ -1991,7 +1991,7 @@ out:
ext4_orphan_del(handle, inode);
if (err == 0) {
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
err = ext4_mark_inode_dirty(handle, inode);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
diff --git a/fs/ext4/inode-test.c b/fs/ext4/inode-test.c
index 7935ea6cf92c..f0c0fd507fbc 100644
--- a/fs/ext4/inode-test.c
+++ b/fs/ext4/inode-test.c
@@ -245,9 +245,9 @@ static void inode_test_xtimestamp_decoding(struct kunit *test)
struct timestamp_expectation *test_param =
(struct timestamp_expectation *)(test->param_value);
- timestamp.tv_sec = get_32bit_time(test_param);
- ext4_decode_extra_time(&timestamp,
- cpu_to_le32(test_param->extra_bits));
+ timestamp = ext4_decode_extra_time(
+ cpu_to_le32(get_32bit_time(test_param)),
+ cpu_to_le32(test_param->extra_bits));
KUNIT_EXPECT_EQ_MSG(test,
test_param->expected.tv_sec,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 43775a6ca505..6683076ecb2f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3986,7 +3986,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
ret2 = ext4_mark_inode_dirty(handle, inode);
if (unlikely(ret2))
ret = ret2;
@@ -4146,7 +4146,7 @@ out_stop:
if (inode->i_nlink)
ext4_orphan_del(handle, inode);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
err2 = ext4_mark_inode_dirty(handle, inode);
if (unlikely(err2 && !err))
err = err2;
@@ -4249,7 +4249,7 @@ static int ext4_fill_raw_inode(struct inode *inode, struct ext4_inode *raw_inode
}
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
- EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+ EXT4_INODE_SET_CTIME(inode, raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
@@ -4858,7 +4858,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
}
}
- EXT4_INODE_GET_XTIME(i_ctime, inode, raw_inode);
+ EXT4_INODE_GET_CTIME(inode, raw_inode);
EXT4_INODE_GET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode);
EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode);
@@ -4981,7 +4981,7 @@ static void __ext4_update_other_inode_time(struct super_block *sb,
spin_unlock(&inode->i_lock);
spin_lock(&ei->i_raw_lock);
- EXT4_INODE_SET_XTIME(i_ctime, inode, raw_inode);
+ EXT4_INODE_SET_CTIME(inode, raw_inode);
EXT4_INODE_SET_XTIME(i_mtime, inode, raw_inode);
EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
ext4_inode_csum_set(inode, raw_inode, ei);
@@ -5376,10 +5376,8 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
* Update c/mtime on truncate up, ext4_truncate() will
* update c/mtime in shrink case below
*/
- if (!shrink) {
- inode->i_mtime = current_time(inode);
- inode->i_ctime = inode->i_mtime;
- }
+ if (!shrink)
+ inode->i_mtime = inode_set_ctime_current(inode);
if (shrink)
ext4_fc_track_range(handle, inode,
@@ -5537,7 +5535,7 @@ int ext4_getattr(struct mnt_idmap *idmap, const struct path *path,
STATX_ATTR_NODUMP |
STATX_ATTR_VERITY);
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
return 0;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 331859511f80..b0349f451863 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -449,7 +449,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
diff = size - size_bl;
swap_inode_data(inode, inode_bl);
- inode->i_ctime = inode_bl->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
+ inode_set_ctime_current(inode_bl);
inode_inc_iversion(inode);
inode->i_generation = get_random_u32();
@@ -663,7 +664,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
ext4_set_inode_flags(inode, false);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_iversion(inode);
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -774,7 +775,7 @@ static int ext4_ioctl_setproject(struct inode *inode, __u32 projid)
}
EXT4_I(inode)->i_projid = kprojid;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_iversion(inode);
out_dirty:
rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
@@ -1266,7 +1267,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
err = ext4_reserve_inode_write(handle, inode, &iloc);
if (err == 0) {
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_iversion(inode);
inode->i_generation = generation;
err = ext4_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a2475b8c9fb5..21b903fe546e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1006,14 +1006,11 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
* fls() instead since we need to know the actual length while modifying
* goal length.
*/
- order = fls(ac->ac_g_ex.fe_len);
+ order = fls(ac->ac_g_ex.fe_len) - 1;
min_order = order - sbi->s_mb_best_avail_max_trim_order;
if (min_order < 0)
min_order = 0;
- if (1 << min_order < ac->ac_o_ex.fe_len)
- min_order = fls(ac->ac_o_ex.fe_len) + 1;
-
if (sbi->s_stripe > 0) {
/*
* We are assuming that stripe size is always a multiple of
@@ -1021,9 +1018,16 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context
*/
num_stripe_clusters = EXT4_NUM_B2C(sbi, sbi->s_stripe);
if (1 << min_order < num_stripe_clusters)
- min_order = fls(num_stripe_clusters);
+ /*
+ * We consider 1 order less because later we round
+ * up the goal len to num_stripe_clusters
+ */
+ min_order = fls(num_stripe_clusters) - 1;
}
+ if (1 << min_order < ac->ac_o_ex.fe_len)
+ min_order = fls(ac->ac_o_ex.fe_len);
+
for (i = order; i >= min_order; i--) {
int frag_order;
/*
@@ -4761,8 +4765,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
int order, i;
struct ext4_inode_info *ei = EXT4_I(ac->ac_inode);
struct ext4_locality_group *lg;
- struct ext4_prealloc_space *tmp_pa, *cpa = NULL;
- ext4_lblk_t tmp_pa_start, tmp_pa_end;
+ struct ext4_prealloc_space *tmp_pa = NULL, *cpa = NULL;
+ loff_t tmp_pa_end;
struct rb_node *iter;
ext4_fsblk_t goal_block;
@@ -4770,47 +4774,151 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
return false;
- /* first, try per-file preallocation */
+ /*
+ * first, try per-file preallocation by searching the inode pa rbtree.
+ *
+ * Here, we can't do a direct traversal of the tree because
+ * ext4_mb_discard_group_preallocation() can paralelly mark the pa
+ * deleted and that can cause direct traversal to skip some entries.
+ */
read_lock(&ei->i_prealloc_lock);
+
+ if (RB_EMPTY_ROOT(&ei->i_prealloc_node)) {
+ goto try_group_pa;
+ }
+
+ /*
+ * Step 1: Find a pa with logical start immediately adjacent to the
+ * original logical start. This could be on the left or right.
+ *
+ * (tmp_pa->pa_lstart never changes so we can skip locking for it).
+ */
for (iter = ei->i_prealloc_node.rb_node; iter;
iter = ext4_mb_pa_rb_next_iter(ac->ac_o_ex.fe_logical,
- tmp_pa_start, iter)) {
+ tmp_pa->pa_lstart, iter)) {
tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
pa_node.inode_node);
+ }
- /* all fields in this condition don't change,
- * so we can skip locking for them */
- tmp_pa_start = tmp_pa->pa_lstart;
- tmp_pa_end = tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
-
- /* original request start doesn't lie in this PA */
- if (ac->ac_o_ex.fe_logical < tmp_pa_start ||
- ac->ac_o_ex.fe_logical >= tmp_pa_end)
- continue;
+ /*
+ * Step 2: The adjacent pa might be to the right of logical start, find
+ * the left adjacent pa. After this step we'd have a valid tmp_pa whose
+ * logical start is towards the left of original request's logical start
+ */
+ if (tmp_pa->pa_lstart > ac->ac_o_ex.fe_logical) {
+ struct rb_node *tmp;
+ tmp = rb_prev(&tmp_pa->pa_node.inode_node);
- /* non-extent files can't have physical blocks past 2^32 */
- if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
- (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
- EXT4_MAX_BLOCK_FILE_PHYS)) {
+ if (tmp) {
+ tmp_pa = rb_entry(tmp, struct ext4_prealloc_space,
+ pa_node.inode_node);
+ } else {
/*
- * Since PAs don't overlap, we won't find any
- * other PA to satisfy this.
+ * If there is no adjacent pa to the left then finding
+ * an overlapping pa is not possible hence stop searching
+ * inode pa tree
*/
- break;
+ goto try_group_pa;
}
+ }
+
+ BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
- /* found preallocated blocks, use them */
+ /*
+ * Step 3: If the left adjacent pa is deleted, keep moving left to find
+ * the first non deleted adjacent pa. After this step we should have a
+ * valid tmp_pa which is guaranteed to be non deleted.
+ */
+ for (iter = &tmp_pa->pa_node.inode_node;; iter = rb_prev(iter)) {
+ if (!iter) {
+ /*
+ * no non deleted left adjacent pa, so stop searching
+ * inode pa tree
+ */
+ goto try_group_pa;
+ }
+ tmp_pa = rb_entry(iter, struct ext4_prealloc_space,
+ pa_node.inode_node);
spin_lock(&tmp_pa->pa_lock);
- if (tmp_pa->pa_deleted == 0 && tmp_pa->pa_free &&
- likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
- atomic_inc(&tmp_pa->pa_count);
- ext4_mb_use_inode_pa(ac, tmp_pa);
+ if (tmp_pa->pa_deleted == 0) {
+ /*
+ * We will keep holding the pa_lock from
+ * this point on because we don't want group discard
+ * to delete this pa underneath us. Since group
+ * discard is anyways an ENOSPC operation it
+ * should be okay for it to wait a few more cycles.
+ */
+ break;
+ } else {
spin_unlock(&tmp_pa->pa_lock);
- read_unlock(&ei->i_prealloc_lock);
- return true;
}
+ }
+
+ BUG_ON(!(tmp_pa && tmp_pa->pa_lstart <= ac->ac_o_ex.fe_logical));
+ BUG_ON(tmp_pa->pa_deleted == 1);
+
+ /*
+ * Step 4: We now have the non deleted left adjacent pa. Only this
+ * pa can possibly satisfy the request hence check if it overlaps
+ * original logical start and stop searching if it doesn't.
+ */
+ tmp_pa_end = (loff_t)tmp_pa->pa_lstart + EXT4_C2B(sbi, tmp_pa->pa_len);
+
+ if (ac->ac_o_ex.fe_logical >= tmp_pa_end) {
spin_unlock(&tmp_pa->pa_lock);
+ goto try_group_pa;
+ }
+
+ /* non-extent files can't have physical blocks past 2^32 */
+ if (!(ext4_test_inode_flag(ac->ac_inode, EXT4_INODE_EXTENTS)) &&
+ (tmp_pa->pa_pstart + EXT4_C2B(sbi, tmp_pa->pa_len) >
+ EXT4_MAX_BLOCK_FILE_PHYS)) {
+ /*
+ * Since PAs don't overlap, we won't find any other PA to
+ * satisfy this.
+ */
+ spin_unlock(&tmp_pa->pa_lock);
+ goto try_group_pa;
+ }
+
+ if (tmp_pa->pa_free && likely(ext4_mb_pa_goal_check(ac, tmp_pa))) {
+ atomic_inc(&tmp_pa->pa_count);
+ ext4_mb_use_inode_pa(ac, tmp_pa);
+ spin_unlock(&tmp_pa->pa_lock);
+ read_unlock(&ei->i_prealloc_lock);
+ return true;
+ } else {
+ /*
+ * We found a valid overlapping pa but couldn't use it because
+ * it had no free blocks. This should ideally never happen
+ * because:
+ *
+ * 1. When a new inode pa is added to rbtree it must have
+ * pa_free > 0 since otherwise we won't actually need
+ * preallocation.
+ *
+ * 2. An inode pa that is in the rbtree can only have it's
+ * pa_free become zero when another thread calls:
+ * ext4_mb_new_blocks
+ * ext4_mb_use_preallocated
+ * ext4_mb_use_inode_pa
+ *
+ * 3. Further, after the above calls make pa_free == 0, we will
+ * immediately remove it from the rbtree in:
+ * ext4_mb_new_blocks
+ * ext4_mb_release_context
+ * ext4_mb_put_pa
+ *
+ * 4. Since the pa_free becoming 0 and pa_free getting removed
+ * from tree both happen in ext4_mb_new_blocks, which is always
+ * called with i_data_sem held for data allocations, we can be
+ * sure that another process will never see a pa in rbtree with
+ * pa_free == 0.
+ */
+ WARN_ON_ONCE(tmp_pa->pa_free == 0);
}
+ spin_unlock(&tmp_pa->pa_lock);
+try_group_pa:
read_unlock(&ei->i_prealloc_lock);
/* can we use group allocation? */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 0caf6c730ce3..933ad03f4f58 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2203,7 +2203,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
* happen is that the times are slightly out of date
* and/or different from the directory change time.
*/
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
ext4_update_dx_flag(dir);
inode_inc_iversion(dir);
err2 = ext4_mark_inode_dirty(handle, dir);
@@ -3197,7 +3197,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
* recovery. */
inode->i_size = 0;
ext4_orphan_add(handle, inode);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ dir->i_mtime = inode_set_ctime_current(dir);
+ inode_set_ctime_current(inode);
retval = ext4_mark_inode_dirty(handle, inode);
if (retval)
goto end_rmdir;
@@ -3271,7 +3272,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
retval = ext4_delete_entry(handle, dir, de, bh);
if (retval)
goto out_handle;
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
ext4_update_dx_flag(dir);
retval = ext4_mark_inode_dirty(handle, dir);
if (retval)
@@ -3286,7 +3287,7 @@ int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
drop_nlink(inode);
if (!inode->i_nlink)
ext4_orphan_add(handle, inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
retval = ext4_mark_inode_dirty(handle, inode);
if (dentry && !retval)
ext4_fc_track_unlink(handle, dentry);
@@ -3463,7 +3464,7 @@ retry:
if (IS_DIRSYNC(dir))
ext4_handle_sync(handle);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
ext4_inc_count(inode);
ihold(inode);
@@ -3641,8 +3642,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
if (ext4_has_feature_filetype(ent->dir->i_sb))
ent->de->file_type = file_type;
inode_inc_iversion(ent->dir);
- ent->dir->i_ctime = ent->dir->i_mtime =
- current_time(ent->dir);
+ ent->dir->i_mtime = inode_set_ctime_current(ent->dir);
retval = ext4_mark_inode_dirty(handle, ent->dir);
BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
if (!ent->inlined) {
@@ -3941,7 +3941,7 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old.inode->i_ctime = current_time(old.inode);
+ inode_set_ctime_current(old.inode);
retval = ext4_mark_inode_dirty(handle, old.inode);
if (unlikely(retval))
goto end_rename;
@@ -3955,9 +3955,9 @@ static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (new.inode) {
ext4_dec_count(new.inode);
- new.inode->i_ctime = current_time(new.inode);
+ inode_set_ctime_current(new.inode);
}
- old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
+ old.dir->i_mtime = inode_set_ctime_current(old.dir);
ext4_update_dx_flag(old.dir);
if (old.dir_bh) {
retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
@@ -4053,7 +4053,6 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
};
u8 new_file_type;
int retval;
- struct timespec64 ctime;
if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
!projid_eq(EXT4_I(new_dir)->i_projid,
@@ -4147,9 +4146,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- ctime = current_time(old.inode);
- old.inode->i_ctime = ctime;
- new.inode->i_ctime = ctime;
+ inode_set_ctime_current(old.inode);
+ inode_set_ctime_current(new.inode);
retval = ext4_mark_inode_dirty(handle, old.inode);
if (unlikely(retval))
goto end_rename;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c94ebf704616..73547d2334fd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -93,6 +93,7 @@ static int ext4_get_tree(struct fs_context *fc);
static int ext4_reconfigure(struct fs_context *fc);
static void ext4_fc_free(struct fs_context *fc);
static int ext4_init_fs_context(struct fs_context *fc);
+static void ext4_kill_sb(struct super_block *sb);
static const struct fs_parameter_spec ext4_param_specs[];
/*
@@ -135,12 +136,12 @@ static struct file_system_type ext2_fs_type = {
.name = "ext2",
.init_fs_context = ext4_init_fs_context,
.parameters = ext4_param_specs,
- .kill_sb = kill_block_super,
+ .kill_sb = ext4_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ext2");
MODULE_ALIAS("ext2");
-#define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
+#define IS_EXT2_SB(sb) ((sb)->s_type == &ext2_fs_type)
#else
#define IS_EXT2_SB(sb) (0)
#endif
@@ -151,12 +152,12 @@ static struct file_system_type ext3_fs_type = {
.name = "ext3",
.init_fs_context = ext4_init_fs_context,
.parameters = ext4_param_specs,
- .kill_sb = kill_block_super,
+ .kill_sb = ext4_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};
MODULE_ALIAS_FS("ext3");
MODULE_ALIAS("ext3");
-#define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
+#define IS_EXT3_SB(sb) ((sb)->s_type == &ext3_fs_type)
static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
@@ -1096,15 +1097,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
*/
}
-static void ext4_bdev_mark_dead(struct block_device *bdev)
-{
- ext4_force_shutdown(bdev->bd_holder, EXT4_GOING_FLAGS_NOLOGFLUSH);
-}
-
-static const struct blk_holder_ops ext4_holder_ops = {
- .mark_dead = ext4_bdev_mark_dead,
-};
-
/*
* Open the external journal device
*/
@@ -1113,7 +1105,7 @@ static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
struct block_device *bdev;
bdev = blkdev_get_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, sb,
- &ext4_holder_ops);
+ &fs_holder_ops);
if (IS_ERR(bdev))
goto fail;
return bdev;
@@ -1125,25 +1117,6 @@ fail:
return NULL;
}
-/*
- * Release the journal device
- */
-static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
-{
- struct block_device *bdev;
- bdev = sbi->s_journal_bdev;
- if (bdev) {
- /*
- * Invalidate the journal device's buffers. We don't want them
- * floating about in memory - the physical journal device may
- * hotswapped, and it breaks the `ro-after' testing code.
- */
- invalidate_bdev(bdev);
- blkdev_put(bdev, sbi->s_sb);
- sbi->s_journal_bdev = NULL;
- }
-}
-
static inline struct inode *orphan_list_entry(struct list_head *l)
{
return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
@@ -1339,8 +1312,13 @@ static void ext4_put_super(struct super_block *sb)
sync_blockdev(sb->s_bdev);
invalidate_bdev(sb->s_bdev);
if (sbi->s_journal_bdev) {
+ /*
+ * Invalidate the journal device's buffers. We don't want them
+ * floating about in memory - the physical journal device may
+ * hotswapped, and it breaks the `ro-after' testing code.
+ */
sync_blockdev(sbi->s_journal_bdev);
- ext4_blkdev_remove(sbi);
+ invalidate_bdev(sbi->s_journal_bdev);
}
ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
@@ -5572,7 +5550,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
spin_lock_init(&sbi->s_bdev_wb_lock);
errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
&sbi->s_bdev_wb_err);
- sb->s_bdev->bd_super = sb;
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
@@ -5664,9 +5641,11 @@ failed_mount:
kfree(get_qf_name(sb, sbi, i));
#endif
fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
- /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
brelse(sbi->s_sbh);
- ext4_blkdev_remove(sbi);
+ if (sbi->s_journal_bdev) {
+ invalidate_bdev(sbi->s_journal_bdev);
+ blkdev_put(sbi->s_journal_bdev, sb);
+ }
out_fail:
invalidate_bdev(sb->s_bdev);
sb->s_fs_info = NULL;
@@ -5854,7 +5833,10 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
return NULL;
+ /* see get_tree_bdev why this is needed and safe */
+ up_write(&sb->s_umount);
bdev = ext4_blkdev_get(j_dev, sb);
+ down_write(&sb->s_umount);
if (bdev == NULL)
return NULL;
@@ -7103,7 +7085,7 @@ static int ext4_quota_off(struct super_block *sb, int type)
}
EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
err = ext4_mark_inode_dirty(handle, inode);
ext4_journal_stop(handle);
out_unlock:
@@ -7273,13 +7255,24 @@ static inline int ext3_feature_set_ok(struct super_block *sb)
return 1;
}
+static void ext4_kill_sb(struct super_block *sb)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct block_device *journal_bdev = sbi ? sbi->s_journal_bdev : NULL;
+
+ kill_block_super(sb);
+
+ if (journal_bdev)
+ blkdev_put(journal_bdev, sb);
+}
+
static struct file_system_type ext4_fs_type = {
.owner = THIS_MODULE,
.name = "ext4",
.init_fs_context = ext4_init_fs_context,
.parameters = ext4_param_specs,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .kill_sb = ext4_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
};
MODULE_ALIAS_FS("ext4");
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 321e3a888c20..281e1bfbbe3e 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -356,13 +356,13 @@ ext4_xattr_inode_hash(struct ext4_sb_info *sbi, const void *buffer, size_t size)
static u64 ext4_xattr_inode_get_ref(struct inode *ea_inode)
{
- return ((u64)ea_inode->i_ctime.tv_sec << 32) |
+ return ((u64) inode_get_ctime(ea_inode).tv_sec << 32) |
(u32) inode_peek_iversion_raw(ea_inode);
}
static void ext4_xattr_inode_set_ref(struct inode *ea_inode, u64 ref_count)
{
- ea_inode->i_ctime.tv_sec = (u32)(ref_count >> 32);
+ inode_set_ctime(ea_inode, (u32)(ref_count >> 32), 0);
inode_set_iversion_raw(ea_inode, ref_count & 0xffffffff);
}
@@ -1782,6 +1782,20 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
memmove(here, (void *)here + size,
(void *)last - (void *)here + sizeof(__u32));
memset(last, 0, size);
+
+ /*
+ * Update i_inline_off - moved ibody region might contain
+ * system.data attribute. Handling a failure here won't
+ * cause other complications for setting an xattr.
+ */
+ if (!is_block && ext4_has_inline_data(inode)) {
+ ret = ext4_find_inline_data_nolock(inode);
+ if (ret) {
+ ext4_warning_inode(inode,
+ "unable to update i_inline_off");
+ goto out;
+ }
+ }
} else if (s->not_found) {
/* Insert new name. */
size_t size = EXT4_XATTR_LEN(name_len);
@@ -2459,7 +2473,7 @@ retry_inode:
}
if (!error) {
ext4_xattr_update_super_block(handle, inode->i_sb);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_iversion(inode);
if (!value)
no_expand = 0;
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 236d890f560b..0f7df9c11af3 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1045,7 +1045,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc,
struct address_space *mapping = cc->inode->i_mapping;
struct page *page;
sector_t last_block_in_bio;
- unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT;
+ fgf_t fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT;
pgoff_t start_idx = start_idx_of_cluster(cc);
int i, ret;
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index d635c58cf5a3..8aa29fe2e87b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -455,7 +455,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
de->file_type = fs_umode_to_ftype(inode->i_mode);
set_page_dirty(page);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
f2fs_mark_inode_dirty_sync(dir, false);
f2fs_put_page(page, 1);
}
@@ -609,7 +609,7 @@ void f2fs_update_parent_metadata(struct inode *dir, struct inode *inode,
f2fs_i_links_write(dir, true);
clear_inode_flag(inode, FI_NEW_INODE);
}
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (F2FS_I(dir)->i_current_depth != current_depth)
@@ -858,7 +858,7 @@ void f2fs_drop_nlink(struct inode *dir, struct inode *inode)
if (S_ISDIR(inode->i_mode))
f2fs_i_links_write(dir, false);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
f2fs_i_links_write(inode, false);
if (S_ISDIR(inode->i_mode)) {
@@ -919,7 +919,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
}
f2fs_put_page(page, 1);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index c7cb2177b252..613132339d72 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2736,7 +2736,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping,
static inline struct page *f2fs_pagecache_get_page(
struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp_mask)
+ fgf_t fgp_flags, gfp_t gfp_mask)
{
if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET))
return NULL;
@@ -3303,9 +3303,11 @@ static inline void clear_file(struct inode *inode, int type)
static inline bool f2fs_is_time_consistent(struct inode *inode)
{
+ struct timespec64 ctime = inode_get_ctime(inode);
+
if (!timespec64_equal(F2FS_I(inode)->i_disk_time, &inode->i_atime))
return false;
- if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &inode->i_ctime))
+ if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 1, &ctime))
return false;
if (!timespec64_equal(F2FS_I(inode)->i_disk_time + 2, &inode->i_mtime))
return false;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 093039dee992..35886a52edfb 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -794,7 +794,7 @@ int f2fs_truncate(struct inode *inode)
if (err)
return err;
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, false);
return 0;
}
@@ -882,7 +882,7 @@ int f2fs_getattr(struct mnt_idmap *idmap, const struct path *path,
STATX_ATTR_NODUMP |
STATX_ATTR_VERITY);
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
/* we need to show initial sectors used for inline_data/dentries */
if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
@@ -905,7 +905,7 @@ static void __setattr_copy(struct mnt_idmap *idmap,
if (ia_valid & ATTR_MTIME)
inode->i_mtime = attr->ia_mtime;
if (ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
@@ -1008,7 +1008,7 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
return err;
spin_lock(&F2FS_I(inode)->i_size_lock);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
F2FS_I(inode)->last_disk_size = i_size_read(inode);
spin_unlock(&F2FS_I(inode)->i_size_lock);
}
@@ -1835,7 +1835,7 @@ static long f2fs_fallocate(struct file *file, int mode,
}
if (!ret) {
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, false);
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
}
@@ -1937,7 +1937,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
else
clear_inode_flag(inode, FI_PROJ_INHERIT);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
f2fs_set_inode_flags(inode);
f2fs_mark_inode_dirty_sync(inode, true);
return 0;
@@ -2874,10 +2874,10 @@ out_src:
if (ret)
goto out_unlock;
- src->i_mtime = src->i_ctime = current_time(src);
+ src->i_mtime = inode_set_ctime_current(src);
f2fs_mark_inode_dirty_sync(src, false);
if (src != dst) {
- dst->i_mtime = dst->i_ctime = current_time(dst);
+ dst->i_mtime = inode_set_ctime_current(dst);
f2fs_mark_inode_dirty_sync(dst, false);
}
f2fs_update_time(sbi, REQ_TIME);
@@ -3073,7 +3073,7 @@ static int f2fs_ioc_setproject(struct inode *inode, __u32 projid)
goto out_unlock;
fi->i_projid = kprojid;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, true);
out_unlock:
f2fs_unlock_op(sbi);
@@ -3511,7 +3511,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
}
set_inode_flag(inode, FI_COMPRESS_RELEASED);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, true);
f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -3710,7 +3710,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
if (ret >= 0) {
clear_inode_flag(inode, FI_COMPRESS_RELEASED);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
unlock_inode:
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 01effd3fcb6c..a1ca394bc327 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -2181,12 +2181,14 @@ out_drop_write:
if (err)
return err;
- err = freeze_super(sbi->sb);
+ err = freeze_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
if (err)
return err;
if (f2fs_readonly(sbi->sb)) {
- thaw_super(sbi->sb);
+ err = thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
+ if (err)
+ return err;
return -EROFS;
}
@@ -2240,6 +2242,6 @@ recover_out:
out_err:
f2fs_up_write(&sbi->cp_global_sem);
f2fs_up_write(&sbi->gc_lock);
- thaw_super(sbi->sb);
+ thaw_super(sbi->sb, FREEZE_HOLDER_USERSPACE);
return err;
}
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 4638fee16a91..88fc9208ffa7 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -698,7 +698,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
set_page_dirty(page);
f2fs_put_page(page, 1);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
f2fs_mark_inode_dirty_sync(dir, false);
if (inode)
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 09e986b050c6..c1c2ba9f28e5 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -403,7 +403,7 @@ static void init_idisk_time(struct inode *inode)
struct f2fs_inode_info *fi = F2FS_I(inode);
fi->i_disk_time[0] = inode->i_atime;
- fi->i_disk_time[1] = inode->i_ctime;
+ fi->i_disk_time[1] = inode_get_ctime(inode);
fi->i_disk_time[2] = inode->i_mtime;
}
@@ -434,10 +434,10 @@ static int do_read_inode(struct inode *inode)
inode->i_blocks = SECTOR_FROM_BLOCK(le64_to_cpu(ri->i_blocks) - 1);
inode->i_atime.tv_sec = le64_to_cpu(ri->i_atime);
- inode->i_ctime.tv_sec = le64_to_cpu(ri->i_ctime);
+ inode_set_ctime(inode, le64_to_cpu(ri->i_ctime),
+ le32_to_cpu(ri->i_ctime_nsec));
inode->i_mtime.tv_sec = le64_to_cpu(ri->i_mtime);
inode->i_atime.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
- inode->i_ctime.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
inode->i_generation = le32_to_cpu(ri->i_generation);
if (S_ISDIR(inode->i_mode))
@@ -714,10 +714,10 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
set_raw_inline(inode, ri);
ri->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
- ri->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+ ri->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
ri->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
ri->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
- ri->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ri->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ri->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
if (S_ISDIR(inode->i_mode))
ri->i_current_depth =
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index bee0568888da..193b22a2d6bf 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -243,7 +243,7 @@ static struct inode *f2fs_new_inode(struct mnt_idmap *idmap,
inode->i_ino = ino;
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = get_random_u32();
@@ -420,7 +420,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
f2fs_balance_fs(sbi, true);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
ihold(inode);
set_inode_flag(inode, FI_INC_LINK);
@@ -1052,7 +1052,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
f2fs_set_link(new_dir, new_entry, new_page, old_inode);
new_page = NULL;
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
f2fs_down_write(&F2FS_I(new_inode)->i_sem);
if (old_dir_entry)
f2fs_i_links_write(new_inode, false);
@@ -1086,7 +1086,7 @@ static int f2fs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
f2fs_i_pino_write(old_inode, new_dir->i_ino);
f2fs_up_write(&F2FS_I(old_inode)->i_sem);
- old_inode->i_ctime = current_time(old_inode);
+ inode_set_ctime_current(old_inode);
f2fs_mark_inode_dirty_sync(old_inode, false);
f2fs_delete_entry(old_entry, old_page, old_dir, NULL);
@@ -1251,7 +1251,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_pino_write(old_inode, new_dir->i_ino);
f2fs_up_write(&F2FS_I(old_inode)->i_sem);
- old_dir->i_ctime = current_time(old_dir);
+ inode_set_ctime_current(old_dir);
if (old_nlink) {
f2fs_down_write(&F2FS_I(old_dir)->i_sem);
f2fs_i_links_write(old_dir, old_nlink > 0);
@@ -1270,7 +1270,7 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
f2fs_i_pino_write(new_inode, old_dir->i_ino);
f2fs_up_write(&F2FS_I(new_inode)->i_sem);
- new_dir->i_ctime = current_time(new_dir);
+ inode_set_ctime_current(new_dir);
if (new_nlink) {
f2fs_down_write(&F2FS_I(new_dir)->i_sem);
f2fs_i_links_write(new_dir, new_nlink > 0);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4e7d4ceeb084..b8637e88d94f 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -321,10 +321,10 @@ static int recover_inode(struct inode *inode, struct page *page)
f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
- inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
+ inode_set_ctime(inode, le64_to_cpu(raw->i_ctime),
+ le32_to_cpu(raw->i_ctime_nsec));
inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
- inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
F2FS_I(inode)->i_advise = raw->i_advise;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ca31163da00a..aa1f9a3a8037 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1561,7 +1561,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
int i;
for (i = 0; i < sbi->s_ndevs; i++) {
- blkdev_put(FDEV(i).bdev, sbi->sb->s_type);
+ blkdev_put(FDEV(i).bdev, sbi->sb);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
#endif
@@ -2703,7 +2703,7 @@ retry:
if (len == towrite)
return err;
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
f2fs_mark_inode_dirty_sync(inode, false);
return len - towrite;
}
@@ -4198,7 +4198,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
/* Single zoned block device mount */
FDEV(0).bdev =
blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev, mode,
- sbi->sb->s_type, NULL);
+ sbi->sb, NULL);
} else {
/* Multi-device mount */
memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
@@ -4217,8 +4217,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
sbi->log_blocks_per_seg) - 1;
}
FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path, mode,
- sbi->sb->s_type,
- NULL);
+ sbi->sb, NULL);
}
if (IS_ERR(FDEV(i).bdev))
return PTR_ERR(FDEV(i).bdev);
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 476b186b90a6..4ae93e1df421 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -764,7 +764,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
same:
if (is_inode_flag_set(inode, FI_ACL_MODE)) {
inode->i_mode = F2FS_I(inode)->i_acl_mode;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
clear_inode_flag(inode, FI_ACL_MODE);
}
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e3b690b48e3e..66cf4778cf3b 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -460,8 +460,7 @@ extern struct timespec64 fat_truncate_mtime(const struct msdos_sb_info *sbi,
const struct timespec64 *ts);
extern int fat_truncate_time(struct inode *inode, struct timespec64 *now,
int flags);
-extern int fat_update_time(struct inode *inode, struct timespec64 *now,
- int flags);
+extern int fat_update_time(struct inode *inode, int flags);
extern int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs);
int fat_cache_init(void);
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 456477946dd9..e887e9ab7472 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -401,7 +401,7 @@ int fat_getattr(struct mnt_idmap *idmap, const struct path *path,
struct inode *inode = d_inode(path->dentry);
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
stat->blksize = sbi->cluster_size;
if (sbi->options.nfs == FAT_NFS_NOSTALE_RO) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index d99b8549ec8f..cdd39b6020f3 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -562,7 +562,7 @@ int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
& ~((loff_t)sbi->cluster_size - 1)) >> 9;
fat_time_fat2unix(sbi, &inode->i_mtime, de->time, de->date, 0);
- inode->i_ctime = inode->i_mtime;
+ inode_set_ctime_to_ts(inode, inode->i_mtime);
if (sbi->options.isvfat) {
fat_time_fat2unix(sbi, &inode->i_atime, 0, de->adate, 0);
fat_time_fat2unix(sbi, &MSDOS_I(inode)->i_crtime, de->ctime,
@@ -1407,8 +1407,7 @@ static int fat_read_root(struct inode *inode)
MSDOS_I(inode)->mmu_private = inode->i_size;
fat_save_attrs(inode, ATTR_DIR);
- inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = 0;
- inode->i_mtime.tv_nsec = inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = 0;
+ inode->i_mtime = inode->i_atime = inode_set_ctime(inode, 0, 0);
set_nlink(inode, fat_subdirs(inode)+2);
return 0;
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 7e5d6ae305f2..f2304a1054aa 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -332,13 +332,14 @@ int fat_truncate_time(struct inode *inode, struct timespec64 *now, int flags)
* but ctime updates are ignored.
*/
if (flags & S_MTIME)
- inode->i_mtime = inode->i_ctime = fat_truncate_mtime(sbi, now);
+ inode->i_mtime = inode_set_ctime_to_ts(inode,
+ fat_truncate_mtime(sbi, now));
return 0;
}
EXPORT_SYMBOL_GPL(fat_truncate_time);
-int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
+int fat_update_time(struct inode *inode, int flags)
{
int dirty_flags = 0;
@@ -346,16 +347,13 @@ int fat_update_time(struct inode *inode, struct timespec64 *now, int flags)
return 0;
if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
- fat_truncate_time(inode, now, flags);
+ fat_truncate_time(inode, NULL, flags);
if (inode->i_sb->s_flags & SB_LAZYTIME)
dirty_flags |= I_DIRTY_TIME;
else
dirty_flags |= I_DIRTY_SYNC;
}
- if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
- dirty_flags |= I_DIRTY_SYNC;
-
__mark_inode_dirty(inode, dirty_flags);
return 0;
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index b622be119706..e871009f6c88 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -34,7 +34,7 @@
#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
-static int setfl(int fd, struct file * filp, unsigned long arg)
+static int setfl(int fd, struct file * filp, unsigned int arg)
{
struct inode * inode = file_inode(filp);
int error = 0;
@@ -112,11 +112,11 @@ void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
}
EXPORT_SYMBOL(__f_setown);
-int f_setown(struct file *filp, unsigned long arg, int force)
+int f_setown(struct file *filp, int who, int force)
{
enum pid_type type;
struct pid *pid = NULL;
- int who = arg, ret = 0;
+ int ret = 0;
type = PIDTYPE_TGID;
if (who < 0) {
@@ -317,28 +317,29 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
void __user *argp = (void __user *)arg;
+ int argi = (int)arg;
struct flock flock;
long err = -EINVAL;
switch (cmd) {
case F_DUPFD:
- err = f_dupfd(arg, filp, 0);
+ err = f_dupfd(argi, filp, 0);
break;
case F_DUPFD_CLOEXEC:
- err = f_dupfd(arg, filp, O_CLOEXEC);
+ err = f_dupfd(argi, filp, O_CLOEXEC);
break;
case F_GETFD:
err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
break;
case F_SETFD:
err = 0;
- set_close_on_exec(fd, arg & FD_CLOEXEC);
+ set_close_on_exec(fd, argi & FD_CLOEXEC);
break;
case F_GETFL:
err = filp->f_flags;
break;
case F_SETFL:
- err = setfl(fd, filp, arg);
+ err = setfl(fd, filp, argi);
break;
#if BITS_PER_LONG != 32
/* 32-bit arches must use fcntl64() */
@@ -375,7 +376,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
force_successful_syscall_return();
break;
case F_SETOWN:
- err = f_setown(filp, arg, 1);
+ err = f_setown(filp, argi, 1);
break;
case F_GETOWN_EX:
err = f_getown_ex(filp, arg);
@@ -391,28 +392,28 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
break;
case F_SETSIG:
/* arg == 0 restores default behaviour. */
- if (!valid_signal(arg)) {
+ if (!valid_signal(argi)) {
break;
}
err = 0;
- filp->f_owner.signum = arg;
+ filp->f_owner.signum = argi;
break;
case F_GETLEASE:
err = fcntl_getlease(filp);
break;
case F_SETLEASE:
- err = fcntl_setlease(fd, filp, arg);
+ err = fcntl_setlease(fd, filp, argi);
break;
case F_NOTIFY:
- err = fcntl_dirnotify(fd, filp, arg);
+ err = fcntl_dirnotify(fd, filp, argi);
break;
case F_SETPIPE_SZ:
case F_GETPIPE_SZ:
- err = pipe_fcntl(filp, cmd, arg);
+ err = pipe_fcntl(filp, cmd, argi);
break;
case F_ADD_SEALS:
case F_GET_SEALS:
- err = memfd_fcntl(filp, cmd, arg);
+ err = memfd_fcntl(filp, cmd, argi);
break;
case F_GET_RW_HINT:
case F_SET_RW_HINT:
diff --git a/fs/file.c b/fs/file.c
index 7893ea161d77..3e4a4dfa38fc 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -668,7 +668,7 @@ EXPORT_SYMBOL(close_fd); /* for ksys_close() */
/**
* last_fd - return last valid index into fd table
- * @cur_fds: files struct
+ * @fdt: File descriptor table.
*
* Context: Either rcu read lock or files_lock must be held.
*
@@ -693,29 +693,30 @@ static inline void __range_cloexec(struct files_struct *cur_fds,
spin_unlock(&cur_fds->file_lock);
}
-static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
+static inline void __range_close(struct files_struct *files, unsigned int fd,
unsigned int max_fd)
{
+ struct file *file;
unsigned n;
- rcu_read_lock();
- n = last_fd(files_fdtable(cur_fds));
- rcu_read_unlock();
+ spin_lock(&files->file_lock);
+ n = last_fd(files_fdtable(files));
max_fd = min(max_fd, n);
- while (fd <= max_fd) {
- struct file *file;
-
- spin_lock(&cur_fds->file_lock);
- file = pick_file(cur_fds, fd++);
- spin_unlock(&cur_fds->file_lock);
-
+ for (; fd <= max_fd; fd++) {
+ file = pick_file(files, fd);
if (file) {
- /* found a valid file to close */
- filp_close(file, cur_fds);
+ spin_unlock(&files->file_lock);
+ filp_close(file, files);
cond_resched();
+ spin_lock(&files->file_lock);
+ } else if (need_resched()) {
+ spin_unlock(&files->file_lock);
+ cond_resched();
+ spin_lock(&files->file_lock);
}
}
+ spin_unlock(&files->file_lock);
}
/**
@@ -723,6 +724,7 @@ static inline void __range_close(struct files_struct *cur_fds, unsigned int fd,
*
* @fd: starting file descriptor to close
* @max_fd: last file descriptor to close
+ * @flags: CLOSE_RANGE flags.
*
* This closes a range of file descriptors. All file descriptors
* from @fd up to and including @max_fd are closed.
@@ -1036,16 +1038,30 @@ unsigned long __fdget_raw(unsigned int fd)
return __fget_light(fd, 0);
}
+/*
+ * Try to avoid f_pos locking. We only need it if the
+ * file is marked for FMODE_ATOMIC_POS, and it can be
+ * accessed multiple ways.
+ *
+ * Always do it for directories, because pidfd_getfd()
+ * can make a file accessible even if it otherwise would
+ * not be, and for directories this is a correctness
+ * issue, not a "POSIX requirement".
+ */
+static inline bool file_needs_f_pos_lock(struct file *file)
+{
+ return (file->f_mode & FMODE_ATOMIC_POS) &&
+ (file_count(file) > 1 || file->f_op->iterate_shared);
+}
+
unsigned long __fdget_pos(unsigned int fd)
{
unsigned long v = __fdget(fd);
struct file *file = (struct file *)(v & ~3);
- if (file && (file->f_mode & FMODE_ATOMIC_POS)) {
- if (file_count(file) > 1) {
- v |= FDPUT_POS_UNLOCK;
- mutex_lock(&file->f_pos_lock);
- }
+ if (file && file_needs_f_pos_lock(file)) {
+ v |= FDPUT_POS_UNLOCK;
+ mutex_lock(&file->f_pos_lock);
}
return v;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index fc7d677ff5ad..ee21b3da9d08 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -461,11 +461,8 @@ void fput(struct file *file)
*/
void __fput_sync(struct file *file)
{
- if (atomic_long_dec_and_test(&file->f_count)) {
- struct task_struct *task = current;
- BUG_ON(!(task->flags & PF_KTHREAD));
+ if (atomic_long_dec_and_test(&file->f_count))
__fput(file);
- }
}
EXPORT_SYMBOL(fput);
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index ceb6a12649ba..ac5d43b164b5 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -110,10 +110,9 @@ static inline void dip2vip_cpy(struct vxfs_sb_info *sbi,
inode->i_size = vip->vii_size;
inode->i_atime.tv_sec = vip->vii_atime;
- inode->i_ctime.tv_sec = vip->vii_ctime;
+ inode_set_ctime(inode, vip->vii_ctime, 0);
inode->i_mtime.tv_sec = vip->vii_mtime;
inode->i_atime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
inode->i_blocks = vip->vii_blocks;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index aca4b4811394..969ce991b0b0 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1953,9 +1953,9 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
struct inode *inode = wb_inode(wb->b_io.prev);
struct super_block *sb = inode->i_sb;
- if (!trylock_super(sb)) {
+ if (!super_trylock_shared(sb)) {
/*
- * trylock_super() may fail consistently due to
+ * super_trylock_shared() may fail consistently due to
* s_umount being grabbed by someone else. Don't use
* requeue_io() to avoid busy retrying the inode/sb.
*/
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 851214d1d013..a0ad7a0c4680 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -162,6 +162,10 @@ EXPORT_SYMBOL(vfs_parse_fs_param);
/**
* vfs_parse_fs_string - Convenience function to just parse a string.
+ * @fc: Filesystem context.
+ * @key: Parameter name.
+ * @value: Default value.
+ * @v_size: Maximum number of bytes in the value.
*/
int vfs_parse_fs_string(struct fs_context *fc, const char *key,
const char *value, size_t v_size)
@@ -189,7 +193,7 @@ EXPORT_SYMBOL(vfs_parse_fs_string);
/**
* generic_parse_monolithic - Parse key[=val][,key[=val]]* mount data
- * @ctx: The superblock configuration to fill in.
+ * @fc: The superblock configuration to fill in.
* @data: The data to parse
*
* Parse a blob of data that's in key[=val][,key[=val]]* form. This can be
@@ -315,10 +319,31 @@ struct fs_context *fs_context_for_reconfigure(struct dentry *dentry,
}
EXPORT_SYMBOL(fs_context_for_reconfigure);
+/**
+ * fs_context_for_submount: allocate a new fs_context for a submount
+ * @type: file_system_type of the new context
+ * @reference: reference dentry from which to copy relevant info
+ *
+ * Allocate a new fs_context suitable for a submount. This also ensures that
+ * the fc->security object is inherited from @reference (if needed).
+ */
struct fs_context *fs_context_for_submount(struct file_system_type *type,
struct dentry *reference)
{
- return alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
+ struct fs_context *fc;
+ int ret;
+
+ fc = alloc_fs_context(type, reference, 0, 0, FS_CONTEXT_FOR_SUBMOUNT);
+ if (IS_ERR(fc))
+ return fc;
+
+ ret = security_fs_context_submount(fc, reference->d_sb);
+ if (ret) {
+ put_fs_context(fc);
+ return ERR_PTR(ret);
+ }
+
+ return fc;
}
EXPORT_SYMBOL(fs_context_for_submount);
@@ -333,7 +358,7 @@ void fc_drop_locked(struct fs_context *fc)
static void legacy_fs_context_free(struct fs_context *fc);
/**
- * vfs_dup_fc_config: Duplicate a filesystem context.
+ * vfs_dup_fs_context - Duplicate a filesystem context.
* @src_fc: The context to copy.
*/
struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
@@ -379,7 +404,9 @@ EXPORT_SYMBOL(vfs_dup_fs_context);
/**
* logfc - Log a message to a filesystem context
- * @fc: The filesystem context to log to.
+ * @log: The filesystem context to log to, or NULL to use printk.
+ * @prefix: A string to prefix the output with, or NULL.
+ * @level: 'w' for a warning, 'e' for an error. Anything else is a notice.
* @fmt: The format of the buffer.
*/
void logfc(struct fc_log *log, const char *prefix, char level, const char *fmt, ...)
@@ -692,6 +719,7 @@ void vfs_clean_context(struct fs_context *fc)
security_free_mnt_opts(&fc->security);
kfree(fc->source);
fc->source = NULL;
+ fc->exclusive = false;
fc->purpose = FS_CONTEXT_FOR_RECONFIGURE;
fc->phase = FS_CONTEXT_AWAITING_RECONF;
diff --git a/fs/fsopen.c b/fs/fsopen.c
index fc9d2d9fd234..ce03f6521c88 100644
--- a/fs/fsopen.c
+++ b/fs/fsopen.c
@@ -209,6 +209,72 @@ err:
return ret;
}
+static int vfs_cmd_create(struct fs_context *fc, bool exclusive)
+{
+ struct super_block *sb;
+ int ret;
+
+ if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
+ return -EBUSY;
+
+ if (!mount_capable(fc))
+ return -EPERM;
+
+ /* require the new mount api */
+ if (exclusive && fc->ops == &legacy_fs_context_ops)
+ return -EOPNOTSUPP;
+
+ fc->phase = FS_CONTEXT_CREATING;
+ fc->exclusive = exclusive;
+
+ ret = vfs_get_tree(fc);
+ if (ret) {
+ fc->phase = FS_CONTEXT_FAILED;
+ return ret;
+ }
+
+ sb = fc->root->d_sb;
+ ret = security_sb_kern_mount(sb);
+ if (unlikely(ret)) {
+ fc_drop_locked(fc);
+ fc->phase = FS_CONTEXT_FAILED;
+ return ret;
+ }
+
+ /* vfs_get_tree() callchains will have grabbed @s_umount */
+ up_write(&sb->s_umount);
+ fc->phase = FS_CONTEXT_AWAITING_MOUNT;
+ return 0;
+}
+
+static int vfs_cmd_reconfigure(struct fs_context *fc)
+{
+ struct super_block *sb;
+ int ret;
+
+ if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
+ return -EBUSY;
+
+ fc->phase = FS_CONTEXT_RECONFIGURING;
+
+ sb = fc->root->d_sb;
+ if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
+ fc->phase = FS_CONTEXT_FAILED;
+ return -EPERM;
+ }
+
+ down_write(&sb->s_umount);
+ ret = reconfigure_super(fc);
+ up_write(&sb->s_umount);
+ if (ret) {
+ fc->phase = FS_CONTEXT_FAILED;
+ return ret;
+ }
+
+ vfs_clean_context(fc);
+ return 0;
+}
+
/*
* Check the state and apply the configuration. Note that this function is
* allowed to 'steal' the value by setting param->xxx to NULL before returning.
@@ -216,7 +282,6 @@ err:
static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
struct fs_parameter *param)
{
- struct super_block *sb;
int ret;
ret = finish_clean_context(fc);
@@ -224,39 +289,11 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
return ret;
switch (cmd) {
case FSCONFIG_CMD_CREATE:
- if (fc->phase != FS_CONTEXT_CREATE_PARAMS)
- return -EBUSY;
- if (!mount_capable(fc))
- return -EPERM;
- fc->phase = FS_CONTEXT_CREATING;
- ret = vfs_get_tree(fc);
- if (ret)
- break;
- sb = fc->root->d_sb;
- ret = security_sb_kern_mount(sb);
- if (unlikely(ret)) {
- fc_drop_locked(fc);
- break;
- }
- up_write(&sb->s_umount);
- fc->phase = FS_CONTEXT_AWAITING_MOUNT;
- return 0;
+ return vfs_cmd_create(fc, false);
+ case FSCONFIG_CMD_CREATE_EXCL:
+ return vfs_cmd_create(fc, true);
case FSCONFIG_CMD_RECONFIGURE:
- if (fc->phase != FS_CONTEXT_RECONF_PARAMS)
- return -EBUSY;
- fc->phase = FS_CONTEXT_RECONFIGURING;
- sb = fc->root->d_sb;
- if (!ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
- ret = -EPERM;
- break;
- }
- down_write(&sb->s_umount);
- ret = reconfigure_super(fc);
- up_write(&sb->s_umount);
- if (ret)
- break;
- vfs_clean_context(fc);
- return 0;
+ return vfs_cmd_reconfigure(fc);
default:
if (fc->phase != FS_CONTEXT_CREATE_PARAMS &&
fc->phase != FS_CONTEXT_RECONF_PARAMS)
@@ -264,8 +301,6 @@ static int vfs_fsconfig_locked(struct fs_context *fc, int cmd,
return vfs_parse_fs_param(fc, param);
}
- fc->phase = FS_CONTEXT_FAILED;
- return ret;
}
/**
@@ -353,6 +388,7 @@ SYSCALL_DEFINE5(fsconfig,
return -EINVAL;
break;
case FSCONFIG_CMD_CREATE:
+ case FSCONFIG_CMD_CREATE_EXCL:
case FSCONFIG_CMD_RECONFIGURE:
if (_key || _value || aux)
return -EINVAL;
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 247ef4f76761..ab62e4624256 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -235,7 +235,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
inode->i_mode = mode;
inode->i_uid = fc->user_id;
inode->i_gid = fc->group_id;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
/* setting ->i_op to NULL is not allowed */
if (iop)
inode->i_op = iop;
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 35bc174f9ba2..881524b9a55a 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -258,7 +258,7 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
spin_unlock(&fi->lock);
}
kfree(forget);
- if (ret == -ENOMEM)
+ if (ret == -ENOMEM || ret == -EINTR)
goto out;
if (ret || fuse_invalid_attr(&outarg.attr) ||
fuse_stale_inode(inode, outarg.generation, &outarg.attr))
@@ -395,8 +395,6 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name
goto out_put_forget;
err = -EIO;
- if (!outarg->nodeid)
- goto out_put_forget;
if (fuse_invalid_attr(&outarg->attr))
goto out_put_forget;
@@ -935,7 +933,7 @@ void fuse_flush_time_update(struct inode *inode)
static void fuse_update_ctime_in_cache(struct inode *inode)
{
if (!IS_NOCMTIME(inode)) {
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty_sync(inode);
fuse_flush_time_update(inode);
}
@@ -1224,7 +1222,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
forget_all_cached_acls(inode);
err = fuse_do_getattr(inode, stat, file);
} else if (stat) {
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->mode = fi->orig_i_mode;
stat->ino = fi->orig_ino;
}
@@ -1717,8 +1715,8 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.mtimensec = inode->i_mtime.tv_nsec;
if (fm->fc->minor >= 23) {
inarg.valid |= FATTR_CTIME;
- inarg.ctime = inode->i_ctime.tv_sec;
- inarg.ctimensec = inode->i_ctime.tv_nsec;
+ inarg.ctime = inode_get_ctime(inode).tv_sec;
+ inarg.ctimensec = inode_get_ctime(inode).tv_nsec;
}
if (ff) {
inarg.valid |= FATTR_FH;
@@ -1859,7 +1857,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
if (attr->ia_valid & ATTR_MTIME)
inode->i_mtime = attr->ia_mtime;
if (attr->ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
/* FIXME: clear I_DIRTY_SYNC? */
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index d66070af145d..549358ffea8b 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -194,8 +194,7 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
inode->i_mtime.tv_nsec = attr->mtimensec;
}
if (!(cache_mask & STATX_CTIME)) {
- inode->i_ctime.tv_sec = attr->ctime;
- inode->i_ctime.tv_nsec = attr->ctimensec;
+ inode_set_ctime(inode, attr->ctime, attr->ctimensec);
}
if (attr->blksize != 0)
@@ -259,8 +258,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
attr->mtimensec = inode->i_mtime.tv_nsec;
}
if (cache_mask & STATX_CTIME) {
- attr->ctime = inode->i_ctime.tv_sec;
- attr->ctimensec = inode->i_ctime.tv_nsec;
+ attr->ctime = inode_get_ctime(inode).tv_sec;
+ attr->ctimensec = inode_get_ctime(inode).tv_nsec;
}
if ((attr_version != 0 && fi->attr_version > attr_version) ||
@@ -318,8 +317,7 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr,
inode->i_size = attr->size;
inode->i_mtime.tv_sec = attr->mtime;
inode->i_mtime.tv_nsec = attr->mtimensec;
- inode->i_ctime.tv_sec = attr->ctime;
- inode->i_ctime.tv_nsec = attr->ctimensec;
+ inode_set_ctime(inode, attr->ctime, attr->ctimensec);
if (S_ISREG(inode->i_mode)) {
fuse_init_common(inode);
fuse_init_file_inode(inode, attr->flags);
@@ -1134,7 +1132,10 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
process_init_limits(fc, arg);
if (arg->minor >= 6) {
- u64 flags = arg->flags | (u64) arg->flags2 << 32;
+ u64 flags = arg->flags;
+
+ if (flags & FUSE_INIT_EXT)
+ flags |= (u64) arg->flags2 << 32;
ra_pages = arg->max_readahead / PAGE_SIZE;
if (flags & FUSE_ASYNC_READ)
@@ -1254,7 +1255,8 @@ void fuse_send_init(struct fuse_mount *fm)
FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT | FUSE_INIT_EXT |
- FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP;
+ FUSE_SECURITY_CTX | FUSE_CREATE_SUPP_GROUP |
+ FUSE_HAS_EXPIRE_ONLY;
#ifdef CONFIG_FUSE_DAX
if (fm->fc->dax)
flags |= FUSE_MAP_ALIGNMENT;
@@ -1397,16 +1399,18 @@ EXPORT_SYMBOL_GPL(fuse_dev_free);
static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
const struct fuse_inode *fi)
{
+ struct timespec64 ctime = inode_get_ctime(&fi->inode);
+
*attr = (struct fuse_attr){
.ino = fi->inode.i_ino,
.size = fi->inode.i_size,
.blocks = fi->inode.i_blocks,
.atime = fi->inode.i_atime.tv_sec,
.mtime = fi->inode.i_mtime.tv_sec,
- .ctime = fi->inode.i_ctime.tv_sec,
+ .ctime = ctime.tv_sec,
.atimensec = fi->inode.i_atime.tv_nsec,
.mtimensec = fi->inode.i_mtime.tv_nsec,
- .ctimensec = fi->inode.i_ctime.tv_nsec,
+ .ctimensec = ctime.tv_nsec,
.mode = fi->inode.i_mode,
.nlink = fi->inode.i_nlink,
.uid = fi->inode.i_uid.val,
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
index 8e01bfdfc430..726640fa439e 100644
--- a/fs/fuse/ioctl.c
+++ b/fs/fuse/ioctl.c
@@ -9,14 +9,23 @@
#include <linux/compat.h>
#include <linux/fileattr.h>
-static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args)
+static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args,
+ struct fuse_ioctl_out *outarg)
{
- ssize_t ret = fuse_simple_request(fm, args);
+ ssize_t ret;
+
+ args->out_args[0].size = sizeof(*outarg);
+ args->out_args[0].value = outarg;
+
+ ret = fuse_simple_request(fm, args);
/* Translate ENOSYS, which shouldn't be returned from fs */
if (ret == -ENOSYS)
ret = -ENOTTY;
+ if (ret >= 0 && outarg->result == -ENOSYS)
+ outarg->result = -ENOTTY;
+
return ret;
}
@@ -264,13 +273,11 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
}
ap.args.out_numargs = 2;
- ap.args.out_args[0].size = sizeof(outarg);
- ap.args.out_args[0].value = &outarg;
ap.args.out_args[1].size = out_size;
ap.args.out_pages = true;
ap.args.out_argvar = true;
- transferred = fuse_send_ioctl(fm, &ap.args);
+ transferred = fuse_send_ioctl(fm, &ap.args, &outarg);
err = transferred;
if (transferred < 0)
goto out;
@@ -399,12 +406,10 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
args.in_args[1].size = inarg.in_size;
args.in_args[1].value = ptr;
args.out_numargs = 2;
- args.out_args[0].size = sizeof(outarg);
- args.out_args[0].value = &outarg;
args.out_args[1].size = inarg.out_size;
args.out_args[1].value = ptr;
- err = fuse_send_ioctl(fm, &args);
+ err = fuse_send_ioctl(fm, &args, &outarg);
if (!err) {
if (outarg.result < 0)
err = outarg.result;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index a392aa0f041d..443640e6fb9c 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -142,7 +142,7 @@ int gfs2_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
ret = __gfs2_set_acl(inode, acl, type);
if (!ret && mode != inode->i_mode) {
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode->i_mode = mode;
mark_inode_dirty(inode);
}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ae49256b7c8c..9c4b26aec580 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -747,7 +747,7 @@ static const struct address_space_operations gfs2_aops = {
.writepages = gfs2_writepages,
.read_folio = gfs2_read_folio,
.readahead = gfs2_readahead,
- .dirty_folio = filemap_dirty_folio,
+ .dirty_folio = iomap_dirty_folio,
.release_folio = iomap_release_folio,
.invalidate_folio = iomap_invalidate_folio,
.bmap = gfs2_bmap,
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 8d611fbcf0bd..f62366be7587 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -971,7 +971,7 @@ gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len)
if (status)
return ERR_PTR(status);
- folio = iomap_get_folio(iter, pos);
+ folio = iomap_get_folio(iter, pos, len);
if (IS_ERR(folio))
gfs2_trans_end(sdp);
return folio;
@@ -1386,7 +1386,7 @@ static int trunc_start(struct inode *inode, u64 newsize)
ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG;
i_size_write(inode, newsize);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
gfs2_dinode_out(ip, dibh->b_data);
if (journaled)
@@ -1583,8 +1583,7 @@ out_unlock:
/* Every transaction boundary, we rewrite the dinode
to keep its di_blocks current in case of failure. */
- ip->i_inode.i_mtime = ip->i_inode.i_ctime =
- current_time(&ip->i_inode);
+ ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -1950,7 +1949,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length)
gfs2_statfs_change(sdp, 0, +btotal, 0);
gfs2_quota_change(ip, -(s64)btotal, ip->i_inode.i_uid,
ip->i_inode.i_gid);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
up_write(&ip->i_rw_mutex);
@@ -1993,7 +1992,7 @@ static int trunc_end(struct gfs2_inode *ip)
gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
gfs2_ordered_del_inode(ip);
}
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
ip->i_diskflags &= ~GFS2_DIF_TRUNC_IN_PROG;
gfs2_trans_add_meta(ip->i_gl, dibh);
@@ -2094,7 +2093,7 @@ static int do_grow(struct inode *inode, u64 size)
goto do_end_trans;
truncate_setsize(inode, size);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 54a6d17b8c25..1a2afa88f8be 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -130,7 +130,7 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf,
memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size);
if (ip->i_inode.i_size < offset + size)
i_size_write(&ip->i_inode, offset + size);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
gfs2_dinode_out(ip, dibh->b_data);
brelse(dibh);
@@ -227,7 +227,7 @@ out:
if (ip->i_inode.i_size < offset + copied)
i_size_write(&ip->i_inode, offset + copied);
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ ip->i_inode.i_mtime = inode_set_ctime_current(&ip->i_inode);
gfs2_trans_add_meta(ip->i_gl, dibh);
gfs2_dinode_out(ip, dibh->b_data);
@@ -1814,7 +1814,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
gfs2_inum_out(nip, dent);
dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode));
dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip));
- tv = current_time(&ip->i_inode);
+ tv = inode_set_ctime_current(&ip->i_inode);
if (ip->i_diskflags & GFS2_DIF_EXHASH) {
leaf = (struct gfs2_leaf *)bh->b_data;
be16_add_cpu(&leaf->lf_entries, 1);
@@ -1825,7 +1825,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name,
da->bh = NULL;
brelse(bh);
ip->i_entries++;
- ip->i_inode.i_mtime = ip->i_inode.i_ctime = tv;
+ ip->i_inode.i_mtime = tv;
if (S_ISDIR(nip->i_inode.i_mode))
inc_nlink(&ip->i_inode);
mark_inode_dirty(inode);
@@ -1876,7 +1876,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
const struct qstr *name = &dentry->d_name;
struct gfs2_dirent *dent, *prev = NULL;
struct buffer_head *bh;
- struct timespec64 tv = current_time(&dip->i_inode);
+ struct timespec64 tv;
/* Returns _either_ the entry (if its first in block) or the
previous entry otherwise */
@@ -1896,6 +1896,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
}
dirent_del(dip, bh, prev, dent);
+ tv = inode_set_ctime_current(&dip->i_inode);
if (dip->i_diskflags & GFS2_DIF_EXHASH) {
struct gfs2_leaf *leaf = (struct gfs2_leaf *)bh->b_data;
u16 entries = be16_to_cpu(leaf->lf_entries);
@@ -1910,7 +1911,7 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
if (!dip->i_entries)
gfs2_consist_inode(dip);
dip->i_entries--;
- dip->i_inode.i_mtime = dip->i_inode.i_ctime = tv;
+ dip->i_inode.i_mtime = tv;
if (d_is_dir(dentry))
drop_nlink(&dip->i_inode);
mark_inode_dirty(&dip->i_inode);
@@ -1951,7 +1952,7 @@ int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename,
dent->de_type = cpu_to_be16(new_type);
brelse(bh);
- dip->i_inode.i_mtime = dip->i_inode.i_ctime = current_time(&dip->i_inode);
+ dip->i_inode.i_mtime = inode_set_ctime_current(&dip->i_inode);
mark_inode_dirty_sync(&dip->i_inode);
return 0;
}
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 1bf3c4453516..766186c80682 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -260,7 +260,7 @@ static int do_gfs2_set_flags(struct inode *inode, u32 reqflags, u32 mask)
error = gfs2_meta_inode_buffer(ip, &bh);
if (error)
goto out_trans_end;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
gfs2_trans_add_meta(ip->i_gl, bh);
ip->i_diskflags = new_flags;
gfs2_dinode_out(ip, bh->b_data);
@@ -1578,7 +1578,7 @@ const struct file_operations gfs2_file_fops = {
.fsync = gfs2_fsync,
.lock = gfs2_lock,
.flock = gfs2_flock,
- .splice_read = filemap_splice_read,
+ .splice_read = copy_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = simple_nosetlease,
.fallocate = gfs2_fallocate,
@@ -1609,7 +1609,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.open = gfs2_open,
.release = gfs2_release,
.fsync = gfs2_fsync,
- .splice_read = filemap_splice_read,
+ .splice_read = copy_splice_read,
.splice_write = gfs2_file_splice_write,
.setlease = generic_setlease,
.fallocate = gfs2_fallocate,
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 54319328b16b..aecdac3cfbe1 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -437,8 +437,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
inode->i_atime = atime;
inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
- inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
- inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+ inode_set_ctime(inode, be64_to_cpu(str->di_ctime),
+ be32_to_cpu(str->di_ctime_nsec));
ip->i_goal = be64_to_cpu(str->di_goal_meta);
ip->i_generation = be64_to_cpu(str->di_generation);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 17c994a0c0d0..a21ac41d6669 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -690,7 +690,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
set_nlink(inode, S_ISDIR(mode) ? 2 : 1);
inode->i_rdev = dev;
inode->i_size = size;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
munge_mode_uid_gid(dip, inode);
check_and_update_goal(dip);
ip->i_goal = dip->i_goal;
@@ -1029,7 +1029,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
gfs2_trans_add_meta(ip->i_gl, dibh);
inc_nlink(&ip->i_inode);
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
ihold(inode);
d_instantiate(dentry, inode);
mark_inode_dirty(inode);
@@ -1114,7 +1114,7 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip,
return error;
ip->i_entries = 0;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (S_ISDIR(inode->i_mode))
clear_nlink(inode);
else
@@ -1371,7 +1371,7 @@ static int update_moved_ino(struct gfs2_inode *ip, struct gfs2_inode *ndip,
if (dir_rename)
return gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR);
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
mark_inode_dirty_sync(&ip->i_inode);
return 0;
}
@@ -2071,7 +2071,7 @@ static int gfs2_getattr(struct mnt_idmap *idmap,
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (gfs2_holder_initialized(&gh))
gfs2_glock_dq_uninit(&gh);
@@ -2139,8 +2139,7 @@ loff_t gfs2_seek_hole(struct file *file, loff_t offset)
return vfs_setpos(file, ret, inode->i_sb->s_maxbytes);
}
-static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
- int flags)
+static int gfs2_update_time(struct inode *inode, int flags)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_glock *gl = ip->i_gl;
@@ -2155,7 +2154,8 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
if (error)
return error;
}
- return generic_update_time(inode, time, flags);
+ generic_update_time(inode, flags);
+ return 0;
}
static const struct inode_operations gfs2_file_iops = {
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 704192b73605..aa5fd06d47bc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -871,7 +871,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
size = loc + sizeof(struct gfs2_quota);
if (size > inode->i_size)
i_size_write(inode, size);
- inode->i_mtime = inode->i_atime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
set_bit(QDF_REFRESH, &qd->qd_flags);
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 9f4d5d6549ee..2f701335e8ee 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -412,7 +412,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
- str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
+ str->di_ctime = cpu_to_be64(inode_get_ctime(inode).tv_sec);
str->di_goal_meta = cpu_to_be64(ip->i_goal);
str->di_goal_data = cpu_to_be64(ip->i_goal);
@@ -429,7 +429,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
str->di_eattr = cpu_to_be64(ip->i_eattr);
str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
- str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+ str->di_ctime_nsec = cpu_to_be32(inode_get_ctime(inode).tv_nsec);
}
/**
@@ -689,7 +689,7 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
struct super_block *sb = sdp->sd_vfs;
int error;
- error = freeze_super(sb);
+ error = freeze_super(sb, FREEZE_HOLDER_USERSPACE);
if (error)
return error;
@@ -697,7 +697,9 @@ static int gfs2_freeze_locally(struct gfs2_sbd *sdp)
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
GFS2_LFC_FREEZE_GO_SYNC);
if (gfs2_withdrawn(sdp)) {
- thaw_super(sb);
+ error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ if (error)
+ return error;
return -EIO;
}
}
@@ -712,7 +714,7 @@ static int gfs2_do_thaw(struct gfs2_sbd *sdp)
error = gfs2_freeze_lock_shared(sdp);
if (error)
goto fail;
- error = thaw_super(sb);
+ error = thaw_super(sb, FREEZE_HOLDER_USERSPACE);
if (!error)
return 0;
@@ -761,7 +763,7 @@ out:
*
*/
-static int gfs2_freeze_super(struct super_block *sb)
+static int gfs2_freeze_super(struct super_block *sb, enum freeze_holder who)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
@@ -816,7 +818,7 @@ out:
*
*/
-static int gfs2_thaw_super(struct super_block *sb)
+static int gfs2_thaw_super(struct super_block *sb, enum freeze_holder who)
{
struct gfs2_sbd *sdp = sb->s_fs_info;
int error;
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 2dfbe2f188dd..c60bc7f628e1 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -168,10 +168,10 @@ static ssize_t freeze_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
switch (n) {
case 0:
- error = thaw_super(sdp->sd_vfs);
+ error = thaw_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
break;
case 1:
- error = freeze_super(sdp->sd_vfs);
+ error = freeze_super(sdp->sd_vfs, FREEZE_HOLDER_USERSPACE);
break;
default:
return -EINVAL;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ec1631257978..7e835be7032d 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -230,9 +230,11 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct super_block *sb = sdp->sd_vfs;
struct gfs2_bufdata *bd;
struct gfs2_meta_header *mh;
struct gfs2_trans *tr = current->journal_info;
+ bool withdraw = false;
lock_buffer(bh);
if (buffer_pinned(bh)) {
@@ -266,13 +268,15 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
(unsigned long long)bd->bd_bh->b_blocknr);
BUG();
}
- if (unlikely(test_bit(SDF_FROZEN, &sdp->sd_flags))) {
- fs_info(sdp, "GFS2:adding buf while frozen\n");
- gfs2_assert_withdraw(sdp, 0);
- }
if (unlikely(gfs2_withdrawn(sdp))) {
fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
(unsigned long long)bd->bd_bh->b_blocknr);
+ goto out_unlock;
+ }
+ if (unlikely(sb->s_writers.frozen == SB_FREEZE_COMPLETE)) {
+ fs_info(sdp, "GFS2:adding buf while frozen\n");
+ withdraw = true;
+ goto out_unlock;
}
gfs2_pin(sdp, bd->bd_bh);
mh->__pad0 = cpu_to_be64(0);
@@ -281,6 +285,8 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
tr->tr_num_buf_new++;
out_unlock:
gfs2_log_unlock(sdp);
+ if (withdraw)
+ gfs2_assert_withdraw(sdp, 0);
out:
unlock_buffer(bh);
}
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 93b36d026bb4..4fea70c0fe3d 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -311,7 +311,7 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
ea->ea_num_ptrs = 0;
}
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(sdp);
@@ -763,7 +763,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
if (error)
goto out_end_trans;
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
out_end_trans:
@@ -888,7 +888,7 @@ static int ea_set_simple_noalloc(struct gfs2_inode *ip, struct buffer_head *bh,
if (es->es_el)
ea_set_remove_stuffed(ip, es->es_el);
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(GFS2_SB(&ip->i_inode));
@@ -1106,7 +1106,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
ea->ea_type = GFS2_EATYPE_UNUSED;
}
- ip->i_inode.i_ctime = current_time(&ip->i_inode);
+ inode_set_ctime_current(&ip->i_inode);
__mark_inode_dirty(&ip->i_inode, I_DIRTY_DATASYNC);
gfs2_trans_end(GFS2_SB(&ip->i_inode));
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c
index d365bf0b8c77..632c226a3972 100644
--- a/fs/hfs/catalog.c
+++ b/fs/hfs/catalog.c
@@ -133,7 +133,7 @@ int hfs_cat_create(u32 cnid, struct inode *dir, const struct qstr *str, struct i
goto err1;
dir->i_size++;
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
hfs_find_exit(&fd);
return 0;
@@ -269,7 +269,7 @@ int hfs_cat_delete(u32 cnid, struct inode *dir, const struct qstr *str)
}
dir->i_size--;
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
res = 0;
out:
@@ -337,7 +337,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name,
if (err)
goto out;
dst_dir->i_size++;
- dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir);
+ dst_dir->i_mtime = inode_set_ctime_current(dst_dir);
mark_inode_dirty(dst_dir);
/* finally remove the old entry */
@@ -349,7 +349,7 @@ int hfs_cat_move(u32 cnid, struct inode *src_dir, const struct qstr *src_name,
if (err)
goto out;
src_dir->i_size--;
- src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir);
+ src_dir->i_mtime = inode_set_ctime_current(src_dir);
mark_inode_dirty(src_dir);
type = entry.type;
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c
index 3e1e3dcf0b48..b75c26045df4 100644
--- a/fs/hfs/dir.c
+++ b/fs/hfs/dir.c
@@ -263,7 +263,7 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry)
if (res)
return res;
clear_nlink(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
hfs_delete_inode(inode);
mark_inode_dirty(inode);
return 0;
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 441d7fc952e3..ee349b72cfb3 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -200,7 +200,7 @@ struct inode *hfs_new_inode(struct inode *dir, const struct qstr *name, umode_t
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
set_nlink(inode, 1);
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
HFS_I(inode)->flags = 0;
HFS_I(inode)->rsrc_inode = NULL;
HFS_I(inode)->fs_blocks = 0;
@@ -355,8 +355,8 @@ static int hfs_read_inode(struct inode *inode, void *data)
inode->i_mode |= S_IWUGO;
inode->i_mode &= ~hsb->s_file_umask;
inode->i_mode |= S_IFREG;
- inode->i_ctime = inode->i_atime = inode->i_mtime =
- hfs_m_to_utime(rec->file.MdDat);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode,
+ hfs_m_to_utime(rec->file.MdDat));
inode->i_op = &hfs_file_inode_operations;
inode->i_fop = &hfs_file_operations;
inode->i_mapping->a_ops = &hfs_aops;
@@ -366,8 +366,8 @@ static int hfs_read_inode(struct inode *inode, void *data)
inode->i_size = be16_to_cpu(rec->dir.Val) + 2;
HFS_I(inode)->fs_blocks = 0;
inode->i_mode = S_IFDIR | (S_IRWXUGO & ~hsb->s_dir_umask);
- inode->i_ctime = inode->i_atime = inode->i_mtime =
- hfs_m_to_utime(rec->dir.MdDat);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode,
+ hfs_m_to_utime(rec->dir.MdDat));
inode->i_op = &hfs_dir_inode_operations;
inode->i_fop = &hfs_dir_operations;
break;
@@ -654,8 +654,7 @@ int hfs_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
truncate_setsize(inode, attr->ia_size);
hfs_file_truncate(inode);
- inode->i_atime = inode->i_mtime = inode->i_ctime =
- current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
setattr_copy(&nop_mnt_idmap, inode, attr);
diff --git a/fs/hfs/sysdep.c b/fs/hfs/sysdep.c
index 2875961fdc10..dc27d418fbcd 100644
--- a/fs/hfs/sysdep.c
+++ b/fs/hfs/sysdep.c
@@ -28,7 +28,9 @@ static int hfs_revalidate_dentry(struct dentry *dentry, unsigned int flags)
/* fix up inode on a timezone change */
diff = sys_tz.tz_minuteswest * 60 - HFS_I(inode)->tz_secondswest;
if (diff) {
- inode->i_ctime.tv_sec += diff;
+ struct timespec64 ctime = inode_get_ctime(inode);
+
+ inode_set_ctime(inode, ctime.tv_sec + diff, ctime.tv_nsec);
inode->i_atime.tv_sec += diff;
inode->i_mtime.tv_sec += diff;
HFS_I(inode)->tz_secondswest += diff;
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 35472cba750e..e71ae2537eaa 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -312,7 +312,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
dir->i_size++;
if (S_ISDIR(inode->i_mode))
hfsplus_subfolders_inc(dir);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
hfs_find_exit(&fd);
@@ -417,7 +417,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, const struct qstr *str)
dir->i_size--;
if (type == HFSPLUS_FOLDER)
hfsplus_subfolders_dec(dir);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
hfsplus_mark_inode_dirty(dir, HFSPLUS_I_CAT_DIRTY);
if (type == HFSPLUS_FILE || type == HFSPLUS_FOLDER) {
@@ -494,7 +494,7 @@ int hfsplus_rename_cat(u32 cnid,
dst_dir->i_size++;
if (type == HFSPLUS_FOLDER)
hfsplus_subfolders_inc(dst_dir);
- dst_dir->i_mtime = dst_dir->i_ctime = current_time(dst_dir);
+ dst_dir->i_mtime = inode_set_ctime_current(dst_dir);
/* finally remove the old entry */
err = hfsplus_cat_build_key(sb, src_fd.search_key,
@@ -511,7 +511,7 @@ int hfsplus_rename_cat(u32 cnid,
src_dir->i_size--;
if (type == HFSPLUS_FOLDER)
hfsplus_subfolders_dec(src_dir);
- src_dir->i_mtime = src_dir->i_ctime = current_time(src_dir);
+ src_dir->i_mtime = inode_set_ctime_current(src_dir);
/* remove old thread entry */
hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 56fb5f1312e7..f5c4b3e31a1c 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -346,7 +346,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
inc_nlink(inode);
hfsplus_instantiate(dst_dentry, inode, cnid);
ihold(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
sbi->file_count++;
hfsplus_mark_mdb_dirty(dst_dir->i_sb);
@@ -405,7 +405,7 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
hfsplus_delete_inode(inode);
} else
sbi->file_count--;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
out:
mutex_unlock(&sbi->vh_mutex);
@@ -426,7 +426,7 @@ static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry)
if (res)
goto out;
clear_nlink(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
hfsplus_delete_inode(inode);
mark_inode_dirty(inode);
out:
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 7d1a675e037d..c65c8c4b03dd 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -267,7 +267,7 @@ static int hfsplus_setattr(struct mnt_idmap *idmap,
}
truncate_setsize(inode, attr->ia_size);
hfsplus_file_truncate(inode);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
}
setattr_copy(&nop_mnt_idmap, inode, attr);
@@ -298,7 +298,7 @@ int hfsplus_getattr(struct mnt_idmap *idmap, const struct path *path,
stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP;
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
return 0;
}
@@ -392,7 +392,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
inode->i_ino = sbi->next_cnid++;
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
set_nlink(inode, 1);
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
hip = HFSPLUS_I(inode);
INIT_LIST_HEAD(&hip->open_dir_list);
@@ -523,7 +523,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
inode->i_size = 2 + be32_to_cpu(folder->valence);
inode->i_atime = hfsp_mt2ut(folder->access_date);
inode->i_mtime = hfsp_mt2ut(folder->content_mod_date);
- inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date);
+ inode_set_ctime_to_ts(inode,
+ hfsp_mt2ut(folder->attribute_mod_date));
HFSPLUS_I(inode)->create_date = folder->create_date;
HFSPLUS_I(inode)->fs_blocks = 0;
if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
@@ -564,7 +565,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd)
}
inode->i_atime = hfsp_mt2ut(file->access_date);
inode->i_mtime = hfsp_mt2ut(file->content_mod_date);
- inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date);
+ inode_set_ctime_to_ts(inode,
+ hfsp_mt2ut(file->attribute_mod_date));
HFSPLUS_I(inode)->create_date = file->create_date;
} else {
pr_err("bad catalog entry used to create inode\n");
@@ -609,7 +611,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
hfsplus_cat_set_perms(inode, &folder->permissions);
folder->access_date = hfsp_ut2mt(inode->i_atime);
folder->content_mod_date = hfsp_ut2mt(inode->i_mtime);
- folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
+ folder->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode));
folder->valence = cpu_to_be32(inode->i_size - 2);
if (folder->flags & cpu_to_be16(HFSPLUS_HAS_FOLDER_COUNT)) {
folder->subfolders =
@@ -644,7 +646,7 @@ int hfsplus_cat_write_inode(struct inode *inode)
file->flags &= cpu_to_be16(~HFSPLUS_FILE_LOCKED);
file->access_date = hfsp_ut2mt(inode->i_atime);
file->content_mod_date = hfsp_ut2mt(inode->i_mtime);
- file->attribute_mod_date = hfsp_ut2mt(inode->i_ctime);
+ file->attribute_mod_date = hfsp_ut2mt(inode_get_ctime(inode));
hfs_bnode_write(fd.bnode, &entry, fd.entryoffset,
sizeof(struct hfsplus_cat_file));
}
@@ -700,7 +702,7 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap,
else
hip->userflags &= ~HFSPLUS_FLG_NODUMP;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return 0;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 46387090eb76..dc5a5cea5fae 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -517,8 +517,7 @@ static int hostfs_inode_update(struct inode *ino, const struct hostfs_stat *st)
(struct timespec64){ st->atime.tv_sec, st->atime.tv_nsec };
ino->i_mtime =
(struct timespec64){ st->mtime.tv_sec, st->mtime.tv_nsec };
- ino->i_ctime =
- (struct timespec64){ st->ctime.tv_sec, st->ctime.tv_nsec };
+ inode_set_ctime(ino, st->ctime.tv_sec, st->ctime.tv_nsec);
ino->i_size = st->size;
ino->i_blocks = st->blocks;
return 0;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index f32f15669996..f36566d61215 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -277,10 +277,10 @@ struct dentry *hpfs_lookup(struct inode *dir, struct dentry *dentry, unsigned in
* inode.
*/
- if (!result->i_ctime.tv_sec) {
- if (!(result->i_ctime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date))))
- result->i_ctime.tv_sec = 1;
- result->i_ctime.tv_nsec = 0;
+ if (!inode_get_ctime(result).tv_sec) {
+ time64_t csec = local_to_gmt(dir->i_sb, le32_to_cpu(de->creation_date));
+
+ inode_set_ctime(result, csec ? csec : 1, 0);
result->i_mtime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->write_date));
result->i_mtime.tv_nsec = 0;
result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(de->read_date));
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c
index e50e92a42432..479166378bae 100644
--- a/fs/hpfs/inode.c
+++ b/fs/hpfs/inode.c
@@ -36,7 +36,7 @@ void hpfs_init_inode(struct inode *i)
hpfs_inode->i_rddir_off = NULL;
hpfs_inode->i_dirty = 0;
- i->i_ctime.tv_sec = i->i_ctime.tv_nsec = 0;
+ inode_set_ctime(i, 0, 0);
i->i_mtime.tv_sec = i->i_mtime.tv_nsec = 0;
i->i_atime.tv_sec = i->i_atime.tv_nsec = 0;
}
@@ -232,7 +232,7 @@ void hpfs_write_inode_nolock(struct inode *i)
if (de) {
de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
- de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
+ de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec));
de->read_only = !(i->i_mode & 0222);
de->ea_size = cpu_to_le32(hpfs_inode->i_ea_size);
hpfs_mark_4buffers_dirty(&qbh);
@@ -242,7 +242,7 @@ void hpfs_write_inode_nolock(struct inode *i)
if ((de = map_dirent(i, hpfs_inode->i_dno, "\001\001", 2, NULL, &qbh))) {
de->write_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_mtime.tv_sec));
de->read_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_atime.tv_sec));
- de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, i->i_ctime.tv_sec));
+ de->creation_date = cpu_to_le32(gmt_to_local(i->i_sb, inode_get_ctime(i).tv_sec));
de->read_only = !(i->i_mode & 0222);
de->ea_size = cpu_to_le32(/*hpfs_inode->i_ea_size*/0);
de->file_size = cpu_to_le32(0);
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index 69fb40b2c99a..f4eb8d6f5989 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -13,10 +13,9 @@ static void hpfs_update_directory_times(struct inode *dir)
{
time64_t t = local_to_gmt(dir->i_sb, local_get_seconds(dir->i_sb));
if (t == dir->i_mtime.tv_sec &&
- t == dir->i_ctime.tv_sec)
+ t == inode_get_ctime(dir).tv_sec)
return;
- dir->i_mtime.tv_sec = dir->i_ctime.tv_sec = t;
- dir->i_mtime.tv_nsec = dir->i_ctime.tv_nsec = 0;
+ dir->i_mtime = inode_set_ctime(dir, t, 0);
hpfs_write_inode_nolock(dir);
}
@@ -59,10 +58,8 @@ static int hpfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
result->i_ino = fno;
hpfs_i(result)->i_parent_dir = dir->i_ino;
hpfs_i(result)->i_dno = dno;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
- result->i_ctime.tv_nsec = 0;
- result->i_mtime.tv_nsec = 0;
- result->i_atime.tv_nsec = 0;
+ result->i_mtime = result->i_atime =
+ inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
hpfs_i(result)->i_ea_size = 0;
result->i_mode |= S_IFDIR;
result->i_op = &hpfs_dir_iops;
@@ -167,10 +164,8 @@ static int hpfs_create(struct mnt_idmap *idmap, struct inode *dir,
result->i_fop = &hpfs_file_ops;
set_nlink(result, 1);
hpfs_i(result)->i_parent_dir = dir->i_ino;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
- result->i_ctime.tv_nsec = 0;
- result->i_mtime.tv_nsec = 0;
- result->i_atime.tv_nsec = 0;
+ result->i_mtime = result->i_atime =
+ inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
hpfs_i(result)->i_ea_size = 0;
if (dee.read_only)
result->i_mode &= ~0222;
@@ -250,10 +245,8 @@ static int hpfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
hpfs_init_inode(result);
result->i_ino = fno;
hpfs_i(result)->i_parent_dir = dir->i_ino;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
- result->i_ctime.tv_nsec = 0;
- result->i_mtime.tv_nsec = 0;
- result->i_atime.tv_nsec = 0;
+ result->i_mtime = result->i_atime =
+ inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
hpfs_i(result)->i_ea_size = 0;
result->i_uid = current_fsuid();
result->i_gid = current_fsgid();
@@ -326,10 +319,8 @@ static int hpfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
result->i_ino = fno;
hpfs_init_inode(result);
hpfs_i(result)->i_parent_dir = dir->i_ino;
- result->i_ctime.tv_sec = result->i_mtime.tv_sec = result->i_atime.tv_sec = local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date));
- result->i_ctime.tv_nsec = 0;
- result->i_mtime.tv_nsec = 0;
- result->i_atime.tv_nsec = 0;
+ result->i_mtime = result->i_atime =
+ inode_set_ctime(result, local_to_gmt(dir->i_sb, le32_to_cpu(dee.creation_date)), 0);
hpfs_i(result)->i_ea_size = 0;
result->i_mode = S_IFLNK | 0777;
result->i_uid = current_fsuid();
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 1cb89595b875..758a51564124 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -729,8 +729,9 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
root->i_atime.tv_nsec = 0;
root->i_mtime.tv_sec = local_to_gmt(s, le32_to_cpu(de->write_date));
root->i_mtime.tv_nsec = 0;
- root->i_ctime.tv_sec = local_to_gmt(s, le32_to_cpu(de->creation_date));
- root->i_ctime.tv_nsec = 0;
+ inode_set_ctime(root,
+ local_to_gmt(s, le32_to_cpu(de->creation_date)),
+ 0);
hpfs_i(root)->i_ea_size = le32_to_cpu(de->ea_size);
hpfs_i(root)->i_parent_dir = root->i_ino;
if (root->i_size == -1)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7b17ccfa039d..93d3bcfd4fc8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -887,7 +887,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size)
i_size_write(inode, offset + len);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
out:
inode_unlock(inode);
return error;
@@ -935,7 +935,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
inode->i_mode = S_IFDIR | ctx->mode;
inode->i_uid = ctx->uid;
inode->i_gid = ctx->gid;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_op = &hugetlbfs_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
/* directory inodes start off with i_nlink == 2 (for "." entry) */
@@ -979,7 +979,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
&hugetlbfs_i_mmap_rwsem_key);
inode->i_mapping->a_ops = &hugetlbfs_aops;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_mapping->private_data = resv_map;
info->seals = F_SEAL_SEAL;
switch (mode & S_IFMT) {
@@ -1022,7 +1022,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
inode = hugetlbfs_get_inode(dir->i_sb, dir, mode, dev);
if (!inode)
return -ENOSPC;
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
d_instantiate(dentry, inode);
dget(dentry);/* Extra count - pin the dentry in core */
return 0;
@@ -1054,7 +1054,7 @@ static int hugetlbfs_tmpfile(struct mnt_idmap *idmap,
inode = hugetlbfs_get_inode(dir->i_sb, dir, mode | S_IFREG, 0);
if (!inode)
return -ENOSPC;
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
d_tmpfile(file, inode);
return finish_open_simple(file, 0);
}
@@ -1076,7 +1076,7 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap,
} else
iput(inode);
}
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
return error;
}
diff --git a/fs/inode.c b/fs/inode.c
index 8fefb69e1f84..35fd688168c5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -16,7 +16,6 @@
#include <linux/fsnotify.h>
#include <linux/mount.h>
#include <linux/posix_acl.h>
-#include <linux/prefetch.h>
#include <linux/buffer_head.h> /* for inode_has_buffers */
#include <linux/ratelimit.h>
#include <linux/list_lru.h>
@@ -752,16 +751,11 @@ EXPORT_SYMBOL_GPL(evict_inodes);
/**
* invalidate_inodes - attempt to free all inodes on a superblock
* @sb: superblock to operate on
- * @kill_dirty: flag to guide handling of dirty inodes
*
- * Attempts to free all inodes for a given superblock. If there were any
- * busy inodes return a non-zero value, else zero.
- * If @kill_dirty is set, discard dirty inodes too, otherwise treat
- * them as busy.
+ * Attempts to free all inodes (including dirty inodes) for a given superblock.
*/
-int invalidate_inodes(struct super_block *sb, bool kill_dirty)
+void invalidate_inodes(struct super_block *sb)
{
- int busy = 0;
struct inode *inode, *next;
LIST_HEAD(dispose);
@@ -773,14 +767,8 @@ again:
spin_unlock(&inode->i_lock);
continue;
}
- if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
- spin_unlock(&inode->i_lock);
- busy = 1;
- continue;
- }
if (atomic_read(&inode->i_count)) {
spin_unlock(&inode->i_lock);
- busy = 1;
continue;
}
@@ -798,8 +786,6 @@ again:
spin_unlock(&sb->s_inode_list_lock);
dispose_list(&dispose);
-
- return busy;
}
/*
@@ -1041,8 +1027,6 @@ struct inode *new_inode(struct super_block *sb)
{
struct inode *inode;
- spin_lock_prefetch(&sb->s_inode_list_lock);
-
inode = new_inode_pseudo(sb);
if (inode)
inode_sb_list_add(inode);
@@ -1853,6 +1837,7 @@ EXPORT_SYMBOL(bmap);
static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
struct timespec64 now)
{
+ struct timespec64 ctime;
if (!(mnt->mnt_flags & MNT_RELATIME))
return 1;
@@ -1864,7 +1849,8 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
/*
* Is ctime younger than or equal to atime? If yes, update atime:
*/
- if (timespec64_compare(&inode->i_ctime, &inode->i_atime) >= 0)
+ ctime = inode_get_ctime(inode);
+ if (timespec64_compare(&ctime, &inode->i_atime) >= 0)
return 1;
/*
@@ -1879,29 +1865,76 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
return 0;
}
-int generic_update_time(struct inode *inode, struct timespec64 *time, int flags)
+/**
+ * inode_update_timestamps - update the timestamps on the inode
+ * @inode: inode to be updated
+ * @flags: S_* flags that needed to be updated
+ *
+ * The update_time function is called when an inode's timestamps need to be
+ * updated for a read or write operation. This function handles updating the
+ * actual timestamps. It's up to the caller to ensure that the inode is marked
+ * dirty appropriately.
+ *
+ * In the case where any of S_MTIME, S_CTIME, or S_VERSION need to be updated,
+ * attempt to update all three of them. S_ATIME updates can be handled
+ * independently of the rest.
+ *
+ * Returns a set of S_* flags indicating which values changed.
+ */
+int inode_update_timestamps(struct inode *inode, int flags)
{
- int dirty_flags = 0;
+ int updated = 0;
+ struct timespec64 now;
- if (flags & (S_ATIME | S_CTIME | S_MTIME)) {
- if (flags & S_ATIME)
- inode->i_atime = *time;
- if (flags & S_CTIME)
- inode->i_ctime = *time;
- if (flags & S_MTIME)
- inode->i_mtime = *time;
-
- if (inode->i_sb->s_flags & SB_LAZYTIME)
- dirty_flags |= I_DIRTY_TIME;
- else
- dirty_flags |= I_DIRTY_SYNC;
+ if (flags & (S_MTIME|S_CTIME|S_VERSION)) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+
+ now = inode_set_ctime_current(inode);
+ if (!timespec64_equal(&now, &ctime))
+ updated |= S_CTIME;
+ if (!timespec64_equal(&now, &inode->i_mtime)) {
+ inode->i_mtime = now;
+ updated |= S_MTIME;
+ }
+ if (IS_I_VERSION(inode) && inode_maybe_inc_iversion(inode, updated))
+ updated |= S_VERSION;
+ } else {
+ now = current_time(inode);
}
- if ((flags & S_VERSION) && inode_maybe_inc_iversion(inode, false))
- dirty_flags |= I_DIRTY_SYNC;
+ if (flags & S_ATIME) {
+ if (!timespec64_equal(&now, &inode->i_atime)) {
+ inode->i_atime = now;
+ updated |= S_ATIME;
+ }
+ }
+ return updated;
+}
+EXPORT_SYMBOL(inode_update_timestamps);
+/**
+ * generic_update_time - update the timestamps on the inode
+ * @inode: inode to be updated
+ * @flags: S_* flags that needed to be updated
+ *
+ * The update_time function is called when an inode's timestamps need to be
+ * updated for a read or write operation. In the case where any of S_MTIME, S_CTIME,
+ * or S_VERSION need to be updated we attempt to update all three of them. S_ATIME
+ * updates can be handled done independently of the rest.
+ *
+ * Returns a S_* mask indicating which fields were updated.
+ */
+int generic_update_time(struct inode *inode, int flags)
+{
+ int updated = inode_update_timestamps(inode, flags);
+ int dirty_flags = 0;
+
+ if (updated & (S_ATIME|S_MTIME|S_CTIME))
+ dirty_flags = inode->i_sb->s_flags & SB_LAZYTIME ? I_DIRTY_TIME : I_DIRTY_SYNC;
+ if (updated & S_VERSION)
+ dirty_flags |= I_DIRTY_SYNC;
__mark_inode_dirty(inode, dirty_flags);
- return 0;
+ return updated;
}
EXPORT_SYMBOL(generic_update_time);
@@ -1909,11 +1942,12 @@ EXPORT_SYMBOL(generic_update_time);
* This does the actual work of updating an inodes time or version. Must have
* had called mnt_want_write() before calling this.
*/
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags)
+int inode_update_time(struct inode *inode, int flags)
{
if (inode->i_op->update_time)
- return inode->i_op->update_time(inode, time, flags);
- return generic_update_time(inode, time, flags);
+ return inode->i_op->update_time(inode, flags);
+ generic_update_time(inode, flags);
+ return 0;
}
EXPORT_SYMBOL(inode_update_time);
@@ -1965,7 +1999,6 @@ void touch_atime(const struct path *path)
{
struct vfsmount *mnt = path->mnt;
struct inode *inode = d_inode(path->dentry);
- struct timespec64 now;
if (!atime_needs_update(path, inode))
return;
@@ -1984,8 +2017,7 @@ void touch_atime(const struct path *path)
* We may also fail on filesystems that have the ability to make parts
* of the fs read only, e.g. subvolumes in Btrfs.
*/
- now = current_time(inode);
- inode_update_time(inode, &now, S_ATIME);
+ inode_update_time(inode, S_ATIME);
__mnt_drop_write(mnt);
skip_update:
sb_end_write(inode->i_sb);
@@ -2070,18 +2102,63 @@ int file_remove_privs(struct file *file)
}
EXPORT_SYMBOL(file_remove_privs);
-static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
+/**
+ * current_mgtime - Return FS time (possibly fine-grained)
+ * @inode: inode.
+ *
+ * Return the current time truncated to the time granularity supported by
+ * the fs, as suitable for a ctime/mtime change. If the ctime is flagged
+ * as having been QUERIED, get a fine-grained timestamp.
+ */
+struct timespec64 current_mgtime(struct inode *inode)
+{
+ struct timespec64 now, ctime;
+ atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec;
+ long nsec = atomic_long_read(pnsec);
+
+ if (nsec & I_CTIME_QUERIED) {
+ ktime_get_real_ts64(&now);
+ return timestamp_truncate(now, inode);
+ }
+
+ ktime_get_coarse_real_ts64(&now);
+ now = timestamp_truncate(now, inode);
+
+ /*
+ * If we've recently fetched a fine-grained timestamp
+ * then the coarse-grained one may still be earlier than the
+ * existing ctime. Just keep the existing value if so.
+ */
+ ctime = inode_get_ctime(inode);
+ if (timespec64_compare(&ctime, &now) > 0)
+ now = ctime;
+
+ return now;
+}
+EXPORT_SYMBOL(current_mgtime);
+
+static struct timespec64 current_ctime(struct inode *inode)
+{
+ if (is_mgtime(inode))
+ return current_mgtime(inode);
+ return current_time(inode);
+}
+
+static int inode_needs_update_time(struct inode *inode)
{
int sync_it = 0;
+ struct timespec64 now = current_ctime(inode);
+ struct timespec64 ctime;
/* First try to exhaust all avenues to not sync */
if (IS_NOCMTIME(inode))
return 0;
- if (!timespec64_equal(&inode->i_mtime, now))
+ if (!timespec64_equal(&inode->i_mtime, &now))
sync_it = S_MTIME;
- if (!timespec64_equal(&inode->i_ctime, now))
+ ctime = inode_get_ctime(inode);
+ if (!timespec64_equal(&ctime, &now))
sync_it |= S_CTIME;
if (IS_I_VERSION(inode) && inode_iversion_need_inc(inode))
@@ -2090,15 +2167,14 @@ static int inode_needs_update_time(struct inode *inode, struct timespec64 *now)
return sync_it;
}
-static int __file_update_time(struct file *file, struct timespec64 *now,
- int sync_mode)
+static int __file_update_time(struct file *file, int sync_mode)
{
int ret = 0;
struct inode *inode = file_inode(file);
/* try to update time settings */
if (!__mnt_want_write_file(file)) {
- ret = inode_update_time(inode, now, sync_mode);
+ ret = inode_update_time(inode, sync_mode);
__mnt_drop_write_file(file);
}
@@ -2123,13 +2199,12 @@ int file_update_time(struct file *file)
{
int ret;
struct inode *inode = file_inode(file);
- struct timespec64 now = current_time(inode);
- ret = inode_needs_update_time(inode, &now);
+ ret = inode_needs_update_time(inode);
if (ret <= 0)
return ret;
- return __file_update_time(file, &now, ret);
+ return __file_update_time(file, ret);
}
EXPORT_SYMBOL(file_update_time);
@@ -2152,7 +2227,6 @@ static int file_modified_flags(struct file *file, int flags)
{
int ret;
struct inode *inode = file_inode(file);
- struct timespec64 now = current_time(inode);
/*
* Clear the security bits if the process is not being run by root.
@@ -2165,13 +2239,13 @@ static int file_modified_flags(struct file *file, int flags)
if (unlikely(file->f_mode & FMODE_NOCMTIME))
return 0;
- ret = inode_needs_update_time(inode, &now);
+ ret = inode_needs_update_time(inode);
if (ret <= 0)
return ret;
if (flags & IOCB_NOWAIT)
return -EAGAIN;
- return __file_update_time(file, &now, ret);
+ return __file_update_time(file, ret);
}
/**
@@ -2491,15 +2565,59 @@ struct timespec64 current_time(struct inode *inode)
struct timespec64 now;
ktime_get_coarse_real_ts64(&now);
+ return timestamp_truncate(now, inode);
+}
+EXPORT_SYMBOL(current_time);
- if (unlikely(!inode->i_sb)) {
- WARN(1, "current_time() called with uninitialized super_block in the inode");
+/**
+ * inode_set_ctime_current - set the ctime to current_time
+ * @inode: inode
+ *
+ * Set the inode->i_ctime to the current value for the inode. Returns
+ * the current value that was assigned to i_ctime.
+ */
+struct timespec64 inode_set_ctime_current(struct inode *inode)
+{
+ struct timespec64 now;
+ struct timespec64 ctime;
+
+ ctime.tv_nsec = READ_ONCE(inode->__i_ctime.tv_nsec);
+ if (!(ctime.tv_nsec & I_CTIME_QUERIED)) {
+ now = current_time(inode);
+
+ /* Just copy it into place if it's not multigrain */
+ if (!is_mgtime(inode)) {
+ inode_set_ctime_to_ts(inode, now);
+ return now;
+ }
+
+ /*
+ * If we've recently updated with a fine-grained timestamp,
+ * then the coarse-grained one may still be earlier than the
+ * existing ctime. Just keep the existing value if so.
+ */
+ ctime.tv_sec = inode->__i_ctime.tv_sec;
+ if (timespec64_compare(&ctime, &now) > 0)
+ return ctime;
+
+ /*
+ * Ctime updates are usually protected by the inode_lock, but
+ * we can still race with someone setting the QUERIED flag.
+ * Try to swap the new nsec value into place. If it's changed
+ * in the interim, then just go with a fine-grained timestamp.
+ */
+ if (cmpxchg(&inode->__i_ctime.tv_nsec, ctime.tv_nsec,
+ now.tv_nsec) != ctime.tv_nsec)
+ goto fine_grained;
+ inode->__i_ctime.tv_sec = now.tv_sec;
return now;
}
-
- return timestamp_truncate(now, inode);
+fine_grained:
+ ktime_get_real_ts64(&now);
+ inode_set_ctime_to_ts(inode, timestamp_truncate(now, inode));
+ return now;
}
-EXPORT_SYMBOL(current_time);
+EXPORT_SYMBOL(inode_set_ctime_current);
/**
* in_group_or_capable - check whether caller is CAP_FSETID privileged
diff --git a/fs/internal.h b/fs/internal.h
index f7a3dc111026..74d3b161dd2c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -115,7 +115,7 @@ static inline void put_file_access(struct file *file)
* super.c
*/
extern int reconfigure_super(struct fs_context *);
-extern bool trylock_super(struct super_block *sb);
+extern bool super_trylock_shared(struct super_block *sb);
struct super_block *user_get_super(dev_t, bool excl);
void put_super(struct super_block *sb);
extern bool mount_capable(struct fs_context *);
@@ -201,7 +201,7 @@ void lock_two_inodes(struct inode *inode1, struct inode *inode2,
* fs-writeback.c
*/
extern long get_nr_dirty_inodes(void);
-extern int invalidate_inodes(struct super_block *, bool);
+void invalidate_inodes(struct super_block *sb);
/*
* dcache.c
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 5b2481cd4750..f5fd99d6b0d4 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -109,9 +109,6 @@ static int ioctl_fibmap(struct file *filp, int __user *p)
* Returns 0 on success, -errno on error, 1 if this was the last
* extent that will fit in user array.
*/
-#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
-#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
-#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
u64 phys, u64 len, u32 flags)
{
@@ -127,6 +124,10 @@ int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical,
if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max)
return 1;
+#define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC)
+#define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED)
+#define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE)
+
if (flags & SET_UNKNOWN_FLAGS)
flags |= FIEMAP_EXTENT_UNKNOWN;
if (flags & SET_NO_UNMOUNTED_IO_FLAGS)
@@ -396,8 +397,8 @@ static int ioctl_fsfreeze(struct file *filp)
/* Freeze */
if (sb->s_op->freeze_super)
- return sb->s_op->freeze_super(sb);
- return freeze_super(sb);
+ return sb->s_op->freeze_super(sb, FREEZE_HOLDER_USERSPACE);
+ return freeze_super(sb, FREEZE_HOLDER_USERSPACE);
}
static int ioctl_fsthaw(struct file *filp)
@@ -409,8 +410,8 @@ static int ioctl_fsthaw(struct file *filp)
/* Thaw */
if (sb->s_op->thaw_super)
- return sb->s_op->thaw_super(sb);
- return thaw_super(sb);
+ return sb->s_op->thaw_super(sb, FREEZE_HOLDER_USERSPACE);
+ return thaw_super(sb, FREEZE_HOLDER_USERSPACE);
}
static int ioctl_file_dedupe_range(struct file *file,
@@ -877,6 +878,9 @@ out:
#ifdef CONFIG_COMPAT
/**
* compat_ptr_ioctl - generic implementation of .compat_ioctl file operation
+ * @file: The file to operate on.
+ * @cmd: The ioctl command number.
+ * @arg: The argument to the ioctl.
*
* This is not normally called as a function, but instead set in struct
* file_operations as
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index adb92cdb24b0..283fb96f6609 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -23,65 +23,169 @@
#define IOEND_BATCH_SIZE 4096
+typedef int (*iomap_punch_t)(struct inode *inode, loff_t offset, loff_t length);
/*
- * Structure allocated for each folio when block size < folio size
- * to track sub-folio uptodate status and I/O completions.
+ * Structure allocated for each folio to track per-block uptodate, dirty state
+ * and I/O completions.
*/
-struct iomap_page {
+struct iomap_folio_state {
atomic_t read_bytes_pending;
atomic_t write_bytes_pending;
- spinlock_t uptodate_lock;
- unsigned long uptodate[];
+ spinlock_t state_lock;
+
+ /*
+ * Each block has two bits in this bitmap:
+ * Bits [0..blocks_per_folio) has the uptodate status.
+ * Bits [b_p_f...(2*b_p_f)) has the dirty status.
+ */
+ unsigned long state[];
};
-static inline struct iomap_page *to_iomap_page(struct folio *folio)
+static struct bio_set iomap_ioend_bioset;
+
+static inline bool ifs_is_fully_uptodate(struct folio *folio,
+ struct iomap_folio_state *ifs)
{
- if (folio_test_private(folio))
- return folio_get_private(folio);
- return NULL;
+ struct inode *inode = folio->mapping->host;
+
+ return bitmap_full(ifs->state, i_blocks_per_folio(inode, folio));
}
-static struct bio_set iomap_ioend_bioset;
+static inline bool ifs_block_is_uptodate(struct iomap_folio_state *ifs,
+ unsigned int block)
+{
+ return test_bit(block, ifs->state);
+}
+
+static void ifs_set_range_uptodate(struct folio *folio,
+ struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned int first_blk = off >> inode->i_blkbits;
+ unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+ unsigned int nr_blks = last_blk - first_blk + 1;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ bitmap_set(ifs->state, first_blk, nr_blks);
+ if (ifs_is_fully_uptodate(folio, ifs))
+ folio_mark_uptodate(folio);
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_set_range_uptodate(struct folio *folio, size_t off,
+ size_t len)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (ifs)
+ ifs_set_range_uptodate(folio, ifs, off, len);
+ else
+ folio_mark_uptodate(folio);
+}
+
+static inline bool ifs_block_is_dirty(struct folio *folio,
+ struct iomap_folio_state *ifs, int block)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+
+ return test_bit(block + blks_per_folio, ifs->state);
+}
+
+static void ifs_clear_range_dirty(struct folio *folio,
+ struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+ unsigned int first_blk = (off >> inode->i_blkbits);
+ unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+ unsigned int nr_blks = last_blk - first_blk + 1;
+ unsigned long flags;
-static struct iomap_page *
-iomap_page_create(struct inode *inode, struct folio *folio, unsigned int flags)
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ bitmap_clear(ifs->state, first_blk + blks_per_folio, nr_blks);
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_clear_range_dirty(struct folio *folio, size_t off, size_t len)
{
- struct iomap_page *iop = to_iomap_page(folio);
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (ifs)
+ ifs_clear_range_dirty(folio, ifs, off, len);
+}
+
+static void ifs_set_range_dirty(struct folio *folio,
+ struct iomap_folio_state *ifs, size_t off, size_t len)
+{
+ struct inode *inode = folio->mapping->host;
+ unsigned int blks_per_folio = i_blocks_per_folio(inode, folio);
+ unsigned int first_blk = (off >> inode->i_blkbits);
+ unsigned int last_blk = (off + len - 1) >> inode->i_blkbits;
+ unsigned int nr_blks = last_blk - first_blk + 1;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ifs->state_lock, flags);
+ bitmap_set(ifs->state, first_blk + blks_per_folio, nr_blks);
+ spin_unlock_irqrestore(&ifs->state_lock, flags);
+}
+
+static void iomap_set_range_dirty(struct folio *folio, size_t off, size_t len)
+{
+ struct iomap_folio_state *ifs = folio->private;
+
+ if (ifs)
+ ifs_set_range_dirty(folio, ifs, off, len);
+}
+
+static struct iomap_folio_state *ifs_alloc(struct inode *inode,
+ struct folio *folio, unsigned int flags)
+{
+ struct iomap_folio_state *ifs = folio->private;
unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
gfp_t gfp;
- if (iop || nr_blocks <= 1)
- return iop;
+ if (ifs || nr_blocks <= 1)
+ return ifs;
if (flags & IOMAP_NOWAIT)
gfp = GFP_NOWAIT;
else
gfp = GFP_NOFS | __GFP_NOFAIL;
- iop = kzalloc(struct_size(iop, uptodate, BITS_TO_LONGS(nr_blocks)),
- gfp);
- if (iop) {
- spin_lock_init(&iop->uptodate_lock);
- if (folio_test_uptodate(folio))
- bitmap_fill(iop->uptodate, nr_blocks);
- folio_attach_private(folio, iop);
- }
- return iop;
+ /*
+ * ifs->state tracks two sets of state flags when the
+ * filesystem block size is smaller than the folio size.
+ * The first state tracks per-block uptodate and the
+ * second tracks per-block dirty state.
+ */
+ ifs = kzalloc(struct_size(ifs, state,
+ BITS_TO_LONGS(2 * nr_blocks)), gfp);
+ if (!ifs)
+ return ifs;
+
+ spin_lock_init(&ifs->state_lock);
+ if (folio_test_uptodate(folio))
+ bitmap_set(ifs->state, 0, nr_blocks);
+ if (folio_test_dirty(folio))
+ bitmap_set(ifs->state, nr_blocks, nr_blocks);
+ folio_attach_private(folio, ifs);
+
+ return ifs;
}
-static void iomap_page_release(struct folio *folio)
+static void ifs_free(struct folio *folio)
{
- struct iomap_page *iop = folio_detach_private(folio);
- struct inode *inode = folio->mapping->host;
- unsigned int nr_blocks = i_blocks_per_folio(inode, folio);
+ struct iomap_folio_state *ifs = folio_detach_private(folio);
- if (!iop)
+ if (!ifs)
return;
- WARN_ON_ONCE(atomic_read(&iop->read_bytes_pending));
- WARN_ON_ONCE(atomic_read(&iop->write_bytes_pending));
- WARN_ON_ONCE(bitmap_full(iop->uptodate, nr_blocks) !=
+ WARN_ON_ONCE(atomic_read(&ifs->read_bytes_pending));
+ WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending));
+ WARN_ON_ONCE(ifs_is_fully_uptodate(folio, ifs) !=
folio_test_uptodate(folio));
- kfree(iop);
+ kfree(ifs);
}
/*
@@ -90,7 +194,7 @@ static void iomap_page_release(struct folio *folio)
static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
loff_t *pos, loff_t length, size_t *offp, size_t *lenp)
{
- struct iomap_page *iop = to_iomap_page(folio);
+ struct iomap_folio_state *ifs = folio->private;
loff_t orig_pos = *pos;
loff_t isize = i_size_read(inode);
unsigned block_bits = inode->i_blkbits;
@@ -105,12 +209,12 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
* per-block uptodate status and adjust the offset and length if needed
* to avoid reading in already uptodate ranges.
*/
- if (iop) {
+ if (ifs) {
unsigned int i;
/* move forward for each leading block marked uptodate */
for (i = first; i <= last; i++) {
- if (!test_bit(i, iop->uptodate))
+ if (!ifs_block_is_uptodate(ifs, i))
break;
*pos += block_size;
poff += block_size;
@@ -120,7 +224,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
/* truncate len if we find any trailing uptodate block(s) */
for ( ; i <= last; i++) {
- if (test_bit(i, iop->uptodate)) {
+ if (ifs_block_is_uptodate(ifs, i)) {
plen -= (last - i + 1) * block_size;
last = i - 1;
break;
@@ -144,43 +248,19 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio,
*lenp = plen;
}
-static void iomap_iop_set_range_uptodate(struct folio *folio,
- struct iomap_page *iop, size_t off, size_t len)
-{
- struct inode *inode = folio->mapping->host;
- unsigned first = off >> inode->i_blkbits;
- unsigned last = (off + len - 1) >> inode->i_blkbits;
- unsigned long flags;
-
- spin_lock_irqsave(&iop->uptodate_lock, flags);
- bitmap_set(iop->uptodate, first, last - first + 1);
- if (bitmap_full(iop->uptodate, i_blocks_per_folio(inode, folio)))
- folio_mark_uptodate(folio);
- spin_unlock_irqrestore(&iop->uptodate_lock, flags);
-}
-
-static void iomap_set_range_uptodate(struct folio *folio,
- struct iomap_page *iop, size_t off, size_t len)
-{
- if (iop)
- iomap_iop_set_range_uptodate(folio, iop, off, len);
- else
- folio_mark_uptodate(folio);
-}
-
static void iomap_finish_folio_read(struct folio *folio, size_t offset,
size_t len, int error)
{
- struct iomap_page *iop = to_iomap_page(folio);
+ struct iomap_folio_state *ifs = folio->private;
if (unlikely(error)) {
folio_clear_uptodate(folio);
folio_set_error(folio);
} else {
- iomap_set_range_uptodate(folio, iop, offset, len);
+ iomap_set_range_uptodate(folio, offset, len);
}
- if (!iop || atomic_sub_and_test(len, &iop->read_bytes_pending))
+ if (!ifs || atomic_sub_and_test(len, &ifs->read_bytes_pending))
folio_unlock(folio);
}
@@ -213,7 +293,6 @@ struct iomap_readpage_ctx {
static int iomap_read_inline_data(const struct iomap_iter *iter,
struct folio *folio)
{
- struct iomap_page *iop;
const struct iomap *iomap = iomap_iter_srcmap(iter);
size_t size = i_size_read(iter->inode) - iomap->offset;
size_t poff = offset_in_page(iomap->offset);
@@ -231,15 +310,13 @@ static int iomap_read_inline_data(const struct iomap_iter *iter,
if (WARN_ON_ONCE(size > iomap->length))
return -EIO;
if (offset > 0)
- iop = iomap_page_create(iter->inode, folio, iter->flags);
- else
- iop = to_iomap_page(folio);
+ ifs_alloc(iter->inode, folio, iter->flags);
addr = kmap_local_folio(folio, offset);
memcpy(addr, iomap->inline_data, size);
memset(addr + size, 0, PAGE_SIZE - poff - size);
kunmap_local(addr);
- iomap_set_range_uptodate(folio, iop, offset, PAGE_SIZE - poff);
+ iomap_set_range_uptodate(folio, offset, PAGE_SIZE - poff);
return 0;
}
@@ -260,7 +337,7 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
loff_t pos = iter->pos + offset;
loff_t length = iomap_length(iter) - offset;
struct folio *folio = ctx->cur_folio;
- struct iomap_page *iop;
+ struct iomap_folio_state *ifs;
loff_t orig_pos = pos;
size_t poff, plen;
sector_t sector;
@@ -269,20 +346,20 @@ static loff_t iomap_readpage_iter(const struct iomap_iter *iter,
return iomap_read_inline_data(iter, folio);
/* zero post-eof blocks as the page may be mapped */
- iop = iomap_page_create(iter->inode, folio, iter->flags);
+ ifs = ifs_alloc(iter->inode, folio, iter->flags);
iomap_adjust_read_range(iter->inode, folio, &pos, length, &poff, &plen);
if (plen == 0)
goto done;
if (iomap_block_needs_zeroing(iter, pos)) {
folio_zero_range(folio, poff, plen);
- iomap_set_range_uptodate(folio, iop, poff, plen);
+ iomap_set_range_uptodate(folio, poff, plen);
goto done;
}
ctx->cur_folio_in_bio = true;
- if (iop)
- atomic_add(plen, &iop->read_bytes_pending);
+ if (ifs)
+ atomic_add(plen, &ifs->read_bytes_pending);
sector = iomap_sector(iomap, pos);
if (!ctx->bio ||
@@ -436,11 +513,11 @@ EXPORT_SYMBOL_GPL(iomap_readahead);
*/
bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
{
- struct iomap_page *iop = to_iomap_page(folio);
+ struct iomap_folio_state *ifs = folio->private;
struct inode *inode = folio->mapping->host;
unsigned first, last, i;
- if (!iop)
+ if (!ifs)
return false;
/* Caller's range may extend past the end of this folio */
@@ -451,7 +528,7 @@ bool iomap_is_partially_uptodate(struct folio *folio, size_t from, size_t count)
last = (from + count - 1) >> inode->i_blkbits;
for (i = first; i <= last; i++)
- if (!test_bit(i, iop->uptodate))
+ if (!ifs_block_is_uptodate(ifs, i))
return false;
return true;
}
@@ -461,16 +538,18 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate);
* iomap_get_folio - get a folio reference for writing
* @iter: iteration structure
* @pos: start offset of write
+ * @len: Suggested size of folio to create.
*
* Returns a locked reference to the folio at @pos, or an error pointer if the
* folio could not be obtained.
*/
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos)
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len)
{
- unsigned fgp = FGP_WRITEBEGIN | FGP_NOFS;
+ fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS;
if (iter->flags & IOMAP_NOWAIT)
fgp |= FGP_NOWAIT;
+ fgp |= fgf_set_order(len);
return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT,
fgp, mapping_gfp_mask(iter->inode->i_mapping));
@@ -483,14 +562,13 @@ bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags)
folio_size(folio));
/*
- * mm accommodates an old ext3 case where clean folios might
- * not have had the dirty bit cleared. Thus, it can send actual
- * dirty folios to ->release_folio() via shrink_active_list();
- * skip those here.
+ * If the folio is dirty, we refuse to release our metadata because
+ * it may be partially dirty. Once we track per-block dirty state,
+ * we can release the metadata if every block is dirty.
*/
- if (folio_test_dirty(folio) || folio_test_writeback(folio))
+ if (folio_test_dirty(folio))
return false;
- iomap_page_release(folio);
+ ifs_free(folio);
return true;
}
EXPORT_SYMBOL_GPL(iomap_release_folio);
@@ -507,16 +585,22 @@ void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len)
if (offset == 0 && len == folio_size(folio)) {
WARN_ON_ONCE(folio_test_writeback(folio));
folio_cancel_dirty(folio);
- iomap_page_release(folio);
- } else if (folio_test_large(folio)) {
- /* Must release the iop so the page can be split */
- WARN_ON_ONCE(!folio_test_uptodate(folio) &&
- folio_test_dirty(folio));
- iomap_page_release(folio);
+ ifs_free(folio);
}
}
EXPORT_SYMBOL_GPL(iomap_invalidate_folio);
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio)
+{
+ struct inode *inode = mapping->host;
+ size_t len = folio_size(folio);
+
+ ifs_alloc(inode, folio, 0);
+ iomap_set_range_dirty(folio, 0, len);
+ return filemap_dirty_folio(mapping, folio);
+}
+EXPORT_SYMBOL_GPL(iomap_dirty_folio);
+
static void
iomap_write_failed(struct inode *inode, loff_t pos, unsigned len)
{
@@ -547,7 +631,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
size_t len, struct folio *folio)
{
const struct iomap *srcmap = iomap_iter_srcmap(iter);
- struct iomap_page *iop;
+ struct iomap_folio_state *ifs;
loff_t block_size = i_blocksize(iter->inode);
loff_t block_start = round_down(pos, block_size);
loff_t block_end = round_up(pos + len, block_size);
@@ -555,14 +639,23 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
size_t from = offset_in_folio(folio, pos), to = from + len;
size_t poff, plen;
- if (folio_test_uptodate(folio))
+ /*
+ * If the write completely overlaps the current folio, then
+ * entire folio will be dirtied so there is no need for
+ * per-block state tracking structures to be attached to this folio.
+ */
+ if (pos <= folio_pos(folio) &&
+ pos + len >= folio_pos(folio) + folio_size(folio))
return 0;
- folio_clear_error(folio);
- iop = iomap_page_create(iter->inode, folio, iter->flags);
- if ((iter->flags & IOMAP_NOWAIT) && !iop && nr_blocks > 1)
+ ifs = ifs_alloc(iter->inode, folio, iter->flags);
+ if ((iter->flags & IOMAP_NOWAIT) && !ifs && nr_blocks > 1)
return -EAGAIN;
+ if (folio_test_uptodate(folio))
+ return 0;
+ folio_clear_error(folio);
+
do {
iomap_adjust_read_range(iter->inode, folio, &block_start,
block_end - block_start, &poff, &plen);
@@ -589,7 +682,7 @@ static int __iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
if (status)
return status;
}
- iomap_set_range_uptodate(folio, iop, poff, plen);
+ iomap_set_range_uptodate(folio, poff, plen);
} while ((block_start += plen) < block_end);
return 0;
@@ -603,7 +696,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos,
if (folio_ops && folio_ops->get_folio)
return folio_ops->get_folio(iter, pos, len);
else
- return iomap_get_folio(iter, pos);
+ return iomap_get_folio(iter, pos, len);
}
static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret,
@@ -696,7 +789,6 @@ out_unlock:
static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
size_t copied, struct folio *folio)
{
- struct iomap_page *iop = to_iomap_page(folio);
flush_dcache_folio(folio);
/*
@@ -712,7 +804,8 @@ static size_t __iomap_write_end(struct inode *inode, loff_t pos, size_t len,
*/
if (unlikely(copied < len && !folio_test_uptodate(folio)))
return 0;
- iomap_set_range_uptodate(folio, iop, offset_in_folio(folio, pos), len);
+ iomap_set_range_uptodate(folio, offset_in_folio(folio, pos), len);
+ iomap_set_range_dirty(folio, offset_in_folio(folio, pos), copied);
filemap_dirty_folio(inode->i_mapping, folio);
return copied;
}
@@ -773,6 +866,7 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{
loff_t length = iomap_length(iter);
+ size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
loff_t pos = iter->pos;
ssize_t written = 0;
long status = 0;
@@ -781,15 +875,12 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
do {
struct folio *folio;
- struct page *page;
- unsigned long offset; /* Offset into pagecache page */
- unsigned long bytes; /* Bytes to write to page */
+ size_t offset; /* Offset into folio */
+ size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
- offset = offset_in_page(pos);
- bytes = min_t(unsigned long, PAGE_SIZE - offset,
- iov_iter_count(i));
-again:
+ offset = pos & (chunk - 1);
+ bytes = min(chunk - offset, iov_iter_count(i));
status = balance_dirty_pages_ratelimited_flags(mapping,
bdp_flags);
if (unlikely(status))
@@ -819,12 +910,14 @@ again:
if (iter->iomap.flags & IOMAP_F_STALE)
break;
- page = folio_file_page(folio, pos >> PAGE_SHIFT);
- if (mapping_writably_mapped(mapping))
- flush_dcache_page(page);
+ offset = offset_in_folio(folio, pos);
+ if (bytes > folio_size(folio) - offset)
+ bytes = folio_size(folio) - offset;
- copied = copy_page_from_iter_atomic(page, offset, bytes, i);
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_folio(folio);
+ copied = copy_folio_from_iter_atomic(folio, offset, bytes, i);
status = iomap_write_end(iter, pos, bytes, copied, folio);
if (unlikely(copied != status))
@@ -840,11 +933,13 @@ again:
*/
if (copied)
bytes = copied;
- goto again;
+ if (chunk > PAGE_SIZE)
+ chunk /= 2;
+ } else {
+ pos += status;
+ written += status;
+ length -= status;
}
- pos += status;
- written += status;
- length -= status;
} while (iov_iter_count(i) && length);
if (status == -EAGAIN) {
@@ -872,14 +967,84 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
while ((ret = iomap_iter(&iter, ops)) > 0)
iter.processed = iomap_write_iter(&iter, i);
- if (unlikely(ret < 0))
+ if (unlikely(iter.pos == iocb->ki_pos))
return ret;
ret = iter.pos - iocb->ki_pos;
- iocb->ki_pos += ret;
+ iocb->ki_pos = iter.pos;
return ret;
}
EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
+static int iomap_write_delalloc_ifs_punch(struct inode *inode,
+ struct folio *folio, loff_t start_byte, loff_t end_byte,
+ iomap_punch_t punch)
+{
+ unsigned int first_blk, last_blk, i;
+ loff_t last_byte;
+ u8 blkbits = inode->i_blkbits;
+ struct iomap_folio_state *ifs;
+ int ret = 0;
+
+ /*
+ * When we have per-block dirty tracking, there can be
+ * blocks within a folio which are marked uptodate
+ * but not dirty. In that case it is necessary to punch
+ * out such blocks to avoid leaking any delalloc blocks.
+ */
+ ifs = folio->private;
+ if (!ifs)
+ return ret;
+
+ last_byte = min_t(loff_t, end_byte - 1,
+ folio_pos(folio) + folio_size(folio) - 1);
+ first_blk = offset_in_folio(folio, start_byte) >> blkbits;
+ last_blk = offset_in_folio(folio, last_byte) >> blkbits;
+ for (i = first_blk; i <= last_blk; i++) {
+ if (!ifs_block_is_dirty(folio, ifs, i)) {
+ ret = punch(inode, folio_pos(folio) + (i << blkbits),
+ 1 << blkbits);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+
+static int iomap_write_delalloc_punch(struct inode *inode, struct folio *folio,
+ loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
+ iomap_punch_t punch)
+{
+ int ret = 0;
+
+ if (!folio_test_dirty(folio))
+ return ret;
+
+ /* if dirty, punch up to offset */
+ if (start_byte > *punch_start_byte) {
+ ret = punch(inode, *punch_start_byte,
+ start_byte - *punch_start_byte);
+ if (ret)
+ return ret;
+ }
+
+ /* Punch non-dirty blocks within folio */
+ ret = iomap_write_delalloc_ifs_punch(inode, folio, start_byte,
+ end_byte, punch);
+ if (ret)
+ return ret;
+
+ /*
+ * Make sure the next punch start is correctly bound to
+ * the end of this data range, not the end of the folio.
+ */
+ *punch_start_byte = min_t(loff_t, end_byte,
+ folio_pos(folio) + folio_size(folio));
+
+ return ret;
+}
+
/*
* Scan the data range passed to us for dirty page cache folios. If we find a
* dirty folio, punch out the preceeding range and update the offset from which
@@ -899,10 +1064,11 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
*/
static int iomap_write_delalloc_scan(struct inode *inode,
loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
- int (*punch)(struct inode *inode, loff_t offset, loff_t length))
+ iomap_punch_t punch)
{
while (start_byte < end_byte) {
struct folio *folio;
+ int ret;
/* grab locked page */
folio = filemap_lock_folio(inode->i_mapping,
@@ -913,26 +1079,12 @@ static int iomap_write_delalloc_scan(struct inode *inode,
continue;
}
- /* if dirty, punch up to offset */
- if (folio_test_dirty(folio)) {
- if (start_byte > *punch_start_byte) {
- int error;
-
- error = punch(inode, *punch_start_byte,
- start_byte - *punch_start_byte);
- if (error) {
- folio_unlock(folio);
- folio_put(folio);
- return error;
- }
- }
-
- /*
- * Make sure the next punch start is correctly bound to
- * the end of this data range, not the end of the folio.
- */
- *punch_start_byte = min_t(loff_t, end_byte,
- folio_next_index(folio) << PAGE_SHIFT);
+ ret = iomap_write_delalloc_punch(inode, folio, punch_start_byte,
+ start_byte, end_byte, punch);
+ if (ret) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return ret;
}
/* move offset to start of next folio in range */
@@ -977,8 +1129,7 @@ static int iomap_write_delalloc_scan(struct inode *inode,
* the code to subtle off-by-one bugs....
*/
static int iomap_write_delalloc_release(struct inode *inode,
- loff_t start_byte, loff_t end_byte,
- int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+ loff_t start_byte, loff_t end_byte, iomap_punch_t punch)
{
loff_t punch_start_byte = start_byte;
loff_t scan_end_byte = min(i_size_read(inode), end_byte);
@@ -1071,8 +1222,7 @@ out_unlock:
*/
int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
struct iomap *iomap, loff_t pos, loff_t length,
- ssize_t written,
- int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+ ssize_t written, iomap_punch_t punch)
{
loff_t start_byte;
loff_t end_byte;
@@ -1293,17 +1443,17 @@ EXPORT_SYMBOL_GPL(iomap_page_mkwrite);
static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
size_t len, int error)
{
- struct iomap_page *iop = to_iomap_page(folio);
+ struct iomap_folio_state *ifs = folio->private;
if (error) {
folio_set_error(folio);
mapping_set_error(inode->i_mapping, error);
}
- WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !iop);
- WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) <= 0);
+ WARN_ON_ONCE(i_blocks_per_folio(inode, folio) > 1 && !ifs);
+ WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) <= 0);
- if (!iop || atomic_sub_and_test(len, &iop->write_bytes_pending))
+ if (!ifs || atomic_sub_and_test(len, &ifs->write_bytes_pending))
folio_end_writeback(folio);
}
@@ -1570,7 +1720,7 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
*/
static void
iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
- struct iomap_page *iop, struct iomap_writepage_ctx *wpc,
+ struct iomap_folio_state *ifs, struct iomap_writepage_ctx *wpc,
struct writeback_control *wbc, struct list_head *iolist)
{
sector_t sector = iomap_sector(&wpc->iomap, pos);
@@ -1588,8 +1738,8 @@ iomap_add_to_ioend(struct inode *inode, loff_t pos, struct folio *folio,
bio_add_folio_nofail(wpc->ioend->io_bio, folio, len, poff);
}
- if (iop)
- atomic_add(len, &iop->write_bytes_pending);
+ if (ifs)
+ atomic_add(len, &ifs->write_bytes_pending);
wpc->ioend->io_size += len;
wbc_account_cgroup_owner(wbc, &folio->page, len);
}
@@ -1615,7 +1765,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
struct writeback_control *wbc, struct inode *inode,
struct folio *folio, u64 end_pos)
{
- struct iomap_page *iop = iomap_page_create(inode, folio, 0);
+ struct iomap_folio_state *ifs = folio->private;
struct iomap_ioend *ioend, *next;
unsigned len = i_blocksize(inode);
unsigned nblocks = i_blocks_per_folio(inode, folio);
@@ -1623,7 +1773,14 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
int error = 0, count = 0, i;
LIST_HEAD(submit_list);
- WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
+ WARN_ON_ONCE(end_pos <= pos);
+
+ if (!ifs && nblocks > 1) {
+ ifs = ifs_alloc(inode, folio, 0);
+ iomap_set_range_dirty(folio, 0, end_pos - pos);
+ }
+
+ WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) != 0);
/*
* Walk through the folio to find areas to write back. If we
@@ -1631,7 +1788,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
* invalid, grab a new one.
*/
for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
- if (iop && !test_bit(i, iop->uptodate))
+ if (ifs && !ifs_block_is_dirty(folio, ifs, i))
continue;
error = wpc->ops->map_blocks(wpc, inode, pos);
@@ -1642,7 +1799,7 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
continue;
if (wpc->iomap.type == IOMAP_HOLE)
continue;
- iomap_add_to_ioend(inode, pos, folio, iop, wpc, wbc,
+ iomap_add_to_ioend(inode, pos, folio, ifs, wpc, wbc,
&submit_list);
count++;
}
@@ -1675,6 +1832,12 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
}
}
+ /*
+ * We can have dirty bits set past end of file in page_mkwrite path
+ * while mapping the last partial folio. Hence it's better to clear
+ * all the dirty bits in the folio here.
+ */
+ iomap_clear_range_dirty(folio, 0, folio_size(folio));
folio_start_writeback(folio);
folio_unlock(folio);
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index ea3b868c8355..bcd3f8cf5ea4 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -20,10 +20,12 @@
* Private flags for iomap_dio, must not overlap with the public ones in
* iomap.h:
*/
-#define IOMAP_DIO_WRITE_FUA (1 << 28)
-#define IOMAP_DIO_NEED_SYNC (1 << 29)
-#define IOMAP_DIO_WRITE (1 << 30)
-#define IOMAP_DIO_DIRTY (1 << 31)
+#define IOMAP_DIO_CALLER_COMP (1U << 26)
+#define IOMAP_DIO_INLINE_COMP (1U << 27)
+#define IOMAP_DIO_WRITE_THROUGH (1U << 28)
+#define IOMAP_DIO_NEED_SYNC (1U << 29)
+#define IOMAP_DIO_WRITE (1U << 30)
+#define IOMAP_DIO_DIRTY (1U << 31)
struct iomap_dio {
struct kiocb *iocb;
@@ -41,7 +43,6 @@ struct iomap_dio {
struct {
struct iov_iter *iter;
struct task_struct *waiter;
- struct bio *poll_bio;
} submit;
/* used for aio completion: */
@@ -63,12 +64,14 @@ static struct bio *iomap_dio_alloc_bio(const struct iomap_iter *iter,
static void iomap_dio_submit_bio(const struct iomap_iter *iter,
struct iomap_dio *dio, struct bio *bio, loff_t pos)
{
+ struct kiocb *iocb = dio->iocb;
+
atomic_inc(&dio->ref);
/* Sync dio can't be polled reliably */
- if ((dio->iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(dio->iocb)) {
- bio_set_polled(bio, dio->iocb);
- dio->submit.poll_bio = bio;
+ if ((iocb->ki_flags & IOCB_HIPRI) && !is_sync_kiocb(iocb)) {
+ bio_set_polled(bio, iocb);
+ WRITE_ONCE(iocb->private, bio);
}
if (dio->dops && dio->dops->submit_io)
@@ -130,6 +133,11 @@ ssize_t iomap_dio_complete(struct iomap_dio *dio)
}
EXPORT_SYMBOL_GPL(iomap_dio_complete);
+static ssize_t iomap_dio_deferred_complete(void *data)
+{
+ return iomap_dio_complete(data);
+}
+
static void iomap_dio_complete_work(struct work_struct *work)
{
struct iomap_dio *dio = container_of(work, struct iomap_dio, aio.work);
@@ -152,27 +160,69 @@ void iomap_dio_bio_end_io(struct bio *bio)
{
struct iomap_dio *dio = bio->bi_private;
bool should_dirty = (dio->flags & IOMAP_DIO_DIRTY);
+ struct kiocb *iocb = dio->iocb;
if (bio->bi_status)
iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
+ if (!atomic_dec_and_test(&dio->ref))
+ goto release_bio;
- if (atomic_dec_and_test(&dio->ref)) {
- if (dio->wait_for_completion) {
- struct task_struct *waiter = dio->submit.waiter;
- WRITE_ONCE(dio->submit.waiter, NULL);
- blk_wake_io_task(waiter);
- } else if (dio->flags & IOMAP_DIO_WRITE) {
- struct inode *inode = file_inode(dio->iocb->ki_filp);
-
- WRITE_ONCE(dio->iocb->private, NULL);
- INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
- queue_work(inode->i_sb->s_dio_done_wq, &dio->aio.work);
- } else {
- WRITE_ONCE(dio->iocb->private, NULL);
- iomap_dio_complete_work(&dio->aio.work);
- }
+ /*
+ * Synchronous dio, task itself will handle any completion work
+ * that needs after IO. All we need to do is wake the task.
+ */
+ if (dio->wait_for_completion) {
+ struct task_struct *waiter = dio->submit.waiter;
+
+ WRITE_ONCE(dio->submit.waiter, NULL);
+ blk_wake_io_task(waiter);
+ goto release_bio;
+ }
+
+ /*
+ * Flagged with IOMAP_DIO_INLINE_COMP, we can complete it inline
+ */
+ if (dio->flags & IOMAP_DIO_INLINE_COMP) {
+ WRITE_ONCE(iocb->private, NULL);
+ iomap_dio_complete_work(&dio->aio.work);
+ goto release_bio;
+ }
+
+ /*
+ * If this dio is flagged with IOMAP_DIO_CALLER_COMP, then schedule
+ * our completion that way to avoid an async punt to a workqueue.
+ */
+ if (dio->flags & IOMAP_DIO_CALLER_COMP) {
+ /* only polled IO cares about private cleared */
+ iocb->private = dio;
+ iocb->dio_complete = iomap_dio_deferred_complete;
+
+ /*
+ * Invoke ->ki_complete() directly. We've assigned our
+ * dio_complete callback handler, and since the issuer set
+ * IOCB_DIO_CALLER_COMP, we know their ki_complete handler will
+ * notice ->dio_complete being set and will defer calling that
+ * handler until it can be done from a safe task context.
+ *
+ * Note that the 'res' being passed in here is not important
+ * for this case. The actual completion value of the request
+ * will be gotten from dio_complete when that is run by the
+ * issuer.
+ */
+ iocb->ki_complete(iocb, 0);
+ goto release_bio;
}
+ /*
+ * Async DIO completion that requires filesystem level completion work
+ * gets punted to a work queue to complete as the operation may require
+ * more IO to be issued to finalise filesystem metadata changes or
+ * guarantee data integrity.
+ */
+ INIT_WORK(&dio->aio.work, iomap_dio_complete_work);
+ queue_work(file_inode(iocb->ki_filp)->i_sb->s_dio_done_wq,
+ &dio->aio.work);
+release_bio:
if (should_dirty) {
bio_check_pages_dirty(bio);
} else {
@@ -203,7 +253,7 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
/*
* Figure out the bio's operation flags from the dio request, the
* mapping, and whether or not we want FUA. Note that we can end up
- * clearing the WRITE_FUA flag in the dio request.
+ * clearing the WRITE_THROUGH flag in the dio request.
*/
static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
const struct iomap *iomap, bool use_fua)
@@ -217,7 +267,7 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
if (use_fua)
opflags |= REQ_FUA;
else
- dio->flags &= ~IOMAP_DIO_WRITE_FUA;
+ dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
return opflags;
}
@@ -257,12 +307,19 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
* Use a FUA write if we need datasync semantics, this is a pure
* data IO that doesn't require any metadata updates (including
* after IO completion such as unwritten extent conversion) and
- * the underlying device supports FUA. This allows us to avoid
- * cache flushes on IO completion.
+ * the underlying device either supports FUA or doesn't have
+ * a volatile write cache. This allows us to avoid cache flushes
+ * on IO completion. If we can't use writethrough and need to
+ * sync, disable in-task completions as dio completion will
+ * need to call generic_write_sync() which will do a blocking
+ * fsync / cache flush call.
*/
if (!(iomap->flags & (IOMAP_F_SHARED|IOMAP_F_DIRTY)) &&
- (dio->flags & IOMAP_DIO_WRITE_FUA) && bdev_fua(iomap->bdev))
+ (dio->flags & IOMAP_DIO_WRITE_THROUGH) &&
+ (bdev_fua(iomap->bdev) || !bdev_write_cache(iomap->bdev)))
use_fua = true;
+ else if (dio->flags & IOMAP_DIO_NEED_SYNC)
+ dio->flags &= ~IOMAP_DIO_CALLER_COMP;
}
/*
@@ -277,10 +334,23 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
goto out;
/*
- * We can only poll for single bio I/Os.
+ * We can only do deferred completion for pure overwrites that
+ * don't require additional IO at completion. This rules out
+ * writes that need zeroing or extent conversion, extend
+ * the file size, or issue journal IO or cache flushes
+ * during completion processing.
*/
if (need_zeroout ||
+ ((dio->flags & IOMAP_DIO_NEED_SYNC) && !use_fua) ||
((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode)))
+ dio->flags &= ~IOMAP_DIO_CALLER_COMP;
+
+ /*
+ * The rules for polled IO completions follow the guidelines as the
+ * ones we set for inline and deferred completions. If none of those
+ * are available for this IO, clear the polled flag.
+ */
+ if (!(dio->flags & (IOMAP_DIO_INLINE_COMP|IOMAP_DIO_CALLER_COMP)))
dio->iocb->ki_flags &= ~IOCB_HIPRI;
if (need_zeroout) {
@@ -505,12 +575,14 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->submit.iter = iter;
dio->submit.waiter = current;
- dio->submit.poll_bio = NULL;
if (iocb->ki_flags & IOCB_NOWAIT)
iomi.flags |= IOMAP_NOWAIT;
if (iov_iter_rw(iter) == READ) {
+ /* reads can always complete inline */
+ dio->flags |= IOMAP_DIO_INLINE_COMP;
+
if (iomi.pos >= dio->i_size)
goto out_free_dio;
@@ -524,6 +596,15 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
iomi.flags |= IOMAP_WRITE;
dio->flags |= IOMAP_DIO_WRITE;
+ /*
+ * Flag as supporting deferred completions, if the issuer
+ * groks it. This can avoid a workqueue punt for writes.
+ * We may later clear this flag if we need to do other IO
+ * as part of this IO completion.
+ */
+ if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
+ dio->flags |= IOMAP_DIO_CALLER_COMP;
+
if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
ret = -EAGAIN;
if (iomi.pos >= dio->i_size ||
@@ -537,13 +618,16 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
dio->flags |= IOMAP_DIO_NEED_SYNC;
/*
- * For datasync only writes, we optimistically try
- * using FUA for this IO. Any non-FUA write that
- * occurs will clear this flag, hence we know before
- * completion whether a cache flush is necessary.
+ * For datasync only writes, we optimistically try using
+ * WRITE_THROUGH for this IO. This flag requires either
+ * FUA writes through the device's write cache, or a
+ * normal write to a device without a volatile write
+ * cache. For the former, Any non-FUA write that occurs
+ * will clear this flag, hence we know before completion
+ * whether a cache flush is necessary.
*/
if (!(iocb->ki_flags & IOCB_SYNC))
- dio->flags |= IOMAP_DIO_WRITE_FUA;
+ dio->flags |= IOMAP_DIO_WRITE_THROUGH;
}
/*
@@ -605,14 +689,13 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
iomap_dio_set_error(dio, ret);
/*
- * If all the writes we issued were FUA, we don't need to flush the
- * cache on IO completion. Clear the sync flag for this case.
+ * If all the writes we issued were already written through to the
+ * media, we don't need to flush the cache on IO completion. Clear the
+ * sync flag for this case.
*/
- if (dio->flags & IOMAP_DIO_WRITE_FUA)
+ if (dio->flags & IOMAP_DIO_WRITE_THROUGH)
dio->flags &= ~IOMAP_DIO_NEED_SYNC;
- WRITE_ONCE(iocb->private, dio->submit.poll_bio);
-
/*
* We are about to drop our additional submission reference, which
* might be the last reference to the dio. There are three different
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index df9d70588b60..2ee21286ac8f 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1422,13 +1422,8 @@ static int isofs_read_inode(struct inode *inode, int relocated)
inode->i_ino, de->flags[-high_sierra]);
}
#endif
-
- inode->i_mtime.tv_sec =
- inode->i_atime.tv_sec =
- inode->i_ctime.tv_sec = iso_date(de->date, high_sierra);
- inode->i_mtime.tv_nsec =
- inode->i_atime.tv_nsec =
- inode->i_ctime.tv_nsec = 0;
+ inode->i_mtime = inode->i_atime =
+ inode_set_ctime(inode, iso_date(de->date, high_sierra), 0);
ei->i_first_extent = (isonum_733(de->extent) +
isonum_711(de->ext_attr_length));
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 48f58c6c9e69..348783a70f57 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -421,10 +421,9 @@ repeat:
/* Rock ridge never appears on a High Sierra disk */
cnt = 0;
if (rr->u.TF.flags & TF_CREATE) {
- inode->i_ctime.tv_sec =
- iso_date(rr->u.TF.times[cnt++].time,
- 0);
- inode->i_ctime.tv_nsec = 0;
+ inode_set_ctime(inode,
+ iso_date(rr->u.TF.times[cnt++].time, 0),
+ 0);
}
if (rr->u.TF.flags & TF_MODIFY) {
inode->i_mtime.tv_sec =
@@ -439,10 +438,9 @@ repeat:
inode->i_atime.tv_nsec = 0;
}
if (rr->u.TF.flags & TF_ATTRIBUTES) {
- inode->i_ctime.tv_sec =
- iso_date(rr->u.TF.times[cnt++].time,
- 0);
- inode->i_ctime.tv_nsec = 0;
+ inode_set_ctime(inode,
+ iso_date(rr->u.TF.times[cnt++].time, 0),
+ 0);
}
break;
case SIG('S', 'L'):
@@ -534,7 +532,7 @@ repeat:
inode->i_size = reloc->i_size;
inode->i_blocks = reloc->i_blocks;
inode->i_atime = reloc->i_atime;
- inode->i_ctime = reloc->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(reloc));
inode->i_mtime = reloc->i_mtime;
iput(reloc);
break;
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 51bd38da21cd..9ec91017a7f3 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -27,7 +27,7 @@
*
* Called with j_list_lock held.
*/
-static inline void __buffer_unlink_first(struct journal_head *jh)
+static inline void __buffer_unlink(struct journal_head *jh)
{
transaction_t *transaction = jh->b_cp_transaction;
@@ -41,45 +41,6 @@ static inline void __buffer_unlink_first(struct journal_head *jh)
}
/*
- * Unlink a buffer from a transaction checkpoint(io) list.
- *
- * Called with j_list_lock held.
- */
-static inline void __buffer_unlink(struct journal_head *jh)
-{
- transaction_t *transaction = jh->b_cp_transaction;
-
- __buffer_unlink_first(jh);
- if (transaction->t_checkpoint_io_list == jh) {
- transaction->t_checkpoint_io_list = jh->b_cpnext;
- if (transaction->t_checkpoint_io_list == jh)
- transaction->t_checkpoint_io_list = NULL;
- }
-}
-
-/*
- * Move a buffer from the checkpoint list to the checkpoint io list
- *
- * Called with j_list_lock held
- */
-static inline void __buffer_relink_io(struct journal_head *jh)
-{
- transaction_t *transaction = jh->b_cp_transaction;
-
- __buffer_unlink_first(jh);
-
- if (!transaction->t_checkpoint_io_list) {
- jh->b_cpnext = jh->b_cpprev = jh;
- } else {
- jh->b_cpnext = transaction->t_checkpoint_io_list;
- jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
- jh->b_cpprev->b_cpnext = jh;
- jh->b_cpnext->b_cpprev = jh;
- }
- transaction->t_checkpoint_io_list = jh;
-}
-
-/*
* Check a checkpoint buffer could be release or not.
*
* Requires j_list_lock
@@ -183,6 +144,7 @@ __flush_batch(journal_t *journal, int *batch_count)
struct buffer_head *bh = journal->j_chkpt_bhs[i];
BUFFER_TRACE(bh, "brelse");
__brelse(bh);
+ journal->j_chkpt_bhs[i] = NULL;
}
*batch_count = 0;
}
@@ -242,15 +204,6 @@ restart:
jh = transaction->t_checkpoint_list;
bh = jh2bh(jh);
- if (buffer_locked(bh)) {
- get_bh(bh);
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- /* the journal_head may have gone by now */
- BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
- goto retry;
- }
if (jh->b_transaction != NULL) {
transaction_t *t = jh->b_transaction;
tid_t tid = t->t_tid;
@@ -285,30 +238,50 @@ restart:
spin_lock(&journal->j_list_lock);
goto restart;
}
- if (!buffer_dirty(bh)) {
+ if (!trylock_buffer(bh)) {
+ /*
+ * The buffer is locked, it may be writing back, or
+ * flushing out in the last couple of cycles, or
+ * re-adding into a new transaction, need to check
+ * it again until it's unlocked.
+ */
+ get_bh(bh);
+ spin_unlock(&journal->j_list_lock);
+ wait_on_buffer(bh);
+ /* the journal_head may have gone by now */
+ BUFFER_TRACE(bh, "brelse");
+ __brelse(bh);
+ goto retry;
+ } else if (!buffer_dirty(bh)) {
+ unlock_buffer(bh);
BUFFER_TRACE(bh, "remove from checkpoint");
- if (__jbd2_journal_remove_checkpoint(jh))
- /* The transaction was released; we're done */
+ /*
+ * If the transaction was released or the checkpoint
+ * list was empty, we're done.
+ */
+ if (__jbd2_journal_remove_checkpoint(jh) ||
+ !transaction->t_checkpoint_list)
goto out;
- continue;
+ } else {
+ unlock_buffer(bh);
+ /*
+ * We are about to write the buffer, it could be
+ * raced by some other transaction shrink or buffer
+ * re-log logic once we release the j_list_lock,
+ * leave it on the checkpoint list and check status
+ * again to make sure it's clean.
+ */
+ BUFFER_TRACE(bh, "queue");
+ get_bh(bh);
+ J_ASSERT_BH(bh, !buffer_jwrite(bh));
+ journal->j_chkpt_bhs[batch_count++] = bh;
+ transaction->t_chp_stats.cs_written++;
+ transaction->t_checkpoint_list = jh->b_cpnext;
}
- /*
- * Important: we are about to write the buffer, and
- * possibly block, while still holding the journal
- * lock. We cannot afford to let the transaction
- * logic start messing around with this buffer before
- * we write it to disk, as that would break
- * recoverability.
- */
- BUFFER_TRACE(bh, "queue");
- get_bh(bh);
- J_ASSERT_BH(bh, !buffer_jwrite(bh));
- journal->j_chkpt_bhs[batch_count++] = bh;
- __buffer_relink_io(jh);
- transaction->t_chp_stats.cs_written++;
+
if ((batch_count == JBD2_NR_BATCH) ||
- need_resched() ||
- spin_needbreak(&journal->j_list_lock))
+ need_resched() || spin_needbreak(&journal->j_list_lock) ||
+ jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0])
goto unlock_and_flush;
}
@@ -322,38 +295,6 @@ restart:
goto restart;
}
- /*
- * Now we issued all of the transaction's buffers, let's deal
- * with the buffers that are out for I/O.
- */
-restart2:
- /* Did somebody clean up the transaction in the meanwhile? */
- if (journal->j_checkpoint_transactions != transaction ||
- transaction->t_tid != this_tid)
- goto out;
-
- while (transaction->t_checkpoint_io_list) {
- jh = transaction->t_checkpoint_io_list;
- bh = jh2bh(jh);
- if (buffer_locked(bh)) {
- get_bh(bh);
- spin_unlock(&journal->j_list_lock);
- wait_on_buffer(bh);
- /* the journal_head may have gone by now */
- BUFFER_TRACE(bh, "brelse");
- __brelse(bh);
- spin_lock(&journal->j_list_lock);
- goto restart2;
- }
-
- /*
- * Now in whatever state the buffer currently is, we
- * know that it has been written out and so we can
- * drop it from the list
- */
- if (__jbd2_journal_remove_checkpoint(jh))
- break;
- }
out:
spin_unlock(&journal->j_list_lock);
result = jbd2_cleanup_journal_tail(journal);
@@ -409,49 +350,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/* Checkpoint list management */
/*
- * journal_clean_one_cp_list
- *
- * Find all the written-back checkpoint buffers in the given list and
- * release them. If 'destroy' is set, clean all buffers unconditionally.
- *
- * Called with j_list_lock held.
- * Returns 1 if we freed the transaction, 0 otherwise.
- */
-static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
-{
- struct journal_head *last_jh;
- struct journal_head *next_jh = jh;
-
- if (!jh)
- return 0;
-
- last_jh = jh->b_cpprev;
- do {
- jh = next_jh;
- next_jh = jh->b_cpnext;
-
- if (!destroy && __cp_buffer_busy(jh))
- return 0;
-
- if (__jbd2_journal_remove_checkpoint(jh))
- return 1;
- /*
- * This function only frees up some memory
- * if possible so we dont have an obligation
- * to finish processing. Bail out if preemption
- * requested:
- */
- if (need_resched())
- return 0;
- } while (jh != last_jh);
-
- return 0;
-}
-
-/*
* journal_shrink_one_cp_list
*
- * Find 'nr_to_scan' written-back checkpoint buffers in the given list
+ * Find all the written-back checkpoint buffers in the given list
* and try to release them. If the whole transaction is released, set
* the 'released' parameter. Return the number of released checkpointed
* buffers.
@@ -459,15 +360,15 @@ static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
* Called with j_list_lock held.
*/
static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
- unsigned long *nr_to_scan,
- bool *released)
+ bool destroy, bool *released)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
unsigned long nr_freed = 0;
int ret;
- if (!jh || *nr_to_scan == 0)
+ *released = false;
+ if (!jh)
return 0;
last_jh = jh->b_cpprev;
@@ -475,12 +376,15 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
jh = next_jh;
next_jh = jh->b_cpnext;
- (*nr_to_scan)--;
- if (__cp_buffer_busy(jh))
- continue;
+ if (destroy) {
+ ret = __jbd2_journal_remove_checkpoint(jh);
+ } else {
+ ret = jbd2_journal_try_remove_checkpoint(jh);
+ if (ret < 0)
+ continue;
+ }
nr_freed++;
- ret = __jbd2_journal_remove_checkpoint(jh);
if (ret) {
*released = true;
break;
@@ -488,7 +392,7 @@ static unsigned long journal_shrink_one_cp_list(struct journal_head *jh,
if (need_resched())
break;
- } while (jh != last_jh && *nr_to_scan);
+ } while (jh != last_jh);
return nr_freed;
}
@@ -506,11 +410,11 @@ unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal,
unsigned long *nr_to_scan)
{
transaction_t *transaction, *last_transaction, *next_transaction;
- bool released;
+ bool __maybe_unused released;
tid_t first_tid = 0, last_tid = 0, next_tid = 0;
tid_t tid = 0;
unsigned long nr_freed = 0;
- unsigned long nr_scanned = *nr_to_scan;
+ unsigned long freed;
again:
spin_lock(&journal->j_list_lock);
@@ -539,19 +443,11 @@ again:
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
tid = transaction->t_tid;
- released = false;
-
- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_list,
- nr_to_scan, &released);
- if (*nr_to_scan == 0)
- break;
- if (need_resched() || spin_needbreak(&journal->j_list_lock))
- break;
- if (released)
- continue;
- nr_freed += journal_shrink_one_cp_list(transaction->t_checkpoint_io_list,
- nr_to_scan, &released);
+ freed = journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+ false, &released);
+ nr_freed += freed;
+ (*nr_to_scan) -= min(*nr_to_scan, freed);
if (*nr_to_scan == 0)
break;
if (need_resched() || spin_needbreak(&journal->j_list_lock))
@@ -572,9 +468,8 @@ again:
if (*nr_to_scan && next_tid)
goto again;
out:
- nr_scanned -= *nr_to_scan;
trace_jbd2_shrink_checkpoint_list(journal, first_tid, tid, last_tid,
- nr_freed, nr_scanned, next_tid);
+ nr_freed, next_tid);
return nr_freed;
}
@@ -590,7 +485,7 @@ out:
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{
transaction_t *transaction, *last_transaction, *next_transaction;
- int ret;
+ bool released;
transaction = journal->j_checkpoint_transactions;
if (!transaction)
@@ -601,8 +496,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
do {
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
- ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
- destroy);
+ journal_shrink_one_cp_list(transaction->t_checkpoint_list,
+ destroy, &released);
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if
@@ -610,23 +505,12 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
*/
if (need_resched())
return;
- if (ret)
- continue;
- /*
- * It is essential that we are as careful as in the case of
- * t_checkpoint_list with removing the buffer from the list as
- * we can possibly see not yet submitted buffers on io_list
- */
- ret = journal_clean_one_cp_list(transaction->
- t_checkpoint_io_list, destroy);
- if (need_resched())
- return;
/*
* Stop scanning if we couldn't free the transaction. This
* avoids pointless scanning of transactions which still
* weren't checkpointed.
*/
- if (!ret)
+ if (!released)
return;
} while (transaction != last_transaction);
}
@@ -705,7 +589,7 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
jbd2_journal_put_journal_head(jh);
/* Is this transaction empty? */
- if (transaction->t_checkpoint_list || transaction->t_checkpoint_io_list)
+ if (transaction->t_checkpoint_list)
return 0;
/*
@@ -737,6 +621,34 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
}
/*
+ * Check the checkpoint buffer and try to remove it from the checkpoint
+ * list if it's clean. Returns -EBUSY if it is not clean, returns 1 if
+ * it frees the transaction, 0 otherwise.
+ *
+ * This function is called with j_list_lock held.
+ */
+int jbd2_journal_try_remove_checkpoint(struct journal_head *jh)
+{
+ struct buffer_head *bh = jh2bh(jh);
+
+ if (!trylock_buffer(bh))
+ return -EBUSY;
+ if (buffer_dirty(bh)) {
+ unlock_buffer(bh);
+ return -EBUSY;
+ }
+ unlock_buffer(bh);
+
+ /*
+ * Buffer is clean and the IO has finished (we held the buffer
+ * lock) so the checkpoint is done. We can safely remove the
+ * buffer from this transaction.
+ */
+ JBUFFER_TRACE(jh, "remove from checkpoint list");
+ return __jbd2_journal_remove_checkpoint(jh);
+}
+
+/*
* journal_insert_checkpoint: put a committed buffer onto a checkpoint
* list so that we know when it is safe to clean the transaction out of
* the log.
@@ -797,7 +709,6 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact
J_ASSERT(transaction->t_forget == NULL);
J_ASSERT(transaction->t_shadow_list == NULL);
J_ASSERT(transaction->t_checkpoint_list == NULL);
- J_ASSERT(transaction->t_checkpoint_io_list == NULL);
J_ASSERT(atomic_read(&transaction->t_updates) == 0);
J_ASSERT(journal->j_committing_transaction != transaction);
J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b33155dd7001..1073259902a6 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1141,8 +1141,7 @@ restart_loop:
spin_lock(&journal->j_list_lock);
commit_transaction->t_state = T_FINISHED;
/* Check if the transaction can be dropped now that we are finished */
- if (commit_transaction->t_checkpoint_list == NULL &&
- commit_transaction->t_checkpoint_io_list == NULL) {
+ if (commit_transaction->t_checkpoint_list == NULL) {
__jbd2_journal_drop_transaction(journal, commit_transaction);
jbd2_journal_free_transaction(commit_transaction);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 18611241f451..4d1fda1f7143 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1784,8 +1784,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
* Otherwise, if the buffer has been written to disk,
* it is safe to remove the checkpoint and drop it.
*/
- if (!buffer_dirty(bh)) {
- __jbd2_journal_remove_checkpoint(jh);
+ if (jbd2_journal_try_remove_checkpoint(jh) >= 0) {
spin_unlock(&journal->j_list_lock);
goto drop;
}
@@ -2100,35 +2099,6 @@ void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
__brelse(bh);
}
-/*
- * Called from jbd2_journal_try_to_free_buffers().
- *
- * Called under jh->b_state_lock
- */
-static void
-__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
-{
- struct journal_head *jh;
-
- jh = bh2jh(bh);
-
- if (buffer_locked(bh) || buffer_dirty(bh))
- goto out;
-
- if (jh->b_next_transaction != NULL || jh->b_transaction != NULL)
- goto out;
-
- spin_lock(&journal->j_list_lock);
- if (jh->b_cp_transaction != NULL) {
- /* written-back checkpointed metadata buffer */
- JBUFFER_TRACE(jh, "remove from checkpoint list");
- __jbd2_journal_remove_checkpoint(jh);
- }
- spin_unlock(&journal->j_list_lock);
-out:
- return;
-}
-
/**
* jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
@@ -2186,7 +2156,13 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
continue;
spin_lock(&jh->b_state_lock);
- __journal_try_to_free_buffer(journal, bh);
+ if (!jh->b_transaction && !jh->b_next_transaction) {
+ spin_lock(&journal->j_list_lock);
+ /* Remove written-back checkpointed metadata buffer */
+ if (jh->b_cp_transaction != NULL)
+ jbd2_journal_try_remove_checkpoint(jh);
+ spin_unlock(&journal->j_list_lock);
+ }
spin_unlock(&jh->b_state_lock);
jbd2_journal_put_journal_head(jh);
if (buffer_jbd(bh))
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 5075a0a6d594..091ab0eaabbe 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -204,7 +204,8 @@ static int jffs2_create(struct mnt_idmap *idmap, struct inode *dir_i,
if (ret)
goto fail;
- dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(ri->ctime));
+ dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+ ITIME(je32_to_cpu(ri->ctime)));
jffs2_free_raw_inode(ri);
@@ -237,7 +238,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
if (dead_f->inocache)
set_nlink(d_inode(dentry), dead_f->inocache->pino_nlink);
if (!ret)
- dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+ dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
return ret;
}
/***********************************************************************/
@@ -271,7 +272,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
set_nlink(d_inode(old_dentry), ++f->inocache->pino_nlink);
mutex_unlock(&f->sem);
d_instantiate(dentry, d_inode(old_dentry));
- dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+ dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
ihold(d_inode(old_dentry));
}
return ret;
@@ -422,7 +423,8 @@ static int jffs2_symlink (struct mnt_idmap *idmap, struct inode *dir_i,
goto fail;
}
- dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+ dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+ ITIME(je32_to_cpu(rd->mctime)));
jffs2_free_raw_dirent(rd);
@@ -566,7 +568,8 @@ static int jffs2_mkdir (struct mnt_idmap *idmap, struct inode *dir_i,
goto fail;
}
- dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+ dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+ ITIME(je32_to_cpu(rd->mctime)));
inc_nlink(dir_i);
jffs2_free_raw_dirent(rd);
@@ -607,7 +610,7 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
dentry->d_name.len, f, now);
if (!ret) {
- dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
+ dir_i->i_mtime = inode_set_ctime_to_ts(dir_i, ITIME(now));
clear_nlink(d_inode(dentry));
drop_nlink(dir_i);
}
@@ -743,7 +746,8 @@ static int jffs2_mknod (struct mnt_idmap *idmap, struct inode *dir_i,
goto fail;
}
- dir_i->i_mtime = dir_i->i_ctime = ITIME(je32_to_cpu(rd->mctime));
+ dir_i->i_mtime = inode_set_ctime_to_ts(dir_i,
+ ITIME(je32_to_cpu(rd->mctime)));
jffs2_free_raw_dirent(rd);
@@ -864,14 +868,16 @@ static int jffs2_rename (struct mnt_idmap *idmap,
* caller won't do it on its own since we are returning an error.
*/
d_invalidate(new_dentry);
- new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
+ new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i,
+ ITIME(now));
return ret;
}
if (d_is_dir(old_dentry))
drop_nlink(old_dir_i);
- new_dir_i->i_mtime = new_dir_i->i_ctime = old_dir_i->i_mtime = old_dir_i->i_ctime = ITIME(now);
+ old_dir_i->i_mtime = inode_set_ctime_to_ts(old_dir_i, ITIME(now));
+ new_dir_i->i_mtime = inode_set_ctime_to_ts(new_dir_i, ITIME(now));
return 0;
}
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 2345ca3f09ee..11c66793960e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -317,7 +317,8 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
inode->i_size = pos + writtenlen;
inode->i_blocks = (inode->i_size + 511) >> 9;
- inode->i_ctime = inode->i_mtime = ITIME(je32_to_cpu(ri->ctime));
+ inode->i_mtime = inode_set_ctime_to_ts(inode,
+ ITIME(je32_to_cpu(ri->ctime)));
}
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 038516bee1ab..0403efab4089 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -115,7 +115,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
ri->isize = cpu_to_je32((ivalid & ATTR_SIZE)?iattr->ia_size:inode->i_size);
ri->atime = cpu_to_je32(I_SEC((ivalid & ATTR_ATIME)?iattr->ia_atime:inode->i_atime));
ri->mtime = cpu_to_je32(I_SEC((ivalid & ATTR_MTIME)?iattr->ia_mtime:inode->i_mtime));
- ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode->i_ctime));
+ ri->ctime = cpu_to_je32(I_SEC((ivalid & ATTR_CTIME)?iattr->ia_ctime:inode_get_ctime(inode)));
ri->offset = cpu_to_je32(0);
ri->csize = ri->dsize = cpu_to_je32(mdatalen);
@@ -148,7 +148,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
}
/* It worked. Update the inode */
inode->i_atime = ITIME(je32_to_cpu(ri->atime));
- inode->i_ctime = ITIME(je32_to_cpu(ri->ctime));
+ inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(ri->ctime)));
inode->i_mtime = ITIME(je32_to_cpu(ri->mtime));
inode->i_mode = jemode_to_cpu(ri->mode);
i_uid_write(inode, je16_to_cpu(ri->uid));
@@ -284,7 +284,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
inode->i_size = je32_to_cpu(latest_node.isize);
inode->i_atime = ITIME(je32_to_cpu(latest_node.atime));
inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
- inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
+ inode_set_ctime_to_ts(inode, ITIME(je32_to_cpu(latest_node.ctime)));
set_nlink(inode, f->inocache->pino_nlink);
@@ -388,7 +388,7 @@ void jffs2_dirty_inode(struct inode *inode, int flags)
iattr.ia_gid = inode->i_gid;
iattr.ia_atime = inode->i_atime;
iattr.ia_mtime = inode->i_mtime;
- iattr.ia_ctime = inode->i_ctime;
+ iattr.ia_ctime = inode_get_ctime(inode);
jffs2_do_setattr(inode, &iattr);
}
@@ -475,7 +475,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r
inode->i_mode = jemode_to_cpu(ri->mode);
i_gid_write(inode, je16_to_cpu(ri->gid));
i_uid_write(inode, je16_to_cpu(ri->uid));
- inode->i_atime = inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
ri->atime = ri->mtime = ri->ctime = cpu_to_je32(I_SEC(inode->i_mtime));
inode->i_blocks = 0;
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 8da19766c101..50727a1ff931 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -35,7 +35,7 @@ struct kvec;
#define ITIME(sec) ((struct timespec64){sec, 0})
#define JFFS2_NOW() JFFS2_CLAMP_TIME(ktime_get_real_seconds())
#define I_SEC(tv) JFFS2_CLAMP_TIME((tv).tv_sec)
-#define JFFS2_F_I_CTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_ctime)
+#define JFFS2_F_I_CTIME(f) I_SEC(inode_get_ctime(OFNI_EDONI_2SFFJ(f)))
#define JFFS2_F_I_MTIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_mtime)
#define JFFS2_F_I_ATIME(f) I_SEC(OFNI_EDONI_2SFFJ(f)->i_atime)
#define sleep_on_spinunlock(wq, s) \
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index fb96f872d207..1de3602c98de 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -116,7 +116,7 @@ int jfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
if (!rc) {
if (update_mode) {
inode->i_mode = mode;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
}
rc = txCommit(tid, 1, &inode, 0);
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 8ac10e396050..920d58a1566b 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -393,7 +393,7 @@ void jfs_truncate_nolock(struct inode *ip, loff_t length)
break;
}
- ip->i_mtime = ip->i_ctime = current_time(ip);
+ ip->i_mtime = inode_set_ctime_current(ip);
mark_inode_dirty(ip);
txCommit(tid, 1, &ip, 0);
diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c
index ed7989bc2db1..f7bd7e8f5be4 100644
--- a/fs/jfs/ioctl.c
+++ b/fs/jfs/ioctl.c
@@ -96,7 +96,7 @@ int jfs_fileattr_set(struct mnt_idmap *idmap,
jfs_inode->mode2 = flags;
jfs_set_inode_flags(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return 0;
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 390cbfce391f..a40383aa6c84 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -3064,8 +3064,8 @@ static int copy_from_dinode(struct dinode * dip, struct inode *ip)
ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
- ip->i_ctime.tv_sec = le32_to_cpu(dip->di_ctime.tv_sec);
- ip->i_ctime.tv_nsec = le32_to_cpu(dip->di_ctime.tv_nsec);
+ inode_set_ctime(ip, le32_to_cpu(dip->di_ctime.tv_sec),
+ le32_to_cpu(dip->di_ctime.tv_nsec));
ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
ip->i_generation = le32_to_cpu(dip->di_gen);
@@ -3139,8 +3139,8 @@ static void copy_to_dinode(struct dinode * dip, struct inode *ip)
dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
- dip->di_ctime.tv_sec = cpu_to_le32(ip->i_ctime.tv_sec);
- dip->di_ctime.tv_nsec = cpu_to_le32(ip->i_ctime.tv_nsec);
+ dip->di_ctime.tv_sec = cpu_to_le32(inode_get_ctime(ip).tv_sec);
+ dip->di_ctime.tv_nsec = cpu_to_le32(inode_get_ctime(ip).tv_nsec);
dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
dip->di_ixpxd = jfs_ip->ixpxd; /* in-memory pxd's are little-endian */
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index 9e1f02767201..87594efa7f7c 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -97,8 +97,8 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
jfs_inode->mode2 |= inode->i_mode;
inode->i_blocks = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
- jfs_inode->otime = inode->i_ctime.tv_sec;
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
+ jfs_inode->otime = inode_get_ctime(inode).tv_sec;
inode->i_generation = JFS_SBI(sb)->gengen++;
jfs_inode->cflag = 0;
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index 9b030297aa64..029d47065600 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -149,7 +149,7 @@ static int jfs_create(struct mnt_idmap *idmap, struct inode *dip,
mark_inode_dirty(ip);
- dip->i_ctime = dip->i_mtime = current_time(dip);
+ dip->i_mtime = inode_set_ctime_current(dip);
mark_inode_dirty(dip);
@@ -284,7 +284,7 @@ static int jfs_mkdir(struct mnt_idmap *idmap, struct inode *dip,
/* update parent directory inode */
inc_nlink(dip); /* for '..' from child directory */
- dip->i_ctime = dip->i_mtime = current_time(dip);
+ dip->i_mtime = inode_set_ctime_current(dip);
mark_inode_dirty(dip);
rc = txCommit(tid, 2, &iplist[0], 0);
@@ -390,7 +390,7 @@ static int jfs_rmdir(struct inode *dip, struct dentry *dentry)
/* update parent directory's link count corresponding
* to ".." entry of the target directory deleted
*/
- dip->i_ctime = dip->i_mtime = current_time(dip);
+ dip->i_mtime = inode_set_ctime_current(dip);
inode_dec_link_count(dip);
/*
@@ -512,7 +512,7 @@ static int jfs_unlink(struct inode *dip, struct dentry *dentry)
ASSERT(ip->i_nlink);
- ip->i_ctime = dip->i_ctime = dip->i_mtime = current_time(ip);
+ dip->i_mtime = inode_set_ctime_to_ts(dip, inode_set_ctime_current(ip));
mark_inode_dirty(dip);
/* update target's inode */
@@ -827,8 +827,8 @@ static int jfs_link(struct dentry *old_dentry,
/* update object inode */
inc_nlink(ip); /* for new link */
- ip->i_ctime = current_time(ip);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ inode_set_ctime_current(ip);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
ihold(ip);
@@ -1028,7 +1028,7 @@ static int jfs_symlink(struct mnt_idmap *idmap, struct inode *dip,
mark_inode_dirty(ip);
- dip->i_ctime = dip->i_mtime = current_time(dip);
+ dip->i_mtime = inode_set_ctime_current(dip);
mark_inode_dirty(dip);
/*
* commit update of parent directory and link object
@@ -1205,7 +1205,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
tblk->xflag |= COMMIT_DELETE;
tblk->u.ip = new_ip;
} else {
- new_ip->i_ctime = current_time(new_ip);
+ inode_set_ctime_current(new_ip);
mark_inode_dirty(new_ip);
}
} else {
@@ -1268,10 +1268,10 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
/*
* Update ctime on changed/moved inodes & mark dirty
*/
- old_ip->i_ctime = current_time(old_ip);
+ inode_set_ctime_current(old_ip);
mark_inode_dirty(old_ip);
- new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir);
+ new_dir->i_mtime = inode_set_ctime_current(new_dir);
mark_inode_dirty(new_dir);
/* Build list of inodes modified by this transaction */
@@ -1283,7 +1283,7 @@ static int jfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (old_dir != new_dir) {
iplist[ipcount++] = new_dir;
- old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+ old_dir->i_mtime = inode_set_ctime_current(old_dir);
mark_inode_dirty(old_dir);
}
@@ -1416,7 +1416,7 @@ static int jfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
mark_inode_dirty(ip);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
@@ -1535,9 +1535,10 @@ const struct inode_operations jfs_dir_inode_operations = {
#endif
};
+WRAP_DIR_ITER(jfs_readdir) // FIXME!
const struct file_operations jfs_dir_operations = {
.read = generic_read_dir,
- .iterate = jfs_readdir,
+ .iterate_shared = shared_jfs_readdir,
.fsync = jfs_fsync,
.unlocked_ioctl = jfs_ioctl,
.compat_ioctl = compat_ptr_ioctl,
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index d2f82cb7db1b..2e2f7f6d36a0 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -818,7 +818,7 @@ out:
}
if (inode->i_size < off+len-towrite)
i_size_write(inode, off+len-towrite);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
inode_unlock(inode);
return len - towrite;
diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
index 931e50018f88..8577ad494e05 100644
--- a/fs/jfs/xattr.c
+++ b/fs/jfs/xattr.c
@@ -647,7 +647,7 @@ static int ea_put(tid_t tid, struct inode *inode, struct ea_buffer *ea_buf,
if (old_blocks)
dquot_free_block(inode, old_blocks);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
return 0;
}
diff --git a/fs/kernel_read_file.c b/fs/kernel_read_file.c
index 5d826274570c..c429c42a6867 100644
--- a/fs/kernel_read_file.c
+++ b/fs/kernel_read_file.c
@@ -8,16 +8,16 @@
/**
* kernel_read_file() - read file contents into a kernel buffer
*
- * @file file to read from
- * @offset where to start reading from (see below).
- * @buf pointer to a "void *" buffer for reading into (if
+ * @file: file to read from
+ * @offset: where to start reading from (see below).
+ * @buf: pointer to a "void *" buffer for reading into (if
* *@buf is NULL, a buffer will be allocated, and
* @buf_size will be ignored)
- * @buf_size size of buf, if already allocated. If @buf not
+ * @buf_size: size of buf, if already allocated. If @buf not
* allocated, this is the largest size to allocate.
- * @file_size if non-NULL, the full size of @file will be
+ * @file_size: if non-NULL, the full size of @file will be
* written here.
- * @id the kernel_read_file_id identifying the type of
+ * @id: the kernel_read_file_id identifying the type of
* file contents being read (for LSMs to examine)
*
* @offset must be 0 unless both @buf and @file_size are non-NULL
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 5a1a4af9d3d2..660995856a04 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -556,7 +556,7 @@ void kernfs_put(struct kernfs_node *kn)
kfree_const(kn->name);
if (kn->iattr) {
- simple_xattrs_free(&kn->iattr->xattrs);
+ simple_xattrs_free(&kn->iattr->xattrs, NULL);
kmem_cache_free(kernfs_iattrs_cache, kn->iattr);
}
spin_lock(&kernfs_idr_lock);
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index b22b74d1a115..922719a343a7 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -151,8 +151,7 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size)
static inline void set_default_inode_attr(struct inode *inode, umode_t mode)
{
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime =
- inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
static inline void set_inode_attr(struct inode *inode,
@@ -162,7 +161,7 @@ static inline void set_inode_attr(struct inode *inode,
inode->i_gid = attrs->ia_gid;
inode->i_atime = attrs->ia_atime;
inode->i_mtime = attrs->ia_mtime;
- inode->i_ctime = attrs->ia_ctime;
+ inode_set_ctime_to_ts(inode, attrs->ia_ctime);
}
static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
@@ -191,7 +190,7 @@ int kernfs_iop_getattr(struct mnt_idmap *idmap,
down_read(&root->kernfs_iattr_rwsem);
kernfs_refresh_inode(kn, inode);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
up_read(&root->kernfs_iattr_rwsem);
return 0;
@@ -306,11 +305,17 @@ int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
int kernfs_xattr_set(struct kernfs_node *kn, const char *name,
const void *value, size_t size, int flags)
{
+ struct simple_xattr *old_xattr;
struct kernfs_iattrs *attrs = kernfs_iattrs(kn);
if (!attrs)
return -ENOMEM;
- return simple_xattr_set(&attrs->xattrs, name, value, size, flags, NULL);
+ old_xattr = simple_xattr_set(&attrs->xattrs, name, value, size, flags);
+ if (IS_ERR(old_xattr))
+ return PTR_ERR(old_xattr);
+
+ simple_xattr_free(old_xattr);
+ return 0;
}
static int kernfs_vfs_xattr_get(const struct xattr_handler *handler,
@@ -342,7 +347,7 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
{
atomic_t *sz = &kn->iattr->user_xattr_size;
atomic_t *nr = &kn->iattr->nr_user_xattrs;
- ssize_t removed_size;
+ struct simple_xattr *old_xattr;
int ret;
if (atomic_inc_return(nr) > KERNFS_MAX_USER_XATTRS) {
@@ -355,13 +360,18 @@ static int kernfs_vfs_user_xattr_add(struct kernfs_node *kn,
goto dec_size_out;
}
- ret = simple_xattr_set(xattrs, full_name, value, size, flags,
- &removed_size);
-
- if (!ret && removed_size >= 0)
- size = removed_size;
- else if (!ret)
+ old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
+ if (!old_xattr)
return 0;
+
+ if (IS_ERR(old_xattr)) {
+ ret = PTR_ERR(old_xattr);
+ goto dec_size_out;
+ }
+
+ ret = 0;
+ size = old_xattr->size;
+ simple_xattr_free(old_xattr);
dec_size_out:
atomic_sub(size, sz);
dec_count_out:
@@ -376,18 +386,19 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
{
atomic_t *sz = &kn->iattr->user_xattr_size;
atomic_t *nr = &kn->iattr->nr_user_xattrs;
- ssize_t removed_size;
- int ret;
+ struct simple_xattr *old_xattr;
- ret = simple_xattr_set(xattrs, full_name, value, size, flags,
- &removed_size);
+ old_xattr = simple_xattr_set(xattrs, full_name, value, size, flags);
+ if (!old_xattr)
+ return 0;
- if (removed_size >= 0) {
- atomic_sub(removed_size, sz);
- atomic_dec(nr);
- }
+ if (IS_ERR(old_xattr))
+ return PTR_ERR(old_xattr);
- return ret;
+ atomic_sub(old_xattr->size, sz);
+ atomic_dec(nr);
+ simple_xattr_free(old_xattr);
+ return 0;
}
static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
diff --git a/fs/libfs.c b/fs/libfs.c
index 5b851315eeed..da78eb64831e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -33,7 +33,7 @@ int simple_getattr(struct mnt_idmap *idmap, const struct path *path,
unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
return 0;
}
@@ -239,6 +239,254 @@ const struct inode_operations simple_dir_inode_operations = {
};
EXPORT_SYMBOL(simple_dir_inode_operations);
+static void offset_set(struct dentry *dentry, u32 offset)
+{
+ dentry->d_fsdata = (void *)((uintptr_t)(offset));
+}
+
+static u32 dentry2offset(struct dentry *dentry)
+{
+ return (u32)((uintptr_t)(dentry->d_fsdata));
+}
+
+static struct lock_class_key simple_offset_xa_lock;
+
+/**
+ * simple_offset_init - initialize an offset_ctx
+ * @octx: directory offset map to be initialized
+ *
+ */
+void simple_offset_init(struct offset_ctx *octx)
+{
+ xa_init_flags(&octx->xa, XA_FLAGS_ALLOC1);
+ lockdep_set_class(&octx->xa.xa_lock, &simple_offset_xa_lock);
+
+ /* 0 is '.', 1 is '..', so always start with offset 2 */
+ octx->next_offset = 2;
+}
+
+/**
+ * simple_offset_add - Add an entry to a directory's offset map
+ * @octx: directory offset ctx to be updated
+ * @dentry: new dentry being added
+ *
+ * Returns zero on success. @so_ctx and the dentry offset are updated.
+ * Otherwise, a negative errno value is returned.
+ */
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
+{
+ static const struct xa_limit limit = XA_LIMIT(2, U32_MAX);
+ u32 offset;
+ int ret;
+
+ if (dentry2offset(dentry) != 0)
+ return -EBUSY;
+
+ ret = xa_alloc_cyclic(&octx->xa, &offset, dentry, limit,
+ &octx->next_offset, GFP_KERNEL);
+ if (ret < 0)
+ return ret;
+
+ offset_set(dentry, offset);
+ return 0;
+}
+
+/**
+ * simple_offset_remove - Remove an entry to a directory's offset map
+ * @octx: directory offset ctx to be updated
+ * @dentry: dentry being removed
+ *
+ */
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
+{
+ u32 offset;
+
+ offset = dentry2offset(dentry);
+ if (offset == 0)
+ return;
+
+ xa_erase(&octx->xa, offset);
+ offset_set(dentry, 0);
+}
+
+/**
+ * simple_offset_rename_exchange - exchange rename with directory offsets
+ * @old_dir: parent of dentry being moved
+ * @old_dentry: dentry being moved
+ * @new_dir: destination parent
+ * @new_dentry: destination dentry
+ *
+ * Returns zero on success. Otherwise a negative errno is returned and the
+ * rename is rolled back.
+ */
+int simple_offset_rename_exchange(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry)
+{
+ struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
+ struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
+ u32 old_index = dentry2offset(old_dentry);
+ u32 new_index = dentry2offset(new_dentry);
+ int ret;
+
+ simple_offset_remove(old_ctx, old_dentry);
+ simple_offset_remove(new_ctx, new_dentry);
+
+ ret = simple_offset_add(new_ctx, old_dentry);
+ if (ret)
+ goto out_restore;
+
+ ret = simple_offset_add(old_ctx, new_dentry);
+ if (ret) {
+ simple_offset_remove(new_ctx, old_dentry);
+ goto out_restore;
+ }
+
+ ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+ if (ret) {
+ simple_offset_remove(new_ctx, old_dentry);
+ simple_offset_remove(old_ctx, new_dentry);
+ goto out_restore;
+ }
+ return 0;
+
+out_restore:
+ offset_set(old_dentry, old_index);
+ xa_store(&old_ctx->xa, old_index, old_dentry, GFP_KERNEL);
+ offset_set(new_dentry, new_index);
+ xa_store(&new_ctx->xa, new_index, new_dentry, GFP_KERNEL);
+ return ret;
+}
+
+/**
+ * simple_offset_destroy - Release offset map
+ * @octx: directory offset ctx that is about to be destroyed
+ *
+ * During fs teardown (eg. umount), a directory's offset map might still
+ * contain entries. xa_destroy() cleans out anything that remains.
+ */
+void simple_offset_destroy(struct offset_ctx *octx)
+{
+ xa_destroy(&octx->xa);
+}
+
+/**
+ * offset_dir_llseek - Advance the read position of a directory descriptor
+ * @file: an open directory whose position is to be updated
+ * @offset: a byte offset
+ * @whence: enumerator describing the starting position for this update
+ *
+ * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
+ *
+ * Returns the updated read position if successful; otherwise a
+ * negative errno is returned and the read position remains unchanged.
+ */
+static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ switch (whence) {
+ case SEEK_CUR:
+ offset += file->f_pos;
+ fallthrough;
+ case SEEK_SET:
+ if (offset >= 0)
+ break;
+ fallthrough;
+ default:
+ return -EINVAL;
+ }
+
+ return vfs_setpos(file, offset, U32_MAX);
+}
+
+static struct dentry *offset_find_next(struct xa_state *xas)
+{
+ struct dentry *child, *found = NULL;
+
+ rcu_read_lock();
+ child = xas_next_entry(xas, U32_MAX);
+ if (!child)
+ goto out;
+ spin_lock(&child->d_lock);
+ if (simple_positive(child))
+ found = dget_dlock(child);
+ spin_unlock(&child->d_lock);
+out:
+ rcu_read_unlock();
+ return found;
+}
+
+static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
+{
+ u32 offset = dentry2offset(dentry);
+ struct inode *inode = d_inode(dentry);
+
+ return ctx->actor(ctx, dentry->d_name.name, dentry->d_name.len, offset,
+ inode->i_ino, fs_umode_to_dtype(inode->i_mode));
+}
+
+static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
+{
+ struct offset_ctx *so_ctx = inode->i_op->get_offset_ctx(inode);
+ XA_STATE(xas, &so_ctx->xa, ctx->pos);
+ struct dentry *dentry;
+
+ while (true) {
+ dentry = offset_find_next(&xas);
+ if (!dentry)
+ break;
+
+ if (!offset_dir_emit(ctx, dentry)) {
+ dput(dentry);
+ break;
+ }
+
+ dput(dentry);
+ ctx->pos = xas.xa_index + 1;
+ }
+}
+
+/**
+ * offset_readdir - Emit entries starting at offset @ctx->pos
+ * @file: an open directory to iterate over
+ * @ctx: directory iteration context
+ *
+ * Caller must hold @file's i_rwsem to prevent insertion or removal of
+ * entries during this call.
+ *
+ * On entry, @ctx->pos contains an offset that represents the first entry
+ * to be read from the directory.
+ *
+ * The operation continues until there are no more entries to read, or
+ * until the ctx->actor indicates there is no more space in the caller's
+ * output buffer.
+ *
+ * On return, @ctx->pos contains an offset that will read the next entry
+ * in this directory when offset_readdir() is called again with @ctx.
+ *
+ * Return values:
+ * %0 - Complete
+ */
+static int offset_readdir(struct file *file, struct dir_context *ctx)
+{
+ struct dentry *dir = file->f_path.dentry;
+
+ lockdep_assert_held(&d_inode(dir)->i_rwsem);
+
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+
+ offset_iterate_dir(d_inode(dir), ctx);
+ return 0;
+}
+
+const struct file_operations simple_offset_dir_operations = {
+ .llseek = offset_dir_llseek,
+ .iterate_shared = offset_readdir,
+ .read = generic_read_dir,
+ .fsync = noop_fsync,
+};
+
static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
{
struct dentry *child = NULL;
@@ -275,7 +523,7 @@ void simple_recursive_removal(struct dentry *dentry,
while ((child = find_next_child(this, victim)) == NULL) {
// kill and ascend
// update metadata while it's still locked
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
clear_nlink(inode);
inode_unlock(inode);
victim = this;
@@ -293,8 +541,7 @@ void simple_recursive_removal(struct dentry *dentry,
dput(victim); // unpin it
}
if (victim == dentry) {
- inode->i_ctime = inode->i_mtime =
- current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
if (d_is_dir(dentry))
drop_nlink(inode);
inode_unlock(inode);
@@ -335,7 +582,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
*/
root->i_ino = 1;
root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
- root->i_atime = root->i_mtime = root->i_ctime = current_time(root);
+ root->i_atime = root->i_mtime = inode_set_ctime_current(root);
s->s_root = d_make_root(root);
if (!s->s_root)
return -ENOMEM;
@@ -391,7 +638,8 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
{
struct inode *inode = d_inode(old_dentry);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ dir->i_mtime = inode_set_ctime_to_ts(dir,
+ inode_set_ctime_current(inode));
inc_nlink(inode);
ihold(inode);
dget(dentry);
@@ -425,7 +673,8 @@ int simple_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ dir->i_mtime = inode_set_ctime_to_ts(dir,
+ inode_set_ctime_current(inode));
drop_nlink(inode);
dput(dentry);
return 0;
@@ -444,6 +693,31 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
}
EXPORT_SYMBOL(simple_rmdir);
+/**
+ * simple_rename_timestamp - update the various inode timestamps for rename
+ * @old_dir: old parent directory
+ * @old_dentry: dentry that is being renamed
+ * @new_dir: new parent directory
+ * @new_dentry: target for rename
+ *
+ * POSIX mandates that the old and new parent directories have their ctime and
+ * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
+ * their ctime updated.
+ */
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry)
+{
+ struct inode *newino = d_inode(new_dentry);
+
+ old_dir->i_mtime = inode_set_ctime_current(old_dir);
+ if (new_dir != old_dir)
+ new_dir->i_mtime = inode_set_ctime_current(new_dir);
+ inode_set_ctime_current(d_inode(old_dentry));
+ if (newino)
+ inode_set_ctime_current(newino);
+}
+EXPORT_SYMBOL_GPL(simple_rename_timestamp);
+
int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
@@ -459,11 +733,7 @@ int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
inc_nlink(old_dir);
}
}
- old_dir->i_ctime = old_dir->i_mtime =
- new_dir->i_ctime = new_dir->i_mtime =
- d_inode(old_dentry)->i_ctime =
- d_inode(new_dentry)->i_ctime = current_time(old_dir);
-
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
return 0;
}
EXPORT_SYMBOL_GPL(simple_rename_exchange);
@@ -472,7 +742,6 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
- struct inode *inode = d_inode(old_dentry);
int they_are_dirs = d_is_dir(old_dentry);
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
@@ -495,9 +764,7 @@ int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
inc_nlink(new_dir);
}
- old_dir->i_ctime = old_dir->i_mtime = new_dir->i_ctime =
- new_dir->i_mtime = inode->i_ctime = current_time(old_dir);
-
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
return 0;
}
EXPORT_SYMBOL(simple_rename);
@@ -548,21 +815,20 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
struct page **pagep, void **fsdata)
{
- struct page *page;
- pgoff_t index;
-
- index = pos >> PAGE_SHIFT;
+ struct folio *folio;
- page = grab_cache_page_write_begin(mapping, index);
- if (!page)
- return -ENOMEM;
+ folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
+ mapping_gfp_mask(mapping));
+ if (IS_ERR(folio))
+ return PTR_ERR(folio);
- *pagep = page;
+ *pagep = &folio->page;
- if (!PageUptodate(page) && (len != PAGE_SIZE)) {
- unsigned from = pos & (PAGE_SIZE - 1);
+ if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
+ size_t from = offset_in_folio(folio, pos);
- zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
+ folio_zero_segments(folio, 0, from,
+ from + len, folio_size(folio));
}
return 0;
}
@@ -594,17 +860,18 @@ static int simple_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- struct inode *inode = page->mapping->host;
+ struct folio *folio = page_folio(page);
+ struct inode *inode = folio->mapping->host;
loff_t last_pos = pos + copied;
- /* zero the stale part of the page if we did a short copy */
- if (!PageUptodate(page)) {
+ /* zero the stale part of the folio if we did a short copy */
+ if (!folio_test_uptodate(folio)) {
if (copied < len) {
- unsigned from = pos & (PAGE_SIZE - 1);
+ size_t from = offset_in_folio(folio, pos);
- zero_user(page, from + copied, len - copied);
+ folio_zero_range(folio, from + copied, len - copied);
}
- SetPageUptodate(page);
+ folio_mark_uptodate(folio);
}
/*
* No need to use i_size_read() here, the i_size
@@ -613,9 +880,9 @@ static int simple_write_end(struct file *file, struct address_space *mapping,
if (last_pos > inode->i_size)
i_size_write(inode, last_pos);
- set_page_dirty(page);
- unlock_page(page);
- put_page(page);
+ folio_mark_dirty(folio);
+ folio_unlock(folio);
+ folio_put(folio);
return copied;
}
@@ -659,7 +926,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
*/
inode->i_ino = 1;
inode->i_mode = S_IFDIR | 0755;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_op = &simple_dir_inode_operations;
inode->i_fop = &simple_dir_operations;
set_nlink(inode, 2);
@@ -685,7 +952,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
goto out;
}
inode->i_mode = S_IFREG | files->mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_fop = files->ops;
inode->i_ino = i;
d_add(dentry, inode);
@@ -1253,7 +1520,7 @@ struct inode *alloc_anon_inode(struct super_block *s)
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_flags |= S_PRIVATE;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
return inode;
}
EXPORT_SYMBOL(alloc_anon_inode);
@@ -1269,7 +1536,7 @@ EXPORT_SYMBOL(alloc_anon_inode);
* All arguments are ignored and it just returns -EINVAL.
*/
int
-simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
+simple_nosetlease(struct file *filp, int arg, struct file_lock **flp,
void **priv)
{
return -EINVAL;
@@ -1315,7 +1582,7 @@ static int empty_dir_getattr(struct mnt_idmap *idmap,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
return 0;
}
diff --git a/fs/locks.c b/fs/locks.c
index df8b26a42524..a45efc16945d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -438,7 +438,7 @@ static void flock_make_lock(struct file *filp, struct file_lock *fl, int type)
fl->fl_end = OFFSET_MAX;
}
-static int assign_type(struct file_lock *fl, long type)
+static int assign_type(struct file_lock *fl, int type)
{
switch (type) {
case F_RDLCK:
@@ -549,7 +549,7 @@ static const struct lock_manager_operations lease_manager_ops = {
/*
* Initialize a lease, use the default lock manager operations
*/
-static int lease_init(struct file *filp, long type, struct file_lock *fl)
+static int lease_init(struct file *filp, int type, struct file_lock *fl)
{
if (assign_type(fl, type) != 0)
return -EINVAL;
@@ -567,7 +567,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
}
/* Allocate a file_lock initialised to this type of lease */
-static struct file_lock *lease_alloc(struct file *filp, long type)
+static struct file_lock *lease_alloc(struct file *filp, int type)
{
struct file_lock *fl = locks_alloc_lock();
int error = -ENOMEM;
@@ -868,6 +868,21 @@ static bool posix_locks_conflict(struct file_lock *caller_fl,
return locks_conflict(caller_fl, sys_fl);
}
+/* Determine if lock sys_fl blocks lock caller_fl. Used on xx_GETLK
+ * path so checks for additional GETLK-specific things like F_UNLCK.
+ */
+static bool posix_test_locks_conflict(struct file_lock *caller_fl,
+ struct file_lock *sys_fl)
+{
+ /* F_UNLCK checks any locks on the same fd. */
+ if (caller_fl->fl_type == F_UNLCK) {
+ if (!posix_same_owner(caller_fl, sys_fl))
+ return false;
+ return locks_overlap(caller_fl, sys_fl);
+ }
+ return posix_locks_conflict(caller_fl, sys_fl);
+}
+
/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
* checking before calling the locks_conflict().
*/
@@ -901,7 +916,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
retry:
spin_lock(&ctx->flc_lock);
list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
- if (!posix_locks_conflict(fl, cfl))
+ if (!posix_test_locks_conflict(fl, cfl))
continue;
if (cfl->fl_lmops && cfl->fl_lmops->lm_lock_expirable
&& (*cfl->fl_lmops->lm_lock_expirable)(cfl)) {
@@ -1301,6 +1316,7 @@ retry:
out:
spin_unlock(&ctx->flc_lock);
percpu_up_read(&file_rwsem);
+ trace_posix_lock_inode(inode, request, error);
/*
* Free any unused locks.
*/
@@ -1309,7 +1325,6 @@ retry:
if (new_fl2)
locks_free_lock(new_fl2);
locks_dispose_list(&dispose);
- trace_posix_lock_inode(inode, request, error);
return error;
}
@@ -1666,7 +1681,7 @@ int fcntl_getlease(struct file *filp)
* conflict with the lease we're trying to set.
*/
static int
-check_conflicting_open(struct file *filp, const long arg, int flags)
+check_conflicting_open(struct file *filp, const int arg, int flags)
{
struct inode *inode = file_inode(filp);
int self_wcount = 0, self_rcount = 0;
@@ -1701,7 +1716,7 @@ check_conflicting_open(struct file *filp, const long arg, int flags)
}
static int
-generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
+generic_add_lease(struct file *filp, int arg, struct file_lock **flp, void **priv)
{
struct file_lock *fl, *my_fl = NULL, *lease;
struct inode *inode = file_inode(filp);
@@ -1859,7 +1874,7 @@ static int generic_delete_lease(struct file *filp, void *owner)
* The (input) flp->fl_lmops->lm_break function is required
* by break_lease().
*/
-int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
+int generic_setlease(struct file *filp, int arg, struct file_lock **flp,
void **priv)
{
struct inode *inode = file_inode(filp);
@@ -1906,7 +1921,7 @@ lease_notifier_chain_init(void)
}
static inline void
-setlease_notifier(long arg, struct file_lock *lease)
+setlease_notifier(int arg, struct file_lock *lease)
{
if (arg != F_UNLCK)
srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
@@ -1942,7 +1957,7 @@ EXPORT_SYMBOL_GPL(lease_unregister_notifier);
* may be NULL if the lm_setup operation doesn't require it.
*/
int
-vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
+vfs_setlease(struct file *filp, int arg, struct file_lock **lease, void **priv)
{
if (lease)
setlease_notifier(arg, *lease);
@@ -1953,7 +1968,7 @@ vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
}
EXPORT_SYMBOL_GPL(vfs_setlease);
-static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
+static int do_fcntl_add_lease(unsigned int fd, struct file *filp, int arg)
{
struct file_lock *fl;
struct fasync_struct *new;
@@ -1988,7 +2003,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
* Note that you also need to call %F_SETSIG to
* receive a signal when the lease is broken.
*/
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
{
if (arg == F_UNLCK)
return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
@@ -2136,7 +2151,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock);
* @fl: The file_lock who's fl_pid should be translated
* @ns: The namespace into which the pid should be translated
*
- * Used to tranlate a fl_pid into a namespace virtual pid number
+ * Used to translate a fl_pid into a namespace virtual pid number
*/
static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
{
@@ -2207,7 +2222,8 @@ int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
if (fl == NULL)
return -ENOMEM;
error = -EINVAL;
- if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
+ if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
+ && flock->l_type != F_WRLCK)
goto out;
error = flock_to_posix_lock(filp, fl, flock);
@@ -2414,7 +2430,8 @@ int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
return -ENOMEM;
error = -EINVAL;
- if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
+ if (cmd != F_OFD_GETLK && flock->l_type != F_RDLCK
+ && flock->l_type != F_WRLCK)
goto out;
error = flock64_to_posix_lock(filp, fl, flock);
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 870207ba23f1..25c08fbfcb9d 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -251,7 +251,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode)
}
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = j;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_blocks = 0;
memset(&minix_i(inode)->u, 0, sizeof(minix_i(inode)->u));
insert_inode_hash(inode);
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index bf9858f76b6a..20f23e6e58ad 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -281,7 +281,7 @@ got_it:
de->inode = inode->i_ino;
}
dir_commit_chunk(page, pos, sbi->s_dirsize);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
err = minix_handle_dirsync(dir);
out_put:
@@ -313,7 +313,7 @@ int minix_delete_entry(struct minix_dir_entry *de, struct page *page)
else
de->inode = 0;
dir_commit_chunk(page, pos, len);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return minix_handle_dirsync(inode);
}
@@ -436,7 +436,7 @@ int minix_set_link(struct minix_dir_entry *de, struct page *page,
else
de->inode = inode->i_ino;
dir_commit_chunk(page, pos, sbi->s_dirsize);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
return minix_handle_dirsync(dir);
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e9fbb5303a22..df575473c1cc 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -501,10 +501,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
i_gid_write(inode, raw_inode->i_gid);
set_nlink(inode, raw_inode->i_nlinks);
inode->i_size = raw_inode->i_size;
- inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
- inode->i_mtime.tv_nsec = 0;
- inode->i_atime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
+ inode->i_mtime = inode->i_atime = inode_set_ctime(inode, raw_inode->i_time, 0);
inode->i_blocks = 0;
for (i = 0; i < 9; i++)
minix_inode->u.i1_data[i] = raw_inode->i_zone[i];
@@ -543,10 +540,9 @@ static struct inode *V2_minix_iget(struct inode *inode)
inode->i_size = raw_inode->i_size;
inode->i_mtime.tv_sec = raw_inode->i_mtime;
inode->i_atime.tv_sec = raw_inode->i_atime;
- inode->i_ctime.tv_sec = raw_inode->i_ctime;
+ inode_set_ctime(inode, raw_inode->i_ctime, 0);
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
inode->i_blocks = 0;
for (i = 0; i < 10; i++)
minix_inode->u.i2_data[i] = raw_inode->i_zone[i];
@@ -622,7 +618,7 @@ static struct buffer_head * V2_minix_update_inode(struct inode * inode)
raw_inode->i_size = inode->i_size;
raw_inode->i_mtime = inode->i_mtime.tv_sec;
raw_inode->i_atime = inode->i_atime.tv_sec;
- raw_inode->i_ctime = inode->i_ctime.tv_sec;
+ raw_inode->i_ctime = inode_get_ctime(inode).tv_sec;
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
raw_inode->i_zone[0] = old_encode_dev(inode->i_rdev);
else for (i = 0; i < 10; i++)
@@ -660,7 +656,7 @@ int minix_getattr(struct mnt_idmap *idmap, const struct path *path,
struct super_block *sb = path->dentry->d_sb;
struct inode *inode = d_inode(path->dentry);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (INODE_VERSION(inode) == MINIX_V1)
stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
else
diff --git a/fs/minix/itree_common.c b/fs/minix/itree_common.c
index 446148792f41..ce18ae37c29d 100644
--- a/fs/minix/itree_common.c
+++ b/fs/minix/itree_common.c
@@ -131,7 +131,7 @@ static inline int splice_branch(struct inode *inode,
/* We are done with atomic stuff, now do the rest of housekeeping */
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
/* had we spliced it onto indirect block? */
if (where->bh)
@@ -350,7 +350,7 @@ do_indirects:
}
first_whole++;
}
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
}
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index 956d5183828d..114084d5636a 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -98,7 +98,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
{
struct inode *inode = d_inode(old_dentry);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_link_count(inode);
ihold(inode);
return add_nondir(dentry, inode);
@@ -154,7 +154,7 @@ static int minix_unlink(struct inode * dir, struct dentry *dentry)
if (err)
return err;
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
inode_dec_link_count(inode);
return 0;
}
@@ -218,7 +218,7 @@ static int minix_rename(struct mnt_idmap *idmap,
put_page(new_page);
if (err)
goto out_dir;
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
if (dir_de)
drop_nlink(new_inode);
inode_dec_link_count(new_inode);
diff --git a/fs/namei.c b/fs/namei.c
index e56ff39a79bc..567ee547492b 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -643,6 +643,8 @@ static bool nd_alloc_stack(struct nameidata *nd)
/**
* path_connected - Verify that a dentry is below mnt.mnt_root
+ * @mnt: The mountpoint to check.
+ * @dentry: The dentry to check.
*
* Rename can sometimes move a file or directory outside of a bind
* mount, path_connected allows those cases to be detected.
@@ -1083,6 +1085,7 @@ fs_initcall(init_fs_namei_sysctls);
/**
* may_follow_link - Check symlink following for unsafe situations
* @nd: nameidata pathwalk data
+ * @inode: Used for idmapping.
*
* In the case of the sysctl_protected_symlinks sysctl being enabled,
* CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
@@ -2890,7 +2893,7 @@ int path_pts(struct path *path)
dput(path->dentry);
path->dentry = parent;
child = d_hash_and_lookup(parent, &this);
- if (!child)
+ if (IS_ERR_OR_NULL(child))
return -ENOENT;
path->dentry = child;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index c1eda73254e1..6bed1394d748 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -59,7 +59,7 @@ __be32 nfs4_callback_getattr(void *argp, void *resp,
res->change_attr = delegation->change_attr;
if (nfs_have_writebacks(inode))
res->change_attr++;
- res->ctime = inode->i_ctime;
+ res->ctime = inode_get_ctime(inode);
res->mtime = inode->i_mtime;
res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) &
args->bitmap[0];
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9a18c5a69ace..aaffaaa336cc 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -472,20 +472,26 @@ out:
return result;
}
-static void
-nfs_direct_join_group(struct list_head *list, struct inode *inode)
+static void nfs_direct_join_group(struct list_head *list, struct inode *inode)
{
- struct nfs_page *req, *next;
+ struct nfs_page *req, *subreq;
list_for_each_entry(req, list, wb_list) {
- if (req->wb_head != req || req->wb_this_page == req)
+ if (req->wb_head != req)
continue;
- for (next = req->wb_this_page;
- next != req->wb_head;
- next = next->wb_this_page) {
- nfs_list_remove_request(next);
- nfs_release_request(next);
- }
+ subreq = req->wb_this_page;
+ if (subreq == req)
+ continue;
+ do {
+ /*
+ * Remove subrequests from this list before freeing
+ * them in the call to nfs_join_page_group().
+ */
+ if (!list_empty(&subreq->wb_list)) {
+ nfs_list_remove_request(subreq);
+ nfs_release_request(subreq);
+ }
+ } while ((subreq = subreq->wb_this_page) != req);
nfs_join_page_group(req, inode);
}
}
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index e1706e736c64..2dc64454492b 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -116,8 +116,8 @@ static inline void nfs_fscache_update_auxdata(struct nfs_fscache_inode_auxdata *
memset(auxdata, 0, sizeof(*auxdata));
auxdata->mtime_sec = inode->i_mtime.tv_sec;
auxdata->mtime_nsec = inode->i_mtime.tv_nsec;
- auxdata->ctime_sec = inode->i_ctime.tv_sec;
- auxdata->ctime_nsec = inode->i_ctime.tv_nsec;
+ auxdata->ctime_sec = inode_get_ctime(inode).tv_sec;
+ auxdata->ctime_nsec = inode_get_ctime(inode).tv_nsec;
if (NFS_SERVER(inode)->nfs_client->rpc_ops->version == 4)
auxdata->change_attr = inode_peek_iversion_raw(inode);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 8172dd4135a1..e21c073158e5 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -514,7 +514,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
memset(&inode->i_atime, 0, sizeof(inode->i_atime));
memset(&inode->i_mtime, 0, sizeof(inode->i_mtime));
- memset(&inode->i_ctime, 0, sizeof(inode->i_ctime));
+ inode_set_ctime(inode, 0, 0);
inode_set_iversion_raw(inode, 0);
inode->i_size = 0;
clear_nlink(inode);
@@ -535,7 +535,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
@@ -731,7 +731,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
if ((attr->ia_valid & ATTR_GID) != 0)
inode->i_gid = attr->ia_gid;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -749,7 +749,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -765,7 +765,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr,
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE
| NFS_INO_INVALID_CTIME);
@@ -912,7 +912,7 @@ out_no_revalidate:
/* Only return attributes that were revalidated. */
stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
stat->change_cookie = inode_peek_iversion_raw(inode);
stat->attributes_mask |= STATX_ATTR_CHANGE_MONOTONIC;
@@ -1444,11 +1444,11 @@ static void nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr *fattr)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
}
/* If we have atomic WCC data, we may update some attributes */
- ts = inode->i_ctime;
+ ts = inode_get_ctime(inode);
if ((fattr->valid & NFS_ATTR_FATTR_PRECTIME)
&& (fattr->valid & NFS_ATTR_FATTR_CTIME)
&& timespec64_equal(&ts, &fattr->pre_ctime)) {
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
}
ts = inode->i_mtime;
@@ -1510,7 +1510,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
invalid |= NFS_INO_INVALID_MTIME;
- ts = inode->i_ctime;
+ ts = inode_get_ctime(inode);
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) && !timespec64_equal(&ts, &fattr->ctime))
invalid |= NFS_INO_INVALID_CTIME;
@@ -1997,7 +1997,7 @@ int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fa
}
if ((fattr->valid & NFS_ATTR_FATTR_CTIME) != 0 &&
(fattr->valid & NFS_ATTR_FATTR_PRECTIME) == 0) {
- fattr->pre_ctime = inode->i_ctime;
+ fattr->pre_ctime = inode_get_ctime(inode);
fattr->valid |= NFS_ATTR_FATTR_PRECTIME;
}
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) != 0 &&
@@ -2190,7 +2190,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
save_cache_validity & NFS_INO_INVALID_MTIME;
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
- inode->i_ctime = fattr->ctime;
+ inode_set_ctime_to_ts(inode, fattr->ctime);
else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfsi->cache_validity |=
save_cache_validity & NFS_INO_INVALID_CTIME;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 19d51ebf842c..e7494cdd957e 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -215,7 +215,8 @@ nfs_namespace_getattr(struct mnt_idmap *idmap,
if (NFS_FH(d_inode(path->dentry))->size != 0)
return nfs_getattr(idmap, path, stat, request_mask,
query_flags);
- generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+ stat);
return 0;
}
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 63802d195556..49f78e23b34c 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1377,7 +1377,6 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
for (i = 0; i < np; i++) {
pages[i] = alloc_page(GFP_KERNEL);
if (!pages[i]) {
- np = i + 1;
err = -ENOMEM;
goto out;
}
@@ -1401,8 +1400,8 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
} while (exception.retry);
out:
- while (--np >= 0)
- __free_page(pages[np]);
+ while (--i >= 0)
+ __free_page(pages[i]);
kfree(pages);
return err;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4c9f8bd866ab..47c5c1f86d66 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -328,7 +328,7 @@ extern int update_open_stateid(struct nfs4_state *state,
const nfs4_stateid *open_stateid,
const nfs4_stateid *deleg_stateid,
fmode_t fmode);
-extern int nfs4_proc_setlease(struct file *file, long arg,
+extern int nfs4_proc_setlease(struct file *file, int arg,
struct file_lock **lease, void **priv);
extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
struct nfs_fsinfo *fsinfo);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 4aeadd6e1a6d..02788c3c85e5 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -438,7 +438,7 @@ void nfs42_ssc_unregister_ops(void)
}
#endif /* CONFIG_NFS_V4_2 */
-static int nfs4_setlease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_setlease(struct file *file, int arg, struct file_lock **lease,
void **priv)
{
return nfs4_proc_setlease(file, arg, lease, priv);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e1a886b58354..d57aaf0cc577 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -6004,9 +6004,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf,
out_ok:
ret = res.acl_len;
out_free:
- for (i = 0; i < npages; i++)
- if (pages[i])
- __free_page(pages[i]);
+ while (--i >= 0)
+ __free_page(pages[i]);
if (res.acl_scratch)
__free_page(res.acl_scratch);
kfree(pages);
@@ -7181,8 +7180,15 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
} else if (!nfs4_update_lock_stateid(lsp, &data->res.stateid))
goto out_restart;
break;
- case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OLD_STATEID:
+ if (data->arg.new_lock_owner != 0 &&
+ nfs4_refresh_open_old_stateid(&data->arg.open_stateid,
+ lsp->ls_state))
+ goto out_restart;
+ if (nfs4_refresh_lock_old_stateid(&data->arg.lock_stateid, lsp))
+ goto out_restart;
+ fallthrough;
+ case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_STALE_STATEID:
case -NFS4ERR_EXPIRED:
if (data->arg.new_lock_owner != 0) {
@@ -7573,7 +7579,7 @@ static int nfs4_delete_lease(struct file *file, void **priv)
return generic_setlease(file, F_UNLCK, NULL, priv);
}
-static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
+static int nfs4_add_lease(struct file *file, int arg, struct file_lock **lease,
void **priv)
{
struct inode *inode = file_inode(file);
@@ -7591,7 +7597,7 @@ static int nfs4_add_lease(struct file *file, long arg, struct file_lock **lease,
return -EAGAIN;
}
-int nfs4_proc_setlease(struct file *file, long arg, struct file_lock **lease,
+int nfs4_proc_setlease(struct file *file, int arg, struct file_lock **lease,
void **priv)
{
switch (arg) {
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index acda8f033d30..bf378ecd5d9f 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -345,8 +345,10 @@ void nfs_sysfs_move_sb_to_server(struct nfs_server *server)
int ret = -ENOMEM;
s = kasprintf(GFP_KERNEL, "server-%d", server->s_sysfs_id);
- if (s)
+ if (s) {
ret = kobject_rename(&server->kobj, s);
+ kfree(s);
+ }
if (ret < 0)
pr_warn("NFS: rename sysfs %s failed (%d)\n",
server->kobj.name, ret);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6e61fa3acaf1..daf305daa751 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1354,9 +1354,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
trace_nfsd_stid_revoke(&dp->dl_stid);
if (clp->cl_minorversion) {
+ spin_lock(&clp->cl_lock);
dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
refcount_inc(&dp->dl_stid.sc_count);
- spin_lock(&clp->cl_lock);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock);
}
@@ -6341,8 +6341,6 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
return status;
- if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid))
- return status;
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid);
if (!s)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 1b8b1aab9a15..3709830f90a6 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1105,6 +1105,7 @@ static ssize_t write_v4_end_grace(struct file *file, char *buf, size_t size)
if (!nn->nfsd_serv)
return -EBUSY;
trace_nfsd_end_grace(netns(file));
+ nfsd4_end_grace(nn);
break;
default:
return -EINVAL;
@@ -1131,7 +1132,7 @@ static struct inode *nfsd_get_inode(struct super_block *sb, umode_t mode)
/* Following advice from simple_fill_super documentation: */
inode->i_ino = iunique(sb, NFSD_MaxReserved);
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 8a2321d19194..9b7acba382fe 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -520,7 +520,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_sanitize_attrs(inode, iap);
- if (check_guard && guardtime != inode->i_ctime.tv_sec)
+ if (check_guard && guardtime != inode_get_ctime(inode).tv_sec)
return nfserr_notsync;
/*
@@ -956,10 +956,13 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
last_page = page + (offset + sd->len - 1) / PAGE_SIZE;
for (page += offset / PAGE_SIZE; page <= last_page; page++) {
/*
- * Skip page replacement when extending the contents
- * of the current page.
+ * Skip page replacement when extending the contents of the
+ * current page. But note that we may get two zero_pages in a
+ * row from shmem.
*/
- if (page == *(rqstp->rq_next_page - 1))
+ if (page == *(rqstp->rq_next_page - 1) &&
+ offset_in_page(rqstp->rq_res.page_base +
+ rqstp->rq_res.page_len))
continue;
if (unlikely(!svc_rqst_replace_page(rqstp, page)))
return -EIO;
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index decd6471300b..bce734b68f08 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -429,7 +429,7 @@ void nilfs_set_link(struct inode *dir, struct nilfs_dir_entry *de,
nilfs_set_de_type(de, inode);
nilfs_commit_chunk(page, mapping, from, to);
nilfs_put_page(page);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
}
/*
@@ -519,7 +519,7 @@ got_it:
de->inode = cpu_to_le64(inode->i_ino);
nilfs_set_de_type(de, inode);
nilfs_commit_chunk(page, page->mapping, from, to);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
nilfs_mark_inode_dirty(dir);
/* OFFSET_CACHE */
out_put:
@@ -567,7 +567,7 @@ int nilfs_delete_entry(struct nilfs_dir_entry *dir, struct page *page)
pde->rec_len = nilfs_rec_len_to_disk(to - from);
dir->inode = 0;
nilfs_commit_chunk(page, mapping, from, to);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
out:
nilfs_put_page(page);
return err;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index a8ce522ac747..d588c719d743 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -366,7 +366,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
atomic64_inc(&root->inodes_count);
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = ino;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
err = nilfs_bmap_read(ii->i_bmap, NULL);
@@ -450,10 +450,10 @@ int nilfs_read_inode_common(struct inode *inode,
set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
inode->i_size = le64_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
- inode->i_ctime.tv_sec = le64_to_cpu(raw_inode->i_ctime);
+ inode_set_ctime(inode, le64_to_cpu(raw_inode->i_ctime),
+ le32_to_cpu(raw_inode->i_ctime_nsec));
inode->i_mtime.tv_sec = le64_to_cpu(raw_inode->i_mtime);
inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
- inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
if (nilfs_is_metadata_file_inode(inode) && !S_ISREG(inode->i_mode))
return -EIO; /* this inode is for metadata and corrupted */
@@ -768,9 +768,9 @@ void nilfs_write_inode_common(struct inode *inode,
raw_inode->i_gid = cpu_to_le32(i_gid_read(inode));
raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
raw_inode->i_size = cpu_to_le64(inode->i_size);
- raw_inode->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
+ raw_inode->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
raw_inode->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
- raw_inode->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ raw_inode->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
raw_inode->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
raw_inode->i_blocks = cpu_to_le64(inode->i_blocks);
@@ -875,7 +875,7 @@ void nilfs_truncate(struct inode *inode)
nilfs_truncate_bmap(ii, blkoff);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
@@ -1101,9 +1101,17 @@ int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty)
int __nilfs_mark_inode_dirty(struct inode *inode, int flags)
{
+ struct the_nilfs *nilfs = inode->i_sb->s_fs_info;
struct buffer_head *ibh;
int err;
+ /*
+ * Do not dirty inodes after the log writer has been detached
+ * and its nilfs_root struct has been freed.
+ */
+ if (unlikely(nilfs_purging(nilfs)))
+ return 0;
+
err = nilfs_load_inode_block(inode, &ibh);
if (unlikely(err)) {
nilfs_warn(inode->i_sb,
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 1dfbc0c34513..40ffade49f38 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -149,7 +149,7 @@ int nilfs_fileattr_set(struct mnt_idmap *idmap,
NILFS_I(inode)->i_flags = oldflags | (flags & FS_FL_USER_MODIFIABLE);
nilfs_set_inode_flags(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (IS_SYNC(inode))
nilfs_set_transaction_flag(NILFS_TI_SYNC);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index c7024da8f1e2..2a4e7f4a8102 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -185,7 +185,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
if (err)
return err;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_link_count(inode);
ihold(inode);
@@ -283,7 +283,7 @@ static int nilfs_do_unlink(struct inode *dir, struct dentry *dentry)
if (err)
goto out;
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
drop_nlink(inode);
err = 0;
out:
@@ -387,7 +387,7 @@ static int nilfs_rename(struct mnt_idmap *idmap,
goto out_dir;
nilfs_set_link(new_dir, new_de, new_page, old_inode);
nilfs_mark_inode_dirty(new_dir);
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
if (dir_de)
drop_nlink(new_inode);
drop_nlink(new_inode);
@@ -406,7 +406,7 @@ static int nilfs_rename(struct mnt_idmap *idmap,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = current_time(old_inode);
+ inode_set_ctime_current(old_inode);
nilfs_delete_entry(old_de, old_page);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index c2553024bd25..7ec16879756e 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -725,6 +725,11 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
struct folio *folio = fbatch.folios[i];
folio_lock(folio);
+ if (unlikely(folio->mapping != mapping)) {
+ /* Exclude folios removed from the address space */
+ folio_unlock(folio);
+ continue;
+ }
head = folio_buffers(folio);
if (!head) {
create_empty_buffers(&folio->page, i_blocksize(inode), 0);
@@ -2845,6 +2850,7 @@ void nilfs_detach_log_writer(struct super_block *sb)
nilfs_segctor_destroy(nilfs->ns_writer);
nilfs->ns_writer = NULL;
}
+ set_nilfs_purging(nilfs);
/* Force to free the list of dirty files */
spin_lock(&nilfs->ns_inode_lock);
@@ -2857,4 +2863,5 @@ void nilfs_detach_log_writer(struct super_block *sb)
up_write(&nilfs->ns_segctor_sem);
nilfs_dispose_list(nilfs, &garbage_list, 1);
+ clear_nilfs_purging(nilfs);
}
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 0ef8c71bde8e..a5d1fa4e7552 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -35,6 +35,7 @@
#include <linux/writeback.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/fs_context.h>
#include "nilfs.h"
#include "export.h"
#include "mdt.h"
@@ -1216,7 +1217,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data)
}
struct nilfs_super_data {
- struct block_device *bdev;
__u64 cno;
int flags;
};
@@ -1283,64 +1283,49 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd)
static int nilfs_set_bdev_super(struct super_block *s, void *data)
{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
+ s->s_dev = *(dev_t *)data;
return 0;
}
static int nilfs_test_bdev_super(struct super_block *s, void *data)
{
- return (void *)s->s_bdev == data;
+ return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
}
static struct dentry *
nilfs_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data)
{
- struct nilfs_super_data sd;
+ struct nilfs_super_data sd = { .flags = flags };
struct super_block *s;
- struct dentry *root_dentry;
- int err, s_new = false;
+ dev_t dev;
+ int err;
- sd.bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
- NULL);
- if (IS_ERR(sd.bdev))
- return ERR_CAST(sd.bdev);
+ if (nilfs_identify(data, &sd))
+ return ERR_PTR(-EINVAL);
- sd.cno = 0;
- sd.flags = flags;
- if (nilfs_identify((char *)data, &sd)) {
- err = -EINVAL;
- goto failed;
- }
+ err = lookup_bdev(dev_name, &dev);
+ if (err)
+ return ERR_PTR(err);
- /*
- * once the super is inserted into the list by sget, s_umount
- * will protect the lockfs code from trying to start a snapshot
- * while we are mounting
- */
- mutex_lock(&sd.bdev->bd_fsfreeze_mutex);
- if (sd.bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
- err = -EBUSY;
- goto failed;
- }
s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, flags,
- sd.bdev);
- mutex_unlock(&sd.bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s)) {
- err = PTR_ERR(s);
- goto failed;
- }
+ &dev);
+ if (IS_ERR(s))
+ return ERR_CAST(s);
if (!s->s_root) {
- s_new = true;
-
- /* New superblock instance created */
- snprintf(s->s_id, sizeof(s->s_id), "%pg", sd.bdev);
- sb_set_blocksize(s, block_size(sd.bdev));
-
- err = nilfs_fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+ /*
+ * We drop s_umount here because we need to open the bdev and
+ * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+ * __invalidate_device()). It is safe because we have active sb
+ * reference and SB_BORN is not set yet.
+ */
+ up_write(&s->s_umount);
+ err = setup_bdev_super(s, flags, NULL);
+ down_write(&s->s_umount);
+ if (!err)
+ err = nilfs_fill_super(s, data,
+ flags & SB_SILENT ? 1 : 0);
if (err)
goto failed_super;
@@ -1366,24 +1351,18 @@ nilfs_mount(struct file_system_type *fs_type, int flags,
}
if (sd.cno) {
+ struct dentry *root_dentry;
+
err = nilfs_attach_snapshot(s, sd.cno, &root_dentry);
if (err)
goto failed_super;
- } else {
- root_dentry = dget(s->s_root);
+ return root_dentry;
}
- if (!s_new)
- blkdev_put(sd.bdev, fs_type);
-
- return root_dentry;
+ return dget(s->s_root);
failed_super:
deactivate_locked_super(s);
-
- failed:
- if (!s_new)
- blkdev_put(sd.bdev, fs_type);
return ERR_PTR(err);
}
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 47c7dfbb7ea5..cd4ae1b8ae16 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -29,6 +29,7 @@ enum {
THE_NILFS_DISCONTINUED, /* 'next' pointer chain has broken */
THE_NILFS_GC_RUNNING, /* gc process is running */
THE_NILFS_SB_DIRTY, /* super block is dirty */
+ THE_NILFS_PURGING, /* disposing dirty files for cleanup */
};
/**
@@ -208,6 +209,7 @@ THE_NILFS_FNS(INIT, init)
THE_NILFS_FNS(DISCONTINUED, discontinued)
THE_NILFS_FNS(GC_RUNNING, gc_running)
THE_NILFS_FNS(SB_DIRTY, sb_dirty)
+THE_NILFS_FNS(PURGING, purging)
/*
* Mount option operations
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c
index 52ccd34b1e79..a026dbd3593f 100644
--- a/fs/nls/nls_base.c
+++ b/fs/nls/nls_base.c
@@ -272,7 +272,7 @@ int unregister_nls(struct nls_table * nls)
return -EINVAL;
}
-static struct nls_table *find_nls(char *charset)
+static struct nls_table *find_nls(const char *charset)
{
struct nls_table *nls;
spin_lock(&nls_lock);
@@ -288,7 +288,7 @@ static struct nls_table *find_nls(char *charset)
return nls;
}
-struct nls_table *load_nls(char *charset)
+struct nls_table *load_nls(const char *charset)
{
return try_then_request_module(find_nls(charset), "nls_%s", charset);
}
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 190aa717fa32..ebdcc25df0f7 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -199,7 +199,7 @@ void dnotify_flush(struct file *filp, fl_owner_t id)
}
/* this conversion is done only at watch creation */
-static __u32 convert_arg(unsigned long arg)
+static __u32 convert_arg(unsigned int arg)
{
__u32 new_mask = FS_EVENT_ON_CHILD;
@@ -258,7 +258,7 @@ static int attach_dn(struct dnotify_struct *dn, struct dnotify_mark *dn_mark,
* up here. Allocate both a mark for fsnotify to add and a dnotify_struct to be
* attached to the fsnotify_mark.
*/
-int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
{
struct dnotify_mark *new_dn_mark, *dn_mark;
struct fsnotify_mark *new_fsn_mark, *fsn_mark;
diff --git a/fs/nsfs.c b/fs/nsfs.c
index f602a96a1afe..647a22433bd8 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -84,7 +84,7 @@ slow:
return -ENOMEM;
}
inode->i_ino = ns->inum;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_flags |= S_IMMUTABLE;
inode->i_mode = S_IFREG | S_IRUGO;
inode->i_fop = &ns_file_operations;
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 518c3a21a556..4596c90e7b7c 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1525,10 +1525,11 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
#endif /* NTFS_RW */
+WRAP_DIR_ITER(ntfs_readdir) // FIXME!
const struct file_operations ntfs_dir_ops = {
.llseek = generic_file_llseek, /* Seek inside directory. */
.read = generic_read_dir, /* Return -EISDIR. */
- .iterate = ntfs_readdir, /* Read directory contents. */
+ .iterate_shared = shared_ntfs_readdir, /* Read directory contents. */
#ifdef NTFS_RW
.fsync = ntfs_dir_fsync, /* Sync a directory to disk. */
#endif /* NTFS_RW */
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 6c3f38d66579..99ac6ea277c4 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -654,7 +654,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
* always changes, when mtime is changed. ctime can be changed on its
* own, mtime is then not changed, e.g. when a file is renamed.
*/
- vi->i_ctime = ntfs2utc(si->last_mft_change_time);
+ inode_set_ctime_to_ts(vi, ntfs2utc(si->last_mft_change_time));
/*
* Last access to the data within the file. Not changed during a rename
* for example but changed whenever the file is written to.
@@ -1218,7 +1218,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
vi->i_gid = base_vi->i_gid;
set_nlink(vi, base_vi->i_nlink);
vi->i_mtime = base_vi->i_mtime;
- vi->i_ctime = base_vi->i_ctime;
+ inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
vi->i_atime = base_vi->i_atime;
vi->i_generation = ni->seq_no = base_ni->seq_no;
@@ -1484,7 +1484,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
vi->i_gid = base_vi->i_gid;
set_nlink(vi, base_vi->i_nlink);
vi->i_mtime = base_vi->i_mtime;
- vi->i_ctime = base_vi->i_ctime;
+ inode_set_ctime_to_ts(vi, inode_get_ctime(base_vi));
vi->i_atime = base_vi->i_atime;
vi->i_generation = ni->seq_no = base_ni->seq_no;
/* Set inode type to zero but preserve permissions. */
@@ -2804,13 +2804,14 @@ done:
*/
if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
struct timespec64 now = current_time(VFS_I(base_ni));
+ struct timespec64 ctime = inode_get_ctime(VFS_I(base_ni));
int sync_it = 0;
if (!timespec64_equal(&VFS_I(base_ni)->i_mtime, &now) ||
- !timespec64_equal(&VFS_I(base_ni)->i_ctime, &now))
+ !timespec64_equal(&ctime, &now))
sync_it = 1;
+ inode_set_ctime_to_ts(VFS_I(base_ni), now);
VFS_I(base_ni)->i_mtime = now;
- VFS_I(base_ni)->i_ctime = now;
if (sync_it)
mark_inode_dirty_sync(VFS_I(base_ni));
@@ -2928,7 +2929,7 @@ int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (ia_valid & ATTR_MTIME)
vi->i_mtime = attr->ia_mtime;
if (ia_valid & ATTR_CTIME)
- vi->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(vi, attr->ia_ctime);
mark_inode_dirty(vi);
out:
return err;
@@ -3004,7 +3005,7 @@ int __ntfs_write_inode(struct inode *vi, int sync)
si->last_data_change_time = nt;
modified = true;
}
- nt = utc2ntfs(vi->i_ctime);
+ nt = utc2ntfs(inode_get_ctime(vi));
if (si->last_mft_change_time != nt) {
ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
"new = 0x%llx", vi->i_ino, (long long)
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 0155f106ec34..ad1a8f72da22 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -2682,8 +2682,7 @@ mft_rec_already_initialized:
vi->i_mode &= ~S_IWUGO;
/* Set the inode times to the current time. */
- vi->i_atime = vi->i_mtime = vi->i_ctime =
- current_time(vi);
+ vi->i_atime = vi->i_mtime = inode_set_ctime_current(vi);
/*
* Set the file size to 0, the ntfs inode sizes are set to 0 by
* the call to ntfs_init_big_inode() below.
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 1d6c824246c4..962f12ce6c0a 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -85,7 +85,7 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED;
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
stat->result_mask |= STATX_BTIME;
stat->btime = ni->i_crtime;
@@ -342,7 +342,7 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
err = 0;
}
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
if (IS_SYNC(inode)) {
@@ -400,7 +400,7 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
ni_unlock(ni);
ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
if (!IS_DIRSYNC(inode)) {
dirty = 1;
} else {
@@ -642,7 +642,7 @@ out:
filemap_invalidate_unlock(mapping);
if (!err) {
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
}
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 16bd9faa2d28..2b85cb10f0be 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -3265,6 +3265,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
if (is_rec_inuse(ni->mi.mrec) &&
!(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) {
bool modified = false;
+ struct timespec64 ctime = inode_get_ctime(inode);
/* Update times in standard attribute. */
std = ni_std(ni);
@@ -3280,7 +3281,7 @@ int ni_write_inode(struct inode *inode, int sync, const char *hint)
modified = true;
}
- dup.c_time = kernel2nt(&inode->i_ctime);
+ dup.c_time = kernel2nt(&ctime);
if (std->c_time != dup.c_time) {
std->c_time = dup.c_time;
modified = true;
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index dc7e7ab701c6..4123e126c4d0 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -44,6 +44,7 @@ static struct inode *ntfs_read_mft(struct inode *inode,
u64 t64;
struct MFT_REC *rec;
struct runs_tree *run;
+ struct timespec64 ctime;
inode->i_op = NULL;
/* Setup 'uid' and 'gid' */
@@ -169,7 +170,8 @@ next_attr:
nt2kernel(std5->cr_time, &ni->i_crtime);
#endif
nt2kernel(std5->a_time, &inode->i_atime);
- nt2kernel(std5->c_time, &inode->i_ctime);
+ ctime = inode_get_ctime(inode);
+ nt2kernel(std5->c_time, &ctime);
nt2kernel(std5->m_time, &inode->i_mtime);
ni->std_fa = std5->fa;
@@ -958,7 +960,7 @@ int ntfs_write_end(struct file *file, struct address_space *mapping, loff_t pos,
if (err >= 0) {
if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) {
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE;
dirty = true;
}
@@ -1658,8 +1660,8 @@ struct inode *ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
d_instantiate(dentry, inode);
/* Set original time. inode times (i_ctime) may be changed in ntfs_init_acl. */
- inode->i_atime = inode->i_mtime = inode->i_ctime = dir->i_mtime =
- dir->i_ctime = ni->i_crtime;
+ inode->i_atime = inode->i_mtime = inode_set_ctime_to_ts(inode, ni->i_crtime);
+ dir->i_mtime = inode_set_ctime_to_ts(dir, ni->i_crtime);
mark_inode_dirty(dir);
mark_inode_dirty(inode);
@@ -1765,9 +1767,9 @@ int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry)
if (!err) {
drop_nlink(inode);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
if (inode->i_nlink)
mark_inode_dirty(inode);
} else if (!ni_remove_name_undo(dir_ni, ni, de, de2, undo_remove)) {
diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c
index 70f8c859e0ad..ad430d50bd79 100644
--- a/fs/ntfs3/namei.c
+++ b/fs/ntfs3/namei.c
@@ -156,8 +156,8 @@ static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de)
err = ntfs_link_inode(inode, de);
if (!err) {
- dir->i_ctime = dir->i_mtime = inode->i_ctime =
- current_time(dir);
+ dir->i_mtime = inode_set_ctime_to_ts(inode,
+ inode_set_ctime_current(dir));
mark_inode_dirty(inode);
mark_inode_dirty(dir);
d_instantiate(de, inode);
@@ -324,14 +324,11 @@ static int ntfs_rename(struct mnt_idmap *idmap, struct inode *dir,
/* Restore after failed rename failed too. */
_ntfs_bad_inode(inode);
} else if (!err) {
- inode->i_ctime = dir->i_ctime = dir->i_mtime =
- current_time(dir);
+ simple_rename_timestamp(dir, dentry, new_dir, new_dentry);
mark_inode_dirty(inode);
mark_inode_dirty(dir);
- if (dir != new_dir) {
- new_dir->i_mtime = new_dir->i_ctime = dir->i_ctime;
+ if (dir != new_dir)
mark_inode_dirty(new_dir);
- }
if (IS_DIRSYNC(dir))
ntfs_sync_inode(dir);
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index 1a02072b6b0e..5fffddea554f 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -569,9 +569,9 @@ static void init_once(void *foo)
}
/*
- * put_ntfs - Noinline to reduce binary size.
+ * Noinline to reduce binary size.
*/
-static noinline void put_ntfs(struct ntfs_sb_info *sbi)
+static noinline void ntfs3_free_sbi(struct ntfs_sb_info *sbi)
{
kfree(sbi->new_rec);
kvfree(ntfs_put_shared(sbi->upcase));
@@ -625,12 +625,6 @@ static void ntfs_put_super(struct super_block *sb)
/* Mark rw ntfs as clear, if possible. */
ntfs_set_state(sbi, NTFS_DIRTY_CLEAR);
-
- put_mount_options(sbi->options);
- put_ntfs(sbi);
- sb->s_fs_info = NULL;
-
- sync_blockdev(sb->s_bdev);
}
static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1564,15 +1558,7 @@ load_root:
put_inode_out:
iput(inode);
out:
- /*
- * Free resources here.
- * ntfs_fs_free will be called with fc->s_fs_info = NULL
- */
- put_mount_options(sbi->options);
- put_ntfs(sbi);
- sb->s_fs_info = NULL;
kfree(boot2);
-
return err;
}
@@ -1659,7 +1645,7 @@ static void ntfs_fs_free(struct fs_context *fc)
struct ntfs_sb_info *sbi = fc->s_fs_info;
if (sbi)
- put_ntfs(sbi);
+ ntfs3_free_sbi(sbi);
if (opts)
put_mount_options(opts);
@@ -1728,13 +1714,24 @@ free_opts:
return -ENOMEM;
}
+static void ntfs3_kill_sb(struct super_block *sb)
+{
+ struct ntfs_sb_info *sbi = sb->s_fs_info;
+
+ kill_block_super(sb);
+
+ if (sbi->options)
+ put_mount_options(sbi->options);
+ ntfs3_free_sbi(sbi);
+}
+
// clang-format off
static struct file_system_type ntfs_fs_type = {
.owner = THIS_MODULE,
.name = "ntfs3",
.init_fs_context = ntfs_init_fs_context,
.parameters = ntfs_fs_parameters,
- .kill_sb = kill_block_super,
+ .kill_sb = ntfs3_kill_sb,
.fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
};
// clang-format on
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index 023f314e8950..29fd391899e5 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -637,7 +637,7 @@ static noinline int ntfs_set_acl_ex(struct mnt_idmap *idmap,
if (!err) {
set_cached_acl(inode, type, acl);
inode->i_mode = mode;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
}
@@ -924,7 +924,7 @@ set_new_fa:
NULL);
out:
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return err;
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 9fd03eaf15f8..e75137a8e7cb 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -191,10 +191,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh,
}
inode->i_mode = new_mode;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
di->i_mode = cpu_to_le16(inode->i_mode);
- di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 51c93929a146..aef58f1395c8 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -7436,10 +7436,10 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
- di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
- di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8dfc284e85f0..0fdba30740ab 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2048,7 +2048,7 @@ out_write_size:
}
inode->i_blocks = ocfs2_inode_sector_count(inode);
di->i_size = cpu_to_le64((u64)i_size_read(inode));
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
if (handle)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 694471fc46b8..8b123d543e6e 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1658,7 +1658,7 @@ int __ocfs2_add_entry(handle_t *handle,
offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
if (ocfs2_dirent_would_fit(de, rec_len)) {
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
if (retval < 0) {
mlog_errno(retval);
@@ -2962,11 +2962,11 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
ocfs2_dinode_new_extent_list(dir, di);
i_size_write(dir, sb->s_blocksize);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
di->i_size = cpu_to_le64(sb->s_blocksize);
- di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
- di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
+ di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(dir).tv_sec);
+ di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(dir).tv_nsec);
ocfs2_update_inode_fsync_trans(handle, dir, 1);
/*
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index ba26c5567cff..81265123ce6c 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -337,7 +337,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
if (inode) {
inode->i_ino = get_next_ino();
inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inc_nlink(inode);
inode->i_fop = &simple_dir_operations;
@@ -360,7 +360,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
inode->i_ino = get_next_ino();
inode_init_owner(&nop_mnt_idmap, inode, parent, mode);
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
ip = DLMFS_I(inode);
ip->ip_conn = DLMFS_I(parent)->ip_conn;
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c28bc983a7b1..c3e2961ee5db 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2162,6 +2162,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb;
+ struct timespec64 ctime = inode_get_ctime(inode);
lvb = ocfs2_dlm_lvb(&lockres->l_lksb);
@@ -2185,7 +2186,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
lvb->lvb_iatime_packed =
cpu_to_be64(ocfs2_pack_timespec(&inode->i_atime));
lvb->lvb_ictime_packed =
- cpu_to_be64(ocfs2_pack_timespec(&inode->i_ctime));
+ cpu_to_be64(ocfs2_pack_timespec(&ctime));
lvb->lvb_imtime_packed =
cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
@@ -2208,6 +2209,7 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
struct ocfs2_inode_info *oi = OCFS2_I(inode);
struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres;
struct ocfs2_meta_lvb *lvb;
+ struct timespec64 ctime;
mlog_meta_lvb(0, lockres);
@@ -2238,8 +2240,9 @@ static int ocfs2_refresh_inode_from_lvb(struct inode *inode)
be64_to_cpu(lvb->lvb_iatime_packed));
ocfs2_unpack_timespec(&inode->i_mtime,
be64_to_cpu(lvb->lvb_imtime_packed));
- ocfs2_unpack_timespec(&inode->i_ctime,
+ ocfs2_unpack_timespec(&ctime,
be64_to_cpu(lvb->lvb_ictime_packed));
+ inode_set_ctime_to_ts(inode, ctime);
spin_unlock(&oi->ip_lock);
return 0;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 91a194596552..3b91b4cc7c6a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -232,8 +232,10 @@ int ocfs2_should_update_atime(struct inode *inode,
return 0;
if (vfsmnt->mnt_flags & MNT_RELATIME) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+
if ((timespec64_compare(&inode->i_atime, &inode->i_mtime) <= 0) ||
- (timespec64_compare(&inode->i_atime, &inode->i_ctime) <= 0))
+ (timespec64_compare(&inode->i_atime, &ctime) <= 0))
return 1;
return 0;
@@ -294,7 +296,7 @@ int ocfs2_set_inode_size(handle_t *handle,
i_size_write(inode, new_i_size);
inode->i_blocks = ocfs2_inode_sector_count(inode);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
if (status < 0) {
@@ -415,12 +417,12 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
}
i_size_write(inode, new_i_size);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
di = (struct ocfs2_dinode *) fe_bh->b_data;
di->i_size = cpu_to_le64(new_i_size);
- di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
- di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, fe_bh);
@@ -824,7 +826,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
i_size_write(inode, abs_to);
inode->i_blocks = ocfs2_inode_sector_count(inode);
di->i_size = cpu_to_le64((u64)i_size_read(inode));
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
di->i_mtime_nsec = di->i_ctime_nsec;
@@ -1317,7 +1319,7 @@ int ocfs2_getattr(struct mnt_idmap *idmap, const struct path *path,
goto bail;
}
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
/*
* If there is inline data in the inode, the inode will normally not
* have data blocks allocated (it may have an external xattr block).
@@ -2043,7 +2045,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
goto out_inode_unlock;
}
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
if (ret < 0)
mlog_errno(ret);
@@ -2793,10 +2795,11 @@ const struct file_operations ocfs2_fops = {
.remap_file_range = ocfs2_remap_file_range,
};
+WRAP_DIR_ITER(ocfs2_readdir) // FIXME!
const struct file_operations ocfs2_dops = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = ocfs2_readdir,
+ .iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
.release = ocfs2_dir_release,
.open = ocfs2_dir_open,
@@ -2842,7 +2845,7 @@ const struct file_operations ocfs2_fops_no_plocks = {
const struct file_operations ocfs2_dops_no_plocks = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .iterate = ocfs2_readdir,
+ .iterate_shared = shared_ocfs2_readdir,
.fsync = ocfs2_sync_file,
.release = ocfs2_dir_release,
.open = ocfs2_dir_open,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index bb116c39b581..e8771600b930 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -306,8 +306,8 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
- inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
- inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
+ inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
+ le32_to_cpu(fe->i_ctime_nsec));
if (OCFS2_I(inode)->ip_blkno != le64_to_cpu(fe->i_blkno))
mlog(ML_ERROR,
@@ -1314,8 +1314,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
fe->i_mode = cpu_to_le16(inode->i_mode);
fe->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
fe->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
- fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
@@ -1352,8 +1352,8 @@ void ocfs2_refresh_inode(struct inode *inode,
inode->i_atime.tv_nsec = le32_to_cpu(fe->i_atime_nsec);
inode->i_mtime.tv_sec = le64_to_cpu(fe->i_mtime);
inode->i_mtime.tv_nsec = le32_to_cpu(fe->i_mtime_nsec);
- inode->i_ctime.tv_sec = le64_to_cpu(fe->i_ctime);
- inode->i_ctime.tv_nsec = le32_to_cpu(fe->i_ctime_nsec);
+ inode_set_ctime(inode, le64_to_cpu(fe->i_ctime),
+ le32_to_cpu(fe->i_ctime_nsec));
spin_unlock(&OCFS2_I(inode)->ip_lock);
}
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 25d8072ccfce..c19c730c26e2 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -557,7 +557,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers,
(unsigned long)bh,
(unsigned long long)bh->b_blocknr);
- ocfs2_error(bh->b_bdev->bd_super,
+ ocfs2_error(bh->b_assoc_map->host->i_sb,
"JBD2 has aborted our journal, ocfs2 cannot continue\n");
}
@@ -780,14 +780,14 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
mlog_errno(status);
if (!is_handle_aborted(handle)) {
journal_t *journal = handle->h_transaction->t_journal;
- struct super_block *sb = bh->b_bdev->bd_super;
mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. "
"Aborting transaction and journal.\n");
handle->h_err = status;
jbd2_journal_abort_handle(handle);
jbd2_journal_abort(journal, status);
- ocfs2_abort(sb, "Journal already aborted.\n");
+ ocfs2_abort(bh->b_assoc_map->host->i_sb,
+ "Journal already aborted.\n");
}
}
}
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index b1e32ec4a9d4..05d67968a3a9 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -950,9 +950,9 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context)
}
di = (struct ocfs2_dinode *)di_bh->b_data;
- inode->i_ctime = current_time(inode);
- di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ inode_set_ctime_current(inode);
+ di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ocfs2_update_inode_fsync_trans(handle, inode, 0);
ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 17c52225b87d..e4a684d45308 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -793,10 +793,10 @@ static int ocfs2_link(struct dentry *old_dentry,
}
inc_nlink(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
ocfs2_set_links_count(fe, inode->i_nlink);
- fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ fe->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ocfs2_journal_dirty(handle, fe_bh);
err = ocfs2_add_entry(handle, dentry, inode,
@@ -995,7 +995,7 @@ static int ocfs2_unlink(struct inode *dir,
ocfs2_set_links_count(fe, inode->i_nlink);
ocfs2_journal_dirty(handle, fe_bh);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
if (S_ISDIR(inode->i_mode))
drop_nlink(dir);
@@ -1537,7 +1537,7 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
new_dir_bh, &target_insert);
}
- old_inode->i_ctime = current_time(old_inode);
+ inode_set_ctime_current(old_inode);
mark_inode_dirty(old_inode);
status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
@@ -1546,8 +1546,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
if (status >= 0) {
old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
- old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
- old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
+ old_di->i_ctime = cpu_to_le64(inode_get_ctime(old_inode).tv_sec);
+ old_di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(old_inode).tv_nsec);
ocfs2_journal_dirty(handle, old_inode_bh);
} else
mlog_errno(status);
@@ -1586,9 +1586,9 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
if (new_inode) {
drop_nlink(new_inode);
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
}
- old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
+ old_dir->i_mtime = inode_set_ctime_current(old_dir);
if (update_dot_dot) {
status = ocfs2_update_entry(old_inode, handle,
@@ -1610,7 +1610,8 @@ static int ocfs2_rename(struct mnt_idmap *idmap,
if (old_dir != new_dir) {
/* Keep the same times on both directories.*/
- new_dir->i_ctime = new_dir->i_mtime = old_dir->i_ctime;
+ new_dir->i_mtime = inode_set_ctime_to_ts(new_dir,
+ inode_get_ctime(old_dir));
/*
* This will also pick up the i_nlink change from the
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 564ab48d03ef..25c8ec3c8c3a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -3750,9 +3750,9 @@ static int ocfs2_change_ctime(struct inode *inode,
goto out_commit;
}
- inode->i_ctime = current_time(inode);
- di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ inode_set_ctime_current(inode);
+ di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ocfs2_journal_dirty(handle, di_bh);
@@ -4073,10 +4073,10 @@ static int ocfs2_complete_reflink(struct inode *s_inode,
* we want mtime to appear identical to the source and
* update ctime.
*/
- t_inode->i_ctime = current_time(t_inode);
+ inode_set_ctime_current(t_inode);
- di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
+ di->i_ctime = cpu_to_le64(inode_get_ctime(t_inode).tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(t_inode).tv_nsec);
t_inode->i_mtime = s_inode->i_mtime;
di->i_mtime = s_di->i_mtime;
@@ -4456,7 +4456,7 @@ int ocfs2_reflink_update_dest(struct inode *dest,
if (newlen > i_size_read(dest))
i_size_write(dest, newlen);
spin_unlock(&OCFS2_I(dest)->ip_lock);
- dest->i_ctime = dest->i_mtime = current_time(dest);
+ dest->i_mtime = inode_set_ctime_current(dest);
ret = ocfs2_mark_inode_dirty(handle, dest, d_bh);
if (ret) {
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 4ac77ff6e676..6510ad783c91 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3421,9 +3421,9 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
goto out;
}
- inode->i_ctime = current_time(inode);
- di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
- di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ inode_set_ctime_current(inode);
+ di->i_ctime = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ di->i_ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
}
out:
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 82cf7e9a665f..6bda275826d6 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -143,7 +143,7 @@ static int omfs_add_link(struct dentry *dentry, struct inode *inode)
mark_buffer_dirty(bh);
brelse(bh);
- dir->i_ctime = current_time(dir);
+ inode_set_ctime_current(dir);
/* mark affected inodes dirty to rebuild checksums */
mark_inode_dirty(dir);
@@ -399,7 +399,7 @@ static int omfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (err)
goto out;
- old_inode->i_ctime = current_time(old_inode);
+ inode_set_ctime_current(old_inode);
mark_inode_dirty(old_inode);
out:
return err;
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index c4c79e07efc7..2f8c1882f45c 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -51,7 +51,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
inode_init_owner(&nop_mnt_idmap, inode, NULL, mode);
inode->i_mapping->a_ops = &omfs_aops;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_op = &omfs_dir_inops;
@@ -134,8 +134,8 @@ static int __omfs_write_inode(struct inode *inode, int wait)
oi->i_head.h_magic = OMFS_IMAGIC;
oi->i_size = cpu_to_be64(inode->i_size);
- ctime = inode->i_ctime.tv_sec * 1000LL +
- ((inode->i_ctime.tv_nsec + 999)/1000);
+ ctime = inode_get_ctime(inode).tv_sec * 1000LL +
+ ((inode_get_ctime(inode).tv_nsec + 999)/1000);
oi->i_ctime = cpu_to_be64(ctime);
omfs_update_checksums(oi);
@@ -232,10 +232,9 @@ struct inode *omfs_iget(struct super_block *sb, ino_t ino)
inode->i_atime.tv_sec = ctime;
inode->i_mtime.tv_sec = ctime;
- inode->i_ctime.tv_sec = ctime;
+ inode_set_ctime(inode, ctime, nsecs);
inode->i_atime.tv_nsec = nsecs;
inode->i_mtime.tv_nsec = nsecs;
- inode->i_ctime.tv_nsec = nsecs;
inode->i_mapping->a_ops = &omfs_aops;
diff --git a/fs/open.c b/fs/open.c
index 0c55c8e7f837..98f6601fbac6 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -671,11 +671,20 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
return err;
}
-static int do_fchmodat(int dfd, const char __user *filename, umode_t mode)
+static int do_fchmodat(int dfd, const char __user *filename, umode_t mode,
+ unsigned int flags)
{
struct path path;
int error;
- unsigned int lookup_flags = LOOKUP_FOLLOW;
+ unsigned int lookup_flags;
+
+ if (unlikely(flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)))
+ return -EINVAL;
+
+ lookup_flags = (flags & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
+ if (flags & AT_EMPTY_PATH)
+ lookup_flags |= LOOKUP_EMPTY;
+
retry:
error = user_path_at(dfd, filename, lookup_flags, &path);
if (!error) {
@@ -689,15 +698,21 @@ retry:
return error;
}
+SYSCALL_DEFINE4(fchmodat2, int, dfd, const char __user *, filename,
+ umode_t, mode, unsigned int, flags)
+{
+ return do_fchmodat(dfd, filename, mode, flags);
+}
+
SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename,
umode_t, mode)
{
- return do_fchmodat(dfd, filename, mode);
+ return do_fchmodat(dfd, filename, mode, 0);
}
SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
{
- return do_fchmodat(AT_FDCWD, filename, mode);
+ return do_fchmodat(AT_FDCWD, filename, mode, 0);
}
/*
@@ -1150,7 +1165,7 @@ EXPORT_SYMBOL_GPL(kernel_file_open);
* backing_file_open - open a backing file for kernel internal use
* @path: path of the file to open
* @flags: open flags
- * @path: path of the backing file
+ * @real_path: path of the backing file
* @cred: credentials for open
*
* Open a backing file for a stackable filesystem (e.g., overlayfs).
@@ -1322,7 +1337,7 @@ inline int build_open_flags(const struct open_how *how, struct open_flags *op)
lookup_flags |= LOOKUP_IN_ROOT;
if (how->resolve & RESOLVE_CACHED) {
/* Don't bother even trying for create/truncate/tmpfile open */
- if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
+ if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
return -EAGAIN;
lookup_flags |= LOOKUP_CACHED;
}
@@ -1503,7 +1518,7 @@ SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
* "id" is the POSIX thread ID. We use the
* files pointer for this..
*/
-int filp_close(struct file *filp, fl_owner_t id)
+static int filp_flush(struct file *filp, fl_owner_t id)
{
int retval = 0;
@@ -1520,10 +1535,18 @@ int filp_close(struct file *filp, fl_owner_t id)
dnotify_flush(filp, id);
locks_remove_posix(filp, id);
}
- fput(filp);
return retval;
}
+int filp_close(struct file *filp, fl_owner_t id)
+{
+ int retval;
+
+ retval = filp_flush(filp, id);
+ fput(filp);
+
+ return retval;
+}
EXPORT_SYMBOL(filp_close);
/*
@@ -1533,7 +1556,20 @@ EXPORT_SYMBOL(filp_close);
*/
SYSCALL_DEFINE1(close, unsigned int, fd)
{
- int retval = close_fd(fd);
+ int retval;
+ struct file *file;
+
+ file = close_fd_get_file(fd);
+ if (!file)
+ return -EBADF;
+
+ retval = filp_flush(file, current->files);
+
+ /*
+ * We're returning to user space. Don't bother
+ * with any delayed fput() cases.
+ */
+ __fput_sync(file);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
@@ -1546,7 +1582,7 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
}
/**
- * close_range() - Close all file descriptors in a given range.
+ * sys_close_range() - Close all file descriptors in a given range.
*
* @fd: starting file descriptor to close
* @max_fd: last file descriptor to close
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index f0b7f4d51a17..b2457cb97fa0 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -237,7 +237,7 @@ found:
if (IS_ERR(inode))
return ERR_CAST(inode);
if (inode->i_state & I_NEW) {
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
ent_oi = OP_I(inode);
ent_oi->type = ent_type;
ent_oi->u = ent_data;
@@ -387,8 +387,7 @@ static int openprom_fill_super(struct super_block *s, struct fs_context *fc)
goto out_no_root;
}
- root_inode->i_mtime = root_inode->i_atime =
- root_inode->i_ctime = current_time(root_inode);
+ root_inode->i_mtime = root_inode->i_atime = inode_set_ctime_current(root_inode);
root_inode->i_op = &openprom_inode_operations;
root_inode->i_fop = &openprom_operations;
root_inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 9014bbcc8031..085912268442 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -871,7 +871,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
ret = orangefs_inode_getattr(inode,
request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0);
if (ret == 0) {
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
/* override block size reported to stat */
if (!(request_mask & STATX_SIZE))
@@ -900,12 +900,13 @@ int orangefs_permission(struct mnt_idmap *idmap,
return generic_permission(&nop_mnt_idmap, inode, mask);
}
-int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
+int orangefs_update_time(struct inode *inode, int flags)
{
struct iattr iattr;
+
gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_update_time: %pU\n",
get_khandle_from_ino(inode));
- generic_update_time(inode, time, flags);
+ flags = generic_update_time(inode, flags);
memset(&iattr, 0, sizeof iattr);
if (flags & S_ATIME)
iattr.ia_valid |= ATTR_ATIME;
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
index 77518e248cf7..c9dfd5c6a097 100644
--- a/fs/orangefs/namei.c
+++ b/fs/orangefs/namei.c
@@ -421,7 +421,7 @@ static int orangefs_rename(struct mnt_idmap *idmap,
ret);
if (new_dentry->d_inode)
- new_dentry->d_inode->i_ctime = current_time(new_dentry->d_inode);
+ inode_set_ctime_current(d_inode(new_dentry));
op_release(new_op);
return ret;
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index ce20d3443869..b711654ca18a 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -370,7 +370,7 @@ int orangefs_getattr(struct mnt_idmap *idmap, const struct path *path,
int orangefs_permission(struct mnt_idmap *idmap,
struct inode *inode, int mask);
-int orangefs_update_time(struct inode *, struct timespec64 *, int);
+int orangefs_update_time(struct inode *, int);
/*
* defined in xattr.c
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
index 46b7dcff18ac..0a9fcfdf552f 100644
--- a/fs/orangefs/orangefs-utils.c
+++ b/fs/orangefs/orangefs-utils.c
@@ -361,11 +361,11 @@ again2:
downcall.resp.getattr.attributes.atime;
inode->i_mtime.tv_sec = (time64_t)new_op->
downcall.resp.getattr.attributes.mtime;
- inode->i_ctime.tv_sec = (time64_t)new_op->
- downcall.resp.getattr.attributes.ctime;
+ inode_set_ctime(inode,
+ (time64_t)new_op->downcall.resp.getattr.attributes.ctime,
+ 0);
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
/* special case: mark the root inode as sticky */
inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 21245b00722a..eaa1e6b3e04a 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -239,6 +239,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
static void ovl_file_accessed(struct file *file)
{
struct inode *inode, *upperinode;
+ struct timespec64 ctime, uctime;
if (file->f_flags & O_NOATIME)
return;
@@ -249,10 +250,12 @@ static void ovl_file_accessed(struct file *file)
if (!upperinode)
return;
+ ctime = inode_get_ctime(inode);
+ uctime = inode_get_ctime(upperinode);
if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
- !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
+ !timespec64_equal(&ctime, &uctime))) {
inode->i_mtime = upperinode->i_mtime;
- inode->i_ctime = upperinode->i_ctime;
+ inode_set_ctime_to_ts(inode, uctime);
}
touch_atime(&file->f_path);
@@ -290,10 +293,7 @@ static void ovl_aio_cleanup_handler(struct ovl_aio_req *aio_req)
if (iocb->ki_flags & IOCB_WRITE) {
struct inode *inode = file_inode(orig_iocb->ki_filp);
- /* Actually acquired in ovl_write_iter() */
- __sb_writers_acquired(file_inode(iocb->ki_filp)->i_sb,
- SB_FREEZE_WRITE);
- file_end_write(iocb->ki_filp);
+ kiocb_end_write(iocb);
ovl_copyattr(inode);
}
@@ -409,10 +409,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!aio_req)
goto out;
- file_start_write(real.file);
- /* Pacify lockdep, same trick as done in aio_write() */
- __sb_writers_release(file_inode(real.file)->i_sb,
- SB_FREEZE_WRITE);
aio_req->fd = real;
real.flags = 0;
aio_req->orig_iocb = iocb;
@@ -420,6 +416,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
aio_req->iocb.ki_flags = ifl;
aio_req->iocb.ki_complete = ovl_aio_rw_complete;
refcount_set(&aio_req->ref, 2);
+ kiocb_start_write(&aio_req->iocb);
ret = vfs_iocb_iter_write(real.file, &aio_req->iocb, iter);
ovl_aio_put(aio_req);
if (ret != -EIOCBQUEUED)
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index a63e57447be9..f22e27b78025 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -693,7 +693,7 @@ int ovl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
}
#endif
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags)
+int ovl_update_time(struct inode *inode, int flags)
{
if (flags & S_ATIME) {
struct ovl_fs *ofs = inode->i_sb->s_fs_info;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 9402591f12aa..8bbe6173bef4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -665,7 +665,7 @@ static inline struct posix_acl *ovl_get_acl_path(const struct path *path,
}
#endif
-int ovl_update_time(struct inode *inode, struct timespec64 *ts, int flags);
+int ovl_update_time(struct inode *inode, int flags);
bool ovl_is_private_xattr(struct super_block *sb, const char *name);
struct ovl_inode_params {
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index ee5c4736480f..de39e067ae65 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -954,10 +954,11 @@ static int ovl_dir_open(struct inode *inode, struct file *file)
return 0;
}
+WRAP_DIR_ITER(ovl_iterate) // FIXME!
const struct file_operations ovl_dir_operations = {
.read = generic_read_dir,
.open = ovl_dir_open,
- .iterate = ovl_iterate,
+ .iterate_shared = shared_ovl_iterate,
.llseek = ovl_dir_llseek,
.fsync = ovl_dir_fsync,
.release = ovl_dir_release,
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 5b069f1a1e44..cc8977498c48 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -1460,7 +1460,7 @@ int ovl_fill_super(struct super_block *sb, struct fs_context *fc)
ovl_trusted_xattr_handlers;
sb->s_fs_info = ofs;
sb->s_flags |= SB_POSIXACL;
- sb->s_iflags |= SB_I_SKIP_SYNC;
+ sb->s_iflags |= SB_I_SKIP_SYNC | SB_I_IMA_UNVERIFIABLE_SIGNATURE;
err = -ENOMEM;
root_dentry = ovl_get_root(sb, ctx->upper.dentry, oe);
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 7ef9e13c404a..c210b5d496a8 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -1202,6 +1202,6 @@ void ovl_copyattr(struct inode *inode)
inode->i_mode = realinode->i_mode;
inode->i_atime = realinode->i_atime;
inode->i_mtime = realinode->i_mtime;
- inode->i_ctime = realinode->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(realinode));
i_size_write(inode, i_size_read(realinode));
}
diff --git a/fs/pipe.c b/fs/pipe.c
index 2d88f73f585a..6c1a9b1db907 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -489,7 +489,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from)
head = pipe->head;
if (!pipe_full(head, pipe->tail, pipe->max_usage)) {
unsigned int mask = pipe->ring_size - 1;
- struct pipe_buffer *buf = &pipe->bufs[head & mask];
+ struct pipe_buffer *buf;
struct page *page = pipe->tmp_page;
int copied;
@@ -899,7 +899,7 @@ static struct inode * get_pipe_inode(void)
inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
return inode;
@@ -1236,7 +1236,7 @@ const struct file_operations pipefifo_fops = {
* Currently we rely on the pipe array holding a power-of-2 number
* of pages. Returns 0 on error.
*/
-unsigned int round_pipe_size(unsigned long size)
+unsigned int round_pipe_size(unsigned int size)
{
if (size > (1U << 31))
return 0;
@@ -1319,7 +1319,7 @@ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots)
* Allocate a new array of pipe buffers and copy the info over. Returns the
* pipe size if successful, or return -ERROR on error.
*/
-static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg)
+static long pipe_set_size(struct pipe_inode_info *pipe, unsigned int arg)
{
unsigned long user_bufs;
unsigned int nr_slots, size;
@@ -1387,7 +1387,7 @@ struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice)
return pipe;
}
-long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
+long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg)
{
struct pipe_inode_info *pipe;
long ret;
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 7fa1b738bbab..a05fe94970ce 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -1027,7 +1027,7 @@ int simple_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
return error;
}
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (IS_I_VERSION(inode))
inode_inc_iversion(inode);
set_cached_acl(inode, type, acl);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 05452c3b9872..7576effe8d52 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1902,7 +1902,7 @@ struct inode *proc_pid_make_inode(struct super_block *sb,
ei = PROC_I(inode);
inode->i_mode = mode;
inode->i_ino = get_next_ino();
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_op = &proc_def_inode_operations;
/*
@@ -1966,7 +1966,7 @@ int pid_getattr(struct mnt_idmap *idmap, const struct path *path,
struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct task_struct *task;
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->uid = GLOBAL_ROOT_UID;
stat->gid = GLOBAL_ROOT_GID;
@@ -2817,7 +2817,7 @@ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \
\
static const struct file_operations proc_##LSM##_attr_dir_ops = { \
.read = generic_read_dir, \
- .iterate = proc_##LSM##_attr_dir_iterate, \
+ .iterate_shared = proc_##LSM##_attr_dir_iterate, \
.llseek = default_llseek, \
}; \
\
@@ -3583,7 +3583,8 @@ static int proc_tid_comm_permission(struct mnt_idmap *idmap,
}
static const struct inode_operations proc_tid_comm_inode_operations = {
- .permission = proc_tid_comm_permission,
+ .setattr = proc_setattr,
+ .permission = proc_tid_comm_permission,
};
/*
@@ -3899,7 +3900,7 @@ static int proc_task_getattr(struct mnt_idmap *idmap,
{
struct inode *inode = d_inode(path->dentry);
struct task_struct *p = get_proc_task(inode);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (p) {
stat->nlink += get_nr_threads(p);
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index b3140deebbbf..6276b3938842 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -352,7 +352,7 @@ static int proc_fd_getattr(struct mnt_idmap *idmap,
struct inode *inode = d_inode(path->dentry);
int rv = 0;
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
/* If it's a directory, put the number of open fds there */
if (S_ISDIR(inode->i_mode)) {
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 42ae38ff6e7e..775ce0bcf08c 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -146,7 +146,7 @@ static int proc_getattr(struct mnt_idmap *idmap,
}
}
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
return 0;
}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 67b09a1d9433..532dc9d240f7 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -660,7 +660,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
inode->i_private = de->data;
inode->i_ino = de->low_ino;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
PROC_I(inode)->pde = de;
if (is_empty_pde(de)) {
make_empty_dir_inode(inode);
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 9cb32e1a78a0..23fc24d16b31 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -309,6 +309,8 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
{
+ struct file *file = iocb->ki_filp;
+ char *buf = file->private_data;
loff_t *fpos = &iocb->ki_pos;
size_t phdrs_offset, notes_offset, data_offset;
size_t page_offline_frozen = 1;
@@ -555,10 +557,21 @@ static ssize_t read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
case KCORE_VMEMMAP:
case KCORE_TEXT:
/*
- * We use _copy_to_iter() to bypass usermode hardening
- * which would otherwise prevent this operation.
+ * Sadly we must use a bounce buffer here to be able to
+ * make use of copy_from_kernel_nofault(), as these
+ * memory regions might not always be mapped on all
+ * architectures.
*/
- if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
+ if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ /*
+ * We know the bounce buffer is safe to copy from, so
+ * use _copy_to_iter() directly.
+ */
+ } else if (_copy_to_iter(buf, tsz, iter) != tsz) {
ret = -EFAULT;
goto out;
}
@@ -595,6 +608,10 @@ static int open_kcore(struct inode *inode, struct file *filp)
if (ret)
return ret;
+ filp->private_data = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!filp->private_data)
+ return -ENOMEM;
+
if (kcore_need_update)
kcore_update_ram();
if (i_size_read(inode) != proc_root_kcore->size) {
@@ -605,9 +622,16 @@ static int open_kcore(struct inode *inode, struct file *filp)
return 0;
}
+static int release_kcore(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
static const struct proc_ops kcore_proc_ops = {
.proc_read_iter = read_kcore_iter,
.proc_open = open_kcore,
+ .proc_release = release_kcore,
.proc_lseek = default_llseek,
};
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index a0c0419872e3..2ba31b6d68c0 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -308,7 +308,7 @@ static int proc_tgid_net_getattr(struct mnt_idmap *idmap,
net = get_proc_task_net(inode);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (net != NULL) {
stat->nlink = net->proc_net->nlink;
@@ -321,6 +321,7 @@ static int proc_tgid_net_getattr(struct mnt_idmap *idmap,
const struct inode_operations proc_net_inode_operations = {
.lookup = proc_tgid_net_lookup,
.getattr = proc_tgid_net_getattr,
+ .setattr = proc_setattr,
};
static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 5ea42653126e..bf06344a42cc 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -463,7 +463,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
head->count++;
spin_unlock(&sysctl_lock);
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_mode = table->mode;
if (!S_ISDIR(table->mode)) {
inode->i_mode |= S_IFREG;
@@ -849,7 +849,7 @@ static int proc_sys_getattr(struct mnt_idmap *idmap,
if (IS_ERR(head))
return PTR_ERR(head);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
if (table)
stat->mode = (stat->mode & S_IFMT) | table->mode;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index a86e65a608da..9191248f2dac 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -314,7 +314,8 @@ static int proc_root_getattr(struct mnt_idmap *idmap,
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
- generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+ stat);
stat->nlink = proc_root.nlink + nr_processes();
return 0;
}
diff --git a/fs/proc/self.c b/fs/proc/self.c
index 72cd69bcaf4a..ecc4da8d265e 100644
--- a/fs/proc/self.c
+++ b/fs/proc/self.c
@@ -46,7 +46,7 @@ int proc_setup_self(struct super_block *s)
struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = self_inum;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_mode = S_IFLNK | S_IRWXUGO;
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 507cd4e59d07..fafff1bd34cd 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -587,8 +587,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr,
bool migration = false;
if (pmd_present(*pmd)) {
- /* FOLL_DUMP will return -EFAULT on huge zero page */
- page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP);
+ page = vm_normal_page_pmd(vma, addr, *pmd);
} else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) {
swp_entry_t entry = pmd_to_swp_entry(*pmd);
@@ -758,12 +757,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
static const struct mm_walk_ops smaps_walk_ops = {
.pmd_entry = smaps_pte_range,
.hugetlb_entry = smaps_hugetlb_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static const struct mm_walk_ops smaps_shmem_walk_ops = {
.pmd_entry = smaps_pte_range,
.hugetlb_entry = smaps_hugetlb_range,
.pte_hole = smaps_pte_hole,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
@@ -1245,6 +1246,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
static const struct mm_walk_ops clear_refs_walk_ops = {
.pmd_entry = clear_refs_pte_range,
.test_walk = clear_refs_test_walk,
+ .walk_lock = PGWALK_WRLOCK,
};
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
@@ -1622,6 +1624,7 @@ static const struct mm_walk_ops pagemap_ops = {
.pmd_entry = pagemap_pmd_range,
.pte_hole = pagemap_pte_hole,
.hugetlb_entry = pagemap_hugetlb_range,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
@@ -1935,6 +1938,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
static const struct mm_walk_ops show_numa_ops = {
.hugetlb_entry = gather_hugetlb_stats,
.pmd_entry = gather_pte_stats,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
index a553273fbd41..63ac1f93289f 100644
--- a/fs/proc/thread_self.c
+++ b/fs/proc/thread_self.c
@@ -46,7 +46,7 @@ int proc_setup_thread_self(struct super_block *s)
struct inode *inode = new_inode(s);
if (inode) {
inode->i_ino = thread_self_inum;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_mode = S_IFLNK | S_IRWXUGO;
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index cb80a7703d58..1fb213f379a5 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -132,7 +132,7 @@ ssize_t read_from_oldmem(struct iov_iter *iter, size_t count,
u64 *ppos, bool encrypted)
{
unsigned long pfn, offset;
- size_t nr_bytes;
+ ssize_t nr_bytes;
ssize_t read = 0, tmp;
int idx;
diff --git a/fs/pstore/Kconfig b/fs/pstore/Kconfig
index c49d554cc9ae..3acc38600cd1 100644
--- a/fs/pstore/Kconfig
+++ b/fs/pstore/Kconfig
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
config PSTORE
tristate "Persistent store support"
- select CRYPTO if PSTORE_COMPRESS
default n
help
This option enables generic access to platform level
@@ -22,99 +21,18 @@ config PSTORE_DEFAULT_KMSG_BYTES
Defines default size of pstore kernel log storage.
Can be enlarged if needed, not recommended to shrink it.
-config PSTORE_DEFLATE_COMPRESS
- tristate "DEFLATE (ZLIB) compression"
- default y
- depends on PSTORE
- select CRYPTO_DEFLATE
- help
- This option enables DEFLATE (also known as ZLIB) compression
- algorithm support.
-
-config PSTORE_LZO_COMPRESS
- tristate "LZO compression"
- depends on PSTORE
- select CRYPTO_LZO
- help
- This option enables LZO compression algorithm support.
-
-config PSTORE_LZ4_COMPRESS
- tristate "LZ4 compression"
- depends on PSTORE
- select CRYPTO_LZ4
- help
- This option enables LZ4 compression algorithm support.
-
-config PSTORE_LZ4HC_COMPRESS
- tristate "LZ4HC compression"
- depends on PSTORE
- select CRYPTO_LZ4HC
- help
- This option enables LZ4HC (high compression) mode algorithm.
-
-config PSTORE_842_COMPRESS
- bool "842 compression"
- depends on PSTORE
- select CRYPTO_842
- help
- This option enables 842 compression algorithm support.
-
-config PSTORE_ZSTD_COMPRESS
- bool "zstd compression"
- depends on PSTORE
- select CRYPTO_ZSTD
- help
- This option enables zstd compression algorithm support.
-
config PSTORE_COMPRESS
- def_bool y
+ bool "Pstore compression (deflate)"
depends on PSTORE
- depends on PSTORE_DEFLATE_COMPRESS || PSTORE_LZO_COMPRESS || \
- PSTORE_LZ4_COMPRESS || PSTORE_LZ4HC_COMPRESS || \
- PSTORE_842_COMPRESS || PSTORE_ZSTD_COMPRESS
-
-choice
- prompt "Default pstore compression algorithm"
- depends on PSTORE_COMPRESS
+ select ZLIB_INFLATE
+ select ZLIB_DEFLATE
+ default y
help
- This option chooses the default active compression algorithm.
- This change be changed at boot with "pstore.compress=..." on
- the kernel command line.
-
- Currently, pstore has support for 6 compression algorithms:
- deflate, lzo, lz4, lz4hc, 842 and zstd.
-
- The default compression algorithm is deflate.
-
- config PSTORE_DEFLATE_COMPRESS_DEFAULT
- bool "deflate" if PSTORE_DEFLATE_COMPRESS
-
- config PSTORE_LZO_COMPRESS_DEFAULT
- bool "lzo" if PSTORE_LZO_COMPRESS
-
- config PSTORE_LZ4_COMPRESS_DEFAULT
- bool "lz4" if PSTORE_LZ4_COMPRESS
-
- config PSTORE_LZ4HC_COMPRESS_DEFAULT
- bool "lz4hc" if PSTORE_LZ4HC_COMPRESS
-
- config PSTORE_842_COMPRESS_DEFAULT
- bool "842" if PSTORE_842_COMPRESS
-
- config PSTORE_ZSTD_COMPRESS_DEFAULT
- bool "zstd" if PSTORE_ZSTD_COMPRESS
-
-endchoice
-
-config PSTORE_COMPRESS_DEFAULT
- string
- depends on PSTORE_COMPRESS
- default "deflate" if PSTORE_DEFLATE_COMPRESS_DEFAULT
- default "lzo" if PSTORE_LZO_COMPRESS_DEFAULT
- default "lz4" if PSTORE_LZ4_COMPRESS_DEFAULT
- default "lz4hc" if PSTORE_LZ4HC_COMPRESS_DEFAULT
- default "842" if PSTORE_842_COMPRESS_DEFAULT
- default "zstd" if PSTORE_ZSTD_COMPRESS_DEFAULT
+ Whether pstore records should be compressed before being written to
+ the backing store. This is implemented using the zlib 'deflate'
+ algorithm, using the library implementation instead of using the full
+ blown crypto API. This reduces the risk of secondary oopses or other
+ problems while pstore is recording panic metadata.
config PSTORE_CONSOLE
bool "Log kernel console messages"
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index ffbadb8b3032..585360706b33 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -54,7 +54,7 @@ static void free_pstore_private(struct pstore_private *private)
if (!private)
return;
if (private->record) {
- kfree(private->record->buf);
+ kvfree(private->record->buf);
kfree(private->record->priv);
kfree(private->record);
}
@@ -223,7 +223,7 @@ static struct inode *pstore_get_inode(struct super_block *sb)
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
return inode;
}
@@ -390,7 +390,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record)
inode->i_private = private;
if (record->time.tv_sec)
- inode->i_mtime = inode->i_ctime = record->time;
+ inode->i_mtime = inode_set_ctime_to_ts(inode, record->time);
d_add(dentry, inode);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index cbc0b468c1ab..62356d542ef6 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -14,24 +14,17 @@
#include <linux/init.h>
#include <linux/kmsg_dump.h>
#include <linux/console.h>
+#include <linux/mm.h>
#include <linux/module.h>
#include <linux/pstore.h>
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-#include <linux/lzo.h>
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-#include <linux/lz4.h>
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-#include <linux/zstd.h>
-#endif
-#include <linux/crypto.h>
#include <linux/string.h>
#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/jiffies.h>
+#include <linux/vmalloc.h>
#include <linux/workqueue.h>
+#include <linux/zlib.h>
#include "internal.h"
@@ -80,12 +73,21 @@ static char *backend;
module_param(backend, charp, 0444);
MODULE_PARM_DESC(backend, "specific backend to use");
-static char *compress =
-#ifdef CONFIG_PSTORE_COMPRESS_DEFAULT
- CONFIG_PSTORE_COMPRESS_DEFAULT;
-#else
- NULL;
-#endif
+/*
+ * pstore no longer implements compression via the crypto API, and only
+ * supports zlib deflate compression implemented using the zlib library
+ * interface. This removes additional complexity which is hard to justify for a
+ * diagnostic facility that has to operate in conditions where the system may
+ * have become unstable. Zlib deflate is comparatively small in terms of code
+ * size, and compresses ASCII text comparatively well. In terms of compression
+ * speed, deflate is not the best performer but for recording the log output on
+ * a kernel panic, this is not considered critical.
+ *
+ * The only remaining arguments supported by the compress= module parameter are
+ * 'deflate' and 'none'. To retain compatibility with existing installations,
+ * all other values are logged and replaced with 'deflate'.
+ */
+static char *compress = "deflate";
module_param(compress, charp, 0444);
MODULE_PARM_DESC(compress, "compression to use");
@@ -94,16 +96,9 @@ unsigned long kmsg_bytes = CONFIG_PSTORE_DEFAULT_KMSG_BYTES;
module_param(kmsg_bytes, ulong, 0444);
MODULE_PARM_DESC(kmsg_bytes, "amount of kernel log to snapshot (in bytes)");
-/* Compression parameters */
-static struct crypto_comp *tfm;
-
-struct pstore_zbackend {
- int (*zbufsize)(size_t size);
- const char *name;
-};
+static void *compress_workspace;
static char *big_oops_buf;
-static size_t big_oops_buf_sz;
void pstore_set_kmsg_bytes(int bytes)
{
@@ -168,206 +163,89 @@ static bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
}
}
-#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
-static int zbufsize_deflate(size_t size)
-{
- size_t cmpr;
-
- switch (size) {
- /* buffer range for efivars */
- case 1000 ... 2000:
- cmpr = 56;
- break;
- case 2001 ... 3000:
- cmpr = 54;
- break;
- case 3001 ... 3999:
- cmpr = 52;
- break;
- /* buffer range for nvram, erst */
- case 4000 ... 10000:
- cmpr = 45;
- break;
- default:
- cmpr = 60;
- break;
- }
-
- return (size * 100) / cmpr;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
-static int zbufsize_lzo(size_t size)
-{
- return lzo1x_worst_compress(size);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS) || IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
-static int zbufsize_lz4(size_t size)
-{
- return LZ4_compressBound(size);
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
-static int zbufsize_842(size_t size)
-{
- return size;
-}
-#endif
-
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
-static int zbufsize_zstd(size_t size)
-{
- return zstd_compress_bound(size);
-}
-#endif
-
-static const struct pstore_zbackend *zbackend __ro_after_init;
-
-static const struct pstore_zbackend zbackends[] = {
-#if IS_ENABLED(CONFIG_PSTORE_DEFLATE_COMPRESS)
- {
- .zbufsize = zbufsize_deflate,
- .name = "deflate",
- },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZO_COMPRESS)
- {
- .zbufsize = zbufsize_lzo,
- .name = "lzo",
- },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4_COMPRESS)
- {
- .zbufsize = zbufsize_lz4,
- .name = "lz4",
- },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_LZ4HC_COMPRESS)
- {
- .zbufsize = zbufsize_lz4,
- .name = "lz4hc",
- },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_842_COMPRESS)
- {
- .zbufsize = zbufsize_842,
- .name = "842",
- },
-#endif
-#if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS)
- {
- .zbufsize = zbufsize_zstd,
- .name = "zstd",
- },
-#endif
- { }
-};
-
static int pstore_compress(const void *in, void *out,
unsigned int inlen, unsigned int outlen)
{
+ struct z_stream_s zstream = {
+ .next_in = in,
+ .avail_in = inlen,
+ .next_out = out,
+ .avail_out = outlen,
+ .workspace = compress_workspace,
+ };
int ret;
if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS))
return -EINVAL;
- ret = crypto_comp_compress(tfm, in, inlen, out, &outlen);
- if (ret) {
- pr_err("crypto_comp_compress failed, ret = %d!\n", ret);
- return ret;
- }
+ ret = zlib_deflateInit2(&zstream, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
+ -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
+ if (ret != Z_OK)
+ return -EINVAL;
+
+ ret = zlib_deflate(&zstream, Z_FINISH);
+ if (ret != Z_STREAM_END)
+ return -EINVAL;
+
+ ret = zlib_deflateEnd(&zstream);
+ if (ret != Z_OK)
+ pr_warn_once("zlib_deflateEnd() failed: %d\n", ret);
- return outlen;
+ return zstream.total_out;
}
static void allocate_buf_for_compression(void)
{
- struct crypto_comp *ctx;
- int size;
char *buf;
- /* Skip if not built-in or compression backend not selected yet. */
- if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !zbackend)
- return;
-
- /* Skip if no pstore backend yet or compression init already done. */
- if (!psinfo || tfm)
- return;
-
- if (!crypto_has_comp(zbackend->name, 0, 0)) {
- pr_err("Unknown compression: %s\n", zbackend->name);
+ /* Skip if not built-in or compression disabled. */
+ if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !compress ||
+ !strcmp(compress, "none")) {
+ compress = NULL;
return;
}
- size = zbackend->zbufsize(psinfo->bufsize);
- if (size <= 0) {
- pr_err("Invalid compression size for %s: %d\n",
- zbackend->name, size);
- return;
+ if (strcmp(compress, "deflate")) {
+ pr_err("Unsupported compression '%s', falling back to deflate\n",
+ compress);
+ compress = "deflate";
}
- buf = kmalloc(size, GFP_KERNEL);
+ /*
+ * The compression buffer only needs to be as large as the maximum
+ * uncompressed record size, since any record that would be expanded by
+ * compression is just stored uncompressed.
+ */
+ buf = kvzalloc(psinfo->bufsize, GFP_KERNEL);
if (!buf) {
- pr_err("Failed %d byte compression buffer allocation for: %s\n",
- size, zbackend->name);
+ pr_err("Failed %zu byte compression buffer allocation for: %s\n",
+ psinfo->bufsize, compress);
return;
}
- ctx = crypto_alloc_comp(zbackend->name, 0, 0);
- if (IS_ERR_OR_NULL(ctx)) {
- kfree(buf);
- pr_err("crypto_alloc_comp('%s') failed: %ld\n", zbackend->name,
- PTR_ERR(ctx));
+ compress_workspace =
+ vmalloc(zlib_deflate_workspacesize(MAX_WBITS, DEF_MEM_LEVEL));
+ if (!compress_workspace) {
+ pr_err("Failed to allocate zlib deflate workspace\n");
+ kvfree(buf);
return;
}
/* A non-NULL big_oops_buf indicates compression is available. */
- tfm = ctx;
- big_oops_buf_sz = size;
big_oops_buf = buf;
- pr_info("Using crash dump compression: %s\n", zbackend->name);
+ pr_info("Using crash dump compression: %s\n", compress);
}
static void free_buf_for_compression(void)
{
- if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && tfm) {
- crypto_free_comp(tfm);
- tfm = NULL;
+ if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress_workspace) {
+ vfree(compress_workspace);
+ compress_workspace = NULL;
}
- kfree(big_oops_buf);
- big_oops_buf = NULL;
- big_oops_buf_sz = 0;
-}
-/*
- * Called when compression fails, since the printk buffer
- * would be fetched for compression calling it again when
- * compression fails would have moved the iterator of
- * printk buffer which results in fetching old contents.
- * Copy the recent messages from big_oops_buf to psinfo->buf
- */
-static size_t copy_kmsg_to_buffer(int hsize, size_t len)
-{
- size_t total_len;
- size_t diff;
-
- total_len = hsize + len;
-
- if (total_len > psinfo->bufsize) {
- diff = total_len - psinfo->bufsize + hsize;
- memcpy(psinfo->buf, big_oops_buf, hsize);
- memcpy(psinfo->buf + hsize, big_oops_buf + diff,
- psinfo->bufsize - hsize);
- total_len = psinfo->bufsize;
- } else
- memcpy(psinfo->buf, big_oops_buf, total_len);
-
- return total_len;
+ kvfree(big_oops_buf);
+ big_oops_buf = NULL;
}
void pstore_record_init(struct pstore_record *record,
@@ -426,13 +304,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
record.part = part;
record.buf = psinfo->buf;
- if (big_oops_buf) {
- dst = big_oops_buf;
- dst_size = big_oops_buf_sz;
- } else {
- dst = psinfo->buf;
- dst_size = psinfo->bufsize;
- }
+ dst = big_oops_buf ?: psinfo->buf;
+ dst_size = psinfo->bufsize;
/* Write dump header. */
header_size = snprintf(dst, dst_size, "%s#%d Part%u\n", why,
@@ -453,8 +326,8 @@ static void pstore_dump(struct kmsg_dumper *dumper,
record.compressed = true;
record.size = zipped_len;
} else {
- record.size = copy_kmsg_to_buffer(header_size,
- dump_size);
+ record.size = header_size + dump_size;
+ memcpy(psinfo->buf, dst, record.size);
}
} else {
record.size = header_size + dump_size;
@@ -549,7 +422,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
if (record->buf)
return -EINVAL;
- record->buf = memdup_user(buf, record->size);
+ record->buf = vmemdup_user(buf, record->size);
if (IS_ERR(record->buf)) {
ret = PTR_ERR(record->buf);
goto out;
@@ -557,7 +430,7 @@ static int pstore_write_user_compat(struct pstore_record *record,
ret = record->psi->write(record);
- kfree(record->buf);
+ kvfree(record->buf);
out:
record->buf = NULL;
@@ -681,7 +554,8 @@ void pstore_unregister(struct pstore_info *psi)
}
EXPORT_SYMBOL_GPL(pstore_unregister);
-static void decompress_record(struct pstore_record *record)
+static void decompress_record(struct pstore_record *record,
+ struct z_stream_s *zstream)
{
int ret;
int unzipped_len;
@@ -697,40 +571,50 @@ static void decompress_record(struct pstore_record *record)
}
/* Missing compression buffer means compression was not initialized. */
- if (!big_oops_buf) {
+ if (!zstream->workspace) {
pr_warn("no decompression method initialized!\n");
return;
}
+ ret = zlib_inflateReset(zstream);
+ if (ret != Z_OK) {
+ pr_err("zlib_inflateReset() failed, ret = %d!\n", ret);
+ return;
+ }
+
/* Allocate enough space to hold max decompression and ECC. */
- unzipped_len = big_oops_buf_sz;
- workspace = kmalloc(unzipped_len + record->ecc_notice_size,
- GFP_KERNEL);
+ workspace = kvzalloc(psinfo->bufsize + record->ecc_notice_size,
+ GFP_KERNEL);
if (!workspace)
return;
- /* After decompression "unzipped_len" is almost certainly smaller. */
- ret = crypto_comp_decompress(tfm, record->buf, record->size,
- workspace, &unzipped_len);
- if (ret) {
- pr_err("crypto_comp_decompress failed, ret = %d!\n", ret);
- kfree(workspace);
+ zstream->next_in = record->buf;
+ zstream->avail_in = record->size;
+ zstream->next_out = workspace;
+ zstream->avail_out = psinfo->bufsize;
+
+ ret = zlib_inflate(zstream, Z_FINISH);
+ if (ret != Z_STREAM_END) {
+ pr_err("zlib_inflate() failed, ret = %d!\n", ret);
+ kvfree(workspace);
return;
}
+ unzipped_len = zstream->total_out;
+
/* Append ECC notice to decompressed buffer. */
memcpy(workspace + unzipped_len, record->buf + record->size,
record->ecc_notice_size);
/* Copy decompressed contents into an minimum-sized allocation. */
- unzipped = kmemdup(workspace, unzipped_len + record->ecc_notice_size,
- GFP_KERNEL);
- kfree(workspace);
+ unzipped = kvmemdup(workspace, unzipped_len + record->ecc_notice_size,
+ GFP_KERNEL);
+ kvfree(workspace);
if (!unzipped)
return;
/* Swap out compressed contents with decompressed contents. */
- kfree(record->buf);
+ kvfree(record->buf);
record->buf = unzipped;
record->size = unzipped_len;
record->compressed = false;
@@ -747,10 +631,17 @@ void pstore_get_backend_records(struct pstore_info *psi,
{
int failed = 0;
unsigned int stop_loop = 65536;
+ struct z_stream_s zstream = {};
if (!psi || !root)
return;
+ if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) {
+ zstream.workspace = kvmalloc(zlib_inflate_workspacesize(),
+ GFP_KERNEL);
+ zlib_inflateInit2(&zstream, -DEF_WBITS);
+ }
+
mutex_lock(&psi->read_mutex);
if (psi->open && psi->open(psi))
goto out;
@@ -779,11 +670,11 @@ void pstore_get_backend_records(struct pstore_info *psi,
break;
}
- decompress_record(record);
+ decompress_record(record, &zstream);
rc = pstore_mkfile(root, record);
if (rc) {
/* pstore_mkfile() did not take record, so free it. */
- kfree(record->buf);
+ kvfree(record->buf);
kfree(record->priv);
kfree(record);
if (rc != -EEXIST || !quiet)
@@ -795,6 +686,12 @@ void pstore_get_backend_records(struct pstore_info *psi,
out:
mutex_unlock(&psi->read_mutex);
+ if (IS_ENABLED(CONFIG_PSTORE_COMPRESS) && compress) {
+ if (zlib_inflateEnd(&zstream) != Z_OK)
+ pr_warn("zlib_inflateEnd() failed\n");
+ kvfree(zstream.workspace);
+ }
+
if (failed)
pr_warn("failed to create %d record(s) from '%s'\n",
failed, psi->name);
@@ -818,34 +715,10 @@ static void pstore_timefunc(struct timer_list *unused)
pstore_timer_kick();
}
-static void __init pstore_choose_compression(void)
-{
- const struct pstore_zbackend *step;
-
- if (!compress)
- return;
-
- for (step = zbackends; step->name; step++) {
- if (!strcmp(compress, step->name)) {
- zbackend = step;
- return;
- }
- }
-}
-
static int __init pstore_init(void)
{
int ret;
- pstore_choose_compression();
-
- /*
- * Check if any pstore backends registered earlier but did not
- * initialize compression because crypto was not ready. If so,
- * initialize compression now.
- */
- allocate_buf_for_compression();
-
ret = pstore_init_fs();
if (ret)
free_buf_for_compression();
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 2f625e1fa8d8..d36702c7ab3c 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -20,6 +20,7 @@
#include <linux/compiler.h>
#include <linux/of.h>
#include <linux/of_address.h>
+#include <linux/mm.h>
#include "internal.h"
#include "ram_internal.h"
@@ -268,7 +269,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
/* ECC correction notice */
record->ecc_notice_size = persistent_ram_ecc_string(prz, NULL, 0);
- record->buf = kmalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
+ record->buf = kvzalloc(size + record->ecc_notice_size + 1, GFP_KERNEL);
if (record->buf == NULL) {
size = -ENOMEM;
goto out;
@@ -282,7 +283,7 @@ static ssize_t ramoops_pstore_read(struct pstore_record *record)
out:
if (free_prz) {
- kfree(prz->old_log);
+ kvfree(prz->old_log);
kfree(prz);
}
@@ -833,7 +834,7 @@ static int ramoops_probe(struct platform_device *pdev)
*/
if (cxt->pstore.flags & PSTORE_FLAGS_DMESG) {
cxt->pstore.bufsize = cxt->dprzs[0]->buffer_size;
- cxt->pstore.buf = kzalloc(cxt->pstore.bufsize, GFP_KERNEL);
+ cxt->pstore.buf = kvzalloc(cxt->pstore.bufsize, GFP_KERNEL);
if (!cxt->pstore.buf) {
pr_err("cannot allocate pstore crash dump buffer\n");
err = -ENOMEM;
@@ -866,7 +867,7 @@ static int ramoops_probe(struct platform_device *pdev)
return 0;
fail_buf:
- kfree(cxt->pstore.buf);
+ kvfree(cxt->pstore.buf);
fail_clear:
cxt->pstore.bufsize = 0;
fail_init:
@@ -881,7 +882,7 @@ static void ramoops_remove(struct platform_device *pdev)
pstore_unregister(&cxt->pstore);
- kfree(cxt->pstore.buf);
+ kvfree(cxt->pstore.buf);
cxt->pstore.bufsize = 0;
ramoops_free_przs(cxt);
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 85aaf0fc6d7d..650e437b55e6 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -17,6 +17,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <asm/page.h>
#include "ram_internal.h"
@@ -24,12 +25,10 @@
/**
* struct persistent_ram_buffer - persistent circular RAM buffer
*
- * @sig:
- * signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value)
- * @start:
- * offset into @data where the beginning of the stored bytes begin
- * @size:
- * number of valid bytes stored in @data
+ * @sig: Signature to indicate header (PERSISTENT_RAM_SIG xor PRZ-type value)
+ * @start: First valid byte in the buffer.
+ * @size: Number of valid bytes in the buffer.
+ * @data: The contents of the buffer.
*/
struct persistent_ram_buffer {
uint32_t sig;
@@ -301,7 +300,7 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
if (!prz->old_log) {
persistent_ram_ecc_old(prz);
- prz->old_log = kmalloc(size, GFP_KERNEL);
+ prz->old_log = kvzalloc(size, GFP_KERNEL);
}
if (!prz->old_log) {
pr_err("failed to allocate buffer\n");
@@ -385,7 +384,7 @@ void *persistent_ram_old(struct persistent_ram_zone *prz)
void persistent_ram_free_old(struct persistent_ram_zone *prz)
{
- kfree(prz->old_log);
+ kvfree(prz->old_log);
prz->old_log = NULL;
prz->old_log_size = 0;
}
@@ -519,7 +518,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
sig ^= PERSISTENT_RAM_SIG;
if (prz->buffer->sig == sig) {
- if (buffer_size(prz) == 0) {
+ if (buffer_size(prz) == 0 && buffer_start(prz) == 0) {
pr_debug("found existing empty buffer\n");
return 0;
}
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 391ea402920d..a7171f5532a1 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -305,8 +305,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_sec = le32_to_cpu(raw_inode->di_atime);
inode->i_atime.tv_nsec = 0;
- inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->di_ctime);
- inode->i_ctime.tv_nsec = 0;
+ inode_set_ctime(inode, le32_to_cpu(raw_inode->di_ctime), 0);
inode->i_blocks = le32_to_cpu(raw_inode->di_first_xtnt.xtnt_size);
memcpy(qnx4_inode, raw_inode, QNX4_DIR_ENTRY_SIZE);
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 85b2fa3b211c..21f90d519f1a 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -562,8 +562,7 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_atime);
inode->i_atime.tv_nsec = 0;
- inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->di_ctime);
- inode->i_ctime.tv_nsec = 0;
+ inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->di_ctime), 0);
/* calc blocks based on 512 byte blocksize */
inode->i_blocks = (inode->i_size + 511) >> 9;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index e3e4f4047657..4d826c369da2 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2367,7 +2367,7 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
if (!fmt)
return -ESRCH;
- if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
+ if (!sb->dq_op || !sb->s_qcop ||
(type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
error = -EINVAL;
goto out_fmt;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index fef477c78107..18e8387cab41 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -65,7 +65,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
inode->i_mapping->a_ops = &ram_aops;
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
mapping_set_unevictable(inode->i_mapping);
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
switch (mode & S_IFMT) {
default:
init_special_inode(inode, mode, dev);
@@ -105,7 +105,7 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
d_instantiate(dentry, inode);
dget(dentry); /* Extra count - pin the dentry in core */
error = 0;
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
}
return error;
}
@@ -138,7 +138,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
if (!error) {
d_instantiate(dentry, inode);
dget(dentry);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
} else
iput(inode);
}
diff --git a/fs/read_write.c b/fs/read_write.c
index b07de77ef126..4771701c896b 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -71,7 +71,7 @@ EXPORT_SYMBOL(vfs_setpos);
* @file: file structure to seek on
* @offset: file offset to seek to
* @whence: type of seek
- * @size: max size of this file in file system
+ * @maxsize: max size of this file in file system
* @eof: offset used for SEEK_END position
*
* This is a variant of generic_file_llseek that allows passing in a custom
diff --git a/fs/readdir.c b/fs/readdir.c
index b264ce60114d..c8c46e294431 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -25,6 +25,53 @@
#include <asm/unaligned.h>
/*
+ * Some filesystems were never converted to '->iterate_shared()'
+ * and their directory iterators want the inode lock held for
+ * writing. This wrapper allows for converting from the shared
+ * semantics to the exclusive inode use.
+ */
+int wrap_directory_iterator(struct file *file,
+ struct dir_context *ctx,
+ int (*iter)(struct file *, struct dir_context *))
+{
+ struct inode *inode = file_inode(file);
+ int ret;
+
+ /*
+ * We'd love to have an 'inode_upgrade_trylock()' operation,
+ * see the comment in mmap_upgrade_trylock() in mm/memory.c.
+ *
+ * But considering this is for "filesystems that never got
+ * converted", it really doesn't matter.
+ *
+ * Also note that since we have to return with the lock held
+ * for reading, we can't use the "killable()" locking here,
+ * since we do need to get the lock even if we're dying.
+ *
+ * We could do the write part killably and then get the read
+ * lock unconditionally if it mattered, but see above on why
+ * this does the very simplistic conversion.
+ */
+ up_read(&inode->i_rwsem);
+ down_write(&inode->i_rwsem);
+
+ /*
+ * Since we dropped the inode lock, we should do the
+ * DEADDIR test again. See 'iterate_dir()' below.
+ *
+ * Note that we don't need to re-do the f_pos games,
+ * since the file must be locked wrt f_pos anyway.
+ */
+ ret = -ENOENT;
+ if (!IS_DEADDIR(inode))
+ ret = iter(file, ctx);
+
+ downgrade_write(&inode->i_rwsem);
+ return ret;
+}
+EXPORT_SYMBOL(wrap_directory_iterator);
+
+/*
* Note the "unsafe_put_user() semantics: we goto a
* label for errors.
*/
@@ -40,39 +87,28 @@
int iterate_dir(struct file *file, struct dir_context *ctx)
{
struct inode *inode = file_inode(file);
- bool shared = false;
int res = -ENOTDIR;
- if (file->f_op->iterate_shared)
- shared = true;
- else if (!file->f_op->iterate)
+
+ if (!file->f_op->iterate_shared)
goto out;
res = security_file_permission(file, MAY_READ);
if (res)
goto out;
- if (shared)
- res = down_read_killable(&inode->i_rwsem);
- else
- res = down_write_killable(&inode->i_rwsem);
+ res = down_read_killable(&inode->i_rwsem);
if (res)
goto out;
res = -ENOENT;
if (!IS_DEADDIR(inode)) {
ctx->pos = file->f_pos;
- if (shared)
- res = file->f_op->iterate_shared(file, ctx);
- else
- res = file->f_op->iterate(file, ctx);
+ res = file->f_op->iterate_shared(file, ctx);
file->f_pos = ctx->pos;
fsnotify_access(file);
file_accessed(file);
}
- if (shared)
- inode_unlock_shared(inode);
- else
- inode_unlock(inode);
+ inode_unlock_shared(inode);
out:
return res;
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 77bd3b27059f..86e55d4bb10d 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1259,9 +1259,8 @@ static void init_inode(struct inode *inode, struct treepath *path)
inode->i_size = sd_v1_size(sd);
inode->i_atime.tv_sec = sd_v1_atime(sd);
inode->i_mtime.tv_sec = sd_v1_mtime(sd);
- inode->i_ctime.tv_sec = sd_v1_ctime(sd);
+ inode_set_ctime(inode, sd_v1_ctime(sd), 0);
inode->i_atime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
inode->i_blocks = sd_v1_blocks(sd);
@@ -1314,8 +1313,7 @@ static void init_inode(struct inode *inode, struct treepath *path)
i_gid_write(inode, sd_v2_gid(sd));
inode->i_mtime.tv_sec = sd_v2_mtime(sd);
inode->i_atime.tv_sec = sd_v2_atime(sd);
- inode->i_ctime.tv_sec = sd_v2_ctime(sd);
- inode->i_ctime.tv_nsec = 0;
+ inode_set_ctime(inode, sd_v2_ctime(sd), 0);
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
inode->i_blocks = sd_v2_blocks(sd);
@@ -1374,7 +1372,7 @@ static void inode2sd(void *sd, struct inode *inode, loff_t size)
set_sd_v2_gid(sd_v2, i_gid_read(inode));
set_sd_v2_mtime(sd_v2, inode->i_mtime.tv_sec);
set_sd_v2_atime(sd_v2, inode->i_atime.tv_sec);
- set_sd_v2_ctime(sd_v2, inode->i_ctime.tv_sec);
+ set_sd_v2_ctime(sd_v2, inode_get_ctime(inode).tv_sec);
set_sd_v2_blocks(sd_v2, to_fake_used_blocks(inode, SD_V2_SIZE));
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
set_sd_v2_rdev(sd_v2, new_encode_dev(inode->i_rdev));
@@ -1394,7 +1392,7 @@ static void inode2sd_v1(void *sd, struct inode *inode, loff_t size)
set_sd_v1_nlink(sd_v1, inode->i_nlink);
set_sd_v1_size(sd_v1, size);
set_sd_v1_atime(sd_v1, inode->i_atime.tv_sec);
- set_sd_v1_ctime(sd_v1, inode->i_ctime.tv_sec);
+ set_sd_v1_ctime(sd_v1, inode_get_ctime(inode).tv_sec);
set_sd_v1_mtime(sd_v1, inode->i_mtime.tv_sec);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
@@ -1986,7 +1984,7 @@ int reiserfs_new_inode(struct reiserfs_transaction_handle *th,
/* uid and gid must already be set by the caller for quota init */
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_size = i_size;
inode->i_blocks = 0;
inode->i_bytes = 0;
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 6bf9b54e58ca..dd33f8cc6eda 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -55,7 +55,7 @@ int reiserfs_fileattr_set(struct mnt_idmap *idmap,
}
sd_attrs_to_i_attrs(flags, inode);
REISERFS_I(inode)->i_attrs = flags;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
err = 0;
unlock:
@@ -107,7 +107,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
err = -EFAULT;
goto setversion_out;
}
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
setversion_out:
mnt_drop_write_file(filp);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 479aa4a57602..015bfe4e4524 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2326,7 +2326,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
int i, j;
bh = __getblk(dev, block, bufsize);
- if (buffer_uptodate(bh))
+ if (!bh || buffer_uptodate(bh))
return (bh);
if (block + BUFNR > max_block) {
@@ -2336,6 +2336,8 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev,
j = 1;
for (i = 1; i < blocks; i++) {
bh = __getblk(dev, block + i, bufsize);
+ if (!bh)
+ break;
if (buffer_uptodate(bh)) {
brelse(bh);
break;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 52240cc891cf..9c5704be2435 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -572,7 +572,7 @@ static int reiserfs_add_entry(struct reiserfs_transaction_handle *th,
}
dir->i_size += paste_size;
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
if (!S_ISDIR(inode->i_mode) && visible)
/* reiserfs_mkdir or reiserfs_rename will do that by itself */
reiserfs_update_sd(th, dir);
@@ -966,7 +966,8 @@ static int reiserfs_rmdir(struct inode *dir, struct dentry *dentry)
inode->i_nlink);
clear_nlink(inode);
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_to_ts(dir,
+ inode_set_ctime_current(inode));
reiserfs_update_sd(&th, inode);
DEC_DIR_INODE_NLINK(dir)
@@ -1070,11 +1071,11 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
inc_nlink(inode);
goto end_unlink;
}
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
reiserfs_update_sd(&th, inode);
dir->i_size -= (de.de_entrylen + DEH_SIZE);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
reiserfs_update_sd(&th, dir);
if (!savelink)
@@ -1250,7 +1251,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
return err ? err : retval;
}
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
reiserfs_update_sd(&th, inode);
ihold(inode);
@@ -1325,7 +1326,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
int jbegin_count;
umode_t old_inode_mode;
unsigned long savelink = 1;
- struct timespec64 ctime;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
@@ -1576,14 +1576,11 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
mark_de_hidden(old_de.de_deh + old_de.de_entry_num);
journal_mark_dirty(&th, old_de.de_bh);
- ctime = current_time(old_dir);
- old_dir->i_ctime = old_dir->i_mtime = ctime;
- new_dir->i_ctime = new_dir->i_mtime = ctime;
/*
* thanks to Alex Adriaanse <alex_a@caltech.edu> for patch
* which adds ctime update of renamed object
*/
- old_inode->i_ctime = ctime;
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
if (new_dentry_inode) {
/* adjust link number of the victim */
@@ -1592,7 +1589,6 @@ static int reiserfs_rename(struct mnt_idmap *idmap,
} else {
drop_nlink(new_dentry_inode);
}
- new_dentry_inode->i_ctime = ctime;
savelink = new_dentry_inode->i_nlink;
}
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index ce5003986789..3676e02a0232 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -2004,7 +2004,7 @@ int reiserfs_do_truncate(struct reiserfs_transaction_handle *th,
if (update_timestamps) {
inode->i_mtime = current_time(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
}
reiserfs_update_sd(th, inode);
@@ -2029,7 +2029,7 @@ update_and_out:
if (update_timestamps) {
/* this is truncate, not file closing */
inode->i_mtime = current_time(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
}
reiserfs_update_sd(th, inode);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 929acce6e731..7eaf36b3de12 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -2587,7 +2587,7 @@ out:
return err;
if (inode->i_size < off + len - towrite)
i_size_write(inode, off + len - towrite);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return len - towrite;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 651027967159..6000964c2b80 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -466,12 +466,13 @@ int reiserfs_commit_write(struct file *f, struct page *page,
static void update_ctime(struct inode *inode)
{
struct timespec64 now = current_time(inode);
+ struct timespec64 ctime = inode_get_ctime(inode);
if (inode_unhashed(inode) || !inode->i_nlink ||
- timespec64_equal(&inode->i_ctime, &now))
+ timespec64_equal(&ctime, &now))
return;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_to_ts(inode, now);
mark_inode_dirty(inode);
}
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 138060452678..064264992b49 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -285,7 +285,7 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
if (error == -ENODATA) {
error = 0;
if (type == ACL_TYPE_ACCESS) {
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
}
}
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c59b230d55b4..5c35f6c76037 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -322,8 +322,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
set_nlink(i, 1); /* Hard to decide.. */
i->i_size = be32_to_cpu(ri.size);
- i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
- i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
+ i->i_mtime = i->i_atime = inode_set_ctime(i, 0, 0);
/* set up mode and ops */
mode = romfs_modemap[nextfh & ROMFH_TYPE];
@@ -583,16 +582,18 @@ static int romfs_init_fs_context(struct fs_context *fc)
*/
static void romfs_kill_sb(struct super_block *sb)
{
+ generic_shutdown_super(sb);
+
#ifdef CONFIG_ROMFS_ON_MTD
if (sb->s_mtd) {
- kill_mtd_super(sb);
- return;
+ put_mtd_device(sb->s_mtd);
+ sb->s_mtd = NULL;
}
#endif
#ifdef CONFIG_ROMFS_ON_BLOCK
if (sb->s_bdev) {
- kill_block_super(sb);
- return;
+ sync_blockdev(sb->s_bdev);
+ blkdev_put(sb->s_bdev, sb);
}
#endif
}
diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c
index fb4162a52844..aec6e9137474 100644
--- a/fs/smb/client/cifs_debug.c
+++ b/fs/smb/client/cifs_debug.c
@@ -153,6 +153,11 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
in_flight(server),
atomic_read(&server->in_send),
atomic_read(&server->num_waiters));
+#ifdef CONFIG_NET_NS
+ if (server->net)
+ seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
+#endif /* NET_NS */
+
}
static inline const char *smb_speed_to_str(size_t bps)
@@ -430,10 +435,15 @@ skip_rdma:
server->reconnect_instance,
server->srv_count,
server->sec_mode, in_flight(server));
+#ifdef CONFIG_NET_NS
+ if (server->net)
+ seq_printf(m, " Net namespace: %u ", server->net->ns.inum);
+#endif /* NET_NS */
seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
atomic_read(&server->in_send),
atomic_read(&server->num_waiters));
+
if (server->leaf_fullpath) {
seq_printf(m, "\nDFS leaf full path: %s",
server->leaf_fullpath);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index a4d8b0ea1c8c..6fc8f43b1c9d 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -1077,7 +1077,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
}
static int
-cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv)
+cifs_setlease(struct file *file, int arg, struct file_lock **lease, void **priv)
{
/*
* Note that this is called by vfs setlease with i_lock held to
diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h
index d7274eefc666..15c8cc4b6680 100644
--- a/fs/smb/client/cifsfs.h
+++ b/fs/smb/client/cifsfs.h
@@ -159,6 +159,6 @@ extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
/* when changing internal version - update following two lines at same time */
-#define SMB3_PRODUCT_BUILD 43
-#define CIFS_VERSION "2.43"
+#define SMB3_PRODUCT_BUILD 44
+#define CIFS_VERSION "2.44"
#endif /* _CIFSFS_H */
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index e5eec6d38d02..657dee4b2c8c 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -1062,6 +1062,7 @@ struct cifs_ses {
unsigned long chans_need_reconnect;
/* ========= end: protected by chan_lock ======== */
struct cifs_ses *dfs_root_ses;
+ struct nls_table *local_nls;
};
static inline bool
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 9dee267f1893..25503f1a4fd2 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -129,7 +129,7 @@ again:
}
spin_unlock(&server->srv_lock);
- nls_codepage = load_nls_default();
+ nls_codepage = ses->local_nls;
/*
* need to prevent multiple threads trying to simultaneously
@@ -200,7 +200,6 @@ out:
rc = -EAGAIN;
}
- unload_nls(nls_codepage);
return rc;
}
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index 9280e253bf09..238538dde4e3 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -1842,6 +1842,10 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
CIFS_MAX_PASSWORD_LEN))
return 0;
}
+
+ if (strcmp(ctx->local_nls->charset, ses->local_nls->charset))
+ return 0;
+
return 1;
}
@@ -2286,6 +2290,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
ses->sectype = ctx->sectype;
ses->sign = ctx->sign;
+ ses->local_nls = load_nls(ctx->local_nls->charset);
/* add server as first channel */
spin_lock(&ses->chan_lock);
diff --git a/fs/smb/client/dfs.c b/fs/smb/client/dfs.c
index df3fd3b720da..ee772c3d9f00 100644
--- a/fs/smb/client/dfs.c
+++ b/fs/smb/client/dfs.c
@@ -177,8 +177,12 @@ static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx)
struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl);
rc = dfs_get_referral(mnt_ctx, ref_path + 1, NULL, &tl);
- if (rc)
+ if (rc) {
+ rc = cifs_mount_get_tcon(mnt_ctx);
+ if (!rc)
+ rc = cifs_is_path_remote(mnt_ctx);
break;
+ }
tit = dfs_cache_get_tgt_iterator(&tl);
if (!tit) {
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index fc5acc95cd13..2108b3b40ce9 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -1085,7 +1085,7 @@ int cifs_close(struct inode *inode, struct file *file)
!test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags) &&
dclose) {
if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
}
spin_lock(&cinode->deferred_lock);
cifs_add_deferred_close(cfile, dclose);
@@ -2596,7 +2596,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
write_data, to - from, &offset);
cifsFileInfo_put(open_file);
/* Does mm or vfs already set times? */
- inode->i_atime = inode->i_mtime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
if ((bytes_written > 0) && (offset))
rc = 0;
else if (bytes_written < 0)
@@ -4681,9 +4681,9 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
io_error:
kunmap(page);
- unlock_page(page);
read_complete:
+ unlock_page(page);
return rc;
}
@@ -4878,9 +4878,11 @@ void cifs_oplock_break(struct work_struct *work)
struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
oplock_break);
struct inode *inode = d_inode(cfile->dentry);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifsInodeInfo *cinode = CIFS_I(inode);
- struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
- struct TCP_Server_Info *server = tcon->ses->server;
+ struct cifs_tcon *tcon;
+ struct TCP_Server_Info *server;
+ struct tcon_link *tlink;
int rc = 0;
bool purge_cache = false, oplock_break_cancelled;
__u64 persistent_fid, volatile_fid;
@@ -4889,6 +4891,12 @@ void cifs_oplock_break(struct work_struct *work)
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink))
+ goto out;
+ tcon = tlink_tcon(tlink);
+ server = tcon->ses->server;
+
server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
cfile->oplock_epoch, &purge_cache);
@@ -4938,18 +4946,19 @@ oplock_break_ack:
/*
* MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
* an acknowledgment to be sent when the file has already been closed.
- * check for server null, since can race with kill_sb calling tree disconnect.
*/
spin_lock(&cinode->open_file_lock);
- if (tcon->ses && tcon->ses->server && !oplock_break_cancelled &&
- !list_empty(&cinode->openFileList)) {
+ /* check list empty since can race with kill_sb calling tree disconnect */
+ if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
spin_unlock(&cinode->open_file_lock);
- rc = tcon->ses->server->ops->oplock_response(tcon, persistent_fid,
- volatile_fid, net_fid, cinode);
+ rc = server->ops->oplock_response(tcon, persistent_fid,
+ volatile_fid, net_fid, cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
} else
spin_unlock(&cinode->open_file_lock);
+ cifs_put_tlink(tlink);
+out:
cifs_done_oplock_break(cinode);
}
diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c
index 4946a0c59600..67e16c2ac90e 100644
--- a/fs/smb/client/fs_context.c
+++ b/fs/smb/client/fs_context.c
@@ -231,6 +231,8 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c
break;
case Opt_sec_none:
ctx->nullauth = 1;
+ kfree(ctx->username);
+ ctx->username = NULL;
break;
default:
cifs_errorf(fc, "bad security option: %s\n", value);
@@ -1201,6 +1203,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
case Opt_user:
kfree(ctx->username);
ctx->username = NULL;
+ if (ctx->nullauth)
+ break;
if (strlen(param->string) == 0) {
/* null user, ie. anonymous authentication */
ctx->nullauth = 1;
diff --git a/fs/smb/client/fscache.h b/fs/smb/client/fscache.h
index 173999610997..84f3b09367d2 100644
--- a/fs/smb/client/fscache.h
+++ b/fs/smb/client/fscache.h
@@ -50,12 +50,13 @@ void cifs_fscache_fill_coherency(struct inode *inode,
struct cifs_fscache_inode_coherency_data *cd)
{
struct cifsInodeInfo *cifsi = CIFS_I(inode);
+ struct timespec64 ctime = inode_get_ctime(inode);
memset(cd, 0, sizeof(*cd));
cd->last_write_time_sec = cpu_to_le64(cifsi->netfs.inode.i_mtime.tv_sec);
cd->last_write_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_mtime.tv_nsec);
- cd->last_change_time_sec = cpu_to_le64(cifsi->netfs.inode.i_ctime.tv_sec);
- cd->last_change_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_ctime.tv_nsec);
+ cd->last_change_time_sec = cpu_to_le64(ctime.tv_sec);
+ cd->last_change_time_nsec = cpu_to_le32(ctime.tv_nsec);
}
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index c3eeae07e139..93fe43789d7a 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -172,7 +172,7 @@ cifs_fattr_to_inode(struct inode *inode, struct cifs_fattr *fattr)
else
inode->i_atime = fattr->cf_atime;
inode->i_mtime = fattr->cf_mtime;
- inode->i_ctime = fattr->cf_ctime;
+ inode_set_ctime_to_ts(inode, fattr->cf_ctime);
inode->i_rdev = fattr->cf_rdev;
cifs_nlink_fattr_to_inode(inode, fattr);
inode->i_uid = fattr->cf_uid;
@@ -1744,9 +1744,9 @@ out_reval:
cifs_inode = CIFS_I(inode);
cifs_inode->time = 0; /* will force revalidate to get info
when needed */
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
}
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
cifs_inode = CIFS_I(dir);
CIFS_I(dir)->time = 0; /* force revalidate of dir as well */
unlink_out:
@@ -2060,8 +2060,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
*/
cifsInode->time = 0;
- d_inode(direntry)->i_ctime = inode->i_ctime = inode->i_mtime =
- current_time(inode);
+ inode_set_ctime_current(d_inode(direntry));
+ inode->i_mtime = inode_set_ctime_current(inode);
rmdir_exit:
free_dentry_path(page);
@@ -2267,8 +2267,8 @@ unlink_target:
/* force revalidate to go get info when needed */
CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
- source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
- target_dir->i_mtime = current_time(source_dir);
+ source_dir->i_mtime = target_dir->i_mtime = inode_set_ctime_to_ts(source_dir,
+ inode_set_ctime_current(target_dir));
cifs_rename_exit:
kfree(info_buf_source);
@@ -2540,7 +2540,7 @@ int cifs_getattr(struct mnt_idmap *idmap, const struct path *path,
return rc;
}
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->blksize = cifs_sb->ctx->bsize;
stat->ino = CIFS_I(inode)->uniqueid;
diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c
index fff092bbc7a3..f7160003e0ed 100644
--- a/fs/smb/client/ioctl.c
+++ b/fs/smb/client/ioctl.c
@@ -433,16 +433,21 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
* Dump encryption keys. This is an old ioctl that only
* handles AES-128-{CCM,GCM}.
*/
- if (pSMBFile == NULL)
- break;
if (!capable(CAP_SYS_ADMIN)) {
rc = -EACCES;
break;
}
- tcon = tlink_tcon(pSMBFile->tlink);
+ cifs_sb = CIFS_SB(inode->i_sb);
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink)) {
+ rc = PTR_ERR(tlink);
+ break;
+ }
+ tcon = tlink_tcon(tlink);
if (!smb3_encryption_required(tcon)) {
rc = -EOPNOTSUPP;
+ cifs_put_tlink(tlink);
break;
}
pkey_inf.cipher_type =
@@ -459,6 +464,7 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = -EFAULT;
else
rc = 0;
+ cifs_put_tlink(tlink);
break;
case CIFS_DUMP_FULL_KEY:
/*
@@ -470,8 +476,16 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = -EACCES;
break;
}
- tcon = tlink_tcon(pSMBFile->tlink);
+ cifs_sb = CIFS_SB(inode->i_sb);
+ tlink = cifs_sb_tlink(cifs_sb);
+ if (IS_ERR(tlink)) {
+ rc = PTR_ERR(tlink);
+ break;
+ }
+
+ tcon = tlink_tcon(tlink);
rc = cifs_dump_full_key(tcon, (void __user *)arg);
+ cifs_put_tlink(tlink);
break;
case CIFS_IOC_NOTIFY:
if (!S_ISDIR(inode->i_mode)) {
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index 70dbfe6584f9..d7e85d9a2655 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -95,6 +95,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
return;
}
+ unload_nls(buf_to_free->local_nls);
atomic_dec(&sesInfoAllocCount);
kfree(buf_to_free->serverOS);
kfree(buf_to_free->serverDomain);
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 335c078c42fb..c57ca2050b73 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -1013,6 +1013,7 @@ setup_ntlm_smb3_neg_ret:
}
+/* See MS-NLMP 2.2.1.3 */
int build_ntlmssp_auth_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
@@ -1047,7 +1048,8 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer,
flags = ses->ntlmssp->server_flags | NTLMSSP_REQUEST_TARGET |
NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_WORKSTATION_SUPPLIED;
-
+ /* we only send version information in ntlmssp negotiate, so do not set this flag */
+ flags = flags & ~NTLMSSP_NEGOTIATE_VERSION;
tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 0f62bc373ad0..182e2e879ecf 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -1396,7 +1396,8 @@ smb2_close_getattr(const unsigned int xid, struct cifs_tcon *tcon,
if (file_inf.LastWriteTime)
inode->i_mtime = cifs_NTtimeToUnix(file_inf.LastWriteTime);
if (file_inf.ChangeTime)
- inode->i_ctime = cifs_NTtimeToUnix(file_inf.ChangeTime);
+ inode_set_ctime_to_ts(inode,
+ cifs_NTtimeToUnix(file_inf.ChangeTime));
if (file_inf.LastAccessTime)
inode->i_atime = cifs_NTtimeToUnix(file_inf.LastAccessTime);
diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c
index e04766fe6f80..a457f07f820d 100644
--- a/fs/smb/client/smb2pdu.c
+++ b/fs/smb/client/smb2pdu.c
@@ -242,7 +242,7 @@ again:
}
spin_unlock(&server->srv_lock);
- nls_codepage = load_nls_default();
+ nls_codepage = ses->local_nls;
/*
* need to prevent multiple threads trying to simultaneously
@@ -324,7 +324,6 @@ out:
rc = -EAGAIN;
}
failed:
- unload_nls(nls_codepage);
return rc;
}
diff --git a/fs/smb/server/ksmbd_netlink.h b/fs/smb/server/ksmbd_netlink.h
index fb8b2d566efb..b7521e41402e 100644
--- a/fs/smb/server/ksmbd_netlink.h
+++ b/fs/smb/server/ksmbd_netlink.h
@@ -352,7 +352,8 @@ enum KSMBD_TREE_CONN_STATUS {
#define KSMBD_SHARE_FLAG_STREAMS BIT(11)
#define KSMBD_SHARE_FLAG_FOLLOW_SYMLINKS BIT(12)
#define KSMBD_SHARE_FLAG_ACL_XATTR BIT(13)
-#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
+#define KSMBD_SHARE_FLAG_UPDATE BIT(14)
+#define KSMBD_SHARE_FLAG_CROSSMNT BIT(15)
/*
* Tree connect request flags.
diff --git a/fs/smb/server/server.c b/fs/smb/server/server.c
index ced7a9e916f0..9df121bdf349 100644
--- a/fs/smb/server/server.c
+++ b/fs/smb/server/server.c
@@ -286,6 +286,7 @@ static void handle_ksmbd_work(struct work_struct *wk)
static int queue_ksmbd_work(struct ksmbd_conn *conn)
{
struct ksmbd_work *work;
+ int err;
work = ksmbd_alloc_work_struct();
if (!work) {
@@ -297,7 +298,11 @@ static int queue_ksmbd_work(struct ksmbd_conn *conn)
work->request_buf = conn->request_buf;
conn->request_buf = NULL;
- ksmbd_init_smb_server(work);
+ err = ksmbd_init_smb_server(work);
+ if (err) {
+ ksmbd_free_work_struct(work);
+ return 0;
+ }
ksmbd_conn_enqueue_request(work);
atomic_inc(&conn->r_count);
diff --git a/fs/smb/server/smb2misc.c b/fs/smb/server/smb2misc.c
index 33b7e6c4ceff..e881df1d10cb 100644
--- a/fs/smb/server/smb2misc.c
+++ b/fs/smb/server/smb2misc.c
@@ -380,13 +380,13 @@ int ksmbd_smb2_check_message(struct ksmbd_work *work)
}
if (smb2_req_struct_sizes[command] != pdu->StructureSize2) {
- if (command == SMB2_OPLOCK_BREAK_HE &&
- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_20 &&
- le16_to_cpu(pdu->StructureSize2) != OP_BREAK_STRUCT_SIZE_21) {
+ if (!(command == SMB2_OPLOCK_BREAK_HE &&
+ (le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_20 ||
+ le16_to_cpu(pdu->StructureSize2) == OP_BREAK_STRUCT_SIZE_21))) {
/* special case for SMB2.1 lease break message */
ksmbd_debug(SMB,
- "Illegal request size %d for oplock break\n",
- le16_to_cpu(pdu->StructureSize2));
+ "Illegal request size %u for command %d\n",
+ le16_to_cpu(pdu->StructureSize2), command);
return 1;
}
}
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index cf8822103f50..a947c18915c2 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -87,9 +87,9 @@ struct channel *lookup_chann_list(struct ksmbd_session *sess, struct ksmbd_conn
*/
int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
{
- struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
+ struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
unsigned int cmd = le16_to_cpu(req_hdr->Command);
- int tree_id;
+ unsigned int tree_id;
if (cmd == SMB2_TREE_CONNECT_HE ||
cmd == SMB2_CANCEL_HE ||
@@ -114,7 +114,7 @@ int smb2_get_ksmbd_tcon(struct ksmbd_work *work)
pr_err("The first operation in the compound does not have tcon\n");
return -EINVAL;
}
- if (work->tcon->id != tree_id) {
+ if (tree_id != UINT_MAX && work->tcon->id != tree_id) {
pr_err("tree id(%u) is different with id(%u) in first operation\n",
tree_id, work->tcon->id);
return -EINVAL;
@@ -559,9 +559,9 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
*/
int smb2_check_user_session(struct ksmbd_work *work)
{
- struct smb2_hdr *req_hdr = smb2_get_msg(work->request_buf);
+ struct smb2_hdr *req_hdr = ksmbd_req_buf_next(work);
struct ksmbd_conn *conn = work->conn;
- unsigned int cmd = conn->ops->get_cmd_val(work);
+ unsigned int cmd = le16_to_cpu(req_hdr->Command);
unsigned long long sess_id;
/*
@@ -587,7 +587,7 @@ int smb2_check_user_session(struct ksmbd_work *work)
pr_err("The first operation in the compound does not have sess\n");
return -EINVAL;
}
- if (work->sess->id != sess_id) {
+ if (sess_id != ULLONG_MAX && work->sess->id != sess_id) {
pr_err("session id(%llu) is different with the first operation(%lld)\n",
sess_id, work->sess->id);
return -EINVAL;
@@ -2324,9 +2324,16 @@ next:
break;
buf_len -= next;
eabuf = (struct smb2_ea_info *)((char *)eabuf + next);
- if (next < (u32)eabuf->EaNameLength + le16_to_cpu(eabuf->EaValueLength))
+ if (buf_len < sizeof(struct smb2_ea_info)) {
+ rc = -EINVAL;
break;
+ }
+ if (buf_len < sizeof(struct smb2_ea_info) + eabuf->EaNameLength +
+ le16_to_cpu(eabuf->EaValueLength)) {
+ rc = -EINVAL;
+ break;
+ }
} while (next != 0);
kfree(attr_name);
@@ -2467,8 +2474,9 @@ static void smb2_update_xattrs(struct ksmbd_tree_connect *tcon,
}
}
-static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
- int open_flags, umode_t posix_mode, bool is_dir)
+static int smb2_creat(struct ksmbd_work *work, struct path *parent_path,
+ struct path *path, char *name, int open_flags,
+ umode_t posix_mode, bool is_dir)
{
struct ksmbd_tree_connect *tcon = work->tcon;
struct ksmbd_share_config *share = tcon->share_conf;
@@ -2495,7 +2503,7 @@ static int smb2_creat(struct ksmbd_work *work, struct path *path, char *name,
return rc;
}
- rc = ksmbd_vfs_kern_path_locked(work, name, 0, path, 0);
+ rc = ksmbd_vfs_kern_path_locked(work, name, 0, parent_path, path, 0);
if (rc) {
pr_err("cannot get linux path (%s), err = %d\n",
name, rc);
@@ -2565,7 +2573,7 @@ int smb2_open(struct ksmbd_work *work)
struct ksmbd_tree_connect *tcon = work->tcon;
struct smb2_create_req *req;
struct smb2_create_rsp *rsp;
- struct path path;
+ struct path path, parent_path;
struct ksmbd_share_config *share = tcon->share_conf;
struct ksmbd_file *fp = NULL;
struct file *filp = NULL;
@@ -2786,7 +2794,8 @@ int smb2_open(struct ksmbd_work *work)
goto err_out1;
}
- rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS, &path, 1);
+ rc = ksmbd_vfs_kern_path_locked(work, name, LOOKUP_NO_SYMLINKS,
+ &parent_path, &path, 1);
if (!rc) {
file_present = true;
@@ -2906,7 +2915,8 @@ int smb2_open(struct ksmbd_work *work)
/*create file if not present */
if (!file_present) {
- rc = smb2_creat(work, &path, name, open_flags, posix_mode,
+ rc = smb2_creat(work, &parent_path, &path, name, open_flags,
+ posix_mode,
req->CreateOptions & FILE_DIRECTORY_FILE_LE);
if (rc) {
if (rc == -ENOENT) {
@@ -3321,8 +3331,9 @@ int smb2_open(struct ksmbd_work *work)
err_out:
if (file_present || created) {
- inode_unlock(d_inode(path.dentry->d_parent));
- dput(path.dentry);
+ inode_unlock(d_inode(parent_path.dentry));
+ path_put(&path);
+ path_put(&parent_path);
}
ksmbd_revert_fsids(work);
err_out1:
@@ -4391,8 +4402,8 @@ static int get_file_basic_info(struct smb2_query_info_rsp *rsp,
}
basic_info = (struct smb2_file_basic_info *)rsp->Buffer;
- generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
- &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+ file_inode(fp->filp), &stat);
basic_info->CreationTime = cpu_to_le64(fp->create_time);
time = ksmbd_UnixTimeToNT(stat.atime);
basic_info->LastAccessTime = cpu_to_le64(time);
@@ -4417,7 +4428,7 @@ static void get_file_standard_info(struct smb2_query_info_rsp *rsp,
struct kstat stat;
inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
sinfo = (struct smb2_file_standard_info *)rsp->Buffer;
delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4471,7 +4482,7 @@ static int get_file_all_info(struct ksmbd_work *work,
return PTR_ERR(filename);
inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
ksmbd_debug(SMB, "filename = %s\n", filename);
delete_pending = ksmbd_inode_pending_delete(fp);
@@ -4548,8 +4559,8 @@ static void get_file_stream_info(struct ksmbd_work *work,
int buf_free_len;
struct smb2_query_info_req *req = ksmbd_req_buf_next(work);
- generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
- &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+ file_inode(fp->filp), &stat);
file_info = (struct smb2_file_stream_info *)rsp->Buffer;
buf_free_len =
@@ -4639,8 +4650,8 @@ static void get_file_internal_info(struct smb2_query_info_rsp *rsp,
struct smb2_file_internal_info *file_info;
struct kstat stat;
- generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
- &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+ file_inode(fp->filp), &stat);
file_info = (struct smb2_file_internal_info *)rsp->Buffer;
file_info->IndexNumber = cpu_to_le64(stat.ino);
rsp->OutputBufferLength =
@@ -4665,7 +4676,7 @@ static int get_file_network_open_info(struct smb2_query_info_rsp *rsp,
file_info = (struct smb2_file_ntwrk_info *)rsp->Buffer;
inode = file_inode(fp->filp);
- generic_fillattr(file_mnt_idmap(fp->filp), inode, &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS, inode, &stat);
file_info->CreationTime = cpu_to_le64(fp->create_time);
time = ksmbd_UnixTimeToNT(stat.atime);
@@ -4726,8 +4737,8 @@ static void get_file_compression_info(struct smb2_query_info_rsp *rsp,
struct smb2_file_comp_info *file_info;
struct kstat stat;
- generic_fillattr(file_mnt_idmap(fp->filp), file_inode(fp->filp),
- &stat);
+ generic_fillattr(file_mnt_idmap(fp->filp), STATX_BASIC_STATS,
+ file_inode(fp->filp), &stat);
file_info = (struct smb2_file_comp_info *)rsp->Buffer;
file_info->CompressedFileSize = cpu_to_le64(stat.blocks << 9);
@@ -4779,7 +4790,7 @@ static int find_file_posix_info(struct smb2_query_info_rsp *rsp,
file_info->LastAccessTime = cpu_to_le64(time);
time = ksmbd_UnixTimeToNT(inode->i_mtime);
file_info->LastWriteTime = cpu_to_le64(time);
- time = ksmbd_UnixTimeToNT(inode->i_ctime);
+ time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
file_info->ChangeTime = cpu_to_le64(time);
file_info->DosAttributes = fp->f_ci->m_fattr;
file_info->Inode = cpu_to_le64(inode->i_ino);
@@ -5422,7 +5433,7 @@ int smb2_close(struct ksmbd_work *work)
rsp->LastAccessTime = cpu_to_le64(time);
time = ksmbd_UnixTimeToNT(inode->i_mtime);
rsp->LastWriteTime = cpu_to_le64(time);
- time = ksmbd_UnixTimeToNT(inode->i_ctime);
+ time = ksmbd_UnixTimeToNT(inode_get_ctime(inode));
rsp->ChangeTime = cpu_to_le64(time);
ksmbd_fd_put(work, fp);
} else {
@@ -5545,7 +5556,7 @@ static int smb2_create_link(struct ksmbd_work *work,
struct nls_table *local_nls)
{
char *link_name = NULL, *target_name = NULL, *pathname = NULL;
- struct path path;
+ struct path path, parent_path;
bool file_present = false;
int rc;
@@ -5575,7 +5586,7 @@ static int smb2_create_link(struct ksmbd_work *work,
ksmbd_debug(SMB, "target name is %s\n", target_name);
rc = ksmbd_vfs_kern_path_locked(work, link_name, LOOKUP_NO_SYMLINKS,
- &path, 0);
+ &parent_path, &path, 0);
if (rc) {
if (rc != -ENOENT)
goto out;
@@ -5605,8 +5616,9 @@ static int smb2_create_link(struct ksmbd_work *work,
rc = -EINVAL;
out:
if (file_present) {
- inode_unlock(d_inode(path.dentry->d_parent));
+ inode_unlock(d_inode(parent_path.dentry));
path_put(&path);
+ path_put(&parent_path);
}
if (!IS_ERR(link_name))
kfree(link_name);
@@ -5644,7 +5656,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
if (file_info->ChangeTime)
attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
else
- attrs.ia_ctime = inode->i_ctime;
+ attrs.ia_ctime = inode_get_ctime(inode);
if (file_info->LastWriteTime) {
attrs.ia_mtime = ksmbd_NTtimeToUnix(file_info->LastWriteTime);
@@ -5689,7 +5701,7 @@ static int set_file_basic_info(struct ksmbd_file *fp,
return -EACCES;
inode_lock(inode);
- inode->i_ctime = attrs.ia_ctime;
+ inode_set_ctime_to_ts(inode, attrs.ia_ctime);
attrs.ia_valid &= ~ATTR_CTIME;
rc = notify_change(idmap, dentry, &attrs, NULL);
inode_unlock(inode);
@@ -6209,6 +6221,11 @@ int smb2_read(struct ksmbd_work *work)
unsigned int max_read_size = conn->vals->max_read_size;
WORK_BUFFERS(work, req, rsp);
+ if (work->next_smb2_rcv_hdr_off) {
+ work->send_no_response = 1;
+ err = -EOPNOTSUPP;
+ goto out;
+ }
if (test_share_config_flag(work->tcon->share_conf,
KSMBD_SHARE_FLAG_PIPE)) {
@@ -8609,7 +8626,8 @@ int smb3_decrypt_req(struct ksmbd_work *work)
struct smb2_transform_hdr *tr_hdr = smb2_get_msg(buf);
int rc = 0;
- if (buf_data_size < sizeof(struct smb2_hdr)) {
+ if (pdu_length < sizeof(struct smb2_transform_hdr) ||
+ buf_data_size < sizeof(struct smb2_hdr)) {
pr_err("Transform message is too small (%u)\n",
pdu_length);
return -ECONNABORTED;
diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c
index ef20f63e55e6..c2b75d898852 100644
--- a/fs/smb/server/smb_common.c
+++ b/fs/smb/server/smb_common.c
@@ -388,26 +388,29 @@ static struct smb_version_cmds smb1_server_cmds[1] = {
[SMB_COM_NEGOTIATE_EX] = { .proc = smb1_negotiate, },
};
-static void init_smb1_server(struct ksmbd_conn *conn)
+static int init_smb1_server(struct ksmbd_conn *conn)
{
conn->ops = &smb1_server_ops;
conn->cmds = smb1_server_cmds;
conn->max_cmds = ARRAY_SIZE(smb1_server_cmds);
+ return 0;
}
-void ksmbd_init_smb_server(struct ksmbd_work *work)
+int ksmbd_init_smb_server(struct ksmbd_work *work)
{
struct ksmbd_conn *conn = work->conn;
__le32 proto;
- if (conn->need_neg == false)
- return;
-
proto = *(__le32 *)((struct smb_hdr *)work->request_buf)->Protocol;
+ if (conn->need_neg == false) {
+ if (proto == SMB1_PROTO_NUMBER)
+ return -EINVAL;
+ return 0;
+ }
+
if (proto == SMB1_PROTO_NUMBER)
- init_smb1_server(conn);
- else
- init_smb3_11_server(conn);
+ return init_smb1_server(conn);
+ return init_smb3_11_server(conn);
}
int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index aeca0f46068f..f1092519c0c2 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -427,7 +427,7 @@ bool ksmbd_smb_request(struct ksmbd_conn *conn);
int ksmbd_lookup_dialect_by_id(__le16 *cli_dialects, __le16 dialects_count);
-void ksmbd_init_smb_server(struct ksmbd_work *work);
+int ksmbd_init_smb_server(struct ksmbd_work *work);
struct ksmbd_kstat;
int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c
index e35914457350..d48756a339a5 100644
--- a/fs/smb/server/vfs.c
+++ b/fs/smb/server/vfs.c
@@ -63,13 +63,13 @@ int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
char *pathname, unsigned int flags,
+ struct path *parent_path,
struct path *path)
{
struct qstr last;
struct filename *filename;
struct path *root_share_path = &share_conf->vfs_path;
int err, type;
- struct path parent_path;
struct dentry *d;
if (pathname[0] == '\0') {
@@ -84,7 +84,7 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
return PTR_ERR(filename);
err = vfs_path_parent_lookup(filename, flags,
- &parent_path, &last, &type,
+ parent_path, &last, &type,
root_share_path);
if (err) {
putname(filename);
@@ -92,13 +92,13 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
}
if (unlikely(type != LAST_NORM)) {
- path_put(&parent_path);
+ path_put(parent_path);
putname(filename);
return -ENOENT;
}
- inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
- d = lookup_one_qstr_excl(&last, parent_path.dentry, 0);
+ inode_lock_nested(parent_path->dentry->d_inode, I_MUTEX_PARENT);
+ d = lookup_one_qstr_excl(&last, parent_path->dentry, 0);
if (IS_ERR(d))
goto err_out;
@@ -108,15 +108,22 @@ static int ksmbd_vfs_path_lookup_locked(struct ksmbd_share_config *share_conf,
}
path->dentry = d;
- path->mnt = share_conf->vfs_path.mnt;
- path_put(&parent_path);
- putname(filename);
+ path->mnt = mntget(parent_path->mnt);
+ if (test_share_config_flag(share_conf, KSMBD_SHARE_FLAG_CROSSMNT)) {
+ err = follow_down(path, 0);
+ if (err < 0) {
+ path_put(path);
+ goto err_out;
+ }
+ }
+
+ putname(filename);
return 0;
err_out:
- inode_unlock(parent_path.dentry->d_inode);
- path_put(&parent_path);
+ inode_unlock(d_inode(parent_path->dentry));
+ path_put(parent_path);
putname(filename);
return -ENOENT;
}
@@ -412,7 +419,8 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
{
char *stream_buf = NULL, *wbuf;
struct mnt_idmap *idmap = file_mnt_idmap(fp->filp);
- size_t size, v_len;
+ size_t size;
+ ssize_t v_len;
int err = 0;
ksmbd_debug(VFS, "write stream data pos : %llu, count : %zd\n",
@@ -429,9 +437,9 @@ static int ksmbd_vfs_stream_write(struct ksmbd_file *fp, char *buf, loff_t *pos,
fp->stream.name,
fp->stream.size,
&stream_buf);
- if ((int)v_len < 0) {
+ if (v_len < 0) {
pr_err("not found stream in xattr : %zd\n", v_len);
- err = (int)v_len;
+ err = v_len;
goto out;
}
@@ -1194,14 +1202,14 @@ static int ksmbd_vfs_lookup_in_dir(const struct path *dir, char *name,
* Return: 0 on success, otherwise error
*/
int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
- unsigned int flags, struct path *path,
- bool caseless)
+ unsigned int flags, struct path *parent_path,
+ struct path *path, bool caseless)
{
struct ksmbd_share_config *share_conf = work->tcon->share_conf;
int err;
- struct path parent_path;
- err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, path);
+ err = ksmbd_vfs_path_lookup_locked(share_conf, name, flags, parent_path,
+ path);
if (!err)
return 0;
@@ -1216,10 +1224,10 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
path_len = strlen(filepath);
remain_len = path_len;
- parent_path = share_conf->vfs_path;
- path_get(&parent_path);
+ *parent_path = share_conf->vfs_path;
+ path_get(parent_path);
- while (d_can_lookup(parent_path.dentry)) {
+ while (d_can_lookup(parent_path->dentry)) {
char *filename = filepath + path_len - remain_len;
char *next = strchrnul(filename, '/');
size_t filename_len = next - filename;
@@ -1228,7 +1236,7 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
if (filename_len == 0)
break;
- err = ksmbd_vfs_lookup_in_dir(&parent_path, filename,
+ err = ksmbd_vfs_lookup_in_dir(parent_path, filename,
filename_len,
work->conn->um);
if (err)
@@ -1245,8 +1253,8 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
goto out2;
else if (is_last)
goto out1;
- path_put(&parent_path);
- parent_path = *path;
+ path_put(parent_path);
+ *parent_path = *path;
next[0] = '/';
remain_len -= filename_len + 1;
@@ -1254,16 +1262,17 @@ int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
err = -EINVAL;
out2:
- path_put(&parent_path);
+ path_put(parent_path);
out1:
kfree(filepath);
}
if (!err) {
- err = ksmbd_vfs_lock_parent(parent_path.dentry, path->dentry);
- if (err)
- dput(path->dentry);
- path_put(&parent_path);
+ err = ksmbd_vfs_lock_parent(parent_path->dentry, path->dentry);
+ if (err) {
+ path_put(path);
+ path_put(parent_path);
+ }
}
return err;
}
@@ -1650,7 +1659,8 @@ int ksmbd_vfs_fill_dentry_attrs(struct ksmbd_work *work,
u64 time;
int rc;
- generic_fillattr(idmap, d_inode(dentry), ksmbd_kstat->kstat);
+ generic_fillattr(idmap, STATX_BASIC_STATS, d_inode(dentry),
+ ksmbd_kstat->kstat);
time = ksmbd_UnixTimeToNT(ksmbd_kstat->kstat->ctime);
ksmbd_kstat->create_time = time;
diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h
index 80039312c255..72f9fb4b48d1 100644
--- a/fs/smb/server/vfs.h
+++ b/fs/smb/server/vfs.h
@@ -115,8 +115,8 @@ int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name,
int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap,
const struct path *path, char *attr_name);
int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name,
- unsigned int flags, struct path *path,
- bool caseless);
+ unsigned int flags, struct path *parent_path,
+ struct path *path, bool caseless);
struct dentry *ksmbd_vfs_kern_path_create(struct ksmbd_work *work,
const char *name,
unsigned int flags,
diff --git a/fs/splice.c b/fs/splice.c
index 004eb1c4ce31..02631013b09f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -120,17 +120,17 @@ static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- struct page *page = buf->page;
+ struct folio *folio = page_folio(buf->page);
int err;
- if (!PageUptodate(page)) {
- lock_page(page);
+ if (!folio_test_uptodate(folio)) {
+ folio_lock(folio);
/*
- * Page got truncated/unhashed. This will cause a 0-byte
+ * Folio got truncated/unhashed. This will cause a 0-byte
* splice, if this is the first page.
*/
- if (!page->mapping) {
+ if (!folio->mapping) {
err = -ENODATA;
goto error;
}
@@ -138,20 +138,18 @@ static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe,
/*
* Uh oh, read-error from disk.
*/
- if (!PageUptodate(page)) {
+ if (!folio_test_uptodate(folio)) {
err = -EIO;
goto error;
}
- /*
- * Page is ok afterall, we are done.
- */
- unlock_page(page);
+ /* Folio is ok after all, we are done */
+ folio_unlock(folio);
}
return 0;
error:
- unlock_page(page);
+ folio_unlock(folio);
return err;
}
@@ -876,6 +874,8 @@ ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out,
msg.msg_flags |= MSG_MORE;
if (remain && pipe_occupancy(pipe->head, tail) > 0)
msg.msg_flags |= MSG_MORE;
+ if (out->f_flags & O_NONBLOCK)
+ msg.msg_flags |= MSG_DONTWAIT;
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc,
len - remain);
@@ -1267,10 +1267,8 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
if ((in->f_flags | out->f_flags) & O_NONBLOCK)
flags |= SPLICE_F_NONBLOCK;
- return splice_pipe_to_pipe(ipipe, opipe, len, flags);
- }
-
- if (ipipe) {
+ ret = splice_pipe_to_pipe(ipipe, opipe, len, flags);
+ } else if (ipipe) {
if (off_in)
return -ESPIPE;
if (off_out) {
@@ -1295,18 +1293,11 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
ret = do_splice_from(ipipe, out, &offset, len, flags);
file_end_write(out);
- if (ret > 0)
- fsnotify_modify(out);
-
if (!off_out)
out->f_pos = offset;
else
*off_out = offset;
-
- return ret;
- }
-
- if (opipe) {
+ } else if (opipe) {
if (off_out)
return -ESPIPE;
if (off_in) {
@@ -1322,18 +1313,25 @@ long do_splice(struct file *in, loff_t *off_in, struct file *out,
ret = splice_file_to_pipe(in, opipe, &offset, len, flags);
- if (ret > 0)
- fsnotify_access(in);
-
if (!off_in)
in->f_pos = offset;
else
*off_in = offset;
+ } else {
+ ret = -EINVAL;
+ }
- return ret;
+ if (ret > 0) {
+ /*
+ * Generate modify out before access in:
+ * do_splice_from() may've already sent modify out,
+ * and this ensures the events get merged.
+ */
+ fsnotify_modify(out);
+ fsnotify_access(in);
}
- return -EINVAL;
+ return ret;
}
static long __do_splice(struct file *in, loff_t __user *off_in,
@@ -1462,6 +1460,9 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
pipe_unlock(pipe);
}
+ if (ret > 0)
+ fsnotify_access(file);
+
return ret;
}
@@ -1491,8 +1492,10 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
if (!ret)
ret = iter_to_pipe(iter, pipe, buf_flag);
pipe_unlock(pipe);
- if (ret > 0)
+ if (ret > 0) {
wakeup_pipe_readers(pipe);
+ fsnotify_modify(file);
+ }
return ret;
}
@@ -1926,6 +1929,11 @@ long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags)
}
}
+ if (ret > 0) {
+ fsnotify_access(in);
+ fsnotify_modify(out);
+ }
+
return ret;
}
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 24463145b351..c6e626b00546 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -61,7 +61,7 @@ static int squashfs_new_inode(struct super_block *sb, struct inode *inode,
inode->i_ino = le32_to_cpu(sqsh_ino->inode_number);
inode->i_mtime.tv_sec = le32_to_cpu(sqsh_ino->mtime);
inode->i_atime.tv_sec = inode->i_mtime.tv_sec;
- inode->i_ctime.tv_sec = inode->i_mtime.tv_sec;
+ inode_set_ctime(inode, inode->i_mtime.tv_sec, 0);
inode->i_mode = le16_to_cpu(sqsh_ino->mode);
inode->i_size = 0;
diff --git a/fs/stack.c b/fs/stack.c
index c9830924eb12..b5e01bdb5f5f 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -68,7 +68,7 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
dest->i_rdev = src->i_rdev;
dest->i_atime = src->i_atime;
dest->i_mtime = src->i_mtime;
- dest->i_ctime = src->i_ctime;
+ inode_set_ctime_to_ts(dest, inode_get_ctime(src));
dest->i_blkbits = src->i_blkbits;
dest->i_flags = src->i_flags;
set_nlink(dest, src->i_nlink);
diff --git a/fs/stat.c b/fs/stat.c
index 7c238da22ef0..136711ae72fb 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -27,10 +27,42 @@
#include "mount.h"
/**
+ * fill_mg_cmtime - Fill in the mtime and ctime and flag ctime as QUERIED
+ * @stat: where to store the resulting values
+ * @request_mask: STATX_* values requested
+ * @inode: inode from which to grab the c/mtime
+ *
+ * Given @inode, grab the ctime and mtime out if it and store the result
+ * in @stat. When fetching the value, flag it as queried so the next write
+ * will use a fine-grained timestamp.
+ */
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode)
+{
+ atomic_long_t *pnsec = (atomic_long_t *)&inode->__i_ctime.tv_nsec;
+
+ /* If neither time was requested, then don't report them */
+ if (!(request_mask & (STATX_CTIME|STATX_MTIME))) {
+ stat->result_mask &= ~(STATX_CTIME|STATX_MTIME);
+ return;
+ }
+
+ stat->mtime = inode->i_mtime;
+ stat->ctime.tv_sec = inode->__i_ctime.tv_sec;
+ /*
+ * Atomically set the QUERIED flag and fetch the new value with
+ * the flag masked off.
+ */
+ stat->ctime.tv_nsec = atomic_long_fetch_or(I_CTIME_QUERIED, pnsec) &
+ ~I_CTIME_QUERIED;
+}
+EXPORT_SYMBOL(fill_mg_cmtime);
+
+/**
* generic_fillattr - Fill in the basic attributes from the inode struct
- * @idmap: idmap of the mount the inode was found from
- * @inode: Inode to use as the source
- * @stat: Where to fill in the attributes
+ * @idmap: idmap of the mount the inode was found from
+ * @request_mask: statx request_mask
+ * @inode: Inode to use as the source
+ * @stat: Where to fill in the attributes
*
* Fill in the basic attributes in the kstat structure from data that's to be
* found on the VFS inode structure. This is the default if no getattr inode
@@ -42,8 +74,8 @@
* uid and gid filds. On non-idmapped mounts or if permission checking is to be
* performed on the raw inode simply passs @nop_mnt_idmap.
*/
-void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
- struct kstat *stat)
+void generic_fillattr(struct mnt_idmap *idmap, u32 request_mask,
+ struct inode *inode, struct kstat *stat)
{
vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode);
vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode);
@@ -57,10 +89,22 @@ void generic_fillattr(struct mnt_idmap *idmap, struct inode *inode,
stat->rdev = inode->i_rdev;
stat->size = i_size_read(inode);
stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
+
+ if (is_mgtime(inode)) {
+ fill_mg_cmtime(stat, request_mask, inode);
+ } else {
+ stat->mtime = inode->i_mtime;
+ stat->ctime = inode_get_ctime(inode);
+ }
+
stat->blksize = i_blocksize(inode);
stat->blocks = inode->i_blocks;
+
+ if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
+ stat->result_mask |= STATX_CHANGE_COOKIE;
+ stat->change_cookie = inode_query_iversion(inode);
+ }
+
}
EXPORT_SYMBOL(generic_fillattr);
@@ -123,17 +167,12 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
stat->attributes_mask |= (STATX_ATTR_AUTOMOUNT |
STATX_ATTR_DAX);
- if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
- stat->result_mask |= STATX_CHANGE_COOKIE;
- stat->change_cookie = inode_query_iversion(inode);
- }
-
idmap = mnt_idmap(path->mnt);
if (inode->i_op->getattr)
return inode->i_op->getattr(idmap, path, stat,
request_mask, query_flags);
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
return 0;
}
EXPORT_SYMBOL(vfs_getattr_nosec);
diff --git a/fs/super.c b/fs/super.c
index e781226e2880..692654c2af36 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -39,7 +39,7 @@
#include <uapi/linux/mount.h>
#include "internal.h"
-static int thaw_super_locked(struct super_block *sb);
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who);
static LIST_HEAD(super_blocks);
static DEFINE_SPINLOCK(sb_lock);
@@ -50,6 +50,130 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
"sb_internal",
};
+static inline void __super_lock(struct super_block *sb, bool excl)
+{
+ if (excl)
+ down_write(&sb->s_umount);
+ else
+ down_read(&sb->s_umount);
+}
+
+static inline void super_unlock(struct super_block *sb, bool excl)
+{
+ if (excl)
+ up_write(&sb->s_umount);
+ else
+ up_read(&sb->s_umount);
+}
+
+static inline void __super_lock_excl(struct super_block *sb)
+{
+ __super_lock(sb, true);
+}
+
+static inline void super_unlock_excl(struct super_block *sb)
+{
+ super_unlock(sb, true);
+}
+
+static inline void super_unlock_shared(struct super_block *sb)
+{
+ super_unlock(sb, false);
+}
+
+static inline bool wait_born(struct super_block *sb)
+{
+ unsigned int flags;
+
+ /*
+ * Pairs with smp_store_release() in super_wake() and ensures
+ * that we see SB_BORN or SB_DYING after we're woken.
+ */
+ flags = smp_load_acquire(&sb->s_flags);
+ return flags & (SB_BORN | SB_DYING);
+}
+
+/**
+ * super_lock - wait for superblock to become ready and lock it
+ * @sb: superblock to wait for
+ * @excl: whether exclusive access is required
+ *
+ * If the superblock has neither passed through vfs_get_tree() or
+ * generic_shutdown_super() yet wait for it to happen. Either superblock
+ * creation will succeed and SB_BORN is set by vfs_get_tree() or we're
+ * woken and we'll see SB_DYING.
+ *
+ * The caller must have acquired a temporary reference on @sb->s_count.
+ *
+ * Return: This returns true if SB_BORN was set, false if SB_DYING was
+ * set. The function acquires s_umount and returns with it held.
+ */
+static __must_check bool super_lock(struct super_block *sb, bool excl)
+{
+
+ lockdep_assert_not_held(&sb->s_umount);
+
+relock:
+ __super_lock(sb, excl);
+
+ /*
+ * Has gone through generic_shutdown_super() in the meantime.
+ * @sb->s_root is NULL and @sb->s_active is 0. No one needs to
+ * grab a reference to this. Tell them so.
+ */
+ if (sb->s_flags & SB_DYING)
+ return false;
+
+ /* Has called ->get_tree() successfully. */
+ if (sb->s_flags & SB_BORN)
+ return true;
+
+ super_unlock(sb, excl);
+
+ /* wait until the superblock is ready or dying */
+ wait_var_event(&sb->s_flags, wait_born(sb));
+
+ /*
+ * Neither SB_BORN nor SB_DYING are ever unset so we never loop.
+ * Just reacquire @sb->s_umount for the caller.
+ */
+ goto relock;
+}
+
+/* wait and acquire read-side of @sb->s_umount */
+static inline bool super_lock_shared(struct super_block *sb)
+{
+ return super_lock(sb, false);
+}
+
+/* wait and acquire write-side of @sb->s_umount */
+static inline bool super_lock_excl(struct super_block *sb)
+{
+ return super_lock(sb, true);
+}
+
+/* wake waiters */
+#define SUPER_WAKE_FLAGS (SB_BORN | SB_DYING | SB_DEAD)
+static void super_wake(struct super_block *sb, unsigned int flag)
+{
+ WARN_ON_ONCE((flag & ~SUPER_WAKE_FLAGS));
+ WARN_ON_ONCE(hweight32(flag & SUPER_WAKE_FLAGS) > 1);
+
+ /*
+ * Pairs with smp_load_acquire() in super_lock() to make sure
+ * all initializations in the superblock are seen by the user
+ * seeing SB_BORN sent.
+ */
+ smp_store_release(&sb->s_flags, sb->s_flags | flag);
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees SB_BORN set or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
+ smp_mb();
+ wake_up_var(&sb->s_flags);
+}
+
/*
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
@@ -76,7 +200,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
if (!(sc->gfp_mask & __GFP_FS))
return SHRINK_STOP;
- if (!trylock_super(sb))
+ if (!super_trylock_shared(sb))
return SHRINK_STOP;
if (sb->s_op->nr_cached_objects)
@@ -110,7 +234,7 @@ static unsigned long super_cache_scan(struct shrinker *shrink,
freed += sb->s_op->free_cached_objects(sb, sc);
}
- up_read(&sb->s_umount);
+ super_unlock_shared(sb);
return freed;
}
@@ -123,17 +247,17 @@ static unsigned long super_cache_count(struct shrinker *shrink,
sb = container_of(shrink, struct super_block, s_shrink);
/*
- * We don't call trylock_super() here as it is a scalability bottleneck,
- * so we're exposed to partial setup state. The shrinker rwsem does not
- * protect filesystem operations backing list_lru_shrink_count() or
- * s_op->nr_cached_objects(). Counts can change between
- * super_cache_count and super_cache_scan, so we really don't need locks
- * here.
+ * We don't call super_trylock_shared() here as it is a scalability
+ * bottleneck, so we're exposed to partial setup state. The shrinker
+ * rwsem does not protect filesystem operations backing
+ * list_lru_shrink_count() or s_op->nr_cached_objects(). Counts can
+ * change between super_cache_count and super_cache_scan, so we really
+ * don't need locks here.
*
* However, if we are currently mounting the superblock, the underlying
* filesystem might be in a state of partial construction and hence it
- * is dangerous to access it. trylock_super() uses a SB_BORN check to
- * avoid this situation, so do the same here. The memory barrier is
+ * is dangerous to access it. super_trylock_shared() uses a SB_BORN check
+ * to avoid this situation, so do the same here. The memory barrier is
* matched with the one in mount_fs() as we don't hold locks here.
*/
if (!(sb->s_flags & SB_BORN))
@@ -176,7 +300,7 @@ static void destroy_unused_super(struct super_block *s)
{
if (!s)
return;
- up_write(&s->s_umount);
+ super_unlock_excl(s);
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
security_sb_free(s);
@@ -337,10 +461,29 @@ void deactivate_locked_super(struct super_block *s)
list_lru_destroy(&s->s_dentry_lru);
list_lru_destroy(&s->s_inode_lru);
+ /*
+ * Remove it from @fs_supers so it isn't found by new
+ * sget{_fc}() walkers anymore. Any concurrent mounter still
+ * managing to grab a temporary reference is guaranteed to
+ * already see SB_DYING and will wait until we notify them about
+ * SB_DEAD.
+ */
+ spin_lock(&sb_lock);
+ hlist_del_init(&s->s_instances);
+ spin_unlock(&sb_lock);
+
+ /*
+ * Let concurrent mounts know that this thing is really dead.
+ * We don't need @sb->s_umount here as every concurrent caller
+ * will see SB_DYING and either discard the superblock or wait
+ * for SB_DEAD.
+ */
+ super_wake(s, SB_DEAD);
+
put_filesystem(fs);
put_super(s);
} else {
- up_write(&s->s_umount);
+ super_unlock_excl(s);
}
}
@@ -357,7 +500,7 @@ EXPORT_SYMBOL(deactivate_locked_super);
void deactivate_super(struct super_block *s)
{
if (!atomic_add_unless(&s->s_active, -1, 1)) {
- down_write(&s->s_umount);
+ __super_lock_excl(s);
deactivate_locked_super(s);
}
}
@@ -379,20 +522,61 @@ EXPORT_SYMBOL(deactivate_super);
*/
static int grab_super(struct super_block *s) __releases(sb_lock)
{
+ bool born;
+
s->s_count++;
spin_unlock(&sb_lock);
- down_write(&s->s_umount);
- if ((s->s_flags & SB_BORN) && atomic_inc_not_zero(&s->s_active)) {
+ born = super_lock_excl(s);
+ if (born && atomic_inc_not_zero(&s->s_active)) {
put_super(s);
return 1;
}
- up_write(&s->s_umount);
+ super_unlock_excl(s);
put_super(s);
return 0;
}
+static inline bool wait_dead(struct super_block *sb)
+{
+ unsigned int flags;
+
+ /*
+ * Pairs with memory barrier in super_wake() and ensures
+ * that we see SB_DEAD after we're woken.
+ */
+ flags = smp_load_acquire(&sb->s_flags);
+ return flags & SB_DEAD;
+}
+
+/**
+ * grab_super_dead - acquire an active reference to a superblock
+ * @sb: superblock to acquire
+ *
+ * Acquire a temporary reference on a superblock and try to trade it for
+ * an active reference. This is used in sget{_fc}() to wait for a
+ * superblock to either become SB_BORN or for it to pass through
+ * sb->kill() and be marked as SB_DEAD.
+ *
+ * Return: This returns true if an active reference could be acquired,
+ * false if not.
+ */
+static bool grab_super_dead(struct super_block *sb)
+{
+
+ sb->s_count++;
+ if (grab_super(sb)) {
+ put_super(sb);
+ lockdep_assert_held(&sb->s_umount);
+ return true;
+ }
+ wait_var_event(&sb->s_flags, wait_dead(sb));
+ put_super(sb);
+ lockdep_assert_not_held(&sb->s_umount);
+ return false;
+}
+
/*
- * trylock_super - try to grab ->s_umount shared
+ * super_trylock_shared - try to grab ->s_umount shared
* @sb: reference we are trying to grab
*
* Try to prevent fs shutdown. This is used in places where we
@@ -408,13 +592,13 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
* of down_read(). There's a couple of places that are OK with that, but
* it's very much not a general-purpose interface.
*/
-bool trylock_super(struct super_block *sb)
+bool super_trylock_shared(struct super_block *sb)
{
if (down_read_trylock(&sb->s_umount)) {
- if (!hlist_unhashed(&sb->s_instances) &&
- sb->s_root && (sb->s_flags & SB_BORN))
+ if (!(sb->s_flags & SB_DYING) && sb->s_root &&
+ (sb->s_flags & SB_BORN))
return true;
- up_read(&sb->s_umount);
+ super_unlock_shared(sb);
}
return false;
@@ -439,13 +623,13 @@ bool trylock_super(struct super_block *sb)
void retire_super(struct super_block *sb)
{
WARN_ON(!sb->s_bdev);
- down_write(&sb->s_umount);
+ __super_lock_excl(sb);
if (sb->s_iflags & SB_I_PERSB_BDI) {
bdi_unregister(sb->s_bdi);
sb->s_iflags &= ~SB_I_PERSB_BDI;
}
sb->s_iflags |= SB_I_RETIRED;
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
}
EXPORT_SYMBOL(retire_super);
@@ -517,11 +701,17 @@ void generic_shutdown_super(struct super_block *sb)
spin_unlock(&sb->s_inode_list_lock);
}
}
- spin_lock(&sb_lock);
- /* should be initialized for __put_super_and_need_restart() */
- hlist_del_init(&sb->s_instances);
- spin_unlock(&sb_lock);
- up_write(&sb->s_umount);
+ /*
+ * Broadcast to everyone that grabbed a temporary reference to this
+ * superblock before we removed it from @fs_supers that the superblock
+ * is dying. Every walker of @fs_supers outside of sget{_fc}() will now
+ * discard this superblock and treat it as dead.
+ *
+ * We leave the superblock on @fs_supers so it can be found by
+ * sget{_fc}() until we passed sb->kill_sb().
+ */
+ super_wake(sb, SB_DYING);
+ super_unlock_excl(sb);
if (sb->s_bdi != &noop_backing_dev_info) {
if (sb->s_iflags & SB_I_PERSB_BDI)
bdi_unregister(sb->s_bdi);
@@ -546,17 +736,31 @@ bool mount_capable(struct fs_context *fc)
* @test: Comparison callback
* @set: Setup callback
*
- * Find or create a superblock using the parameters stored in the filesystem
- * context and the two callback functions.
+ * Create a new superblock or find an existing one.
+ *
+ * The @test callback is used to find a matching existing superblock.
+ * Whether or not the requested parameters in @fc are taken into account
+ * is specific to the @test callback that is used. They may even be
+ * completely ignored.
+ *
+ * If an extant superblock is matched, it will be returned unless:
*
- * If an extant superblock is matched, then that will be returned with an
- * elevated reference count that the caller must transfer or discard.
+ * (1) the namespace the filesystem context @fc and the extant
+ * superblock's namespace differ
+ *
+ * (2) the filesystem context @fc has requested that reusing an extant
+ * superblock is not allowed
+ *
+ * In both cases EBUSY will be returned.
*
* If no match is made, a new superblock will be allocated and basic
- * initialisation will be performed (s_type, s_fs_info and s_id will be set and
- * the set() callback will be invoked), the superblock will be published and it
- * will be returned in a partially constructed state with SB_BORN and SB_ACTIVE
- * as yet unset.
+ * initialisation will be performed (s_type, s_fs_info and s_id will be
+ * set and the @set callback will be invoked), the superblock will be
+ * published and it will be returned in a partially constructed state
+ * with SB_BORN and SB_ACTIVE as yet unset.
+ *
+ * Return: On success, an extant or newly created superblock is
+ * returned. On failure an error pointer is returned.
*/
struct super_block *sget_fc(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
@@ -595,6 +799,11 @@ retry:
s->s_type = fc->fs_type;
s->s_iflags |= fc->s_iflags;
strscpy(s->s_id, s->s_type->name, sizeof(s->s_id));
+ /*
+ * Make the superblock visible on @super_blocks and @fs_supers.
+ * It's in a nascent state and users should wait on SB_BORN or
+ * SB_DYING to be set.
+ */
list_add_tail(&s->s_list, &super_blocks);
hlist_add_head(&s->s_instances, &s->s_type->fs_supers);
spin_unlock(&sb_lock);
@@ -603,12 +812,16 @@ retry:
return s;
share_extant_sb:
- if (user_ns != old->s_user_ns) {
+ if (user_ns != old->s_user_ns || fc->exclusive) {
spin_unlock(&sb_lock);
destroy_unused_super(s);
+ if (fc->exclusive)
+ warnfc(fc, "reusing existing filesystem not allowed");
+ else
+ warnfc(fc, "reusing existing filesystem in another namespace not allowed");
return ERR_PTR(-EBUSY);
}
- if (!grab_super(old))
+ if (!grab_super_dead(old))
goto retry;
destroy_unused_super(s);
return old;
@@ -652,7 +865,7 @@ retry:
destroy_unused_super(s);
return ERR_PTR(-EBUSY);
}
- if (!grab_super(old))
+ if (!grab_super_dead(old))
goto retry;
destroy_unused_super(s);
return old;
@@ -685,7 +898,7 @@ EXPORT_SYMBOL(sget);
void drop_super(struct super_block *sb)
{
- up_read(&sb->s_umount);
+ super_unlock_shared(sb);
put_super(sb);
}
@@ -693,7 +906,7 @@ EXPORT_SYMBOL(drop_super);
void drop_super_exclusive(struct super_block *sb)
{
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
put_super(sb);
}
EXPORT_SYMBOL(drop_super_exclusive);
@@ -704,7 +917,8 @@ static void __iterate_supers(void (*f)(struct super_block *))
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
- if (hlist_unhashed(&sb->s_instances))
+ /* Pairs with memory marrier in super_wake(). */
+ if (smp_load_acquire(&sb->s_flags) & SB_DYING)
continue;
sb->s_count++;
spin_unlock(&sb_lock);
@@ -734,15 +948,15 @@ void iterate_supers(void (*f)(struct super_block *, void *), void *arg)
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
- if (hlist_unhashed(&sb->s_instances))
- continue;
+ bool born;
+
sb->s_count++;
spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
- if (sb->s_root && (sb->s_flags & SB_BORN))
+ born = super_lock_shared(sb);
+ if (born && sb->s_root)
f(sb, arg);
- up_read(&sb->s_umount);
+ super_unlock_shared(sb);
spin_lock(&sb_lock);
if (p)
@@ -770,13 +984,15 @@ void iterate_supers_type(struct file_system_type *type,
spin_lock(&sb_lock);
hlist_for_each_entry(sb, &type->fs_supers, s_instances) {
+ bool born;
+
sb->s_count++;
spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
- if (sb->s_root && (sb->s_flags & SB_BORN))
+ born = super_lock_shared(sb);
+ if (born && sb->s_root)
f(sb, arg);
- up_read(&sb->s_umount);
+ super_unlock_shared(sb);
spin_lock(&sb_lock);
if (p)
@@ -791,43 +1007,6 @@ void iterate_supers_type(struct file_system_type *type,
EXPORT_SYMBOL(iterate_supers_type);
/**
- * get_super - get the superblock of a device
- * @bdev: device to get the superblock for
- *
- * Scans the superblock list and finds the superblock of the file system
- * mounted on the device given. %NULL is returned if no match is found.
- */
-struct super_block *get_super(struct block_device *bdev)
-{
- struct super_block *sb;
-
- if (!bdev)
- return NULL;
-
- spin_lock(&sb_lock);
-rescan:
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (hlist_unhashed(&sb->s_instances))
- continue;
- if (sb->s_bdev == bdev) {
- sb->s_count++;
- spin_unlock(&sb_lock);
- down_read(&sb->s_umount);
- /* still alive? */
- if (sb->s_root && (sb->s_flags & SB_BORN))
- return sb;
- up_read(&sb->s_umount);
- /* nope, got unmounted */
- spin_lock(&sb_lock);
- __put_super(sb);
- goto rescan;
- }
- }
- spin_unlock(&sb_lock);
- return NULL;
-}
-
-/**
* get_active_super - get an active reference to the superblock of a device
* @bdev: device to get the superblock for
*
@@ -842,15 +1021,12 @@ struct super_block *get_active_super(struct block_device *bdev)
if (!bdev)
return NULL;
-restart:
spin_lock(&sb_lock);
list_for_each_entry(sb, &super_blocks, s_list) {
- if (hlist_unhashed(&sb->s_instances))
- continue;
if (sb->s_bdev == bdev) {
if (!grab_super(sb))
- goto restart;
- up_write(&sb->s_umount);
+ return NULL;
+ super_unlock_excl(sb);
return sb;
}
}
@@ -863,28 +1039,21 @@ struct super_block *user_get_super(dev_t dev, bool excl)
struct super_block *sb;
spin_lock(&sb_lock);
-rescan:
list_for_each_entry(sb, &super_blocks, s_list) {
- if (hlist_unhashed(&sb->s_instances))
- continue;
if (sb->s_dev == dev) {
+ bool born;
+
sb->s_count++;
spin_unlock(&sb_lock);
- if (excl)
- down_write(&sb->s_umount);
- else
- down_read(&sb->s_umount);
/* still alive? */
- if (sb->s_root && (sb->s_flags & SB_BORN))
+ born = super_lock(sb, excl);
+ if (born && sb->s_root)
return sb;
- if (excl)
- up_write(&sb->s_umount);
- else
- up_read(&sb->s_umount);
+ super_unlock(sb, excl);
/* nope, got unmounted */
spin_lock(&sb_lock);
__put_super(sb);
- goto rescan;
+ break;
}
}
spin_unlock(&sb_lock);
@@ -926,9 +1095,9 @@ int reconfigure_super(struct fs_context *fc)
if (remount_ro) {
if (!hlist_empty(&sb->s_pins)) {
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
group_pin_kill(&sb->s_pins);
- down_write(&sb->s_umount);
+ __super_lock_excl(sb);
if (!sb->s_root)
return 0;
if (sb->s_writers.frozen != SB_UNFROZEN)
@@ -991,9 +1160,9 @@ cancel_readonly:
static void do_emergency_remount_callback(struct super_block *sb)
{
- down_write(&sb->s_umount);
- if (sb->s_root && sb->s_bdev && (sb->s_flags & SB_BORN) &&
- !sb_rdonly(sb)) {
+ bool born = super_lock_excl(sb);
+
+ if (born && sb->s_root && sb->s_bdev && !sb_rdonly(sb)) {
struct fs_context *fc;
fc = fs_context_for_reconfigure(sb->s_root,
@@ -1004,7 +1173,7 @@ static void do_emergency_remount_callback(struct super_block *sb)
put_fs_context(fc);
}
}
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
}
static void do_emergency_remount(struct work_struct *work)
@@ -1027,12 +1196,13 @@ void emergency_remount(void)
static void do_thaw_all_callback(struct super_block *sb)
{
- down_write(&sb->s_umount);
- if (sb->s_root && sb->s_flags & SB_BORN) {
+ bool born = super_lock_excl(sb);
+
+ if (born && sb->s_root) {
emergency_thaw_bdev(sb);
- thaw_super_locked(sb);
+ thaw_super_locked(sb, FREEZE_HOLDER_USERSPACE);
} else {
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
}
}
@@ -1136,7 +1306,7 @@ static int test_single_super(struct super_block *s, struct fs_context *fc)
return 1;
}
-static int vfs_get_super(struct fs_context *fc, bool reconf,
+static int vfs_get_super(struct fs_context *fc,
int (*test)(struct super_block *, struct fs_context *),
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
@@ -1154,19 +1324,9 @@ static int vfs_get_super(struct fs_context *fc, bool reconf,
goto error;
sb->s_flags |= SB_ACTIVE;
- fc->root = dget(sb->s_root);
- } else {
- fc->root = dget(sb->s_root);
- if (reconf) {
- err = reconfigure_super(fc);
- if (err < 0) {
- dput(fc->root);
- fc->root = NULL;
- goto error;
- }
- }
}
+ fc->root = dget(sb->s_root);
return 0;
error:
@@ -1178,7 +1338,7 @@ int get_tree_nodev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
- return vfs_get_super(fc, false, NULL, fill_super);
+ return vfs_get_super(fc, NULL, fill_super);
}
EXPORT_SYMBOL(get_tree_nodev);
@@ -1186,54 +1346,81 @@ int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc))
{
- return vfs_get_super(fc, false, test_single_super, fill_super);
+ return vfs_get_super(fc, test_single_super, fill_super);
}
EXPORT_SYMBOL(get_tree_single);
-int get_tree_single_reconf(struct fs_context *fc,
- int (*fill_super)(struct super_block *sb,
- struct fs_context *fc))
-{
- return vfs_get_super(fc, true, test_single_super, fill_super);
-}
-EXPORT_SYMBOL(get_tree_single_reconf);
-
int get_tree_keyed(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc),
void *key)
{
fc->s_fs_info = key;
- return vfs_get_super(fc, false, test_keyed_super, fill_super);
+ return vfs_get_super(fc, test_keyed_super, fill_super);
}
EXPORT_SYMBOL(get_tree_keyed);
#ifdef CONFIG_BLOCK
-static void fs_mark_dead(struct block_device *bdev)
+/*
+ * Lock a super block that the callers holds a reference to.
+ *
+ * The caller needs to ensure that the super_block isn't being freed while
+ * calling this function, e.g. by holding a lock over the call to this function
+ * and the place that clears the pointer to the superblock used by this function
+ * before freeing the superblock.
+ */
+static bool super_lock_shared_active(struct super_block *sb)
{
- struct super_block *sb;
+ bool born = super_lock_shared(sb);
+
+ if (!born || !sb->s_root || !(sb->s_flags & SB_ACTIVE)) {
+ super_unlock_shared(sb);
+ return false;
+ }
+ return true;
+}
+
+static void fs_bdev_mark_dead(struct block_device *bdev, bool surprise)
+{
+ struct super_block *sb = bdev->bd_holder;
+
+ /* bd_holder_lock ensures that the sb isn't freed */
+ lockdep_assert_held(&bdev->bd_holder_lock);
- sb = get_super(bdev);
- if (!sb)
+ if (!super_lock_shared_active(sb))
return;
+ if (!surprise)
+ sync_filesystem(sb);
+ shrink_dcache_sb(sb);
+ invalidate_inodes(sb);
if (sb->s_op->shutdown)
sb->s_op->shutdown(sb);
- drop_super(sb);
+
+ super_unlock_shared(sb);
+}
+
+static void fs_bdev_sync(struct block_device *bdev)
+{
+ struct super_block *sb = bdev->bd_holder;
+
+ lockdep_assert_held(&bdev->bd_holder_lock);
+
+ if (!super_lock_shared_active(sb))
+ return;
+ sync_filesystem(sb);
+ super_unlock_shared(sb);
}
-static const struct blk_holder_ops fs_holder_ops = {
- .mark_dead = fs_mark_dead,
+const struct blk_holder_ops fs_holder_ops = {
+ .mark_dead = fs_bdev_mark_dead,
+ .sync = fs_bdev_sync,
};
+EXPORT_SYMBOL_GPL(fs_holder_ops);
static int set_bdev_super(struct super_block *s, void *data)
{
- s->s_bdev = data;
- s->s_dev = s->s_bdev->bd_dev;
- s->s_bdi = bdi_get(s->s_bdev->bd_disk->bdi);
-
- if (bdev_stable_writes(s->s_bdev))
- s->s_iflags |= SB_I_STABLE_WRITES;
+ s->s_dev = *(dev_t *)data;
return 0;
}
@@ -1244,8 +1431,63 @@ static int set_bdev_super_fc(struct super_block *s, struct fs_context *fc)
static int test_bdev_super_fc(struct super_block *s, struct fs_context *fc)
{
- return !(s->s_iflags & SB_I_RETIRED) && s->s_bdev == fc->sget_key;
+ return !(s->s_iflags & SB_I_RETIRED) &&
+ s->s_dev == *(dev_t *)fc->sget_key;
+}
+
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+ struct fs_context *fc)
+{
+ blk_mode_t mode = sb_open_mode(sb_flags);
+ struct block_device *bdev;
+
+ bdev = blkdev_get_by_dev(sb->s_dev, mode, sb, &fs_holder_ops);
+ if (IS_ERR(bdev)) {
+ if (fc)
+ errorf(fc, "%s: Can't open blockdev", fc->source);
+ return PTR_ERR(bdev);
+ }
+
+ /*
+ * This really should be in blkdev_get_by_dev, but right now can't due
+ * to legacy issues that require us to allow opening a block device node
+ * writable from userspace even for a read-only block device.
+ */
+ if ((mode & BLK_OPEN_WRITE) && bdev_read_only(bdev)) {
+ blkdev_put(bdev, sb);
+ return -EACCES;
+ }
+
+ /*
+ * Until SB_BORN flag is set, there can be no active superblock
+ * references and thus no filesystem freezing. get_active_super() will
+ * just loop waiting for SB_BORN so even freeze_bdev() cannot proceed.
+ *
+ * It is enough to check bdev was not frozen before we set s_bdev.
+ */
+ mutex_lock(&bdev->bd_fsfreeze_mutex);
+ if (bdev->bd_fsfreeze_count > 0) {
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ if (fc)
+ warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
+ blkdev_put(bdev, sb);
+ return -EBUSY;
+ }
+ spin_lock(&sb_lock);
+ sb->s_bdev = bdev;
+ sb->s_bdi = bdi_get(bdev->bd_disk->bdi);
+ if (bdev_stable_writes(bdev))
+ sb->s_iflags |= SB_I_STABLE_WRITES;
+ spin_unlock(&sb_lock);
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+
+ snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
+ shrinker_debugfs_rename(&sb->s_shrink, "sb-%s:%s", sb->s_type->name,
+ sb->s_id);
+ sb_set_blocksize(sb, block_size(bdev));
+ return 0;
}
+EXPORT_SYMBOL_GPL(setup_bdev_super);
/**
* get_tree_bdev - Get a superblock based on a single block device
@@ -1256,73 +1498,49 @@ int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *,
struct fs_context *))
{
- struct block_device *bdev;
struct super_block *s;
int error = 0;
+ dev_t dev;
if (!fc->source)
return invalf(fc, "No source specified");
- bdev = blkdev_get_by_path(fc->source, sb_open_mode(fc->sb_flags),
- fc->fs_type, &fs_holder_ops);
- if (IS_ERR(bdev)) {
- errorf(fc, "%s: Can't open blockdev", fc->source);
- return PTR_ERR(bdev);
- }
-
- /* Once the superblock is inserted into the list by sget_fc(), s_umount
- * will protect the lockfs code from trying to start a snapshot while
- * we are mounting
- */
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- warnf(fc, "%pg: Can't mount, blockdev is frozen", bdev);
- blkdev_put(bdev, fc->fs_type);
- return -EBUSY;
+ error = lookup_bdev(fc->source, &dev);
+ if (error) {
+ errorf(fc, "%s: Can't lookup blockdev", fc->source);
+ return error;
}
fc->sb_flags |= SB_NOSEC;
- fc->sget_key = bdev;
+ fc->sget_key = &dev;
s = sget_fc(fc, test_bdev_super_fc, set_bdev_super_fc);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- if (IS_ERR(s)) {
- blkdev_put(bdev, fc->fs_type);
+ if (IS_ERR(s))
return PTR_ERR(s);
- }
if (s->s_root) {
/* Don't summarily change the RO/RW state. */
if ((fc->sb_flags ^ s->s_flags) & SB_RDONLY) {
- warnf(fc, "%pg: Can't mount, would change RO state", bdev);
+ warnf(fc, "%pg: Can't mount, would change RO state", s->s_bdev);
deactivate_locked_super(s);
- blkdev_put(bdev, fc->fs_type);
return -EBUSY;
}
-
+ } else {
/*
- * s_umount nests inside open_mutex during
- * __invalidate_device(). blkdev_put() acquires
- * open_mutex and can't be called under s_umount. Drop
- * s_umount temporarily. This is safe as we're
- * holding an active reference.
+ * We drop s_umount here because we need to open the bdev and
+ * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+ * bdev_mark_dead()). It is safe because we have active sb
+ * reference and SB_BORN is not set yet.
*/
- up_write(&s->s_umount);
- blkdev_put(bdev, fc->fs_type);
- down_write(&s->s_umount);
- } else {
- snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
- fc->fs_type->name, s->s_id);
- sb_set_blocksize(s, block_size(bdev));
- error = fill_super(s, fc);
+ super_unlock_excl(s);
+ error = setup_bdev_super(s, fc->sb_flags, fc);
+ __super_lock_excl(s);
+ if (!error)
+ error = fill_super(s, fc);
if (error) {
deactivate_locked_super(s);
return error;
}
-
s->s_flags |= SB_ACTIVE;
- bdev->bd_super = s;
}
BUG_ON(fc->root);
@@ -1333,79 +1551,52 @@ EXPORT_SYMBOL(get_tree_bdev);
static int test_bdev_super(struct super_block *s, void *data)
{
- return !(s->s_iflags & SB_I_RETIRED) && (void *)s->s_bdev == data;
+ return !(s->s_iflags & SB_I_RETIRED) && s->s_dev == *(dev_t *)data;
}
struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int))
{
- struct block_device *bdev;
struct super_block *s;
- int error = 0;
+ int error;
+ dev_t dev;
- bdev = blkdev_get_by_path(dev_name, sb_open_mode(flags), fs_type,
- &fs_holder_ops);
- if (IS_ERR(bdev))
- return ERR_CAST(bdev);
+ error = lookup_bdev(dev_name, &dev);
+ if (error)
+ return ERR_PTR(error);
- /*
- * once the super is inserted into the list by sget, s_umount
- * will protect the lockfs code from trying to start a snapshot
- * while we are mounting
- */
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (bdev->bd_fsfreeze_count > 0) {
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- error = -EBUSY;
- goto error_bdev;
- }
- s = sget(fs_type, test_bdev_super, set_bdev_super, flags | SB_NOSEC,
- bdev);
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ flags |= SB_NOSEC;
+ s = sget(fs_type, test_bdev_super, set_bdev_super, flags, &dev);
if (IS_ERR(s))
- goto error_s;
+ return ERR_CAST(s);
if (s->s_root) {
if ((flags ^ s->s_flags) & SB_RDONLY) {
deactivate_locked_super(s);
- error = -EBUSY;
- goto error_bdev;
+ return ERR_PTR(-EBUSY);
}
-
+ } else {
/*
- * s_umount nests inside open_mutex during
- * __invalidate_device(). blkdev_put() acquires
- * open_mutex and can't be called under s_umount. Drop
- * s_umount temporarily. This is safe as we're
- * holding an active reference.
+ * We drop s_umount here because we need to open the bdev and
+ * bdev->open_mutex ranks above s_umount (blkdev_put() ->
+ * bdev_mark_dead()). It is safe because we have active sb
+ * reference and SB_BORN is not set yet.
*/
- up_write(&s->s_umount);
- blkdev_put(bdev, fs_type);
- down_write(&s->s_umount);
- } else {
- snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev);
- shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s",
- fs_type->name, s->s_id);
- sb_set_blocksize(s, block_size(bdev));
- error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
+ super_unlock_excl(s);
+ error = setup_bdev_super(s, flags, NULL);
+ __super_lock_excl(s);
+ if (!error)
+ error = fill_super(s, data, flags & SB_SILENT ? 1 : 0);
if (error) {
deactivate_locked_super(s);
- goto error;
+ return ERR_PTR(error);
}
s->s_flags |= SB_ACTIVE;
- bdev->bd_super = s;
}
return dget(s->s_root);
-
-error_s:
- error = PTR_ERR(s);
-error_bdev:
- blkdev_put(bdev, fs_type);
-error:
- return ERR_PTR(error);
}
EXPORT_SYMBOL(mount_bdev);
@@ -1413,10 +1604,11 @@ void kill_block_super(struct super_block *sb)
{
struct block_device *bdev = sb->s_bdev;
- bdev->bd_super = NULL;
generic_shutdown_super(sb);
- sync_blockdev(bdev);
- blkdev_put(bdev, sb->s_type);
+ if (bdev) {
+ sync_blockdev(bdev);
+ blkdev_put(bdev, sb);
+ }
}
EXPORT_SYMBOL(kill_block_super);
@@ -1533,13 +1725,13 @@ int vfs_get_tree(struct fs_context *fc)
WARN_ON(!sb->s_bdi);
/*
- * Write barrier is for super_cache_count(). We place it before setting
- * SB_BORN as the data dependency between the two functions is the
- * superblock structure contents that we just set up, not the SB_BORN
- * flag.
+ * super_wake() contains a memory barrier which also care of
+ * ordering for super_cache_count(). We place it before setting
+ * SB_BORN as the data dependency between the two functions is
+ * the superblock structure contents that we just set up, not
+ * the SB_BORN flag.
*/
- smp_wmb();
- sb->s_flags |= SB_BORN;
+ super_wake(sb, SB_BORN);
error = security_sb_set_mnt_opts(sb, fc->security, 0, NULL);
if (unlikely(error)) {
@@ -1644,14 +1836,43 @@ static void sb_freeze_unlock(struct super_block *sb, int level)
percpu_up_write(sb->s_writers.rw_sem + level);
}
+static int wait_for_partially_frozen(struct super_block *sb)
+{
+ int ret = 0;
+
+ do {
+ unsigned short old = sb->s_writers.frozen;
+
+ up_write(&sb->s_umount);
+ ret = wait_var_event_killable(&sb->s_writers.frozen,
+ sb->s_writers.frozen != old);
+ down_write(&sb->s_umount);
+ } while (ret == 0 &&
+ sb->s_writers.frozen != SB_UNFROZEN &&
+ sb->s_writers.frozen != SB_FREEZE_COMPLETE);
+
+ return ret;
+}
+
/**
* freeze_super - lock the filesystem and force it into a consistent state
* @sb: the super to lock
+ * @who: context that wants to freeze
*
* Syncs the super to make sure the filesystem is consistent and calls the fs's
- * freeze_fs. Subsequent calls to this without first thawing the fs will return
+ * freeze_fs. Subsequent calls to this without first thawing the fs may return
* -EBUSY.
*
+ * @who should be:
+ * * %FREEZE_HOLDER_USERSPACE if userspace wants to freeze the fs;
+ * * %FREEZE_HOLDER_KERNEL if the kernel wants to freeze the fs.
+ *
+ * The @who argument distinguishes between the kernel and userspace trying to
+ * freeze the filesystem. Although there cannot be multiple kernel freezes or
+ * multiple userspace freezes in effect at any given time, the kernel and
+ * userspace can both hold a filesystem frozen. The filesystem remains frozen
+ * until there are no kernel or userspace freezes in effect.
+ *
* During this function, sb->s_writers.frozen goes through these values:
*
* SB_UNFROZEN: File system is normal, all writes progress as usual.
@@ -1677,34 +1898,62 @@ static void sb_freeze_unlock(struct super_block *sb, int level)
*
* sb->s_writers.frozen is protected by sb->s_umount.
*/
-int freeze_super(struct super_block *sb)
+int freeze_super(struct super_block *sb, enum freeze_holder who)
{
int ret;
atomic_inc(&sb->s_active);
- down_write(&sb->s_umount);
+ if (!super_lock_excl(sb))
+ WARN(1, "Dying superblock while freezing!");
+
+retry:
+ if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
+ if (sb->s_writers.freeze_holders & who) {
+ deactivate_locked_super(sb);
+ return -EBUSY;
+ }
+
+ WARN_ON(sb->s_writers.freeze_holders == 0);
+
+ /*
+ * Someone else already holds this type of freeze; share the
+ * freeze and assign the active ref to the freeze.
+ */
+ sb->s_writers.freeze_holders |= who;
+ super_unlock_excl(sb);
+ return 0;
+ }
+
if (sb->s_writers.frozen != SB_UNFROZEN) {
- deactivate_locked_super(sb);
- return -EBUSY;
+ ret = wait_for_partially_frozen(sb);
+ if (ret) {
+ deactivate_locked_super(sb);
+ return ret;
+ }
+
+ goto retry;
}
if (!(sb->s_flags & SB_BORN)) {
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
return 0; /* sic - it's "nothing to do" */
}
if (sb_rdonly(sb)) {
/* Nothing to do really... */
+ sb->s_writers.freeze_holders |= who;
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
- up_write(&sb->s_umount);
+ wake_up_var(&sb->s_writers.frozen);
+ super_unlock_excl(sb);
return 0;
}
sb->s_writers.frozen = SB_FREEZE_WRITE;
/* Release s_umount to preserve sb_start_write -> s_umount ordering */
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
sb_wait_write(sb, SB_FREEZE_WRITE);
- down_write(&sb->s_umount);
+ if (!super_lock_excl(sb))
+ WARN(1, "Dying superblock while freezing!");
/* Now we go and block page faults... */
sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
@@ -1715,6 +1964,7 @@ int freeze_super(struct super_block *sb)
if (ret) {
sb->s_writers.frozen = SB_UNFROZEN;
sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
+ wake_up_var(&sb->s_writers.frozen);
deactivate_locked_super(sb);
return ret;
}
@@ -1730,6 +1980,7 @@ int freeze_super(struct super_block *sb)
"VFS:Filesystem freeze failed\n");
sb->s_writers.frozen = SB_UNFROZEN;
sb_freeze_unlock(sb, SB_FREEZE_FS);
+ wake_up_var(&sb->s_writers.frozen);
deactivate_locked_super(sb);
return ret;
}
@@ -1738,24 +1989,50 @@ int freeze_super(struct super_block *sb)
* For debugging purposes so that fs can warn if it sees write activity
* when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
*/
+ sb->s_writers.freeze_holders |= who;
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
+ wake_up_var(&sb->s_writers.frozen);
lockdep_sb_freeze_release(sb);
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
return 0;
}
EXPORT_SYMBOL(freeze_super);
-static int thaw_super_locked(struct super_block *sb)
+/*
+ * Undoes the effect of a freeze_super_locked call. If the filesystem is
+ * frozen both by userspace and the kernel, a thaw call from either source
+ * removes that state without releasing the other state or unlocking the
+ * filesystem.
+ */
+static int thaw_super_locked(struct super_block *sb, enum freeze_holder who)
{
int error;
- if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
- up_write(&sb->s_umount);
+ if (sb->s_writers.frozen == SB_FREEZE_COMPLETE) {
+ if (!(sb->s_writers.freeze_holders & who)) {
+ super_unlock_excl(sb);
+ return -EINVAL;
+ }
+
+ /*
+ * Freeze is shared with someone else. Release our hold and
+ * drop the active ref that freeze_super assigned to the
+ * freezer.
+ */
+ if (sb->s_writers.freeze_holders & ~who) {
+ sb->s_writers.freeze_holders &= ~who;
+ deactivate_locked_super(sb);
+ return 0;
+ }
+ } else {
+ super_unlock_excl(sb);
return -EINVAL;
}
if (sb_rdonly(sb)) {
+ sb->s_writers.freeze_holders &= ~who;
sb->s_writers.frozen = SB_UNFROZEN;
+ wake_up_var(&sb->s_writers.frozen);
goto out;
}
@@ -1764,15 +2041,16 @@ static int thaw_super_locked(struct super_block *sb)
if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb);
if (error) {
- printk(KERN_ERR
- "VFS:Filesystem thaw failed\n");
+ printk(KERN_ERR "VFS:Filesystem thaw failed\n");
lockdep_sb_freeze_release(sb);
- up_write(&sb->s_umount);
+ super_unlock_excl(sb);
return error;
}
}
+ sb->s_writers.freeze_holders &= ~who;
sb->s_writers.frozen = SB_UNFROZEN;
+ wake_up_var(&sb->s_writers.frozen);
sb_freeze_unlock(sb, SB_FREEZE_FS);
out:
deactivate_locked_super(sb);
@@ -1782,13 +2060,20 @@ out:
/**
* thaw_super -- unlock filesystem
* @sb: the super to thaw
+ * @who: context that wants to freeze
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_super()
+ * if there are no remaining freezes on the filesystem.
*
- * Unlocks the filesystem and marks it writeable again after freeze_super().
+ * @who should be:
+ * * %FREEZE_HOLDER_USERSPACE if userspace wants to thaw the fs;
+ * * %FREEZE_HOLDER_KERNEL if the kernel wants to thaw the fs.
*/
-int thaw_super(struct super_block *sb)
+int thaw_super(struct super_block *sb, enum freeze_holder who)
{
- down_write(&sb->s_umount);
- return thaw_super_locked(sb);
+ if (!super_lock_excl(sb))
+ WARN(1, "Dying superblock while thawing!");
+ return thaw_super_locked(sb, who);
}
EXPORT_SYMBOL(thaw_super);
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 0140010aa0c3..2f5ead88d00b 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -224,7 +224,7 @@ got_it:
memset (de->name + namelen, 0, SYSV_DIRSIZE - namelen - 2);
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
dir_commit_chunk(page, pos, SYSV_DIRSIZE);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
err = sysv_handle_dirsync(dir);
out_page:
@@ -249,7 +249,7 @@ int sysv_delete_entry(struct sysv_dir_entry *de, struct page *page)
}
de->inode = 0;
dir_commit_chunk(page, pos, SYSV_DIRSIZE);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
return sysv_handle_dirsync(inode);
}
@@ -346,7 +346,7 @@ int sysv_set_link(struct sysv_dir_entry *de, struct page *page,
}
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
dir_commit_chunk(page, pos, SYSV_DIRSIZE);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
return sysv_handle_dirsync(inode);
}
diff --git a/fs/sysv/ialloc.c b/fs/sysv/ialloc.c
index e732879036ab..6719da5889d9 100644
--- a/fs/sysv/ialloc.c
+++ b/fs/sysv/ialloc.c
@@ -165,7 +165,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
dirty_sb(sb);
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_ino = fs16_to_cpu(sbi, ino);
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_blocks = 0;
memset(SYSV_I(inode)->i_data, 0, sizeof(SYSV_I(inode)->i_data));
SYSV_I(inode)->i_dir_start_lookup = 0;
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 9e8d4a6fb2f3..0aa3827d8178 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -202,8 +202,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
- inode->i_ctime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_ctime);
- inode->i_ctime.tv_nsec = 0;
+ inode_set_ctime(inode, fs32_to_cpu(sbi, raw_inode->i_ctime), 0);
inode->i_atime.tv_nsec = 0;
inode->i_mtime.tv_nsec = 0;
inode->i_blocks = 0;
@@ -256,7 +255,7 @@ static int __sysv_write_inode(struct inode *inode, int wait)
raw_inode->i_size = cpu_to_fs32(sbi, inode->i_size);
raw_inode->i_atime = cpu_to_fs32(sbi, inode->i_atime.tv_sec);
raw_inode->i_mtime = cpu_to_fs32(sbi, inode->i_mtime.tv_sec);
- raw_inode->i_ctime = cpu_to_fs32(sbi, inode->i_ctime.tv_sec);
+ raw_inode->i_ctime = cpu_to_fs32(sbi, inode_get_ctime(inode).tv_sec);
si = SYSV_I(inode);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c
index 58d7f43a1371..edb94e55de8e 100644
--- a/fs/sysv/itree.c
+++ b/fs/sysv/itree.c
@@ -183,7 +183,7 @@ static inline int splice_branch(struct inode *inode,
*where->p = where->key;
write_unlock(&pointers_lock);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
/* had we spliced it onto indirect block? */
if (where->bh)
@@ -423,7 +423,7 @@ do_indirects:
}
n++;
}
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
if (IS_SYNC(inode))
sysv_sync_inode (inode);
else
@@ -449,7 +449,8 @@ int sysv_getattr(struct mnt_idmap *idmap, const struct path *path,
struct kstat *stat, u32 request_mask, unsigned int flags)
{
struct super_block *s = path->dentry->d_sb;
- generic_fillattr(&nop_mnt_idmap, d_inode(path->dentry), stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(path->dentry),
+ stat);
stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
stat->blksize = s->s_blocksize;
return 0;
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index fcf163fea3ad..d6b73798071b 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -103,7 +103,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
{
struct inode *inode = d_inode(old_dentry);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_link_count(inode);
ihold(inode);
@@ -161,7 +161,7 @@ static int sysv_unlink(struct inode * dir, struct dentry * dentry)
err = sysv_delete_entry(de, page);
if (!err) {
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
inode_dec_link_count(inode);
}
unmap_and_put_page(page, de);
@@ -230,7 +230,7 @@ static int sysv_rename(struct mnt_idmap *idmap, struct inode *old_dir,
unmap_and_put_page(new_page, new_de);
if (err)
goto out_dir;
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
if (dir_de)
drop_nlink(new_inode);
inode_dec_link_count(new_inode);
diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c
index 57ac8aa4a724..2feb6c58648c 100644
--- a/fs/tracefs/inode.c
+++ b/fs/tracefs/inode.c
@@ -132,7 +132,7 @@ static struct inode *tracefs_get_inode(struct super_block *sb)
struct inode *inode = new_inode(sb);
if (inode) {
inode->i_ino = get_next_ino();
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
return inode;
}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 9c9d3f0e36a4..eef9e527d9ff 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -243,8 +243,8 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
(unsigned int)inode->i_mtime.tv_sec,
(unsigned int)inode->i_mtime.tv_nsec);
pr_err("\tctime %u.%u\n",
- (unsigned int)inode->i_ctime.tv_sec,
- (unsigned int)inode->i_ctime.tv_nsec);
+ (unsigned int) inode_get_ctime(inode).tv_sec,
+ (unsigned int) inode_get_ctime(inode).tv_nsec);
pr_err("\tcreat_sqnum %llu\n", ui->creat_sqnum);
pr_err("\txattr_size %u\n", ui->xattr_size);
pr_err("\txattr_cnt %u\n", ui->xattr_cnt);
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index ef0499edc248..2f48c58d47cd 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -96,8 +96,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
inode->i_flags |= S_NOCMTIME;
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
- inode->i_mtime = inode->i_atime = inode->i_ctime =
- current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_mapping->nrpages = 0;
if (!is_xattr) {
@@ -325,7 +324,7 @@ static int ubifs_create(struct mnt_idmap *idmap, struct inode *dir,
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = dir->i_ctime = inode->i_ctime;
+ dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
if (err)
goto out_cancel;
@@ -765,10 +764,10 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
inc_nlink(inode);
ihold(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = dir->i_ctime = inode->i_ctime;
+ dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
if (err)
goto out_cancel;
@@ -838,11 +837,11 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
}
lock_2_inodes(dir, inode);
- inode->i_ctime = current_time(dir);
+ inode_set_ctime_current(inode);
drop_nlink(inode);
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = dir->i_ctime = inode->i_ctime;
+ dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
if (err)
goto out_cancel;
@@ -940,12 +939,12 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
}
lock_2_inodes(dir, inode);
- inode->i_ctime = current_time(dir);
+ inode_set_ctime_current(inode);
clear_nlink(inode);
drop_nlink(dir);
dir->i_size -= sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = dir->i_ctime = inode->i_ctime;
+ dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
if (err)
goto out_cancel;
@@ -1019,7 +1018,7 @@ static int ubifs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
inc_nlink(dir);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = dir->i_ctime = inode->i_ctime;
+ dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
if (err) {
ubifs_err(c, "cannot create directory, error %d", err);
@@ -1110,7 +1109,7 @@ static int ubifs_mknod(struct mnt_idmap *idmap, struct inode *dir,
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = dir->i_ctime = inode->i_ctime;
+ dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
if (err)
goto out_cancel;
@@ -1210,7 +1209,7 @@ static int ubifs_symlink(struct mnt_idmap *idmap, struct inode *dir,
mutex_lock(&dir_ui->ui_mutex);
dir->i_size += sz_change;
dir_ui->ui_size = dir->i_size;
- dir->i_mtime = dir->i_ctime = inode->i_ctime;
+ dir->i_mtime = inode_set_ctime_to_ts(dir, inode_get_ctime(inode));
err = ubifs_jnl_update(c, dir, &nm, inode, 0, 0);
if (err)
goto out_cancel;
@@ -1298,7 +1297,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
.dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
struct ubifs_budget_req wht_req;
- struct timespec64 time;
unsigned int saved_nlink;
struct fscrypt_name old_nm, new_nm;
@@ -1414,8 +1412,7 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
* Like most other Unix systems, set the @i_ctime for inodes on a
* rename.
*/
- time = current_time(old_dir);
- old_inode->i_ctime = time;
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
/* We must adjust parent link count when renaming directories */
if (is_dir) {
@@ -1444,13 +1441,11 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
old_dir->i_size -= old_sz;
ubifs_inode(old_dir)->ui_size = old_dir->i_size;
- old_dir->i_mtime = old_dir->i_ctime = time;
- new_dir->i_mtime = new_dir->i_ctime = time;
/*
* And finally, if we unlinked a direntry which happened to have the
* same name as the moved direntry, we have to decrement @i_nlink of
- * the unlinked inode and change its ctime.
+ * the unlinked inode.
*/
if (unlink) {
/*
@@ -1462,7 +1457,6 @@ static int do_rename(struct inode *old_dir, struct dentry *old_dentry,
clear_nlink(new_inode);
else
drop_nlink(new_inode);
- new_inode->i_ctime = time;
} else {
new_dir->i_size += new_sz;
ubifs_inode(new_dir)->ui_size = new_dir->i_size;
@@ -1557,7 +1551,6 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
int sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
struct inode *fst_inode = d_inode(old_dentry);
struct inode *snd_inode = d_inode(new_dentry);
- struct timespec64 time;
int err;
struct fscrypt_name fst_nm, snd_nm;
@@ -1588,11 +1581,7 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
lock_4_inodes(old_dir, new_dir, NULL, NULL);
- time = current_time(old_dir);
- fst_inode->i_ctime = time;
- snd_inode->i_ctime = time;
- old_dir->i_mtime = old_dir->i_ctime = time;
- new_dir->i_mtime = new_dir->i_ctime = time;
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
if (old_dir != new_dir) {
if (S_ISDIR(fst_inode->i_mode) && !S_ISDIR(snd_inode->i_mode)) {
@@ -1665,7 +1654,7 @@ int ubifs_getattr(struct mnt_idmap *idmap, const struct path *path,
STATX_ATTR_ENCRYPTED |
STATX_ATTR_IMMUTABLE);
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
stat->blksize = UBIFS_BLOCK_SIZE;
stat->size = ui->ui_size;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 6738fe43040b..e5382f0b2587 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1092,7 +1092,7 @@ static void do_attr_changes(struct inode *inode, const struct iattr *attr)
if (attr->ia_valid & ATTR_MTIME)
inode->i_mtime = attr->ia_mtime;
if (attr->ia_valid & ATTR_CTIME)
- inode->i_ctime = attr->ia_ctime;
+ inode_set_ctime_to_ts(inode, attr->ia_ctime);
if (attr->ia_valid & ATTR_MODE) {
umode_t mode = attr->ia_mode;
@@ -1192,7 +1192,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
mutex_lock(&ui->ui_mutex);
ui->ui_size = inode->i_size;
/* Truncation changes inode [mc]time */
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
/* Other attributes may be changed at the same time as well */
do_attr_changes(inode, attr);
err = ubifs_jnl_truncate(c, inode, old_size, new_size);
@@ -1239,7 +1239,7 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
mutex_lock(&ui->ui_mutex);
if (attr->ia_valid & ATTR_SIZE) {
/* Truncation changes inode [mc]time */
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
/* 'truncate_setsize()' changed @i_size, update @ui_size */
ui->ui_size = inode->i_size;
}
@@ -1364,8 +1364,10 @@ out:
static inline int mctime_update_needed(const struct inode *inode,
const struct timespec64 *now)
{
+ struct timespec64 ctime = inode_get_ctime(inode);
+
if (!timespec64_equal(&inode->i_mtime, now) ||
- !timespec64_equal(&inode->i_ctime, now))
+ !timespec64_equal(&ctime, now))
return 1;
return 0;
}
@@ -1376,8 +1378,7 @@ static inline int mctime_update_needed(const struct inode *inode,
*
* This function updates time of the inode.
*/
-int ubifs_update_time(struct inode *inode, struct timespec64 *time,
- int flags)
+int ubifs_update_time(struct inode *inode, int flags)
{
struct ubifs_inode *ui = ubifs_inode(inode);
struct ubifs_info *c = inode->i_sb->s_fs_info;
@@ -1385,21 +1386,17 @@ int ubifs_update_time(struct inode *inode, struct timespec64 *time,
.dirtied_ino_d = ALIGN(ui->data_len, 8) };
int err, release;
- if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT))
- return generic_update_time(inode, time, flags);
+ if (!IS_ENABLED(CONFIG_UBIFS_ATIME_SUPPORT)) {
+ generic_update_time(inode, flags);
+ return 0;
+ }
err = ubifs_budget_space(c, &req);
if (err)
return err;
mutex_lock(&ui->ui_mutex);
- if (flags & S_ATIME)
- inode->i_atime = *time;
- if (flags & S_CTIME)
- inode->i_ctime = *time;
- if (flags & S_MTIME)
- inode->i_mtime = *time;
-
+ inode_update_timestamps(inode, flags);
release = ui->dirty;
__mark_inode_dirty(inode, I_DIRTY_SYNC);
mutex_unlock(&ui->ui_mutex);
@@ -1432,7 +1429,7 @@ static int update_mctime(struct inode *inode)
return err;
mutex_lock(&ui->ui_mutex);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
release = ui->dirty;
mark_inode_dirty_sync(inode);
mutex_unlock(&ui->ui_mutex);
@@ -1570,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(struct vm_fault *vmf)
struct ubifs_inode *ui = ubifs_inode(inode);
mutex_lock(&ui->ui_mutex);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
release = ui->dirty;
mark_inode_dirty_sync(inode);
mutex_unlock(&ui->ui_mutex);
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 67c5108abd89..d79cabe193c3 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -118,7 +118,7 @@ static int setflags(struct inode *inode, int flags)
ui->flags &= ~ioctl2ubifs(UBIFS_SETTABLE_IOCTL_FLAGS);
ui->flags |= ioctl2ubifs(flags);
ubifs_set_inode_flags(inode);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
release = ui->dirty;
mark_inode_dirty_sync(inode);
mutex_unlock(&ui->ui_mutex);
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index dc52ac0f4a34..ffc9beee7be6 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -454,8 +454,8 @@ static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum);
ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec);
ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
- ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec);
- ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
+ ino->ctime_sec = cpu_to_le64(inode_get_ctime(inode).tv_sec);
+ ino->ctime_nsec = cpu_to_le32(inode_get_ctime(inode).tv_nsec);
ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec);
ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
ino->uid = cpu_to_le32(i_uid_read(inode));
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 32cb14759796..b08fb28d16b5 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -146,8 +146,8 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec);
inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec);
- inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec);
- inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec);
+ inode_set_ctime(inode, (int64_t)le64_to_cpu(ino->ctime_sec),
+ le32_to_cpu(ino->ctime_nsec));
inode->i_mode = le32_to_cpu(ino->mode);
inode->i_size = le64_to_cpu(ino->size);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4c36044140e7..ebb3ad6b5e7e 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -2027,7 +2027,7 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc);
int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
int ubifs_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
struct iattr *attr);
-int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int ubifs_update_time(struct inode *inode, int flags);
/* dir.c */
struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 349228dd1191..406c82eab513 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -134,7 +134,7 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
ui->data_len = size;
mutex_lock(&host_ui->ui_mutex);
- host->i_ctime = current_time(host);
+ inode_set_ctime_current(host);
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(fname_len(nm));
host_ui->xattr_size += CALC_XATTR_BYTES(size);
@@ -215,7 +215,7 @@ static int change_xattr(struct ubifs_info *c, struct inode *host,
ui->data_len = size;
mutex_lock(&host_ui->ui_mutex);
- host->i_ctime = current_time(host);
+ inode_set_ctime_current(host);
host_ui->xattr_size -= CALC_XATTR_BYTES(old_size);
host_ui->xattr_size += CALC_XATTR_BYTES(size);
@@ -474,7 +474,7 @@ static int remove_xattr(struct ubifs_info *c, struct inode *host,
return err;
mutex_lock(&host_ui->ui_mutex);
- host->i_ctime = current_time(host);
+ inode_set_ctime_current(host);
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(fname_len(nm));
host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index 5f7ac8c84798..6b558cbbeb6b 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -100,7 +100,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT;
else
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
iinfo->i_crtime = inode->i_mtime;
if (unlikely(insert_inode_locked(inode) < 0)) {
make_bad_inode(inode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 28cdfc57d946..d089795074e8 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -910,7 +910,7 @@ static int inode_getblk(struct inode *inode, struct udf_map_rq *map)
map->oflags = UDF_BLK_NEW | UDF_BLK_MAPPED;
iinfo->i_next_alloc_block = map->lblk + 1;
iinfo->i_next_alloc_goal = newblocknum + 1;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (IS_SYNC(inode))
udf_sync_inode(inode);
@@ -1298,7 +1298,7 @@ set_size:
goto out_unlock;
}
update_time:
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
if (IS_SYNC(inode))
udf_sync_inode(inode);
else
@@ -1329,6 +1329,7 @@ static int udf_read_inode(struct inode *inode, bool hidden_inode)
int bs = inode->i_sb->s_blocksize;
int ret = -EIO;
uint32_t uid, gid;
+ struct timespec64 ctime;
reread:
if (iloc->partitionReferenceNum >= sbi->s_partitions) {
@@ -1507,7 +1508,8 @@ reread:
udf_disk_stamp_to_time(&inode->i_atime, fe->accessTime);
udf_disk_stamp_to_time(&inode->i_mtime, fe->modificationTime);
- udf_disk_stamp_to_time(&inode->i_ctime, fe->attrTime);
+ udf_disk_stamp_to_time(&ctime, fe->attrTime);
+ inode_set_ctime_to_ts(inode, ctime);
iinfo->i_unique = le64_to_cpu(fe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(fe->lengthExtendedAttr);
@@ -1522,7 +1524,8 @@ reread:
udf_disk_stamp_to_time(&inode->i_atime, efe->accessTime);
udf_disk_stamp_to_time(&inode->i_mtime, efe->modificationTime);
udf_disk_stamp_to_time(&iinfo->i_crtime, efe->createTime);
- udf_disk_stamp_to_time(&inode->i_ctime, efe->attrTime);
+ udf_disk_stamp_to_time(&ctime, efe->attrTime);
+ inode_set_ctime_to_ts(inode, ctime);
iinfo->i_unique = le64_to_cpu(efe->uniqueID);
iinfo->i_lenEAttr = le32_to_cpu(efe->lengthExtendedAttr);
@@ -1799,7 +1802,7 @@ static int udf_update_inode(struct inode *inode, int do_sync)
udf_time_to_disk_stamp(&fe->accessTime, inode->i_atime);
udf_time_to_disk_stamp(&fe->modificationTime, inode->i_mtime);
- udf_time_to_disk_stamp(&fe->attrTime, inode->i_ctime);
+ udf_time_to_disk_stamp(&fe->attrTime, inode_get_ctime(inode));
memset(&(fe->impIdent), 0, sizeof(struct regid));
strcpy(fe->impIdent.ident, UDF_ID_DEVELOPER);
fe->impIdent.identSuffix[0] = UDF_OS_CLASS_UNIX;
@@ -1830,12 +1833,12 @@ static int udf_update_inode(struct inode *inode, int do_sync)
udf_adjust_time(iinfo, inode->i_atime);
udf_adjust_time(iinfo, inode->i_mtime);
- udf_adjust_time(iinfo, inode->i_ctime);
+ udf_adjust_time(iinfo, inode_get_ctime(inode));
udf_time_to_disk_stamp(&efe->accessTime, inode->i_atime);
udf_time_to_disk_stamp(&efe->modificationTime, inode->i_mtime);
udf_time_to_disk_stamp(&efe->createTime, iinfo->i_crtime);
- udf_time_to_disk_stamp(&efe->attrTime, inode->i_ctime);
+ udf_time_to_disk_stamp(&efe->attrTime, inode_get_ctime(inode));
memset(&(efe->impIdent), 0, sizeof(efe->impIdent));
strcpy(efe->impIdent.ident, UDF_ID_DEVELOPER);
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index a95579b043ab..ae55ab8859b6 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -365,7 +365,7 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
*(__le32 *)((struct allocDescImpUse *)iter.fi.icb.impUse)->impUse =
cpu_to_le32(iinfo->i_unique & 0x00000000FFFFFFFFUL);
udf_fiiter_write_fi(&iter, NULL);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
udf_fiiter_release(&iter);
udf_add_fid_counter(dir->i_sb, false, 1);
@@ -471,7 +471,7 @@ static int udf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
udf_fiiter_release(&iter);
udf_add_fid_counter(dir->i_sb, true, 1);
inc_nlink(dir);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
d_instantiate_new(dentry, inode);
@@ -523,8 +523,8 @@ static int udf_rmdir(struct inode *dir, struct dentry *dentry)
inode->i_size = 0;
inode_dec_link_count(dir);
udf_add_fid_counter(dir->i_sb, true, -1);
- inode->i_ctime = dir->i_ctime = dir->i_mtime =
- current_time(inode);
+ dir->i_mtime = inode_set_ctime_to_ts(dir,
+ inode_set_ctime_current(inode));
mark_inode_dirty(dir);
ret = 0;
end_rmdir:
@@ -555,11 +555,11 @@ static int udf_unlink(struct inode *dir, struct dentry *dentry)
set_nlink(inode, 1);
}
udf_fiiter_delete_entry(&iter);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
inode_dec_link_count(inode);
udf_add_fid_counter(dir->i_sb, false, -1);
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
ret = 0;
end_unlink:
udf_fiiter_release(&iter);
@@ -746,9 +746,9 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
inc_nlink(inode);
udf_add_fid_counter(dir->i_sb, false, 1);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
ihold(inode);
d_instantiate(dentry, inode);
@@ -833,7 +833,7 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = current_time(old_inode);
+ inode_set_ctime_current(old_inode);
mark_inode_dirty(old_inode);
/*
@@ -861,13 +861,13 @@ static int udf_rename(struct mnt_idmap *idmap, struct inode *old_dir,
}
if (new_inode) {
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
inode_dec_link_count(new_inode);
udf_add_fid_counter(old_dir->i_sb, S_ISDIR(new_inode->i_mode),
-1);
}
- old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir);
- new_dir->i_ctime = new_dir->i_mtime = current_time(new_dir);
+ old_dir->i_mtime = inode_set_ctime_current(old_dir);
+ new_dir->i_mtime = inode_set_ctime_current(new_dir);
mark_inode_dirty(old_dir);
mark_inode_dirty(new_dir);
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 779b5c2c75f6..f7eaf7b14594 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -149,7 +149,7 @@ static int udf_symlink_getattr(struct mnt_idmap *idmap,
struct inode *inode = d_backing_inode(dentry);
struct page *page;
- generic_fillattr(&nop_mnt_idmap, inode, stat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
page = read_mapping_page(inode->i_mapping, 0, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 379d75796a5c..fd57f03b6c93 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -107,7 +107,7 @@ void ufs_set_link(struct inode *dir, struct ufs_dir_entry *de,
ufs_commit_chunk(page, pos, len);
ufs_put_page(page);
if (update_times)
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
ufs_handle_dirsync(dir);
}
@@ -397,7 +397,7 @@ got_it:
ufs_set_de_type(sb, de, inode->i_mode);
ufs_commit_chunk(page, pos, rec_len);
- dir->i_mtime = dir->i_ctime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
mark_inode_dirty(dir);
err = ufs_handle_dirsync(dir);
@@ -539,7 +539,7 @@ int ufs_delete_entry(struct inode *inode, struct ufs_dir_entry *dir,
pde->d_reclen = cpu_to_fs16(sb, to - from);
dir->d_ino = 0;
ufs_commit_chunk(page, pos, to - from);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
err = ufs_handle_dirsync(inode);
out:
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 06bd84d555bd..a1e7bd9d1f98 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -292,7 +292,7 @@ cg_found:
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
inode->i_blocks = 0;
inode->i_generation = 0;
- inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
ufsi->i_flags = UFS_I(dir)->i_flags;
ufsi->i_lastfrag = 0;
ufsi->i_shadow = 0;
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a4246c83a8cd..21a4779a2de5 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -296,7 +296,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
if (new)
*new = 1;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (IS_SYNC(inode))
ufs_sync_inode (inode);
mark_inode_dirty(inode);
@@ -378,7 +378,7 @@ ufs_inode_getblock(struct inode *inode, u64 ind_block,
mark_buffer_dirty(bh);
if (IS_SYNC(inode))
sync_dirty_buffer(bh);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
mark_inode_dirty(inode);
out:
brelse (bh);
@@ -580,11 +580,12 @@ static int ufs1_read_inode(struct inode *inode, struct ufs_inode *ufs_inode)
inode->i_size = fs64_to_cpu(sb, ufs_inode->ui_size);
inode->i_atime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_atime.tv_sec);
- inode->i_ctime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec);
+ inode_set_ctime(inode,
+ (signed)fs32_to_cpu(sb, ufs_inode->ui_ctime.tv_sec),
+ 0);
inode->i_mtime.tv_sec = (signed)fs32_to_cpu(sb, ufs_inode->ui_mtime.tv_sec);
inode->i_mtime.tv_nsec = 0;
inode->i_atime.tv_nsec = 0;
- inode->i_ctime.tv_nsec = 0;
inode->i_blocks = fs32_to_cpu(sb, ufs_inode->ui_blocks);
inode->i_generation = fs32_to_cpu(sb, ufs_inode->ui_gen);
ufsi->i_flags = fs32_to_cpu(sb, ufs_inode->ui_flags);
@@ -626,10 +627,10 @@ static int ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size);
inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime);
- inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime);
+ inode_set_ctime(inode, fs64_to_cpu(sb, ufs2_inode->ui_ctime),
+ fs32_to_cpu(sb, ufs2_inode->ui_ctimensec));
inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime);
inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec);
- inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec);
inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec);
inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen);
@@ -726,7 +727,8 @@ static void ufs1_update_inode(struct inode *inode, struct ufs_inode *ufs_inode)
ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
ufs_inode->ui_atime.tv_usec = 0;
- ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec);
+ ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb,
+ inode_get_ctime(inode).tv_sec);
ufs_inode->ui_ctime.tv_usec = 0;
ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec);
ufs_inode->ui_mtime.tv_usec = 0;
@@ -770,8 +772,9 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec);
ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec);
- ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec);
- ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec);
+ ufs_inode->ui_ctime = cpu_to_fs64(sb, inode_get_ctime(inode).tv_sec);
+ ufs_inode->ui_ctimensec = cpu_to_fs32(sb,
+ inode_get_ctime(inode).tv_nsec);
ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec);
ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec);
@@ -1205,7 +1208,7 @@ static int ufs_truncate(struct inode *inode, loff_t size)
truncate_setsize(inode, size);
ufs_truncate_blocks(inode);
- inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
mark_inode_dirty(inode);
out:
UFSD("EXIT: err %d\n", err);
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index 36154b5aca6d..9cad29463791 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -153,7 +153,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
struct inode *inode = d_inode(old_dentry);
int error;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_link_count(inode);
ihold(inode);
@@ -220,7 +220,7 @@ static int ufs_unlink(struct inode *dir, struct dentry *dentry)
if (err)
goto out;
- inode->i_ctime = dir->i_ctime;
+ inode_set_ctime_to_ts(inode, inode_get_ctime(dir));
inode_dec_link_count(inode);
err = 0;
out:
@@ -282,7 +282,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
if (!new_de)
goto out_dir;
ufs_set_link(new_dir, new_de, new_page, old_inode, 1);
- new_inode->i_ctime = current_time(new_inode);
+ inode_set_ctime_current(new_inode);
if (dir_de)
drop_nlink(new_inode);
inode_dec_link_count(new_inode);
@@ -298,7 +298,7 @@ static int ufs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
* Like most other Unix systems, set the ctime for inodes on a
* rename.
*/
- old_inode->i_ctime = current_time(old_inode);
+ inode_set_ctime_current(old_inode);
ufs_delete_entry(old_dir, old_de, old_page);
mark_inode_dirty(old_inode);
diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c
index 075f15c43c78..5f1a14d5b927 100644
--- a/fs/vboxsf/dir.c
+++ b/fs/vboxsf/dir.c
@@ -179,9 +179,10 @@ static int vboxsf_dir_iterate(struct file *dir, struct dir_context *ctx)
return 0;
}
+WRAP_DIR_ITER(vboxsf_dir_iterate) // FIXME!
const struct file_operations vboxsf_dir_fops = {
.open = vboxsf_dir_open,
- .iterate = vboxsf_dir_iterate,
+ .iterate_shared = shared_vboxsf_dir_iterate,
.release = vboxsf_dir_release,
.read = generic_read_dir,
.llseek = generic_file_llseek,
diff --git a/fs/vboxsf/shfl_hostintf.h b/fs/vboxsf/shfl_hostintf.h
index aca829062c12..069a019c9247 100644
--- a/fs/vboxsf/shfl_hostintf.h
+++ b/fs/vboxsf/shfl_hostintf.h
@@ -68,9 +68,9 @@ struct shfl_string {
/** UTF-8 or UTF-16 string. Nul terminated. */
union {
- u8 utf8[2];
- u16 utf16[1];
- u16 ucs2[1]; /* misnomer, use utf16. */
+ u8 legacy_padding[2];
+ DECLARE_FLEX_ARRAY(u8, utf8);
+ DECLARE_FLEX_ARRAY(u16, utf16);
} string;
};
VMMDEV_ASSERT_SIZE(shfl_string, 6);
diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c
index dd0ae1188e87..83f20dd15522 100644
--- a/fs/vboxsf/utils.c
+++ b/fs/vboxsf/utils.c
@@ -128,8 +128,8 @@ int vboxsf_init_inode(struct vboxsf_sbi *sbi, struct inode *inode,
inode->i_atime = ns_to_timespec64(
info->access_time.ns_relative_to_unix_epoch);
- inode->i_ctime = ns_to_timespec64(
- info->change_time.ns_relative_to_unix_epoch);
+ inode_set_ctime_to_ts(inode,
+ ns_to_timespec64(info->change_time.ns_relative_to_unix_epoch));
inode->i_mtime = ns_to_timespec64(
info->modification_time.ns_relative_to_unix_epoch);
return 0;
@@ -252,7 +252,7 @@ int vboxsf_getattr(struct mnt_idmap *idmap, const struct path *path,
if (err)
return err;
- generic_fillattr(&nop_mnt_idmap, d_inode(dentry), kstat);
+ generic_fillattr(&nop_mnt_idmap, request_mask, d_inode(dentry), kstat);
return 0;
}
diff --git a/fs/verity/fsverity_private.h b/fs/verity/fsverity_private.h
index 49bf3a1eb2a0..d071a6e32581 100644
--- a/fs/verity/fsverity_private.h
+++ b/fs/verity/fsverity_private.h
@@ -118,16 +118,16 @@ void fsverity_free_info(struct fsverity_info *vi);
int fsverity_get_descriptor(struct inode *inode,
struct fsverity_descriptor **desc_ret);
-int __init fsverity_init_info_cache(void);
-void __init fsverity_exit_info_cache(void);
+void __init fsverity_init_info_cache(void);
/* signature.c */
#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+extern int fsverity_require_signatures;
int fsverity_verify_signature(const struct fsverity_info *vi,
const u8 *signature, size_t sig_size);
-int __init fsverity_init_signature(void);
+void __init fsverity_init_signature(void);
#else /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
static inline int
fsverity_verify_signature(const struct fsverity_info *vi,
@@ -136,15 +136,13 @@ fsverity_verify_signature(const struct fsverity_info *vi,
return 0;
}
-static inline int fsverity_init_signature(void)
+static inline void fsverity_init_signature(void)
{
- return 0;
}
#endif /* !CONFIG_FS_VERITY_BUILTIN_SIGNATURES */
/* verify.c */
-int __init fsverity_init_workqueue(void);
-void __init fsverity_exit_workqueue(void);
+void __init fsverity_init_workqueue(void);
#endif /* _FSVERITY_PRIVATE_H */
diff --git a/fs/verity/hash_algs.c b/fs/verity/hash_algs.c
index c598d2035476..6b08b1d9a7d7 100644
--- a/fs/verity/hash_algs.c
+++ b/fs/verity/hash_algs.c
@@ -226,6 +226,14 @@ void __init fsverity_check_hash_algs(void)
if (!alg->name)
continue;
+ /*
+ * 0 must never be allocated as an FS_VERITY_HASH_ALG_* value,
+ * as it is reserved for users that use 0 to mean unspecified or
+ * a default value. fs/verity/ itself doesn't care and doesn't
+ * have a default algorithm, but some users make use of this.
+ */
+ BUG_ON(i == 0);
+
BUG_ON(alg->digest_size > FS_VERITY_MAX_DIGEST_SIZE);
/*
diff --git a/fs/verity/init.c b/fs/verity/init.c
index 023905151035..a29f062f6047 100644
--- a/fs/verity/init.c
+++ b/fs/verity/init.c
@@ -9,6 +9,37 @@
#include <linux/ratelimit.h>
+#ifdef CONFIG_SYSCTL
+static struct ctl_table_header *fsverity_sysctl_header;
+
+static struct ctl_table fsverity_sysctl_table[] = {
+#ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES
+ {
+ .procname = "require_signatures",
+ .data = &fsverity_require_signatures,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
+ { }
+};
+
+static void __init fsverity_init_sysctl(void)
+{
+ fsverity_sysctl_header = register_sysctl("fs/verity",
+ fsverity_sysctl_table);
+ if (!fsverity_sysctl_header)
+ panic("fsverity sysctl registration failed");
+}
+#else /* CONFIG_SYSCTL */
+static inline void fsverity_init_sysctl(void)
+{
+}
+#endif /* !CONFIG_SYSCTL */
+
void fsverity_msg(const struct inode *inode, const char *level,
const char *fmt, ...)
{
@@ -33,28 +64,11 @@ void fsverity_msg(const struct inode *inode, const char *level,
static int __init fsverity_init(void)
{
- int err;
-
fsverity_check_hash_algs();
-
- err = fsverity_init_info_cache();
- if (err)
- return err;
-
- err = fsverity_init_workqueue();
- if (err)
- goto err_exit_info_cache;
-
- err = fsverity_init_signature();
- if (err)
- goto err_exit_workqueue;
-
+ fsverity_init_info_cache();
+ fsverity_init_workqueue();
+ fsverity_init_sysctl();
+ fsverity_init_signature();
return 0;
-
-err_exit_workqueue:
- fsverity_exit_workqueue();
-err_exit_info_cache:
- fsverity_exit_info_cache();
- return err;
}
late_initcall(fsverity_init)
diff --git a/fs/verity/open.c b/fs/verity/open.c
index 1db5106a9c38..6c31a871b84b 100644
--- a/fs/verity/open.c
+++ b/fs/verity/open.c
@@ -408,18 +408,10 @@ void __fsverity_cleanup_inode(struct inode *inode)
}
EXPORT_SYMBOL_GPL(__fsverity_cleanup_inode);
-int __init fsverity_init_info_cache(void)
+void __init fsverity_init_info_cache(void)
{
- fsverity_info_cachep = KMEM_CACHE_USERCOPY(fsverity_info,
- SLAB_RECLAIM_ACCOUNT,
- file_digest);
- if (!fsverity_info_cachep)
- return -ENOMEM;
- return 0;
-}
-
-void __init fsverity_exit_info_cache(void)
-{
- kmem_cache_destroy(fsverity_info_cachep);
- fsverity_info_cachep = NULL;
+ fsverity_info_cachep = KMEM_CACHE_USERCOPY(
+ fsverity_info,
+ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC,
+ file_digest);
}
diff --git a/fs/verity/signature.c b/fs/verity/signature.c
index 72034bc71c9d..90c07573dd77 100644
--- a/fs/verity/signature.c
+++ b/fs/verity/signature.c
@@ -24,7 +24,7 @@
* /proc/sys/fs/verity/require_signatures
* If 1, all verity files must have a valid builtin signature.
*/
-static int fsverity_require_signatures;
+int fsverity_require_signatures;
/*
* Keyring that contains the trusted X.509 certificates.
@@ -62,6 +62,22 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
return 0;
}
+ if (fsverity_keyring->keys.nr_leaves_on_tree == 0) {
+ /*
+ * The ".fs-verity" keyring is empty, due to builtin signatures
+ * being supported by the kernel but not actually being used.
+ * In this case, verify_pkcs7_signature() would always return an
+ * error, usually ENOKEY. It could also be EBADMSG if the
+ * PKCS#7 is malformed, but that isn't very important to
+ * distinguish. So, just skip to ENOKEY to avoid the attack
+ * surface of the PKCS#7 parser, which would otherwise be
+ * reachable by any task able to execute FS_IOC_ENABLE_VERITY.
+ */
+ fsverity_err(inode,
+ "fs-verity keyring is empty, rejecting signed file!");
+ return -ENOKEY;
+ }
+
d = kzalloc(sizeof(*d) + hash_alg->digest_size, GFP_KERNEL);
if (!d)
return -ENOMEM;
@@ -93,59 +109,14 @@ int fsverity_verify_signature(const struct fsverity_info *vi,
return 0;
}
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *fsverity_sysctl_header;
-
-static struct ctl_table fsverity_sysctl_table[] = {
- {
- .procname = "require_signatures",
- .data = &fsverity_require_signatures,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
- { }
-};
-
-static int __init fsverity_sysctl_init(void)
-{
- fsverity_sysctl_header = register_sysctl("fs/verity", fsverity_sysctl_table);
- if (!fsverity_sysctl_header) {
- pr_err("sysctl registration failed!\n");
- return -ENOMEM;
- }
- return 0;
-}
-#else /* !CONFIG_SYSCTL */
-static inline int __init fsverity_sysctl_init(void)
+void __init fsverity_init_signature(void)
{
- return 0;
-}
-#endif /* !CONFIG_SYSCTL */
-
-int __init fsverity_init_signature(void)
-{
- struct key *ring;
- int err;
-
- ring = keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
- current_cred(), KEY_POS_SEARCH |
+ fsverity_keyring =
+ keyring_alloc(".fs-verity", KUIDT_INIT(0), KGIDT_INIT(0),
+ current_cred(), KEY_POS_SEARCH |
KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE |
KEY_USR_SEARCH | KEY_USR_SETATTR,
- KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
- if (IS_ERR(ring))
- return PTR_ERR(ring);
-
- err = fsverity_sysctl_init();
- if (err)
- goto err_put_ring;
-
- fsverity_keyring = ring;
- return 0;
-
-err_put_ring:
- key_put(ring);
- return err;
+ KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL);
+ if (IS_ERR(fsverity_keyring))
+ panic("failed to allocate \".fs-verity\" keyring");
}
diff --git a/fs/verity/verify.c b/fs/verity/verify.c
index 433cef51f5f6..904ccd7e8e16 100644
--- a/fs/verity/verify.c
+++ b/fs/verity/verify.c
@@ -346,7 +346,7 @@ void fsverity_enqueue_verify_work(struct work_struct *work)
}
EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
-int __init fsverity_init_workqueue(void)
+void __init fsverity_init_workqueue(void)
{
/*
* Use a high-priority workqueue to prioritize verification work, which
@@ -360,12 +360,5 @@ int __init fsverity_init_workqueue(void)
WQ_HIGHPRI,
num_online_cpus());
if (!fsverity_read_workqueue)
- return -ENOMEM;
- return 0;
-}
-
-void __init fsverity_exit_workqueue(void)
-{
- destroy_workqueue(fsverity_read_workqueue);
- fsverity_read_workqueue = NULL;
+ panic("failed to allocate fsverity_read_queue");
}
diff --git a/fs/xattr.c b/fs/xattr.c
index e7bbb7f57557..efd4736bc94b 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -1040,12 +1040,32 @@ const char *xattr_full_name(const struct xattr_handler *handler,
EXPORT_SYMBOL(xattr_full_name);
/**
- * free_simple_xattr - free an xattr object
+ * simple_xattr_space - estimate the memory used by a simple xattr
+ * @name: the full name of the xattr
+ * @size: the size of its value
+ *
+ * This takes no account of how much larger the two slab objects actually are:
+ * that would depend on the slab implementation, when what is required is a
+ * deterministic number, which grows with name length and size and quantity.
+ *
+ * Return: The approximate number of bytes of memory used by such an xattr.
+ */
+size_t simple_xattr_space(const char *name, size_t size)
+{
+ /*
+ * Use "40" instead of sizeof(struct simple_xattr), to return the
+ * same result on 32-bit and 64-bit, and even if simple_xattr grows.
+ */
+ return 40 + size + strlen(name);
+}
+
+/**
+ * simple_xattr_free - free an xattr object
* @xattr: the xattr object
*
* Free the xattr object. Can handle @xattr being NULL.
*/
-static inline void free_simple_xattr(struct simple_xattr *xattr)
+void simple_xattr_free(struct simple_xattr *xattr)
{
if (xattr)
kfree(xattr->name);
@@ -1073,7 +1093,7 @@ struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
if (len < sizeof(*new_xattr))
return NULL;
- new_xattr = kvmalloc(len, GFP_KERNEL);
+ new_xattr = kvmalloc(len, GFP_KERNEL_ACCOUNT);
if (!new_xattr)
return NULL;
@@ -1164,7 +1184,6 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
* @value: the value to store along the xattr
* @size: the size of @value
* @flags: the flags determining how to set the xattr
- * @removed_size: the size of the removed xattr
*
* Set a new xattr object.
* If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
@@ -1181,29 +1200,27 @@ int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
* nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
* XATTR_REPLACE we fail as mentioned above.
*
- * Return: On success zero and on error a negative error code is returned.
+ * Return: On success, the removed or replaced xattr is returned, to be freed
+ * by the caller; or NULL if none. On failure a negative error code is returned.
*/
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags,
- ssize_t *removed_size)
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+ const char *name, const void *value,
+ size_t size, int flags)
{
- struct simple_xattr *xattr = NULL, *new_xattr = NULL;
+ struct simple_xattr *old_xattr = NULL, *new_xattr = NULL;
struct rb_node *parent = NULL, **rbp;
int err = 0, ret;
- if (removed_size)
- *removed_size = -1;
-
/* value == NULL means remove */
if (value) {
new_xattr = simple_xattr_alloc(value, size);
if (!new_xattr)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- new_xattr->name = kstrdup(name, GFP_KERNEL);
+ new_xattr->name = kstrdup(name, GFP_KERNEL_ACCOUNT);
if (!new_xattr->name) {
- free_simple_xattr(new_xattr);
- return -ENOMEM;
+ simple_xattr_free(new_xattr);
+ return ERR_PTR(-ENOMEM);
}
}
@@ -1217,12 +1234,12 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
else if (ret > 0)
rbp = &(*rbp)->rb_right;
else
- xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
- if (xattr)
+ old_xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
+ if (old_xattr)
break;
}
- if (xattr) {
+ if (old_xattr) {
/* Fail if XATTR_CREATE is requested and the xattr exists. */
if (flags & XATTR_CREATE) {
err = -EEXIST;
@@ -1230,12 +1247,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
}
if (new_xattr)
- rb_replace_node(&xattr->rb_node, &new_xattr->rb_node,
- &xattrs->rb_root);
+ rb_replace_node(&old_xattr->rb_node,
+ &new_xattr->rb_node, &xattrs->rb_root);
else
- rb_erase(&xattr->rb_node, &xattrs->rb_root);
- if (!err && removed_size)
- *removed_size = xattr->size;
+ rb_erase(&old_xattr->rb_node, &xattrs->rb_root);
} else {
/* Fail if XATTR_REPLACE is requested but no xattr is found. */
if (flags & XATTR_REPLACE) {
@@ -1260,12 +1275,10 @@ int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
out_unlock:
write_unlock(&xattrs->lock);
- if (err)
- free_simple_xattr(new_xattr);
- else
- free_simple_xattr(xattr);
- return err;
-
+ if (!err)
+ return old_xattr;
+ simple_xattr_free(new_xattr);
+ return ERR_PTR(err);
}
static bool xattr_is_trusted(const char *name)
@@ -1370,14 +1383,17 @@ void simple_xattrs_init(struct simple_xattrs *xattrs)
/**
* simple_xattrs_free - free xattrs
* @xattrs: xattr header whose xattrs to destroy
+ * @freed_space: approximate number of bytes of memory freed from @xattrs
*
* Destroy all xattrs in @xattr. When this is called no one can hold a
* reference to any of the xattrs anymore.
*/
-void simple_xattrs_free(struct simple_xattrs *xattrs)
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space)
{
struct rb_node *rbp;
+ if (freed_space)
+ *freed_space = 0;
rbp = rb_first(&xattrs->rb_root);
while (rbp) {
struct simple_xattr *xattr;
@@ -1386,7 +1402,10 @@ void simple_xattrs_free(struct simple_xattrs *xattrs)
rbp_next = rb_next(rbp);
xattr = rb_entry(rbp, struct simple_xattr, rb_node);
rb_erase(&xattr->rb_node, &xattrs->rb_root);
- free_simple_xattr(xattr);
+ if (freed_space)
+ *freed_space += simple_xattr_space(xattr->name,
+ xattr->size);
+ simple_xattr_free(xattr);
rbp = rbp_next;
}
}
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 25e2841084e1..f9015f88eca7 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -591,7 +591,7 @@ struct xfs_attr_shortform {
uint8_t valuelen; /* actual length of value (no NULL) */
uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
uint8_t nameval[]; /* name & value bytes concatenated */
- } list[1]; /* variable sized array */
+ } list[]; /* variable sized array */
};
typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
@@ -620,19 +620,29 @@ typedef struct xfs_attr_leaf_entry { /* sorted on key, not name */
typedef struct xfs_attr_leaf_name_local {
__be16 valuelen; /* number of bytes in value */
__u8 namelen; /* length of name bytes */
- __u8 nameval[1]; /* name/value bytes */
+ /*
+ * In Linux 6.5 this flex array was converted from nameval[1] to
+ * nameval[]. Be very careful here about extra padding at the end;
+ * see xfs_attr_leaf_entsize_local() for details.
+ */
+ __u8 nameval[]; /* name/value bytes */
} xfs_attr_leaf_name_local_t;
typedef struct xfs_attr_leaf_name_remote {
__be32 valueblk; /* block number of value bytes */
__be32 valuelen; /* number of bytes in value */
__u8 namelen; /* length of name bytes */
- __u8 name[1]; /* name bytes */
+ /*
+ * In Linux 6.5 this flex array was converted from name[1] to name[].
+ * Be very careful here about extra padding at the end; see
+ * xfs_attr_leaf_entsize_remote() for details.
+ */
+ __u8 name[]; /* name bytes */
} xfs_attr_leaf_name_remote_t;
typedef struct xfs_attr_leafblock {
xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
- xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
+ xfs_attr_leaf_entry_t entries[]; /* sorted on key, not name */
/*
* The rest of the block contains the following structures after the
* leaf entries, growing from the bottom up. The variables are never
@@ -664,7 +674,7 @@ struct xfs_attr3_leaf_hdr {
struct xfs_attr3_leafblock {
struct xfs_attr3_leaf_hdr hdr;
- struct xfs_attr_leaf_entry entries[1];
+ struct xfs_attr_leaf_entry entries[];
/*
* The rest of the block contains the following structures after the
@@ -747,14 +757,61 @@ xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx)
*/
static inline int xfs_attr_leaf_entsize_remote(int nlen)
{
- return round_up(sizeof(struct xfs_attr_leaf_name_remote) - 1 +
- nlen, XFS_ATTR_LEAF_NAME_ALIGN);
+ /*
+ * Prior to Linux 6.5, struct xfs_attr_leaf_name_remote ended with
+ * name[1], which was used as a flexarray. The layout of this struct
+ * is 9 bytes of fixed-length fields followed by a __u8 flex array at
+ * offset 9.
+ *
+ * On most architectures, struct xfs_attr_leaf_name_remote had two
+ * bytes of implicit padding at the end of the struct to make the
+ * struct length 12. After converting name[1] to name[], there are
+ * three implicit padding bytes and the struct size remains 12.
+ * However, there are compiler configurations that do not add implicit
+ * padding at all (m68k) and have been broken for years.
+ *
+ * This entsize computation historically added (the xattr name length)
+ * to (the padded struct length - 1) and rounded that sum up to the
+ * nearest multiple of 4 (NAME_ALIGN). IOWs, round_up(11 + nlen, 4).
+ * This is encoded in the ondisk format, so we cannot change this.
+ *
+ * Compute the entsize from offsetof of the flexarray and manually
+ * adding bytes for the implicit padding.
+ */
+ const size_t remotesize =
+ offsetof(struct xfs_attr_leaf_name_remote, name) + 2;
+
+ return round_up(remotesize + nlen, XFS_ATTR_LEAF_NAME_ALIGN);
}
static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen)
{
- return round_up(sizeof(struct xfs_attr_leaf_name_local) - 1 +
- nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
+ /*
+ * Prior to Linux 6.5, struct xfs_attr_leaf_name_local ended with
+ * nameval[1], which was used as a flexarray. The layout of this
+ * struct is 3 bytes of fixed-length fields followed by a __u8 flex
+ * array at offset 3.
+ *
+ * struct xfs_attr_leaf_name_local had zero bytes of implicit padding
+ * at the end of the struct to make the struct length 4. On most
+ * architectures, after converting nameval[1] to nameval[], there is
+ * one implicit padding byte and the struct size remains 4. However,
+ * there are compiler configurations that do not add implicit padding
+ * at all (m68k) and would break.
+ *
+ * This entsize computation historically added (the xattr name and
+ * value length) to (the padded struct length - 1) and rounded that sum
+ * up to the nearest multiple of 4 (NAME_ALIGN). IOWs, the formula is
+ * round_up(3 + nlen + vlen, 4). This is encoded in the ondisk format,
+ * so we cannot change this.
+ *
+ * Compute the entsize from offsetof of the flexarray and manually
+ * adding bytes for the implicit padding.
+ */
+ const size_t localsize =
+ offsetof(struct xfs_attr_leaf_name_local, nameval);
+
+ return round_up(localsize + nlen + vlen, XFS_ATTR_LEAF_NAME_ALIGN);
}
static inline int xfs_attr_leaf_entsize_local_max(int bsize)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 9c60ebb328b4..2cbf9ea39b8c 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -592,12 +592,12 @@ typedef struct xfs_attrlist_cursor {
struct xfs_attrlist {
__s32 al_count; /* number of entries in attrlist */
__s32 al_more; /* T/F: more attrs (do call again) */
- __s32 al_offset[1]; /* byte offsets of attrs [var-sized] */
+ __s32 al_offset[]; /* byte offsets of attrs [var-sized] */
};
struct xfs_attrlist_ent { /* data from attr_list() */
__u32 a_valuelen; /* number bytes in value of attr */
- char a_name[1]; /* attr name (NULL terminated) */
+ char a_name[]; /* attr name (NULL terminated) */
};
typedef struct xfs_fsop_attrlist_handlereq {
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 758aacd8166b..a35781577cad 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -222,7 +222,8 @@ xfs_inode_from_disk(
*/
inode->i_atime = xfs_inode_from_disk_ts(from, from->di_atime);
inode->i_mtime = xfs_inode_from_disk_ts(from, from->di_mtime);
- inode->i_ctime = xfs_inode_from_disk_ts(from, from->di_ctime);
+ inode_set_ctime_to_ts(inode,
+ xfs_inode_from_disk_ts(from, from->di_ctime));
ip->i_disk_size = be64_to_cpu(from->di_size);
ip->i_nblocks = be64_to_cpu(from->di_nblocks);
@@ -316,7 +317,7 @@ xfs_inode_to_disk(
to->di_atime = xfs_inode_to_disk_ts(ip, inode->i_atime);
to->di_mtime = xfs_inode_to_disk_ts(ip, inode->i_mtime);
- to->di_ctime = xfs_inode_to_disk_ts(ip, inode->i_ctime);
+ to->di_ctime = xfs_inode_to_disk_ts(ip, inode_get_ctime(inode));
to->di_nlink = cpu_to_be32(inode->i_nlink);
to->di_gen = cpu_to_be32(inode->i_generation);
to->di_mode = cpu_to_be16(inode->i_mode);
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index cb4796b6e693..ad22656376d3 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -62,12 +62,12 @@ xfs_trans_ichgtime(
ASSERT(tp);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- tv = current_time(inode);
+ /* If the mtime changes, then ctime must also change */
+ ASSERT(flags & XFS_ICHGTIME_CHG);
+ tv = inode_set_ctime_current(inode);
if (flags & XFS_ICHGTIME_MOD)
inode->i_mtime = tv;
- if (flags & XFS_ICHGTIME_CHG)
- inode->i_ctime = tv;
if (flags & XFS_ICHGTIME_CREATE)
ip->i_crtime = tv;
}
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index e382a35e98d8..05be757668bb 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0+
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2019-2023 Oracle. All Rights Reserved.
* Author: Darrick J. Wong <djwong@kernel.org>
@@ -8,6 +8,8 @@
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
#include "xfs_mount.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
@@ -16,6 +18,7 @@
#include "xfs_ag.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
+#include "xfs_icache.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -53,6 +56,7 @@ struct xchk_fscounters {
uint64_t frextents;
unsigned long long icount_min;
unsigned long long icount_max;
+ bool frozen;
};
/*
@@ -123,6 +127,82 @@ xchk_fscount_warmup(
return error;
}
+static inline int
+xchk_fsfreeze(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ error = freeze_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ trace_xchk_fsfreeze(sc, error);
+ return error;
+}
+
+static inline int
+xchk_fsthaw(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ /* This should always succeed, we have a kernel freeze */
+ error = thaw_super(sc->mp->m_super, FREEZE_HOLDER_KERNEL);
+ trace_xchk_fsthaw(sc, error);
+ return error;
+}
+
+/*
+ * We couldn't stabilize the filesystem long enough to sample all the variables
+ * that comprise the summary counters and compare them to the percpu counters.
+ * We need to disable all writer threads, which means taking the first two
+ * freeze levels to put userspace to sleep, and the third freeze level to
+ * prevent background threads from starting new transactions. Take one level
+ * more to prevent other callers from unfreezing the filesystem while we run.
+ */
+STATIC int
+xchk_fscounters_freeze(
+ struct xfs_scrub *sc)
+{
+ struct xchk_fscounters *fsc = sc->buf;
+ int error = 0;
+
+ if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
+ sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
+ mnt_drop_write_file(sc->file);
+ }
+
+ /* Try to grab a kernel freeze. */
+ while ((error = xchk_fsfreeze(sc)) == -EBUSY) {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ delay(HZ / 10);
+ }
+ if (error)
+ return error;
+
+ fsc->frozen = true;
+ return 0;
+}
+
+/* Thaw the filesystem after checking or repairing fscounters. */
+STATIC void
+xchk_fscounters_cleanup(
+ void *buf)
+{
+ struct xchk_fscounters *fsc = buf;
+ struct xfs_scrub *sc = fsc->sc;
+ int error;
+
+ if (!fsc->frozen)
+ return;
+
+ error = xchk_fsthaw(sc);
+ if (error)
+ xfs_emerg(sc->mp, "still frozen after scrub, err=%d", error);
+ else
+ fsc->frozen = false;
+}
+
int
xchk_setup_fscounters(
struct xfs_scrub *sc)
@@ -140,6 +220,7 @@ xchk_setup_fscounters(
sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
if (!sc->buf)
return -ENOMEM;
+ sc->buf_cleanup = xchk_fscounters_cleanup;
fsc = sc->buf;
fsc->sc = sc;
@@ -150,7 +231,18 @@ xchk_setup_fscounters(
if (error)
return error;
- return xchk_trans_alloc(sc, 0);
+ /*
+ * Pause all writer activity in the filesystem while we're scrubbing to
+ * reduce the likelihood of background perturbations to the counters
+ * throwing off our calculations.
+ */
+ if (sc->flags & XCHK_TRY_HARDER) {
+ error = xchk_fscounters_freeze(sc);
+ if (error)
+ return error;
+ }
+
+ return xfs_trans_alloc_empty(sc->mp, &sc->tp);
}
/*
@@ -290,8 +382,7 @@ retry:
if (fsc->ifree > fsc->icount) {
if (tries--)
goto retry;
- xchk_set_incomplete(sc);
- return 0;
+ return -EDEADLOCK;
}
return 0;
@@ -367,6 +458,8 @@ xchk_fscount_count_frextents(
* Otherwise, we /might/ have a problem. If the change in the summations is
* more than we want to tolerate, the filesystem is probably busy and we should
* just send back INCOMPLETE and see if userspace will try again.
+ *
+ * If we're repairing then we require an exact match.
*/
static inline bool
xchk_fscount_within_range(
@@ -396,21 +489,7 @@ xchk_fscount_within_range(
if (expected >= min_value && expected <= max_value)
return true;
- /*
- * If the difference between the two summations is too large, the fs
- * might just be busy and so we'll mark the scrub incomplete. Return
- * true here so that we don't mark the counter corrupt.
- *
- * XXX: In the future when userspace can grant scrub permission to
- * quiesce the filesystem to solve the outsized variance problem, this
- * check should be moved up and the return code changed to signal to
- * userspace that we need quiesce permission.
- */
- if (max_value - min_value >= XCHK_FSCOUNT_MIN_VARIANCE) {
- xchk_set_incomplete(sc);
- return true;
- }
-
+ /* Everything else is bad. */
return false;
}
@@ -422,6 +501,7 @@ xchk_fscounters(
struct xfs_mount *mp = sc->mp;
struct xchk_fscounters *fsc = sc->buf;
int64_t icount, ifree, fdblocks, frextents;
+ bool try_again = false;
int error;
/* Snapshot the percpu counters. */
@@ -431,9 +511,26 @@ xchk_fscounters(
frextents = percpu_counter_sum(&mp->m_frextents);
/* No negative values, please! */
- if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0)
+ if (icount < 0 || ifree < 0)
xchk_set_corrupt(sc);
+ /*
+ * If the filesystem is not frozen, the counter summation calls above
+ * can race with xfs_mod_freecounter, which subtracts a requested space
+ * reservation from the counter and undoes the subtraction if that made
+ * the counter go negative. Therefore, it's possible to see negative
+ * values here, and we should only flag that as a corruption if we
+ * froze the fs. This is much more likely to happen with frextents
+ * since there are no reserved pools.
+ */
+ if (fdblocks < 0 || frextents < 0) {
+ if (!fsc->frozen)
+ return -EDEADLOCK;
+
+ xchk_set_corrupt(sc);
+ return 0;
+ }
+
/* See if icount is obviously wrong. */
if (icount < fsc->icount_min || icount > fsc->icount_max)
xchk_set_corrupt(sc);
@@ -447,12 +544,6 @@ xchk_fscounters(
xchk_set_corrupt(sc);
/*
- * XXX: We can't quiesce percpu counter updates, so exit early.
- * This can be re-enabled when we gain exclusive freeze functionality.
- */
- return 0;
-
- /*
* If ifree exceeds icount by more than the minimum variance then
* something's probably wrong with the counters.
*/
@@ -463,8 +554,6 @@ xchk_fscounters(
error = xchk_fscount_aggregate_agcounts(sc, fsc);
if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
return error;
- if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
- return 0;
/* Count the free extents counter for rt volumes. */
error = xchk_fscount_count_frextents(sc, fsc);
@@ -473,20 +562,45 @@ xchk_fscounters(
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
return 0;
- /* Compare the in-core counters with whatever we counted. */
- if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
- xchk_set_corrupt(sc);
+ /*
+ * Compare the in-core counters with whatever we counted. If the fs is
+ * frozen, we treat the discrepancy as a corruption because the freeze
+ * should have stabilized the counter values. Otherwise, we need
+ * userspace to call us back having granted us freeze permission.
+ */
+ if (!xchk_fscount_within_range(sc, icount, &mp->m_icount,
+ fsc->icount)) {
+ if (fsc->frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
- if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree))
- xchk_set_corrupt(sc);
+ if (!xchk_fscount_within_range(sc, ifree, &mp->m_ifree, fsc->ifree)) {
+ if (fsc->frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
if (!xchk_fscount_within_range(sc, fdblocks, &mp->m_fdblocks,
- fsc->fdblocks))
- xchk_set_corrupt(sc);
+ fsc->fdblocks)) {
+ if (fsc->frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
- fsc->frextents))
- xchk_set_corrupt(sc);
+ fsc->frextents)) {
+ if (fsc->frozen)
+ xchk_set_corrupt(sc);
+ else
+ try_again = true;
+ }
+
+ if (try_again)
+ return -EDEADLOCK;
return 0;
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 3d98f604765e..a0fffbcd022b 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -184,8 +184,10 @@ xchk_teardown(
xchk_irele(sc, sc->ip);
sc->ip = NULL;
}
- if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ if (sc->flags & XCHK_HAVE_FREEZE_PROT) {
+ sc->flags &= ~XCHK_HAVE_FREEZE_PROT;
mnt_drop_write_file(sc->file);
+ }
if (sc->buf) {
if (sc->buf_cleanup)
sc->buf_cleanup(sc->buf);
@@ -505,6 +507,8 @@ retry_op:
error = mnt_want_write_file(sc->file);
if (error)
goto out_sc;
+
+ sc->flags |= XCHK_HAVE_FREEZE_PROT;
}
/* Set up for the operation. */
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e113f2f5c254..f8ba00e51ca9 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -106,6 +106,7 @@ struct xfs_scrub {
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
#define XCHK_TRY_HARDER (1U << 0) /* can't get resources, try again */
+#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index b3894daeb86a..0b54f1a1cf0c 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -98,6 +98,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
#define XFS_SCRUB_STATE_STRINGS \
{ XCHK_TRY_HARDER, "try_harder" }, \
+ { XCHK_HAVE_FREEZE_PROT, "nofreeze" }, \
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
@@ -693,6 +694,31 @@ TRACE_EVENT(xchk_fscounters_within_range,
__entry->old_value)
)
+DECLARE_EVENT_CLASS(xchk_fsfreeze_class,
+ TP_PROTO(struct xfs_scrub *sc, int error),
+ TP_ARGS(sc, error),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, type)
+ __field(int, error)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->type = sc->sm->sm_type;
+ __entry->error = error;
+ ),
+ TP_printk("dev %d:%d type %s error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __entry->error)
+);
+#define DEFINE_XCHK_FSFREEZE_EVENT(name) \
+DEFINE_EVENT(xchk_fsfreeze_class, name, \
+ TP_PROTO(struct xfs_scrub *sc, int error), \
+ TP_ARGS(sc, error))
+DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsfreeze);
+DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsthaw);
+
TRACE_EVENT(xchk_refcount_incorrect,
TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec,
xfs_nlink_t seen),
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 791db7d9c849..6b840301817a 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -233,7 +233,7 @@ xfs_acl_set_mode(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
inode->i_mode = mode;
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (xfs_has_wsync(mp))
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 451942fb38ec..2fca4b4e7fd8 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -578,7 +578,7 @@ const struct address_space_operations xfs_address_space_operations = {
.read_folio = xfs_vm_read_folio,
.readahead = xfs_vm_readahead,
.writepages = xfs_vm_writepages,
- .dirty_folio = filemap_dirty_folio,
+ .dirty_folio = iomap_dirty_folio,
.release_folio = iomap_release_folio,
.invalidate_folio = iomap_invalidate_folio,
.bmap = xfs_vm_bmap,
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index fbb675563208..fcefab687285 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1644,6 +1644,7 @@ xfs_swap_extents(
uint64_t f;
int resblks = 0;
unsigned int flags = 0;
+ struct timespec64 ctime;
/*
* Lock the inodes against other IO, page faults and truncate to
@@ -1756,8 +1757,9 @@ xfs_swap_extents(
* process that the file was not changed out from
* under it.
*/
- if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
- (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
+ ctime = inode_get_ctime(VFS_I(ip));
+ if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) ||
+ (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) ||
(sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
(sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
error = -EBUSY;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 15d1e5a7c2d3..3b903f6bce98 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1938,14 +1938,17 @@ void
xfs_free_buftarg(
struct xfs_buftarg *btp)
{
+ struct block_device *bdev = btp->bt_bdev;
+
unregister_shrinker(&btp->bt_shrinker);
ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
percpu_counter_destroy(&btp->bt_io_count);
list_lru_destroy(&btp->bt_lru);
- blkdev_issue_flush(btp->bt_bdev);
- invalidate_bdev(btp->bt_bdev);
fs_put_dax(btp->bt_daxdev, btp->bt_mount);
+ /* the main block device is closed by kill_block_super */
+ if (bdev != btp->bt_mount->m_super->s_bdev)
+ blkdev_put(bdev, btp->bt_mount->m_super);
kmem_free(btp);
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 9e62cc500140..360fe83a334f 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -843,10 +843,9 @@ xfs_init_new_inode(
ip->i_df.if_nextents = 0;
ASSERT(ip->i_nblocks == 0);
- tv = current_time(inode);
+ tv = inode_set_ctime_current(inode);
inode->i_mtime = tv;
inode->i_atime = tv;
- inode->i_ctime = tv;
ip->i_extsize = 0;
ip->i_diflags = 0;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 91c847a84e10..127b2410eb20 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -528,7 +528,7 @@ xfs_inode_to_log_dinode(
memset(to->di_pad3, 0, sizeof(to->di_pad3));
to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode->i_atime);
to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode->i_mtime);
- to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode->i_ctime);
+ to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode));
to->di_nlink = inode->i_nlink;
to->di_gen = inode->i_generation;
to->di_mode = inode->i_mode;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 24718adb3c16..2ededd3f6b8c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -573,10 +573,10 @@ xfs_vn_getattr(
stat->gid = vfsgid_into_kgid(vfsgid);
stat->ino = ip->i_ino;
stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
+ fill_mg_cmtime(stat, request_mask, inode);
+
if (xfs_has_v3inodes(mp)) {
if (request_mask & STATX_BTIME) {
stat->result_mask |= STATX_BTIME;
@@ -917,7 +917,7 @@ xfs_setattr_size(
if (newsize != oldsize &&
!(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {
iattr->ia_ctime = iattr->ia_mtime =
- current_time(inode);
+ current_mgtime(inode);
iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
}
@@ -1029,7 +1029,6 @@ xfs_vn_setattr(
STATIC int
xfs_vn_update_time(
struct inode *inode,
- struct timespec64 *now,
int flags)
{
struct xfs_inode *ip = XFS_I(inode);
@@ -1037,13 +1036,16 @@ xfs_vn_update_time(
int log_flags = XFS_ILOG_TIMESTAMP;
struct xfs_trans *tp;
int error;
+ struct timespec64 now;
trace_xfs_update_time(ip);
if (inode->i_sb->s_flags & SB_LAZYTIME) {
if (!((flags & S_VERSION) &&
- inode_maybe_inc_iversion(inode, false)))
- return generic_update_time(inode, now, flags);
+ inode_maybe_inc_iversion(inode, false))) {
+ generic_update_time(inode, flags);
+ return 0;
+ }
/* Capture the iversion update that just occurred */
log_flags |= XFS_ILOG_CORE;
@@ -1054,12 +1056,15 @@ xfs_vn_update_time(
return error;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (flags & S_CTIME)
- inode->i_ctime = *now;
+ if (flags & (S_CTIME|S_MTIME))
+ now = inode_set_ctime_current(inode);
+ else
+ now = current_time(inode);
+
if (flags & S_MTIME)
- inode->i_mtime = *now;
+ inode->i_mtime = now;
if (flags & S_ATIME)
- inode->i_atime = *now;
+ inode->i_atime = now;
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, log_flags);
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f225413a993c..c2093cb56092 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -100,8 +100,8 @@ xfs_bulkstat_one_int(
buf->bs_atime_nsec = inode->i_atime.tv_nsec;
buf->bs_mtime = inode->i_mtime.tv_sec;
buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
- buf->bs_ctime = inode->i_ctime.tv_sec;
- buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
+ buf->bs_ctime = inode_get_ctime(inode).tv_sec;
+ buf->bs_ctime_nsec = inode_get_ctime(inode).tv_nsec;
buf->bs_gen = inode->i_generation;
buf->bs_mode = inode->i_mode;
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index 9737b5a9f405..c4cc99b70dd3 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -56,7 +56,7 @@ xfs_check_ondisk_structs(void)
/* dir/attr trees */
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
- XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock, 88);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock, 80);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode, 64);
@@ -88,7 +88,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen, 4);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8);
XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 40);
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_shortform, 4);
XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.totsize, 0);
XFS_CHECK_OFFSET(struct xfs_attr_shortform, hdr.count, 2);
XFS_CHECK_OFFSET(struct xfs_attr_shortform, list[0].namelen, 4);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 818510243130..c79eac048456 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -377,17 +377,6 @@ disable_dax:
return 0;
}
-static void
-xfs_bdev_mark_dead(
- struct block_device *bdev)
-{
- xfs_force_shutdown(bdev->bd_holder, SHUTDOWN_DEVICE_REMOVED);
-}
-
-static const struct blk_holder_ops xfs_holder_ops = {
- .mark_dead = xfs_bdev_mark_dead,
-};
-
STATIC int
xfs_blkdev_get(
xfs_mount_t *mp,
@@ -396,8 +385,8 @@ xfs_blkdev_get(
{
int error = 0;
- *bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE, mp,
- &xfs_holder_ops);
+ *bdevp = blkdev_get_by_path(name, BLK_OPEN_READ | BLK_OPEN_WRITE,
+ mp->m_super, &fs_holder_ops);
if (IS_ERR(*bdevp)) {
error = PTR_ERR(*bdevp);
xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
@@ -407,31 +396,45 @@ xfs_blkdev_get(
}
STATIC void
-xfs_blkdev_put(
- struct xfs_mount *mp,
- struct block_device *bdev)
-{
- if (bdev)
- blkdev_put(bdev, mp);
-}
-
-STATIC void
-xfs_close_devices(
+xfs_shutdown_devices(
struct xfs_mount *mp)
{
+ /*
+ * Udev is triggered whenever anyone closes a block device or unmounts
+ * a file systemm on a block device.
+ * The default udev rules invoke blkid to read the fs super and create
+ * symlinks to the bdev under /dev/disk. For this, it uses buffered
+ * reads through the page cache.
+ *
+ * xfs_db also uses buffered reads to examine metadata. There is no
+ * coordination between xfs_db and udev, which means that they can run
+ * concurrently. Note there is no coordination between the kernel and
+ * blkid either.
+ *
+ * On a system with 64k pages, the page cache can cache the superblock
+ * and the root inode (and hence the root directory) with the same 64k
+ * page. If udev spawns blkid after the mkfs and the system is busy
+ * enough that it is still running when xfs_db starts up, they'll both
+ * read from the same page in the pagecache.
+ *
+ * The unmount writes updated inode metadata to disk directly. The XFS
+ * buffer cache does not use the bdev pagecache, so it needs to
+ * invalidate that pagecache on unmount. If the above scenario occurs,
+ * the pagecache no longer reflects what's on disk, xfs_db reads the
+ * stale metadata, and fails to find /a. Most of the time this succeeds
+ * because closing a bdev invalidates the page cache, but when processes
+ * race, everyone loses.
+ */
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
- struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
-
- xfs_free_buftarg(mp->m_logdev_targp);
- xfs_blkdev_put(mp, logdev);
+ blkdev_issue_flush(mp->m_logdev_targp->bt_bdev);
+ invalidate_bdev(mp->m_logdev_targp->bt_bdev);
}
if (mp->m_rtdev_targp) {
- struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
-
- xfs_free_buftarg(mp->m_rtdev_targp);
- xfs_blkdev_put(mp, rtdev);
+ blkdev_issue_flush(mp->m_rtdev_targp->bt_bdev);
+ invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
}
- xfs_free_buftarg(mp->m_ddev_targp);
+ blkdev_issue_flush(mp->m_ddev_targp->bt_bdev);
+ invalidate_bdev(mp->m_ddev_targp->bt_bdev);
}
/*
@@ -448,17 +451,24 @@ STATIC int
xfs_open_devices(
struct xfs_mount *mp)
{
- struct block_device *ddev = mp->m_super->s_bdev;
+ struct super_block *sb = mp->m_super;
+ struct block_device *ddev = sb->s_bdev;
struct block_device *logdev = NULL, *rtdev = NULL;
int error;
/*
+ * blkdev_put() can't be called under s_umount, see the comment
+ * in get_tree_bdev() for more details
+ */
+ up_write(&sb->s_umount);
+
+ /*
* Open real time and log devices - order is important.
*/
if (mp->m_logname) {
error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
if (error)
- return error;
+ goto out_relock;
}
if (mp->m_rtname) {
@@ -496,7 +506,10 @@ xfs_open_devices(
mp->m_logdev_targp = mp->m_ddev_targp;
}
- return 0;
+ error = 0;
+out_relock:
+ down_write(&sb->s_umount);
+ return error;
out_free_rtdev_targ:
if (mp->m_rtdev_targp)
@@ -504,11 +517,12 @@ xfs_open_devices(
out_free_ddev_targ:
xfs_free_buftarg(mp->m_ddev_targp);
out_close_rtdev:
- xfs_blkdev_put(mp, rtdev);
+ if (rtdev)
+ blkdev_put(rtdev, sb);
out_close_logdev:
if (logdev && logdev != ddev)
- xfs_blkdev_put(mp, logdev);
- return error;
+ blkdev_put(logdev, sb);
+ goto out_relock;
}
/*
@@ -758,6 +772,17 @@ static void
xfs_mount_free(
struct xfs_mount *mp)
{
+ /*
+ * Free the buftargs here because blkdev_put needs to be called outside
+ * of sb->s_umount, which is held around the call to ->put_super.
+ */
+ if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
+ xfs_free_buftarg(mp->m_logdev_targp);
+ if (mp->m_rtdev_targp)
+ xfs_free_buftarg(mp->m_rtdev_targp);
+ if (mp->m_ddev_targp)
+ xfs_free_buftarg(mp->m_ddev_targp);
+
kfree(mp->m_rtname);
kfree(mp->m_logname);
kmem_free(mp);
@@ -1133,10 +1158,6 @@ xfs_fs_put_super(
{
struct xfs_mount *mp = XFS_M(sb);
- /* if ->fill_super failed, we have no mount to tear down */
- if (!sb->s_fs_info)
- return;
-
xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
@@ -1147,10 +1168,7 @@ xfs_fs_put_super(
xfs_inodegc_free_percpu(mp);
xfs_destroy_percpu_counters(mp);
xfs_destroy_mount_workqueues(mp);
- xfs_close_devices(mp);
-
- sb->s_fs_info = NULL;
- xfs_mount_free(mp);
+ xfs_shutdown_devices(mp);
}
static long
@@ -1492,7 +1510,7 @@ xfs_fs_fill_super(
error = xfs_fs_validate_params(mp);
if (error)
- goto out_free_names;
+ return error;
sb_min_blocksize(sb, BBSIZE);
sb->s_xattr = xfs_xattr_handlers;
@@ -1519,11 +1537,11 @@ xfs_fs_fill_super(
error = xfs_open_devices(mp);
if (error)
- goto out_free_names;
+ return error;
error = xfs_init_mount_workqueues(mp);
if (error)
- goto out_close_devices;
+ goto out_shutdown_devices;
error = xfs_init_percpu_counters(mp);
if (error)
@@ -1737,11 +1755,8 @@ xfs_fs_fill_super(
xfs_destroy_percpu_counters(mp);
out_destroy_workqueues:
xfs_destroy_mount_workqueues(mp);
- out_close_devices:
- xfs_close_devices(mp);
- out_free_names:
- sb->s_fs_info = NULL;
- xfs_mount_free(mp);
+ out_shutdown_devices:
+ xfs_shutdown_devices(mp);
return error;
out_unmount:
@@ -1934,7 +1949,8 @@ xfs_fs_reconfigure(
return 0;
}
-static void xfs_fs_free(
+static void
+xfs_fs_free(
struct fs_context *fc)
{
struct xfs_mount *mp = fc->s_fs_info;
@@ -2003,13 +2019,21 @@ static int xfs_init_fs_context(
return 0;
}
+static void
+xfs_kill_sb(
+ struct super_block *sb)
+{
+ kill_block_super(sb);
+ xfs_mount_free(XFS_M(sb));
+}
+
static struct file_system_type xfs_fs_type = {
.owner = THIS_MODULE,
.name = "xfs",
.init_fs_context = xfs_init_fs_context,
.parameters = xfs_fs_parameters,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .kill_sb = xfs_kill_sb,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
};
MODULE_ALIAS_FS("xfs");
diff --git a/fs/zonefs/file.c b/fs/zonefs/file.c
index 92c9aaae3663..b2c9b35df8f7 100644
--- a/fs/zonefs/file.c
+++ b/fs/zonefs/file.c
@@ -175,7 +175,7 @@ const struct address_space_operations zonefs_file_aops = {
.read_folio = zonefs_read_folio,
.readahead = zonefs_readahead,
.writepages = zonefs_writepages,
- .dirty_folio = filemap_dirty_folio,
+ .dirty_folio = iomap_dirty_folio,
.release_folio = iomap_release_folio,
.invalidate_folio = iomap_invalidate_folio,
.migrate_folio = filemap_migrate_folio,
@@ -341,77 +341,6 @@ static loff_t zonefs_file_llseek(struct file *file, loff_t offset, int whence)
return generic_file_llseek_size(file, offset, whence, isize, isize);
}
-struct zonefs_zone_append_bio {
- /* The target inode of the BIO */
- struct inode *inode;
-
- /* For sync writes, the target append write offset */
- u64 append_offset;
-
- /*
- * This member must come last, bio_alloc_bioset will allocate enough
- * bytes for entire zonefs_bio but relies on bio being last.
- */
- struct bio bio;
-};
-
-static inline struct zonefs_zone_append_bio *
-zonefs_zone_append_bio(struct bio *bio)
-{
- return container_of(bio, struct zonefs_zone_append_bio, bio);
-}
-
-static void zonefs_file_zone_append_dio_bio_end_io(struct bio *bio)
-{
- struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
- struct zonefs_zone *z = zonefs_inode_zone(za_bio->inode);
- sector_t za_sector;
-
- if (bio->bi_status != BLK_STS_OK)
- goto bio_end;
-
- /*
- * If the file zone was written underneath the file system, the zone
- * append operation can still succedd (if the zone is not full) but
- * the write append location will not be where we expect it to be.
- * Check that we wrote where we intended to, that is, at z->z_wpoffset.
- */
- za_sector = z->z_sector + (za_bio->append_offset >> SECTOR_SHIFT);
- if (bio->bi_iter.bi_sector != za_sector) {
- zonefs_warn(za_bio->inode->i_sb,
- "Invalid write sector %llu for zone at %llu\n",
- bio->bi_iter.bi_sector, z->z_sector);
- bio->bi_status = BLK_STS_IOERR;
- }
-
-bio_end:
- iomap_dio_bio_end_io(bio);
-}
-
-static void zonefs_file_zone_append_dio_submit_io(const struct iomap_iter *iter,
- struct bio *bio,
- loff_t file_offset)
-{
- struct zonefs_zone_append_bio *za_bio = zonefs_zone_append_bio(bio);
- struct inode *inode = iter->inode;
- struct zonefs_zone *z = zonefs_inode_zone(inode);
-
- /*
- * Issue a zone append BIO to process sync dio writes. The append
- * file offset is saved to check the zone append write location
- * on completion of the BIO.
- */
- za_bio->inode = inode;
- za_bio->append_offset = file_offset;
-
- bio->bi_opf &= ~REQ_OP_WRITE;
- bio->bi_opf |= REQ_OP_ZONE_APPEND;
- bio->bi_iter.bi_sector = z->z_sector;
- bio->bi_end_io = zonefs_file_zone_append_dio_bio_end_io;
-
- submit_bio(bio);
-}
-
static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
int error, unsigned int flags)
{
@@ -442,14 +371,6 @@ static int zonefs_file_write_dio_end_io(struct kiocb *iocb, ssize_t size,
return 0;
}
-static struct bio_set zonefs_zone_append_bio_set;
-
-static const struct iomap_dio_ops zonefs_zone_append_dio_ops = {
- .submit_io = zonefs_file_zone_append_dio_submit_io,
- .end_io = zonefs_file_write_dio_end_io,
- .bio_set = &zonefs_zone_append_bio_set,
-};
-
static const struct iomap_dio_ops zonefs_write_dio_ops = {
.end_io = zonefs_file_write_dio_end_io,
};
@@ -533,9 +454,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct zonefs_zone *z = zonefs_inode_zone(inode);
struct super_block *sb = inode->i_sb;
- const struct iomap_dio_ops *dio_ops;
- bool sync = is_sync_kiocb(iocb);
- bool append = false;
ssize_t ret, count;
/*
@@ -543,7 +461,8 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
* as this can cause write reordering (e.g. the first aio gets EAGAIN
* on the inode lock but the second goes through but is now unaligned).
*/
- if (zonefs_zone_is_seq(z) && !sync && (iocb->ki_flags & IOCB_NOWAIT))
+ if (zonefs_zone_is_seq(z) && !is_sync_kiocb(iocb) &&
+ (iocb->ki_flags & IOCB_NOWAIT))
return -EOPNOTSUPP;
if (iocb->ki_flags & IOCB_NOWAIT) {
@@ -573,18 +492,6 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
goto inode_unlock;
}
mutex_unlock(&zi->i_truncate_mutex);
- append = sync;
- }
-
- if (append) {
- unsigned int max = bdev_max_zone_append_sectors(sb->s_bdev);
-
- max = ALIGN_DOWN(max << SECTOR_SHIFT, sb->s_blocksize);
- iov_iter_truncate(from, max);
-
- dio_ops = &zonefs_zone_append_dio_ops;
- } else {
- dio_ops = &zonefs_write_dio_ops;
}
/*
@@ -593,7 +500,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
* the user can make sense of the error.
*/
ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
- dio_ops, 0, NULL, 0);
+ &zonefs_write_dio_ops, 0, NULL, 0);
if (ret == -ENOTBLK)
ret = -EBUSY;
@@ -938,15 +845,3 @@ const struct file_operations zonefs_file_operations = {
.splice_write = iter_file_splice_write,
.iopoll = iocb_bio_iopoll,
};
-
-int zonefs_file_bioset_init(void)
-{
- return bioset_init(&zonefs_zone_append_bio_set, BIO_POOL_SIZE,
- offsetof(struct zonefs_zone_append_bio, bio),
- BIOSET_NEED_BVECS);
-}
-
-void zonefs_file_bioset_exit(void)
-{
- bioset_exit(&zonefs_zone_append_bio_set);
-}
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index bbe44a26a8e5..9d1a9808fbbb 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -658,7 +658,8 @@ static struct inode *zonefs_get_file_inode(struct inode *dir,
inode->i_ino = ino;
inode->i_mode = z->z_mode;
- inode->i_ctime = inode->i_mtime = inode->i_atime = dir->i_ctime;
+ inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+ inode_get_ctime(dir));
inode->i_uid = z->z_uid;
inode->i_gid = z->z_gid;
inode->i_size = z->z_wpoffset;
@@ -694,7 +695,8 @@ static struct inode *zonefs_get_zgroup_inode(struct super_block *sb,
inode->i_ino = ino;
inode_init_owner(&nop_mnt_idmap, inode, root, S_IFDIR | 0555);
inode->i_size = sbi->s_zgroup[ztype].g_nr_zones;
- inode->i_ctime = inode->i_mtime = inode->i_atime = root->i_ctime;
+ inode->i_mtime = inode->i_atime = inode_set_ctime_to_ts(inode,
+ inode_get_ctime(root));
inode->i_private = &sbi->s_zgroup[ztype];
set_nlink(inode, 2);
@@ -1317,7 +1319,7 @@ static int zonefs_fill_super(struct super_block *sb, void *data, int silent)
inode->i_ino = bdev_nr_zones(sb->s_bdev);
inode->i_mode = S_IFDIR | 0555;
- inode->i_ctime = inode->i_mtime = inode->i_atime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
inode->i_op = &zonefs_dir_inode_operations;
inode->i_fop = &zonefs_dir_operations;
inode->i_size = 2;
@@ -1412,13 +1414,9 @@ static int __init zonefs_init(void)
BUILD_BUG_ON(sizeof(struct zonefs_super) != ZONEFS_SUPER_SIZE);
- ret = zonefs_file_bioset_init();
- if (ret)
- return ret;
-
ret = zonefs_init_inodecache();
if (ret)
- goto destroy_bioset;
+ return ret;
ret = zonefs_sysfs_init();
if (ret)
@@ -1434,8 +1432,6 @@ sysfs_exit:
zonefs_sysfs_exit();
destroy_inodecache:
zonefs_destroy_inodecache();
-destroy_bioset:
- zonefs_file_bioset_exit();
return ret;
}
@@ -1445,7 +1441,6 @@ static void __exit zonefs_exit(void)
unregister_filesystem(&zonefs_type);
zonefs_sysfs_exit();
zonefs_destroy_inodecache();
- zonefs_file_bioset_exit();
}
MODULE_AUTHOR("Damien Le Moal");
diff --git a/fs/zonefs/zonefs.h b/fs/zonefs/zonefs.h
index f663b8ebc2cb..8175652241b5 100644
--- a/fs/zonefs/zonefs.h
+++ b/fs/zonefs/zonefs.h
@@ -279,8 +279,6 @@ extern const struct file_operations zonefs_dir_operations;
extern const struct address_space_operations zonefs_file_aops;
extern const struct file_operations zonefs_file_operations;
int zonefs_file_truncate(struct inode *inode, loff_t isize);
-int zonefs_file_bioset_init(void);
-void zonefs_file_bioset_exit(void);
/* In sysfs.c */
int zonefs_sysfs_register(struct super_block *sb);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 402a8c1c202d..a8f4b653ef4e 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -190,7 +190,7 @@ int hv_common_cpu_die(unsigned int cpu);
void *hv_alloc_hyperv_page(void);
void *hv_alloc_hyperv_zeroed_page(void);
-void hv_free_hyperv_page(unsigned long addr);
+void hv_free_hyperv_page(void *addr);
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
index 20c93f08c993..95a1d214108a 100644
--- a/include/asm-generic/word-at-a-time.h
+++ b/include/asm-generic/word-at-a-time.h
@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask)
return (mask >> 8) ? byte : byte + 1;
}
-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
{
unsigned long rhs = val | c->low_bits;
*data = rhs;
diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h
index 02f2ac4dd2df..e69cece404b3 100644
--- a/include/drm/display/drm_dp.h
+++ b/include/drm/display/drm_dp.h
@@ -1537,7 +1537,7 @@ enum drm_dp_phy {
#define DP_BRANCH_OUI_HEADER_SIZE 0xc
#define DP_RECEIVER_CAP_SIZE 0xf
-#define DP_DSC_RECEIVER_CAP_SIZE 0xf
+#define DP_DSC_RECEIVER_CAP_SIZE 0x10 /* DSC Capabilities 0x60 through 0x6F */
#define EDP_PSR_RECEIVER_CAP_SIZE 2
#define EDP_DISPLAY_CTL_CAP_SIZE 3
#define DP_LTTPR_COMMON_CAP_SIZE 8
diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h
index 169755d3de19..48e93f909ef6 100644
--- a/include/drm/drm_edid.h
+++ b/include/drm/drm_edid.h
@@ -61,15 +61,9 @@ struct std_timing {
u8 vfreq_aspect;
} __attribute__((packed));
-#define DRM_EDID_PT_SYNC_MASK (3 << 3)
-# define DRM_EDID_PT_ANALOG_CSYNC (0 << 3)
-# define DRM_EDID_PT_BIPOLAR_ANALOG_CSYNC (1 << 3)
-# define DRM_EDID_PT_DIGITAL_CSYNC (2 << 3)
-# define DRM_EDID_PT_CSYNC_ON_RGB (1 << 1) /* analog csync only */
-# define DRM_EDID_PT_CSYNC_SERRATE (1 << 2)
-# define DRM_EDID_PT_DIGITAL_SEPARATE_SYNC (3 << 3)
-# define DRM_EDID_PT_HSYNC_POSITIVE (1 << 1) /* also digital csync */
-# define DRM_EDID_PT_VSYNC_POSITIVE (1 << 2)
+#define DRM_EDID_PT_HSYNC_POSITIVE (1 << 1)
+#define DRM_EDID_PT_VSYNC_POSITIVE (1 << 2)
+#define DRM_EDID_PT_SEPARATE_SYNC (3 << 3)
#define DRM_EDID_PT_STEREO (1 << 5)
#define DRM_EDID_PT_INTERLACED (1 << 7)
diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h
index 4863b0f8299e..375737fd6c36 100644
--- a/include/drm/drm_fb_helper.h
+++ b/include/drm/drm_fb_helper.h
@@ -368,11 +368,6 @@ static inline void drm_fb_helper_deferred_io(struct fb_info *info,
{
}
-static inline int drm_fb_helper_defio_init(struct drm_fb_helper *fb_helper)
-{
- return -ENODEV;
-}
-
static inline void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper,
bool suspend)
{
diff --git a/include/drm/drm_probe_helper.h b/include/drm/drm_probe_helper.h
index 4977e0ab72db..fad3c4003b2b 100644
--- a/include/drm/drm_probe_helper.h
+++ b/include/drm/drm_probe_helper.h
@@ -25,6 +25,7 @@ void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector);
void drm_kms_helper_poll_disable(struct drm_device *dev);
void drm_kms_helper_poll_enable(struct drm_device *dev);
+void drm_kms_helper_poll_reschedule(struct drm_device *dev);
bool drm_kms_helper_is_poll_worker(void);
enum drm_mode_status drm_crtc_helper_mode_valid_fixed(struct drm_crtc *crtc,
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 402b545959af..5b27f94d4fad 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -431,7 +431,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
int vgic_v4_load(struct kvm_vcpu *vcpu);
void vgic_v4_commit(struct kvm_vcpu *vcpu);
-int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db);
+int vgic_v4_put(struct kvm_vcpu *vcpu);
/* CPU HP callbacks */
void kvm_vgic_cpu_up(void);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index c4f5b5228105..11984ed29cb8 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -791,7 +791,7 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb)
{
bio->bi_opf |= REQ_POLLED;
- if (!is_sync_kiocb(kiocb))
+ if (kiocb->ki_flags & IOCB_NOWAIT)
bio->bi_opf |= REQ_NOWAIT;
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b96e00499f9e..495ca198775f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -397,8 +397,6 @@ struct blk_mq_hw_ctx {
*/
struct blk_mq_tags *sched_tags;
- /** @queued: Number of queued requests. */
- unsigned long queued;
/** @run: Number of dispatched requests. */
unsigned long run;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 0bad62cca3d0..d5c5e59ddbd2 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -52,7 +52,6 @@ struct block_device {
atomic_t bd_openers;
spinlock_t bd_size_lock; /* for bd_inode->i_size updates */
struct inode * bd_inode; /* will die */
- struct super_block * bd_super;
void * bd_claiming;
void * bd_holder;
const struct blk_holder_ops *bd_holder_ops;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed44a997f629..83ce87354e9a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -750,7 +750,8 @@ static inline int bdev_read_only(struct block_device *bdev)
}
bool set_capacity_and_notify(struct gendisk *disk, sector_t size);
-bool disk_force_media_change(struct gendisk *disk, unsigned int events);
+void disk_force_media_change(struct gendisk *disk);
+void bdev_mark_dead(struct block_device *bdev, bool surprise);
void add_disk_randomness(struct gendisk *disk) __latent_entropy;
void rand_initialize_disk(struct gendisk *disk);
@@ -809,7 +810,6 @@ int __register_blkdev(unsigned int major, const char *name,
void unregister_blkdev(unsigned int major, const char *name);
bool disk_check_media_change(struct gendisk *disk);
-int __invalidate_device(struct block_device *bdev, bool kill_dirty);
void set_capacity(struct gendisk *disk, sector_t size);
#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
@@ -969,7 +969,6 @@ struct blk_plug {
bool multiple_queues;
bool has_elevator;
- bool nowait;
struct list_head cb_list; /* md requires an unplug callback */
};
@@ -1461,9 +1460,16 @@ void blkdev_show(struct seq_file *seqf, off_t offset);
#endif
struct blk_holder_ops {
- void (*mark_dead)(struct block_device *bdev);
+ void (*mark_dead)(struct block_device *bdev, bool surprise);
+
+ /*
+ * Sync the file system mounted on the block device.
+ */
+ void (*sync)(struct block_device *bdev);
};
+extern const struct blk_holder_ops fs_holder_ops;
+
/*
* Return the correct open flags for blkdev_get_by_* for super block flags
* as stored in sb->s_flags.
@@ -1522,8 +1528,6 @@ static inline int early_lookup_bdev(const char *pathname, dev_t *dev)
}
#endif /* CONFIG_BLOCK */
-int fsync_bdev(struct block_device *bdev);
-
int freeze_bdev(struct block_device *bdev);
int thaw_bdev(struct block_device *bdev);
diff --git a/include/linux/clk.h b/include/linux/clk.h
index 1ef013324237..06f1b292f8a0 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -183,6 +183,39 @@ int clk_get_scaled_duty_cycle(struct clk *clk, unsigned int scale);
*/
bool clk_is_match(const struct clk *p, const struct clk *q);
+/**
+ * clk_rate_exclusive_get - get exclusivity over the rate control of a
+ * producer
+ * @clk: clock source
+ *
+ * This function allows drivers to get exclusive control over the rate of a
+ * provider. It prevents any other consumer to execute, even indirectly,
+ * opereation which could alter the rate of the provider or cause glitches
+ *
+ * If exlusivity is claimed more than once on clock, even by the same driver,
+ * the rate effectively gets locked as exclusivity can't be preempted.
+ *
+ * Must not be called from within atomic context.
+ *
+ * Returns success (0) or negative errno.
+ */
+int clk_rate_exclusive_get(struct clk *clk);
+
+/**
+ * clk_rate_exclusive_put - release exclusivity over the rate control of a
+ * producer
+ * @clk: clock source
+ *
+ * This function allows drivers to release the exclusivity it previously got
+ * from clk_rate_exclusive_get()
+ *
+ * The caller must balance the number of clk_rate_exclusive_get() and
+ * clk_rate_exclusive_put() calls.
+ *
+ * Must not be called from within atomic context.
+ */
+void clk_rate_exclusive_put(struct clk *clk);
+
#else
static inline int clk_notifier_register(struct clk *clk,
@@ -236,6 +269,13 @@ static inline bool clk_is_match(const struct clk *p, const struct clk *q)
return p == q;
}
+static inline int clk_rate_exclusive_get(struct clk *clk)
+{
+ return 0;
+}
+
+static inline void clk_rate_exclusive_put(struct clk *clk) {}
+
#endif
#ifdef CONFIG_HAVE_CLK_PREPARE
@@ -583,38 +623,6 @@ struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
*/
struct clk *devm_get_clk_from_child(struct device *dev,
struct device_node *np, const char *con_id);
-/**
- * clk_rate_exclusive_get - get exclusivity over the rate control of a
- * producer
- * @clk: clock source
- *
- * This function allows drivers to get exclusive control over the rate of a
- * provider. It prevents any other consumer to execute, even indirectly,
- * opereation which could alter the rate of the provider or cause glitches
- *
- * If exlusivity is claimed more than once on clock, even by the same driver,
- * the rate effectively gets locked as exclusivity can't be preempted.
- *
- * Must not be called from within atomic context.
- *
- * Returns success (0) or negative errno.
- */
-int clk_rate_exclusive_get(struct clk *clk);
-
-/**
- * clk_rate_exclusive_put - release exclusivity over the rate control of a
- * producer
- * @clk: clock source
- *
- * This function allows drivers to release the exclusivity it previously got
- * from clk_rate_exclusive_get()
- *
- * The caller must balance the number of clk_rate_exclusive_get() and
- * clk_rate_exclusive_put() calls.
- *
- * Must not be called from within atomic context.
- */
-void clk_rate_exclusive_put(struct clk *clk);
/**
* clk_enable - inform the system when the clock source should be running.
@@ -974,14 +982,6 @@ static inline void clk_bulk_put_all(int num_clks, struct clk_bulk_data *clks) {}
static inline void devm_clk_put(struct device *dev, struct clk *clk) {}
-
-static inline int clk_rate_exclusive_get(struct clk *clk)
-{
- return 0;
-}
-
-static inline void clk_rate_exclusive_put(struct clk *clk) {}
-
static inline int clk_enable(struct clk *clk)
{
return 0;
diff --git a/include/linux/completion.h b/include/linux/completion.h
index 62b32b19e0a8..fb2915676574 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -116,6 +116,7 @@ extern bool try_wait_for_completion(struct completion *x);
extern bool completion_done(struct completion *x);
extern void complete(struct completion *);
+extern void complete_on_current_cpu(struct completion *x);
extern void complete_all(struct completion *);
#endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 6e6e57ec69e8..e006c719182b 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -70,6 +70,10 @@ extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
char *buf);
extern ssize_t cpu_show_retbleed(struct device *dev,
struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_spec_rstack_overflow(struct device *dev,
+ struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_gds(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 0d2e2a38b92d..f10fb87d49db 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -175,8 +175,8 @@ static inline unsigned int cpumask_first_zero(const struct cpumask *srcp)
/**
* cpumask_first_and - return the first cpu from *srcp1 & *srcp2
- * @src1p: the first input
- * @src2p: the second input
+ * @srcp1: the first input
+ * @srcp2: the second input
*
* Returns >= nr_cpu_ids if no cpus set in both. See also cpumask_next_and().
*/
@@ -1197,6 +1197,10 @@ cpumap_print_bitmask_to_buf(char *buf, const struct cpumask *mask,
/**
* cpumap_print_list_to_buf - copies the cpumask into the buffer as
* comma-separated list of cpus
+ * @buf: the buffer to copy into
+ * @mask: the cpumask to copy
+ * @off: in the string from which we are copying, we copy to @buf
+ * @count: the maximum number of bytes to print
*
* Everything is same with the above cpumap_print_bitmask_to_buf()
* except the print format.
diff --git a/include/linux/dnotify.h b/include/linux/dnotify.h
index b1d26f9f1c9f..9f183a679277 100644
--- a/include/linux/dnotify.h
+++ b/include/linux/dnotify.h
@@ -30,7 +30,7 @@ struct dnotify_struct {
FS_MOVED_FROM | FS_MOVED_TO)
extern void dnotify_flush(struct file *, fl_owner_t);
-extern int fcntl_dirnotify(int, struct file *, unsigned long);
+extern int fcntl_dirnotify(int, struct file *, unsigned int);
#else
@@ -38,7 +38,7 @@ static inline void dnotify_flush(struct file *filp, fl_owner_t id)
{
}
-static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
+static inline int fcntl_dirnotify(int fd, struct file *filp, unsigned int arg)
{
return -EINVAL;
}
diff --git a/include/linux/filelock.h b/include/linux/filelock.h
index efcdd1631d9b..95e868e09e29 100644
--- a/include/linux/filelock.h
+++ b/include/linux/filelock.h
@@ -144,7 +144,7 @@ int fcntl_setlk64(unsigned int, struct file *, unsigned int,
struct flock64 *);
#endif
-int fcntl_setlease(unsigned int fd, struct file *filp, long arg);
+int fcntl_setlease(unsigned int fd, struct file *filp, int arg);
int fcntl_getlease(struct file *filp);
/* fs/locks.c */
@@ -167,8 +167,8 @@ bool vfs_inode_has_locks(struct inode *inode);
int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
void lease_get_mtime(struct inode *, struct timespec64 *time);
-int generic_setlease(struct file *, long, struct file_lock **, void **priv);
-int vfs_setlease(struct file *, long, struct file_lock **, void **);
+int generic_setlease(struct file *, int, struct file_lock **, void **priv);
+int vfs_setlease(struct file *, int, struct file_lock **, void **);
int lease_modify(struct file_lock *, int, struct list_head *);
struct notifier_block;
@@ -213,7 +213,7 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file,
return -EACCES;
}
#endif
-static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
+static inline int fcntl_setlease(unsigned int fd, struct file *filp, int arg)
{
return -EINVAL;
}
@@ -306,13 +306,13 @@ static inline void lease_get_mtime(struct inode *inode,
return;
}
-static inline int generic_setlease(struct file *filp, long arg,
+static inline int generic_setlease(struct file *filp, int arg,
struct file_lock **flp, void **priv)
{
return -EINVAL;
}
-static inline int vfs_setlease(struct file *filp, long arg,
+static inline int vfs_setlease(struct file *filp, int arg,
struct file_lock **lease, void **priv)
{
return -EINVAL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6867512907d6..dda08d973639 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -338,6 +338,20 @@ enum rw_hint {
#define IOCB_NOIO (1 << 20)
/* can use bio alloc cache */
#define IOCB_ALLOC_CACHE (1 << 21)
+/*
+ * IOCB_DIO_CALLER_COMP can be set by the iocb owner, to indicate that the
+ * iocb completion can be passed back to the owner for execution from a safe
+ * context rather than needing to be punted through a workqueue. If this
+ * flag is set, the bio completion handling may set iocb->dio_complete to a
+ * handler function and iocb->private to context information for that handler.
+ * The issuer should call the handler with that context information from task
+ * context to complete the processing of the iocb. Note that while this
+ * provides a task context for the dio_complete() callback, it should only be
+ * used on the completion side for non-IO generating completions. It's fine to
+ * call blocking functions from this callback, but they should not wait for
+ * unrelated IO (like cache flushing, new IO generation, etc).
+ */
+#define IOCB_DIO_CALLER_COMP (1 << 22)
/* for use in trace events */
#define TRACE_IOCB_STRINGS \
@@ -351,7 +365,8 @@ enum rw_hint {
{ IOCB_WRITE, "WRITE" }, \
{ IOCB_WAITQ, "WAITQ" }, \
{ IOCB_NOIO, "NOIO" }, \
- { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }
+ { IOCB_ALLOC_CACHE, "ALLOC_CACHE" }, \
+ { IOCB_DIO_CALLER_COMP, "CALLER_COMP" }
struct kiocb {
struct file *ki_filp;
@@ -360,7 +375,23 @@ struct kiocb {
void *private;
int ki_flags;
u16 ki_ioprio; /* See linux/ioprio.h */
- struct wait_page_queue *ki_waitq; /* for async buffered IO */
+ union {
+ /*
+ * Only used for async buffered reads, where it denotes the
+ * page waitqueue associated with completing the read. Valid
+ * IFF IOCB_WAITQ is set.
+ */
+ struct wait_page_queue *ki_waitq;
+ /*
+ * Can be used for O_DIRECT IO, where the completion handling
+ * is punted back to the issuer of the IO. May only be set
+ * if IOCB_DIO_CALLER_COMP is set by the issuer, and the issuer
+ * must then check for presence of this handler when ki_complete
+ * is invoked. The data passed in to this handler must be
+ * assigned to ->private when dio_complete is assigned.
+ */
+ ssize_t (*dio_complete)(void *data);
+ };
};
static inline bool is_sync_kiocb(struct kiocb *kiocb)
@@ -642,7 +673,7 @@ struct inode {
loff_t i_size;
struct timespec64 i_atime;
struct timespec64 i_mtime;
- struct timespec64 i_ctime;
+ struct timespec64 __i_ctime; /* use inode_*_ctime accessors! */
spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */
unsigned short i_bytes;
u8 i_blkbits;
@@ -1069,7 +1100,7 @@ extern void fasync_free(struct fasync_struct *);
extern void kill_fasync(struct fasync_struct **, int, int);
extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
+extern int f_setown(struct file *filp, int who, int force);
extern void f_delown(struct file *filp);
extern pid_t f_getown(struct file *filp);
extern int send_sigurg(struct fown_struct *fown);
@@ -1095,6 +1126,8 @@ extern int send_sigurg(struct fown_struct *fown);
#define SB_LAZYTIME BIT(25) /* Update the on-disk [acm]times lazily */
/* These sb flags are internal to the kernel */
+#define SB_DEAD BIT(21)
+#define SB_DYING BIT(24)
#define SB_SUBMOUNT BIT(26)
#define SB_FORCE BIT(27)
#define SB_NOSEC BIT(28)
@@ -1147,7 +1180,8 @@ enum {
#define SB_FREEZE_LEVELS (SB_FREEZE_COMPLETE - 1)
struct sb_writers {
- int frozen; /* Is sb frozen? */
+ unsigned short frozen; /* Is sb frozen? */
+ unsigned short freeze_holders; /* Who froze fs? */
struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS];
};
@@ -1474,7 +1508,79 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
kgid_has_mapping(fs_userns, kgid);
}
-extern struct timespec64 current_time(struct inode *inode);
+struct timespec64 current_mgtime(struct inode *inode);
+struct timespec64 current_time(struct inode *inode);
+struct timespec64 inode_set_ctime_current(struct inode *inode);
+
+/*
+ * Multigrain timestamps
+ *
+ * Conditionally use fine-grained ctime and mtime timestamps when there
+ * are users actively observing them via getattr. The primary use-case
+ * for this is NFS clients that use the ctime to distinguish between
+ * different states of the file, and that are often fooled by multiple
+ * operations that occur in the same coarse-grained timer tick.
+ *
+ * The kernel always keeps normalized struct timespec64 values in the ctime,
+ * which means that only the first 30 bits of the value are used. Use the
+ * 31st bit of the ctime's tv_nsec field as a flag to indicate that the value
+ * has been queried since it was last updated.
+ */
+#define I_CTIME_QUERIED (1L<<30)
+
+/**
+ * inode_get_ctime - fetch the current ctime from the inode
+ * @inode: inode from which to fetch ctime
+ *
+ * Grab the current ctime tv_nsec field from the inode, mask off the
+ * I_CTIME_QUERIED flag and return it. This is mostly intended for use by
+ * internal consumers of the ctime that aren't concerned with ensuring a
+ * fine-grained update on the next change (e.g. when preparing to store
+ * the value in the backing store for later retrieval).
+ *
+ * This is safe to call regardless of whether the underlying filesystem
+ * is using multigrain timestamps.
+ */
+static inline struct timespec64 inode_get_ctime(const struct inode *inode)
+{
+ struct timespec64 ctime;
+
+ ctime.tv_sec = inode->__i_ctime.tv_sec;
+ ctime.tv_nsec = inode->__i_ctime.tv_nsec & ~I_CTIME_QUERIED;
+
+ return ctime;
+}
+
+/**
+ * inode_set_ctime_to_ts - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @ts: value to set in the ctime field
+ *
+ * Set the ctime in @inode to @ts
+ */
+static inline struct timespec64 inode_set_ctime_to_ts(struct inode *inode,
+ struct timespec64 ts)
+{
+ inode->__i_ctime = ts;
+ return ts;
+}
+
+/**
+ * inode_set_ctime - set the ctime in the inode
+ * @inode: inode in which to set the ctime
+ * @sec: tv_sec value to set
+ * @nsec: tv_nsec value to set
+ *
+ * Set the ctime in @inode to { @sec, @nsec }
+ */
+static inline struct timespec64 inode_set_ctime(struct inode *inode,
+ time64_t sec, long nsec)
+{
+ struct timespec64 ts = { .tv_sec = sec,
+ .tv_nsec = nsec };
+
+ return inode_set_ctime_to_ts(inode, ts);
+}
/*
* Snapshotting support.
@@ -1770,6 +1876,7 @@ struct dir_context {
struct iov_iter;
struct io_uring_cmd;
+struct offset_ctx;
struct file_operations {
struct module *owner;
@@ -1780,7 +1887,6 @@ struct file_operations {
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
unsigned int flags);
- int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -1799,7 +1905,7 @@ struct file_operations {
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
void (*splice_eof)(struct file *file);
- int (*setlease)(struct file *, long, struct file_lock **, void **);
+ int (*setlease)(struct file *, int, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -1817,6 +1923,13 @@ struct file_operations {
unsigned int poll_flags);
} __randomize_layout;
+/* Wrap a directory iterator that needs exclusive inode access */
+int wrap_directory_iterator(struct file *, struct dir_context *,
+ int (*) (struct file *, struct dir_context *));
+#define WRAP_DIR_ITER(x) \
+ static int shared_##x(struct file *file , struct dir_context *ctx) \
+ { return wrap_directory_iterator(file, ctx, x); }
+
struct inode_operations {
struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
@@ -1844,7 +1957,7 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
- int (*update_time)(struct inode *, struct timespec64 *, int);
+ int (*update_time)(struct inode *, int);
int (*atomic_open)(struct inode *, struct dentry *,
struct file *, unsigned open_flag,
umode_t create_mode);
@@ -1857,6 +1970,7 @@ struct inode_operations {
int (*fileattr_set)(struct mnt_idmap *idmap,
struct dentry *dentry, struct fileattr *fa);
int (*fileattr_get)(struct dentry *dentry, struct fileattr *fa);
+ struct offset_ctx *(*get_offset_ctx)(struct inode *inode);
} ____cacheline_aligned;
static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -1902,6 +2016,10 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
struct file *dst_file, loff_t dst_pos,
loff_t len, unsigned int remap_flags);
+enum freeze_holder {
+ FREEZE_HOLDER_KERNEL = (1U << 0),
+ FREEZE_HOLDER_USERSPACE = (1U << 1),
+};
struct super_operations {
struct inode *(*alloc_inode)(struct super_block *sb);
@@ -1914,9 +2032,9 @@ struct super_operations {
void (*evict_inode) (struct inode *);
void (*put_super) (struct super_block *);
int (*sync_fs)(struct super_block *sb, int wait);
- int (*freeze_super) (struct super_block *);
+ int (*freeze_super) (struct super_block *, enum freeze_holder who);
int (*freeze_fs) (struct super_block *);
- int (*thaw_super) (struct super_block *);
+ int (*thaw_super) (struct super_block *, enum freeze_holder who);
int (*unfreeze_fs) (struct super_block *);
int (*statfs) (struct dentry *, struct kstatfs *);
int (*remount_fs) (struct super_block *, int *, char *);
@@ -2194,7 +2312,7 @@ enum file_time_flags {
extern bool atime_needs_update(const struct path *, struct inode *);
extern void touch_atime(const struct path *);
-int inode_update_time(struct inode *inode, struct timespec64 *time, int flags);
+int inode_update_time(struct inode *inode, int flags);
static inline void file_accessed(struct file *file)
{
@@ -2216,6 +2334,7 @@ struct file_system_type {
#define FS_USERNS_MOUNT 8 /* Can be mounted by userns root */
#define FS_DISALLOW_NOTIFY_PERM 16 /* Disable fanotify permission events */
#define FS_ALLOW_IDMAP 32 /* FS has been updated to handle vfs idmappings. */
+#define FS_MGTIME 64 /* FS uses multigrain timestamps */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */
int (*init_fs_context)(struct fs_context *);
const struct fs_parameter_spec *parameters;
@@ -2239,6 +2358,17 @@ struct file_system_type {
#define MODULE_ALIAS_FS(NAME) MODULE_ALIAS("fs-" NAME)
+/**
+ * is_mgtime: is this inode using multigrain timestamps
+ * @inode: inode to test for multigrain timestamps
+ *
+ * Return true if the inode uses multigrain timestamps, false otherwise.
+ */
+static inline bool is_mgtime(const struct inode *inode)
+{
+ return inode->i_sb->s_type->fs_flags & FS_MGTIME;
+}
+
extern struct dentry *mount_bdev(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data,
int (*fill_super)(struct super_block *, void *, int));
@@ -2290,8 +2420,8 @@ extern int unregister_filesystem(struct file_system_type *);
extern int vfs_statfs(const struct path *, struct kstatfs *);
extern int user_statfs(const char __user *, struct kstatfs *);
extern int fd_statfs(int, struct kstatfs *);
-extern int freeze_super(struct super_block *super);
-extern int thaw_super(struct super_block *super);
+int freeze_super(struct super_block *super, enum freeze_holder who);
+int thaw_super(struct super_block *super, enum freeze_holder who);
extern __printf(2, 3)
int super_setup_bdi_name(struct super_block *sb, char *fmt, ...);
extern int super_setup_bdi(struct super_block *sb);
@@ -2300,7 +2430,8 @@ extern int current_umask(void);
extern void ihold(struct inode * inode);
extern void iput(struct inode *);
-extern int generic_update_time(struct inode *, struct timespec64 *, int);
+int inode_update_timestamps(struct inode *inode, int flags);
+int generic_update_time(struct inode *, int);
/* /sys/fs */
extern struct kobject *fs_kobj;
@@ -2539,6 +2670,13 @@ static inline bool inode_wrong_type(const struct inode *inode, umode_t mode)
return (inode->i_mode ^ mode) & S_IFMT;
}
+/**
+ * file_start_write - get write access to a superblock for regular file io
+ * @file: the file we want to write to
+ *
+ * This is a variant of sb_start_write() which is a noop on non-regualr file.
+ * Should be matched with a call to file_end_write().
+ */
static inline void file_start_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
@@ -2553,11 +2691,53 @@ static inline bool file_start_write_trylock(struct file *file)
return sb_start_write_trylock(file_inode(file)->i_sb);
}
+/**
+ * file_end_write - drop write access to a superblock of a regular file
+ * @file: the file we wrote to
+ *
+ * Should be matched with a call to file_start_write().
+ */
static inline void file_end_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return;
- __sb_end_write(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(file_inode(file)->i_sb);
+}
+
+/**
+ * kiocb_start_write - get write access to a superblock for async file io
+ * @iocb: the io context we want to submit the write with
+ *
+ * This is a variant of sb_start_write() for async io submission.
+ * Should be matched with a call to kiocb_end_write().
+ */
+static inline void kiocb_start_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ sb_start_write(inode->i_sb);
+ /*
+ * Fool lockdep by telling it the lock got released so that it
+ * doesn't complain about the held lock when we return to userspace.
+ */
+ __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
+}
+
+/**
+ * kiocb_end_write - drop write access to a superblock after async file io
+ * @iocb: the io context we sumbitted the write with
+ *
+ * Should be matched with a call to kiocb_start_write().
+ */
+static inline void kiocb_end_write(struct kiocb *iocb)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+
+ /*
+ * Tell lockdep we inherited freeze protection from submission thread.
+ */
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ sb_end_write(inode->i_sb);
}
/*
@@ -2607,8 +2787,7 @@ static inline bool inode_is_open_for_write(const struct inode *inode)
#if defined(CONFIG_IMA) || defined(CONFIG_FILE_LOCKING)
static inline void i_readcount_dec(struct inode *inode)
{
- BUG_ON(!atomic_read(&inode->i_readcount));
- atomic_dec(&inode->i_readcount);
+ BUG_ON(atomic_dec_return(&inode->i_readcount) < 0);
}
static inline void i_readcount_inc(struct inode *inode)
{
@@ -2874,7 +3053,8 @@ extern void page_put_link(void *);
extern int page_symlink(struct inode *inode, const char *symname, int len);
extern const struct inode_operations page_symlink_inode_operations;
extern void kfree_link(void *);
-void generic_fillattr(struct mnt_idmap *, struct inode *, struct kstat *);
+void fill_mg_cmtime(struct kstat *stat, u32 request_mask, struct inode *inode);
+void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *);
void generic_fill_statx_attr(struct inode *inode, struct kstat *stat);
extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
@@ -2913,7 +3093,6 @@ extern int vfs_readlink(struct dentry *, char __user *, int);
extern struct file_system_type *get_filesystem(struct file_system_type *fs);
extern void put_filesystem(struct file_system_type *fs);
extern struct file_system_type *get_fs_type(const char *name);
-extern struct super_block *get_super(struct block_device *);
extern struct super_block *get_active_super(struct block_device *bdev);
extern void drop_super(struct super_block *sb);
extern void drop_super_exclusive(struct super_block *sb);
@@ -2934,6 +3113,8 @@ extern int simple_open(struct inode *inode, struct file *file);
extern int simple_link(struct dentry *, struct inode *, struct dentry *);
extern int simple_unlink(struct inode *, struct dentry *);
extern int simple_rmdir(struct inode *, struct dentry *);
+void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry);
extern int simple_rename(struct mnt_idmap *, struct inode *,
@@ -2950,7 +3131,7 @@ extern int simple_write_begin(struct file *file, struct address_space *mapping,
extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
+extern int simple_nosetlease(struct file *, int, struct file_lock **, void **);
extern const struct dentry_operations simple_dentry_operations;
extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
@@ -2971,6 +3152,22 @@ extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
const void __user *from, size_t count);
+struct offset_ctx {
+ struct xarray xa;
+ u32 next_offset;
+};
+
+void simple_offset_init(struct offset_ctx *octx);
+int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry);
+void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry);
+int simple_offset_rename_exchange(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry);
+void simple_offset_destroy(struct offset_ctx *octx);
+
+extern const struct file_operations simple_offset_dir_operations;
+
extern int __generic_file_fsync(struct file *, loff_t, loff_t, int);
extern int generic_file_fsync(struct file *, loff_t, loff_t, int);
diff --git a/include/linux/fs_context.h b/include/linux/fs_context.h
index ff6341e09925..96332db693d5 100644
--- a/include/linux/fs_context.h
+++ b/include/linux/fs_context.h
@@ -109,6 +109,7 @@ struct fs_context {
bool need_free:1; /* Need to call ops->free() */
bool global:1; /* Goes into &init_user_ns */
bool oldapi:1; /* Coming from mount(2) */
+ bool exclusive:1; /* create new superblock, reject existing one */
};
struct fs_context_operations {
@@ -150,14 +151,13 @@ extern int get_tree_nodev(struct fs_context *fc,
extern int get_tree_single(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
-extern int get_tree_single_reconf(struct fs_context *fc,
- int (*fill_super)(struct super_block *sb,
- struct fs_context *fc));
extern int get_tree_keyed(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc),
void *key);
+int setup_bdev_super(struct super_block *sb, int sb_flags,
+ struct fs_context *fc);
extern int get_tree_bdev(struct fs_context *fc,
int (*fill_super)(struct super_block *sb,
struct fs_context *fc));
diff --git a/include/linux/fs_stack.h b/include/linux/fs_stack.h
index 54210a42c30d..010d39d0dc1c 100644
--- a/include/linux/fs_stack.h
+++ b/include/linux/fs_stack.h
@@ -24,7 +24,7 @@ static inline void fsstack_copy_attr_times(struct inode *dest,
{
dest->i_atime = src->i_atime;
dest->i_mtime = src->i_mtime;
- dest->i_ctime = src->i_ctime;
+ inode_set_ctime_to_ts(dest, inode_get_ctime(src));
}
#endif /* _LINUX_FS_STACK_H */
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ce156c7704ee..aad9cf8876b5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -684,7 +684,6 @@ void __init
ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, int enable);
/* defined in arch */
-extern int ftrace_ip_converted(unsigned long ip);
extern int ftrace_dyn_arch_init(void);
extern void ftrace_replace_code(int enable);
extern int ftrace_update_ftrace_func(ftrace_func_t func);
@@ -859,9 +858,6 @@ static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_a
}
#endif
-/* May be defined in arch */
-extern int ftrace_arch_read_dyn_info(char *buf, int size);
-
extern int skip_trace(unsigned long ip);
extern void ftrace_module_init(struct module *mod);
extern void ftrace_module_enable(struct module *mod);
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 20284387b841..e718dbe928ba 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -25,9 +25,6 @@ static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
#endif
vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf);
-struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
- unsigned long addr, pmd_t *pmd,
- unsigned int flags);
bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
pmd_t *pmd, unsigned long addr, unsigned long next);
int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index bfbc37ce223b..3ac3974b3c78 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1239,9 +1239,6 @@ extern int vmbus_recvpacket_raw(struct vmbus_channel *channel,
u32 *buffer_actual_len,
u64 *requestid);
-
-extern void vmbus_ontimer(unsigned long data);
-
/* Base driver object */
struct hv_driver {
const char *name;
diff --git a/include/linux/intel_rapl.h b/include/linux/intel_rapl.h
index e6936cb25047..33f21bd85dbf 100644
--- a/include/linux/intel_rapl.h
+++ b/include/linux/intel_rapl.h
@@ -100,10 +100,16 @@ struct rapl_package;
#define RAPL_DOMAIN_NAME_LENGTH 16
+union rapl_reg {
+ void __iomem *mmio;
+ u32 msr;
+ u64 val;
+};
+
struct rapl_domain {
char name[RAPL_DOMAIN_NAME_LENGTH];
enum rapl_domain_type id;
- u64 regs[RAPL_DOMAIN_REG_MAX];
+ union rapl_reg regs[RAPL_DOMAIN_REG_MAX];
struct powercap_zone power_zone;
struct rapl_domain_data rdd;
struct rapl_power_limit rpl[NR_POWER_LIMITS];
@@ -116,7 +122,7 @@ struct rapl_domain {
};
struct reg_action {
- u64 reg;
+ union rapl_reg reg;
u64 mask;
u64 value;
int err;
@@ -143,8 +149,8 @@ struct rapl_if_priv {
enum rapl_if_type type;
struct powercap_control_type *control_type;
enum cpuhp_state pcap_rapl_online;
- u64 reg_unit;
- u64 regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
+ union rapl_reg reg_unit;
+ union rapl_reg regs[RAPL_DOMAIN_MAX][RAPL_DOMAIN_REG_MAX];
int limits[RAPL_DOMAIN_MAX];
int (*read_raw)(int id, struct reg_action *ra);
int (*write_raw)(int id, struct reg_action *ra);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index e2b836c2e119..fdc6e64f49d6 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -261,9 +261,10 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
-struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos);
+struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len);
bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags);
void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len);
+bool iomap_dirty_folio(struct address_space *mapping, struct folio *folio);
int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len,
const struct iomap_ops *ops);
int iomap_zero_range(struct inode *inode, loff_t pos, loff_t len,
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index d860499e15e4..44c298aa58d4 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -614,12 +614,6 @@ struct transaction_s
struct journal_head *t_checkpoint_list;
/*
- * Doubly-linked circular list of all buffers submitted for IO while
- * checkpointing. [j_list_lock]
- */
- struct journal_head *t_checkpoint_io_list;
-
- /*
* Doubly-linked circular list of metadata buffers being
* shadowed by log IO. The IO buffers on the iobuf list and
* the shadow buffers on this list match each other one for
@@ -1449,6 +1443,7 @@ extern void jbd2_journal_commit_transaction(journal_t *);
void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
unsigned long jbd2_journal_shrink_checkpoint_list(journal_t *journal, unsigned long *nr_to_scan);
int __jbd2_journal_remove_checkpoint(struct journal_head *);
+int jbd2_journal_try_remove_checkpoint(struct journal_head *jh);
void jbd2_journal_destroy_checkpoint(journal_t *journal);
void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h
index 7308a1a7599b..af796986baee 100644
--- a/include/linux/lsm_hook_defs.h
+++ b/include/linux/lsm_hook_defs.h
@@ -54,6 +54,7 @@ LSM_HOOK(int, 0, bprm_creds_from_file, struct linux_binprm *bprm, struct file *f
LSM_HOOK(int, 0, bprm_check_security, struct linux_binprm *bprm)
LSM_HOOK(void, LSM_RET_VOID, bprm_committing_creds, struct linux_binprm *bprm)
LSM_HOOK(void, LSM_RET_VOID, bprm_committed_creds, struct linux_binprm *bprm)
+LSM_HOOK(int, 0, fs_context_submount, struct fs_context *fc, struct super_block *reference)
LSM_HOOK(int, 0, fs_context_dup, struct fs_context *fc,
struct fs_context *src_sc)
LSM_HOOK(int, -ENOPARAM, fs_context_parse_param, struct fs_context *fc,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2dd73e4f3d8e..34f9dba17c1a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -641,8 +641,14 @@ static inline void vma_numab_state_free(struct vm_area_struct *vma) {}
*/
static inline bool vma_start_read(struct vm_area_struct *vma)
{
- /* Check before locking. A race might cause false locked result. */
- if (vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))
+ /*
+ * Check before locking. A race might cause false locked result.
+ * We can use READ_ONCE() for the mm_lock_seq here, and don't need
+ * ACQUIRE semantics, because this is just a lockless check whose result
+ * we don't rely on for anything - the mm_lock_seq read against which we
+ * need ordering is below.
+ */
+ if (READ_ONCE(vma->vm_lock_seq) == READ_ONCE(vma->vm_mm->mm_lock_seq))
return false;
if (unlikely(down_read_trylock(&vma->vm_lock->lock) == 0))
@@ -653,8 +659,13 @@ static inline bool vma_start_read(struct vm_area_struct *vma)
* False unlocked result is impossible because we modify and check
* vma->vm_lock_seq under vma->vm_lock protection and mm->mm_lock_seq
* modification invalidates all existing locks.
+ *
+ * We must use ACQUIRE semantics for the mm_lock_seq so that if we are
+ * racing with vma_end_write_all(), we only start reading from the VMA
+ * after it has been unlocked.
+ * This pairs with RELEASE semantics in vma_end_write_all().
*/
- if (unlikely(vma->vm_lock_seq == READ_ONCE(vma->vm_mm->mm_lock_seq))) {
+ if (unlikely(vma->vm_lock_seq == smp_load_acquire(&vma->vm_mm->mm_lock_seq))) {
up_read(&vma->vm_lock->lock);
return false;
}
@@ -676,7 +687,7 @@ static bool __is_vma_write_locked(struct vm_area_struct *vma, int *mm_lock_seq)
* current task is holding mmap_write_lock, both vma->vm_lock_seq and
* mm->mm_lock_seq can't be concurrently modified.
*/
- *mm_lock_seq = READ_ONCE(vma->vm_mm->mm_lock_seq);
+ *mm_lock_seq = vma->vm_mm->mm_lock_seq;
return (vma->vm_lock_seq == *mm_lock_seq);
}
@@ -688,7 +699,13 @@ static inline void vma_start_write(struct vm_area_struct *vma)
return;
down_write(&vma->vm_lock->lock);
- vma->vm_lock_seq = mm_lock_seq;
+ /*
+ * We should use WRITE_ONCE() here because we can have concurrent reads
+ * from the early lockless pessimistic check in vma_start_read().
+ * We don't really care about the correctness of that early check, but
+ * we should use WRITE_ONCE() for cleanliness and to keep KCSAN happy.
+ */
+ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
up_write(&vma->vm_lock->lock);
}
@@ -702,7 +719,7 @@ static inline bool vma_try_start_write(struct vm_area_struct *vma)
if (!down_write_trylock(&vma->vm_lock->lock))
return false;
- vma->vm_lock_seq = mm_lock_seq;
+ WRITE_ONCE(vma->vm_lock_seq, mm_lock_seq);
up_write(&vma->vm_lock->lock);
return true;
}
@@ -3404,15 +3421,24 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
* Indicates whether GUP can follow a PROT_NONE mapped page, or whether
* a (NUMA hinting) fault is required.
*/
-static inline bool gup_can_follow_protnone(unsigned int flags)
+static inline bool gup_can_follow_protnone(struct vm_area_struct *vma,
+ unsigned int flags)
{
/*
- * FOLL_FORCE has to be able to make progress even if the VMA is
- * inaccessible. Further, FOLL_FORCE access usually does not represent
- * application behaviour and we should avoid triggering NUMA hinting
- * faults.
+ * If callers don't want to honor NUMA hinting faults, no need to
+ * determine if we would actually have to trigger a NUMA hinting fault.
+ */
+ if (!(flags & FOLL_HONOR_NUMA_FAULT))
+ return true;
+
+ /*
+ * NUMA hinting faults don't apply in inaccessible (PROT_NONE) VMAs.
+ *
+ * Requiring a fault here even for inaccessible VMAs would mean that
+ * FOLL_FORCE cannot make any progress, because handle_mm_fault()
+ * refuses to process NUMA hinting faults in inaccessible VMAs.
*/
- return flags & FOLL_FORCE;
+ return !vma_is_accessible(vma);
}
typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index de10fc797c8e..7d30dc4ff0ff 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -514,6 +514,20 @@ struct vm_area_struct {
};
#ifdef CONFIG_PER_VMA_LOCK
+ /*
+ * Can only be written (using WRITE_ONCE()) while holding both:
+ * - mmap_lock (in write mode)
+ * - vm_lock->lock (in write mode)
+ * Can be read reliably while holding one of:
+ * - mmap_lock (in read or write mode)
+ * - vm_lock->lock (in read or write mode)
+ * Can be read unreliably (using READ_ONCE()) for pessimistic bailout
+ * while holding nothing (except RCU to keep the VMA struct allocated).
+ *
+ * This sequence counter is explicitly allowed to overflow; sequence
+ * counter reuse can only lead to occasional unnecessary use of the
+ * slowpath.
+ */
int vm_lock_seq;
struct vma_lock *vm_lock;
@@ -679,6 +693,20 @@ struct mm_struct {
* by mmlist_lock
*/
#ifdef CONFIG_PER_VMA_LOCK
+ /*
+ * This field has lock-like semantics, meaning it is sometimes
+ * accessed with ACQUIRE/RELEASE semantics.
+ * Roughly speaking, incrementing the sequence number is
+ * equivalent to releasing locks on VMAs; reading the sequence
+ * number can be part of taking a read lock on a VMA.
+ *
+ * Can be modified under write mmap_lock using RELEASE
+ * semantics.
+ * Can be read with no other protection when holding write
+ * mmap_lock.
+ * Can be read with ACQUIRE semantics if not holding write
+ * mmap_lock.
+ */
int mm_lock_seq;
#endif
@@ -1258,6 +1286,15 @@ enum {
FOLL_PCI_P2PDMA = 1 << 10,
/* allow interrupts from generic signals */
FOLL_INTERRUPTIBLE = 1 << 11,
+ /*
+ * Always honor (trigger) NUMA hinting faults.
+ *
+ * FOLL_WRITE implicitly honors NUMA hinting faults because a
+ * PROT_NONE-mapped page is not writable (exceptions with FOLL_FORCE
+ * apply). get_user_pages_fast_only() always implicitly honors NUMA
+ * hinting faults.
+ */
+ FOLL_HONOR_NUMA_FAULT = 1 << 12,
/* See also internal only FOLL flags in mm/internal.h */
};
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index aab8f1b28d26..e05e167dbd16 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -76,8 +76,14 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm)
static inline void vma_end_write_all(struct mm_struct *mm)
{
mmap_assert_write_locked(mm);
- /* No races during update due to exclusive mmap_lock being held */
- WRITE_ONCE(mm->mm_lock_seq, mm->mm_lock_seq + 1);
+ /*
+ * Nobody can concurrently modify mm->mm_lock_seq due to exclusive
+ * mmap_lock being held.
+ * We need RELEASE semantics here to ensure that preceding stores into
+ * the VMA take effect before we unlock it with this store.
+ * Pairs with ACQUIRE semantics in vma_start_read().
+ */
+ smp_store_release(&mm->mm_lock_seq, mm->mm_lock_seq + 1);
}
#else
static inline void vma_end_write_all(struct mm_struct *mm) {}
diff --git a/include/linux/nls.h b/include/linux/nls.h
index 499e486b3722..e0bf8367b274 100644
--- a/include/linux/nls.h
+++ b/include/linux/nls.h
@@ -47,7 +47,7 @@ enum utf16_endian {
/* nls_base.c */
extern int __register_nls(struct nls_table *, struct module *);
extern int unregister_nls(struct nls_table *);
-extern struct nls_table *load_nls(char *);
+extern struct nls_table *load_nls(const char *charset);
extern void unload_nls(struct nls_table *);
extern struct nls_table *load_nls_default(void);
#define register_nls(nls) __register_nls((nls), THIS_MODULE)
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 716953ee1ebd..d87840acbfb2 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -470,6 +470,19 @@ static inline void *detach_page_private(struct page *page)
return folio_detach_private(page_folio(page));
}
+/*
+ * There are some parts of the kernel which assume that PMD entries
+ * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
+ * limit the maximum allocation order to PMD size. I'm not aware of any
+ * assumptions about maximum order if THP are disabled, but 8 seems like
+ * a good order (that's 1MB if you're using 4kB pages)
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
+#else
+#define MAX_PAGECACHE_ORDER 8
+#endif
+
#ifdef CONFIG_NUMA
struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order);
#else
@@ -501,22 +514,69 @@ pgoff_t page_cache_next_miss(struct address_space *mapping,
pgoff_t page_cache_prev_miss(struct address_space *mapping,
pgoff_t index, unsigned long max_scan);
-#define FGP_ACCESSED 0x00000001
-#define FGP_LOCK 0x00000002
-#define FGP_CREAT 0x00000004
-#define FGP_WRITE 0x00000008
-#define FGP_NOFS 0x00000010
-#define FGP_NOWAIT 0x00000020
-#define FGP_FOR_MMAP 0x00000040
-#define FGP_STABLE 0x00000080
+/**
+ * typedef fgf_t - Flags for getting folios from the page cache.
+ *
+ * Most users of the page cache will not need to use these flags;
+ * there are convenience functions such as filemap_get_folio() and
+ * filemap_lock_folio(). For users which need more control over exactly
+ * what is done with the folios, these flags to __filemap_get_folio()
+ * are available.
+ *
+ * * %FGP_ACCESSED - The folio will be marked accessed.
+ * * %FGP_LOCK - The folio is returned locked.
+ * * %FGP_CREAT - If no folio is present then a new folio is allocated,
+ * added to the page cache and the VM's LRU list. The folio is
+ * returned locked.
+ * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
+ * folio is already in cache. If the folio was allocated, unlock it
+ * before returning so the caller can do the same dance.
+ * * %FGP_WRITE - The folio will be written to by the caller.
+ * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
+ * * %FGP_NOWAIT - Don't block on the folio lock.
+ * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
+ * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin()
+ * implementation.
+ */
+typedef unsigned int __bitwise fgf_t;
+
+#define FGP_ACCESSED ((__force fgf_t)0x00000001)
+#define FGP_LOCK ((__force fgf_t)0x00000002)
+#define FGP_CREAT ((__force fgf_t)0x00000004)
+#define FGP_WRITE ((__force fgf_t)0x00000008)
+#define FGP_NOFS ((__force fgf_t)0x00000010)
+#define FGP_NOWAIT ((__force fgf_t)0x00000020)
+#define FGP_FOR_MMAP ((__force fgf_t)0x00000040)
+#define FGP_STABLE ((__force fgf_t)0x00000080)
+#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */
#define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE)
+/**
+ * fgf_set_order - Encode a length in the fgf_t flags.
+ * @size: The suggested size of the folio to create.
+ *
+ * The caller of __filemap_get_folio() can use this to suggest a preferred
+ * size for the folio that is created. If there is already a folio at
+ * the index, it will be returned, no matter what its size. If a folio
+ * is freshly created, it may be of a different size than requested
+ * due to alignment constraints, memory pressure, or the presence of
+ * other folios at nearby indices.
+ */
+static inline fgf_t fgf_set_order(size_t size)
+{
+ unsigned int shift = ilog2(size);
+
+ if (shift <= PAGE_SHIFT)
+ return 0;
+ return (__force fgf_t)((shift - PAGE_SHIFT) << 26);
+}
+
void *filemap_get_entry(struct address_space *mapping, pgoff_t index);
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+ fgf_t fgp_flags, gfp_t gfp);
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp);
+ fgf_t fgp_flags, gfp_t gfp);
/**
* filemap_get_folio - Find and get a folio.
@@ -590,7 +650,7 @@ static inline struct page *find_get_page(struct address_space *mapping,
}
static inline struct page *find_get_page_flags(struct address_space *mapping,
- pgoff_t offset, int fgp_flags)
+ pgoff_t offset, fgf_t fgp_flags)
{
return pagecache_get_page(mapping, offset, fgp_flags, 0);
}
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 27a6df448ee5..27cd1e59ccf7 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -6,6 +6,16 @@
struct mm_walk;
+/* Locking requirement during a page walk. */
+enum page_walk_lock {
+ /* mmap_lock should be locked for read to stabilize the vma tree */
+ PGWALK_RDLOCK = 0,
+ /* vma will be write-locked during the walk */
+ PGWALK_WRLOCK = 1,
+ /* vma is expected to be already write-locked during the walk */
+ PGWALK_WRLOCK_VERIFY = 2,
+};
+
/**
* struct mm_walk_ops - callbacks for walk_page_range
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
@@ -66,6 +76,7 @@ struct mm_walk_ops {
int (*pre_vma)(unsigned long start, unsigned long end,
struct mm_walk *walk);
void (*post_vma)(struct mm_walk *walk);
+ enum page_walk_lock walk_lock;
};
/*
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 02e0086b10f6..608a9eb86bff 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -269,10 +269,10 @@ bool pipe_is_unprivileged_user(void);
/* for F_SETPIPE_SZ and F_GETPIPE_SZ */
int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots);
-long pipe_fcntl(struct file *, unsigned int, unsigned long arg);
+long pipe_fcntl(struct file *, unsigned int, unsigned int arg);
struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice);
int create_pipe_files(struct file **, int);
-unsigned int round_pipe_size(unsigned long size);
+unsigned int round_pipe_size(unsigned int size);
#endif
diff --git a/include/linux/pm_wakeirq.h b/include/linux/pm_wakeirq.h
index dd42d16945d0..d9642c6cf852 100644
--- a/include/linux/pm_wakeirq.h
+++ b/include/linux/pm_wakeirq.h
@@ -10,8 +10,6 @@ extern int dev_pm_set_wake_irq(struct device *dev, int irq);
extern int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq);
extern int dev_pm_set_dedicated_wake_irq_reverse(struct device *dev, int irq);
extern void dev_pm_clear_wake_irq(struct device *dev);
-extern void dev_pm_enable_wake_irq(struct device *dev);
-extern void dev_pm_disable_wake_irq(struct device *dev);
#else /* !CONFIG_PM */
@@ -34,13 +32,5 @@ static inline void dev_pm_clear_wake_irq(struct device *dev)
{
}
-static inline void dev_pm_enable_wake_irq(struct device *dev)
-{
-}
-
-static inline void dev_pm_disable_wake_irq(struct device *dev)
-{
-}
-
#endif /* CONFIG_PM */
#endif /* _LINUX_PM_WAKEIRQ_H */
diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h
index b83a3f944f28..b068e2e60939 100644
--- a/include/linux/prefetch.h
+++ b/include/linux/prefetch.h
@@ -25,11 +25,10 @@ struct page;
prefetch() should be defined by the architecture, if not, the
#define below provides a no-op define.
- There are 3 prefetch() macros:
+ There are 2 prefetch() macros:
prefetch(x) - prefetches the cacheline at "x" for read
prefetchw(x) - prefetches the cacheline at "x" for write
- spin_lock_prefetch(x) - prefetches the spinlock *x for taking
there is also PREFETCH_STRIDE which is the architecure-preferred
"lookahead" size for prefetching streamed operations.
@@ -44,10 +43,6 @@ struct page;
#define prefetchw(x) __builtin_prefetch(x,1)
#endif
-#ifndef ARCH_HAS_SPINLOCK_PREFETCH
-#define spin_lock_prefetch(x) prefetchw(x)
-#endif
-
#ifndef PREFETCH_STRIDE
#define PREFETCH_STRIDE (4*L1_CACHE_BYTES)
#endif
diff --git a/include/linux/raid_class.h b/include/linux/raid_class.h
index 6a9b177d5c41..e50416ba9cd9 100644
--- a/include/linux/raid_class.h
+++ b/include/linux/raid_class.h
@@ -77,7 +77,3 @@ DEFINE_RAID_ATTRIBUTE(enum raid_state, state)
struct raid_template *raid_class_attach(struct raid_function_template *);
void raid_class_release(struct raid_template *);
-
-int __must_check raid_component_add(struct raid_template *, struct device *,
- struct device *);
-
diff --git a/include/linux/security.h b/include/linux/security.h
index 32828502f09e..bac98ea18f78 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -293,6 +293,7 @@ int security_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
int security_bprm_check(struct linux_binprm *bprm);
void security_bprm_committing_creds(struct linux_binprm *bprm);
void security_bprm_committed_creds(struct linux_binprm *bprm);
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference);
int security_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc);
int security_fs_context_parse_param(struct fs_context *fc, struct fs_parameter *param);
int security_sb_alloc(struct super_block *sb);
@@ -629,6 +630,11 @@ static inline void security_bprm_committed_creds(struct linux_binprm *bprm)
{
}
+static inline int security_fs_context_submount(struct fs_context *fc,
+ struct super_block *reference)
+{
+ return 0;
+}
static inline int security_fs_context_dup(struct fs_context *fc,
struct fs_context *src_fc)
{
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 6d58c57acdaa..a156d2ed8d9e 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -459,7 +459,8 @@ struct uart_port {
struct serial_rs485 *rs485);
int (*iso7816_config)(struct uart_port *,
struct serial_iso7816 *iso7816);
- int ctrl_id; /* optional serial core controller id */
+ unsigned int ctrl_id; /* optional serial core controller id */
+ unsigned int port_id; /* optional serial core port id */
unsigned int irq; /* irq number */
unsigned long irqflags; /* irq flags */
unsigned int uartclk; /* base uart clock */
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 9029abd29b1c..6b0c626620f5 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -13,6 +13,10 @@
/* inode in-kernel data */
+#ifdef CONFIG_TMPFS_QUOTA
+#define SHMEM_MAXQUOTAS 2
+#endif
+
struct shmem_inode_info {
spinlock_t lock;
unsigned int seals; /* shmem seals */
@@ -27,6 +31,10 @@ struct shmem_inode_info {
atomic_t stop_eviction; /* hold when working on inode */
struct timespec64 i_crtime; /* file creation time */
unsigned int fsflags; /* flags for FS_IOC_[SG]ETFLAGS */
+#ifdef CONFIG_TMPFS_QUOTA
+ struct dquot *i_dquot[MAXQUOTAS];
+#endif
+ struct offset_ctx dir_offsets; /* stable entry offsets */
struct inode vfs_inode;
};
@@ -35,11 +43,18 @@ struct shmem_inode_info {
(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL | FS_NOATIME_FL)
#define SHMEM_FL_INHERITED (FS_NODUMP_FL | FS_NOATIME_FL)
+struct shmem_quota_limits {
+ qsize_t usrquota_bhardlimit; /* Default user quota block hard limit */
+ qsize_t usrquota_ihardlimit; /* Default user quota inode hard limit */
+ qsize_t grpquota_bhardlimit; /* Default group quota block hard limit */
+ qsize_t grpquota_ihardlimit; /* Default group quota inode hard limit */
+};
+
struct shmem_sb_info {
unsigned long max_blocks; /* How many blocks are allowed */
struct percpu_counter used_blocks; /* How many are allocated */
unsigned long max_inodes; /* How many inodes are allowed */
- unsigned long free_inodes; /* How many are left for allocation */
+ unsigned long free_ispace; /* How much ispace left for allocation */
raw_spinlock_t stat_lock; /* Serialize shmem_sb_info changes */
umode_t mode; /* Mount mode for root directory */
unsigned char huge; /* Whether to try for hugepages */
@@ -53,6 +68,7 @@ struct shmem_sb_info {
spinlock_t shrinklist_lock; /* Protects shrinklist */
struct list_head shrinklist; /* List of shinkable inodes */
unsigned long shrinklist_len; /* Length of shrinklist */
+ struct shmem_quota_limits qlimits; /* Default quota limits */
};
static inline struct shmem_inode_info *SHMEM_I(struct inode *inode)
@@ -172,4 +188,17 @@ extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
#endif /* CONFIG_SHMEM */
#endif /* CONFIG_USERFAULTFD */
+/*
+ * Used space is stored as unsigned 64-bit value in bytes but
+ * quota core supports only signed 64-bit values so use that
+ * as a limit
+ */
+#define SHMEM_QUOTA_MAX_SPC_LIMIT 0x7fffffffffffffffLL /* 2^63-1 */
+#define SHMEM_QUOTA_MAX_INO_LIMIT 0x7fffffffffffffffLL
+
+#ifdef CONFIG_TMPFS_QUOTA
+extern const struct dquot_operations shmem_quota_operations;
+extern struct quota_format_type shmem_quota_format;
+#endif /* CONFIG_TMPFS_QUOTA */
+
#endif
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index 054d7911bfc9..c1637515a8a4 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -62,6 +62,7 @@ struct sk_psock_progs {
enum sk_psock_state_bits {
SK_PSOCK_TX_ENABLED,
+ SK_PSOCK_RX_STRP_ENABLED,
};
struct sk_psock_link {
diff --git a/include/linux/spi/corgi_lcd.h b/include/linux/spi/corgi_lcd.h
index 0b857616919c..fc6c1515dc54 100644
--- a/include/linux/spi/corgi_lcd.h
+++ b/include/linux/spi/corgi_lcd.h
@@ -15,4 +15,6 @@ struct corgi_lcd_platform_data {
void (*kick_battery)(void);
};
+void corgi_lcd_limit_intensity(int limit);
+
#endif /* __LINUX_SPI_CORGI_LCD_H */
diff --git a/include/linux/spi/spi-mem.h b/include/linux/spi/spi-mem.h
index 8e984d75f5b6..6b0a7dc48a4b 100644
--- a/include/linux/spi/spi-mem.h
+++ b/include/linux/spi/spi-mem.h
@@ -101,6 +101,7 @@ struct spi_mem_op {
u8 nbytes;
u8 buswidth;
u8 dtr : 1;
+ u8 __pad : 7;
u16 opcode;
} cmd;
@@ -108,6 +109,7 @@ struct spi_mem_op {
u8 nbytes;
u8 buswidth;
u8 dtr : 1;
+ u8 __pad : 7;
u64 val;
} addr;
@@ -115,12 +117,14 @@ struct spi_mem_op {
u8 nbytes;
u8 buswidth;
u8 dtr : 1;
+ u8 __pad : 7;
} dummy;
struct {
u8 buswidth;
u8 dtr : 1;
u8 ecc : 1;
+ u8 __pad : 6;
enum spi_mem_data_dir dir;
unsigned int nbytes;
union {
diff --git a/include/linux/swait.h b/include/linux/swait.h
index 6a8c22b8c2a5..d324419482a0 100644
--- a/include/linux/swait.h
+++ b/include/linux/swait.h
@@ -146,7 +146,7 @@ static inline bool swq_has_sleeper(struct swait_queue_head *wq)
extern void swake_up_one(struct swait_queue_head *q);
extern void swake_up_all(struct swait_queue_head *q);
-extern void swake_up_locked(struct swait_queue_head *q);
+extern void swake_up_locked(struct swait_queue_head *q, int wake_flags);
extern void prepare_to_swait_exclusive(struct swait_queue_head *q, struct swait_queue *wait, int state);
extern long prepare_to_swait_event(struct swait_queue_head *q, struct swait_queue *wait, int state);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 03e3d0121d5e..5690aef7b3a8 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -438,8 +438,10 @@ asmlinkage long sys_chdir(const char __user *filename);
asmlinkage long sys_fchdir(unsigned int fd);
asmlinkage long sys_chroot(const char __user *filename);
asmlinkage long sys_fchmod(unsigned int fd, umode_t mode);
-asmlinkage long sys_fchmodat(int dfd, const char __user * filename,
+asmlinkage long sys_fchmodat(int dfd, const char __user *filename,
umode_t mode);
+asmlinkage long sys_fchmodat2(int dfd, const char __user *filename,
+ umode_t mode, unsigned int flags);
asmlinkage long sys_fchownat(int dfd, const char __user *filename, uid_t user,
gid_t group, int flag);
asmlinkage long sys_fchown(unsigned int fd, uid_t user, gid_t group);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b4c08ac86983..91a37c99ba66 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -513,7 +513,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);
- queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
+ WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn));
}
static inline void tcp_move_syn(struct tcp_sock *tp,
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 87837094d549..dee66ade89a0 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -301,14 +301,14 @@ int thermal_acpi_critical_trip_temp(struct acpi_device *adev, int *ret_temp);
#ifdef CONFIG_THERMAL
struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
void *, struct thermal_zone_device_ops *,
- struct thermal_zone_params *, int, int);
+ const struct thermal_zone_params *, int, int);
void thermal_zone_device_unregister(struct thermal_zone_device *);
struct thermal_zone_device *
thermal_zone_device_register_with_trips(const char *, struct thermal_trip *, int, int,
void *, struct thermal_zone_device_ops *,
- struct thermal_zone_params *, int, int);
+ const struct thermal_zone_params *, int, int);
void *thermal_zone_device_priv(struct thermal_zone_device *tzd);
const char *thermal_zone_device_type(struct thermal_zone_device *tzd);
@@ -348,7 +348,7 @@ void thermal_zone_device_critical(struct thermal_zone_device *tz);
static inline struct thermal_zone_device *thermal_zone_device_register(
const char *type, int trips, int mask, void *devdata,
struct thermal_zone_device_ops *ops,
- struct thermal_zone_params *tzp,
+ const struct thermal_zone_params *tzp,
int passive_delay, int polling_delay)
{ return ERR_PTR(-ENODEV); }
static inline void thermal_zone_device_unregister(
diff --git a/include/linux/tpm.h b/include/linux/tpm.h
index 6a1e8f157255..4ee9d13749ad 100644
--- a/include/linux/tpm.h
+++ b/include/linux/tpm.h
@@ -283,6 +283,7 @@ enum tpm_chip_flags {
TPM_CHIP_FLAG_FIRMWARE_POWER_MANAGED = BIT(6),
TPM_CHIP_FLAG_FIRMWARE_UPGRADE = BIT(7),
TPM_CHIP_FLAG_SUSPENDED = BIT(8),
+ TPM_CHIP_FLAG_HWRNG_DISABLED = BIT(9),
};
#define to_tpm_chip(d) container_of(d, struct tpm_chip, dev)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 3930e676436c..1e8bbdb8da90 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -59,6 +59,17 @@ int trace_raw_output_prep(struct trace_iterator *iter,
extern __printf(2, 3)
void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...);
+/* Used to find the offset and length of dynamic fields in trace events */
+struct trace_dynamic_info {
+#ifdef CONFIG_CPU_BIG_ENDIAN
+ u16 offset;
+ u16 len;
+#else
+ u16 len;
+ u16 offset;
+#endif
+};
+
/*
* The trace entry - the most basic unit of tracing. This is what
* is printed in the end as a single line in the trace output, such as:
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ff81e5ccaef2..42bce38a8e87 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -163,7 +163,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs)
return ret;
}
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset,
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
size_t bytes, struct iov_iter *i);
void iov_iter_advance(struct iov_iter *i, size_t bytes);
void iov_iter_revert(struct iov_iter *i, size_t bytes);
@@ -184,6 +184,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset,
{
return copy_page_to_iter(&folio->page, offset, bytes, i);
}
+
+static inline size_t copy_folio_from_iter_atomic(struct folio *folio,
+ size_t offset, size_t bytes, struct iov_iter *i)
+{
+ return copy_page_from_iter_atomic(&folio->page, offset, bytes, i);
+}
+
size_t copy_page_to_iter_nofault(struct page *page, unsigned offset,
size_t bytes, struct iov_iter *i);
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index bdf8de2cdd93..7b4dd69555e4 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -155,6 +155,10 @@ retry:
if (gso_type & SKB_GSO_UDP)
nh_off -= thlen;
+ /* Kernel has a special handling for GSO_BY_FRAGS. */
+ if (gso_size == GSO_BY_FRAGS)
+ return -EINVAL;
+
/* Too small packets are not really GSO ones. */
if (skb->len - nh_off > gso_size) {
shinfo->gso_size = gso_size;
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a0307b516b09..5ec7739400f4 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -210,6 +210,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
}
int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
@@ -237,6 +238,8 @@ void __wake_up_pollfree(struct wait_queue_head *wq_head);
#define key_to_poll(m) ((__force __poll_t)(uintptr_t)(void *)(m))
#define wake_up_poll(x, m) \
__wake_up(x, TASK_NORMAL, 1, poll_to_key(m))
+#define wake_up_poll_on_current_cpu(x, m) \
+ __wake_up_on_current_cpu(x, TASK_NORMAL, poll_to_key(m))
#define wake_up_locked_poll(x, m) \
__wake_up_locked_key((x), TASK_NORMAL, poll_to_key(m))
#define wake_up_interruptible_poll(x, m) \
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index fba937999fbf..083387c00f0c 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -375,11 +375,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
pgoff_t start, pgoff_t end);
bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
-void folio_account_redirty(struct folio *folio);
-static inline void account_page_redirty(struct page *page)
-{
- folio_account_redirty(page_folio(page));
-}
bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
bool redirty_page_for_writepage(struct writeback_control *, struct page *);
diff --git a/include/linux/xattr.h b/include/linux/xattr.h
index d591ef59aa98..d20051865800 100644
--- a/include/linux/xattr.h
+++ b/include/linux/xattr.h
@@ -114,13 +114,15 @@ struct simple_xattr {
};
void simple_xattrs_init(struct simple_xattrs *xattrs);
-void simple_xattrs_free(struct simple_xattrs *xattrs);
+void simple_xattrs_free(struct simple_xattrs *xattrs, size_t *freed_space);
+size_t simple_xattr_space(const char *name, size_t size);
struct simple_xattr *simple_xattr_alloc(const void *value, size_t size);
+void simple_xattr_free(struct simple_xattr *xattr);
int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
void *buffer, size_t size);
-int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
- const void *value, size_t size, int flags,
- ssize_t *removed_size);
+struct simple_xattr *simple_xattr_set(struct simple_xattrs *xattrs,
+ const char *name, const void *value,
+ size_t size, int flags);
ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
char *buffer, size_t size);
void simple_xattr_add(struct simple_xattrs *xattrs,
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 9654567cfae3..e01d52cb668c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -593,9 +593,7 @@ struct hci_dev {
const char *fw_info;
struct dentry *debugfs;
-#ifdef CONFIG_DEV_COREDUMP
struct hci_devcoredump dump;
-#endif
struct device dev;
@@ -822,6 +820,7 @@ struct hci_conn_params {
struct hci_conn *conn;
bool explicit_connect;
+ /* Accessed without hdev->lock: */
hci_conn_flags_t flags;
u8 privacy_mode;
};
@@ -1573,7 +1572,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type);
void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
void hci_conn_params_clear_disabled(struct hci_dev *hdev);
+void hci_conn_params_free(struct hci_conn_params *param);
+void hci_pend_le_list_del_init(struct hci_conn_params *param);
+void hci_pend_le_list_add(struct hci_conn_params *param,
+ struct list_head *list);
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
bdaddr_t *addr,
u8 addr_type);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index b57bec6e737e..5b8b1b644a2d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -277,7 +277,7 @@ struct bond_vlan_tag {
unsigned short vlan_id;
};
-/**
+/*
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
* Caller must hold bond lock for read
@@ -722,23 +722,14 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
}
/* Caller must hold rcu_read_lock() for read */
-static inline bool bond_slave_has_mac_rx(struct bonding *bond, const u8 *mac)
+static inline bool bond_slave_has_mac_rcu(struct bonding *bond, const u8 *mac)
{
struct list_head *iter;
struct slave *tmp;
- struct netdev_hw_addr *ha;
bond_for_each_slave_rcu(bond, tmp, iter)
if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
return true;
-
- if (netdev_uc_empty(bond->dev))
- return false;
-
- netdev_for_each_uc_addr(ha, bond->dev)
- if (ether_addr_equal_64bits(mac, ha->addr))
- return true;
-
return false;
}
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7c7d03aa9d06..d6fa7c8767ad 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -562,6 +562,9 @@ ieee80211_get_sband_iftype_data(const struct ieee80211_supported_band *sband,
if (WARN_ON(iftype >= NL80211_IFTYPE_MAX))
return NULL;
+ if (iftype == NL80211_IFTYPE_AP_VLAN)
+ iftype = NL80211_IFTYPE_AP;
+
for (i = 0; i < sband->n_iftype_data; i++) {
const struct ieee80211_sband_iftype_data *data =
&sband->iftype_data[i];
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index e00057984489..f79ce133e51a 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -170,7 +170,8 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b)
}
/**
- * @WPAN_PHY_FLAG_TRANSMIT_POWER: Indicates that transceiver will support
+ * enum wpan_phy_flags - WPAN PHY state flags
+ * @WPAN_PHY_FLAG_TXPOWER: Indicates that transceiver will support
* transmit power setting.
* @WPAN_PHY_FLAG_CCA_ED_LEVEL: Indicates that transceiver will support cca ed
* level setting.
diff --git a/include/net/codel.h b/include/net/codel.h
index 5fed2f16cb8d..aa80f744826c 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -145,8 +145,8 @@ struct codel_vars {
* @maxpacket: largest packet we've seen so far
* @drop_count: temp count of dropped packets in dequeue()
* @drop_len: bytes of dropped packets in dequeue()
- * ecn_mark: number of packets we ECN marked instead of dropping
- * ce_mark: number of packets CE marked because sojourn time was above ce_threshold
+ * @ecn_mark: number of packets we ECN marked instead of dropping
+ * @ce_mark: number of packets CE marked because sojourn time was above ce_threshold
*/
struct codel_stats {
u32 maxpacket;
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 9a3c51aa6e81..0cdb4b16e5b5 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -221,7 +221,7 @@ struct devlink_dpipe_field {
/**
* struct devlink_dpipe_header - dpipe header object
* @name: header name
- * @id: index, global/local detrmined by global bit
+ * @id: index, global/local determined by global bit
* @fields: fields
* @fields_count: number of fields
* @global: indicates if header is shared like most protocol header
@@ -241,7 +241,7 @@ struct devlink_dpipe_header {
* @header_index: header index (packets can have several headers of same
* type like in case of tunnels)
* @header: header
- * @fieled_id: field index
+ * @field_id: field index
*/
struct devlink_dpipe_match {
enum devlink_dpipe_match_type type;
@@ -256,7 +256,7 @@ struct devlink_dpipe_match {
* @header_index: header index (packets can have several headers of same
* type like in case of tunnels)
* @header: header
- * @fieled_id: field index
+ * @field_id: field index
*/
struct devlink_dpipe_action {
enum devlink_dpipe_action_type type;
@@ -292,7 +292,7 @@ struct devlink_dpipe_value {
* struct devlink_dpipe_entry - table entry object
* @index: index of the entry in the table
* @match_values: match values
- * @matche_values_count: count of matches tuples
+ * @match_values_count: count of matches tuples
* @action_values: actions values
* @action_values_count: count of actions values
* @counter: value of counter
@@ -342,7 +342,9 @@ struct devlink_dpipe_table_ops;
*/
struct devlink_dpipe_table {
void *priv;
+ /* private: */
struct list_head list;
+ /* public: */
const char *name;
bool counters_enabled;
bool counter_control_extern;
@@ -355,13 +357,13 @@ struct devlink_dpipe_table {
/**
* struct devlink_dpipe_table_ops - dpipe_table ops
- * @actions_dump - dumps all tables actions
- * @matches_dump - dumps all tables matches
- * @entries_dump - dumps all active entries in the table
- * @counters_set_update - when changing the counter status hardware sync
+ * @actions_dump: dumps all tables actions
+ * @matches_dump: dumps all tables matches
+ * @entries_dump: dumps all active entries in the table
+ * @counters_set_update: when changing the counter status hardware sync
* maybe needed to allocate/free counter related
* resources
- * @size_get - get size
+ * @size_get: get size
*/
struct devlink_dpipe_table_ops {
int (*actions_dump)(void *priv, struct sk_buff *skb);
@@ -374,8 +376,8 @@ struct devlink_dpipe_table_ops {
/**
* struct devlink_dpipe_headers - dpipe headers
- * @headers - header array can be shared (global bit) or driver specific
- * @headers_count - count of headers
+ * @headers: header array can be shared (global bit) or driver specific
+ * @headers_count: count of headers
*/
struct devlink_dpipe_headers {
struct devlink_dpipe_header **headers;
@@ -387,7 +389,7 @@ struct devlink_dpipe_headers {
* @size_min: minimum size which can be set
* @size_max: maximum size which can be set
* @size_granularity: size granularity
- * @size_unit: resource's basic unit
+ * @unit: resource's basic unit
*/
struct devlink_resource_size_params {
u64 size_min;
@@ -457,6 +459,7 @@ struct devlink_flash_notify {
/**
* struct devlink_param - devlink configuration parameter data
+ * @id: devlink parameter id number
* @name: name of the parameter
* @generic: indicates if the parameter is generic or driver specific
* @type: parameter type
@@ -632,6 +635,7 @@ enum devlink_param_generic_id {
* struct devlink_flash_update_params - Flash Update parameters
* @fw: pointer to the firmware data to update from
* @component: the flash component to update
+ * @overwrite_mask: which types of flash update are supported (may be %0)
*
* With the exception of fw, drivers must opt-in to parameters by
* setting the appropriate bit in the supported_flash_update_params field in
diff --git a/include/net/gro.h b/include/net/gro.h
index 75efa6fb8441..88644b3ca660 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -452,6 +452,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
gro_normal_list(napi);
}
+/* This function is the alternative of 'inet_iif' and 'inet_sdif'
+ * functions in case we can not rely on fields of IPCB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ *iif = inet_iif(skb) ?: skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
+/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
+ * functions in case we can not rely on fields of IP6CB.
+ *
+ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
+ * The caller must hold the RCU read lock.
+ */
+static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
+{
+ /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
+ *iif = skb->dev->ifindex;
+ *sdif = 0;
+
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+ if (netif_is_l3_slave(skb->dev)) {
+ struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
+
+ *sdif = *iif;
+ *iif = master ? master->ifindex : 0;
+ }
+#endif
+}
+
extern struct list_head offload_base;
#endif /* _NET_IPV6_GRO_H */
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 325ad893f624..153960663ce4 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -29,7 +29,7 @@ struct fqdir {
};
/**
- * fragment queue flags
+ * enum: fragment queue flags
*
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index caa20a905531..491ceb7ebe5d 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark &&
- READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+ u32 mark = READ_ONCE(sk->sk_mark);
+
+ if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
- return sk->sk_mark;
+ return mark;
}
static inline int inet_request_bound_dev_if(const struct sock *sk,
@@ -221,8 +222,8 @@ struct inet_sock {
__s16 uc_ttl;
__u16 cmsg_flags;
struct ip_options_rcu __rcu *inet_opt;
+ atomic_t inet_id;
__be16 inet_sport;
- __u16 inet_id;
__u8 tos;
__u8 min_ttl;
diff --git a/include/net/ip.h b/include/net/ip.h
index 50d435855ae2..19adacd5ece0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
{
ipcm_init(ipcm);
- ipcm->sockc.mark = inet->sk.sk_mark;
+ ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
ipcm->sockc.tsflags = inet->sk.sk_tsflags;
ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
ipcm->addr = inet->inet_saddr;
@@ -538,8 +538,19 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
* generator as much as we can.
*/
if (sk && inet_sk(sk)->inet_daddr) {
- iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += segs;
+ int val;
+
+ /* avoid atomic operations for TCP,
+ * as we hold socket lock at this point.
+ */
+ if (sk_is_tcp(sk)) {
+ sock_owned_by_me(sk);
+ val = atomic_read(&inet_sk(sk)->inet_id);
+ atomic_set(&inet_sk(sk)->inet_id, val + segs);
+ } else {
+ val = atomic_add_return(segs, &inet_sk(sk)->inet_id);
+ }
+ iph->id = htons(val);
return;
}
if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7332296eca44..2acc4c808d45 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -752,12 +752,8 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
/* more secured version of ipv6_addr_hash() */
static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
{
- u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
-
- return jhash_3words(v,
- (__force u32)a->s6_addr32[2],
- (__force u32)a->s6_addr32[3],
- initval);
+ return jhash2((__force const u32 *)a->s6_addr32,
+ ARRAY_SIZE(a->s6_addr32), initval);
}
static inline bool ipv6_addr_loopback(const struct in6_addr *a)
diff --git a/include/net/llc_conn.h b/include/net/llc_conn.h
index 2c1ea3414640..374411b3066c 100644
--- a/include/net/llc_conn.h
+++ b/include/net/llc_conn.h
@@ -111,7 +111,7 @@ void llc_conn_resend_i_pdu_as_cmd(struct sock *sk, u8 nr, u8 first_p_bit);
void llc_conn_resend_i_pdu_as_rsp(struct sock *sk, u8 nr, u8 first_f_bit);
int llc_conn_remove_acked_pdus(struct sock *conn, u8 nr, u16 *how_many_unacked);
struct sock *llc_lookup_established(struct llc_sap *sap, struct llc_addr *daddr,
- struct llc_addr *laddr);
+ struct llc_addr *laddr, const struct net *net);
void llc_sap_add_socket(struct llc_sap *sap, struct sock *sk);
void llc_sap_remove_socket(struct llc_sap *sap, struct sock *sk);
diff --git a/include/net/llc_pdu.h b/include/net/llc_pdu.h
index 49aa79c7b278..7e73f8e5e497 100644
--- a/include/net/llc_pdu.h
+++ b/include/net/llc_pdu.h
@@ -269,7 +269,7 @@ static inline void llc_pdu_decode_sa(struct sk_buff *skb, u8 *sa)
/**
* llc_pdu_decode_da - extracts dest address of input frame
* @skb: input skb that destination address must be extracted from it
- * @sa: pointer to destination address (6 byte array).
+ * @da: pointer to destination address (6 byte array).
*
* This function extracts destination address(MAC) of input frame.
*/
@@ -321,7 +321,7 @@ static inline void llc_pdu_init_as_ui_cmd(struct sk_buff *skb)
/**
* llc_pdu_init_as_test_cmd - sets PDU as TEST
- * @skb - Address of the skb to build
+ * @skb: Address of the skb to build
*
* Sets a PDU as TEST
*/
@@ -369,6 +369,8 @@ struct llc_xid_info {
/**
* llc_pdu_init_as_xid_cmd - sets bytes 3, 4 & 5 of LLC header as XID
* @skb: input skb that header must be set into it.
+ * @svcs_supported: The class of the LLC (I or II)
+ * @rx_window: The size of the receive window of the LLC
*
* This function sets third,fourth,fifth and sixth bytes of LLC header as
* a XID PDU.
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 3a8a2d2c58c3..2a55ae932c56 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -6612,6 +6612,7 @@ void ieee80211_stop_rx_ba_session(struct ieee80211_vif *vif, u16 ba_rx_bitmap,
* marks frames marked in the bitmap as having been filtered. Afterwards, it
* checks if any frames in the window starting from @ssn can now be released
* (in case they were only waiting for frames that were filtered.)
+ * (Only work correctly if @max_rx_aggregation_subframes <= 64 frames)
*/
void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
u16 ssn, u64 filtered,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 640441a2f926..dd40c75011d2 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -512,6 +512,7 @@ struct nft_set_elem_expr {
*
* @list: table set list node
* @bindings: list of set bindings
+ * @refs: internal refcounting for async set destruction
* @table: table this set belongs to
* @net: netnamespace this set belongs to
* @name: name of the set
@@ -533,6 +534,7 @@ struct nft_set_elem_expr {
* @expr: stateful expression
* @ops: set ops
* @flags: set flags
+ * @dead: set will be freed, never cleared
* @genmask: generation mask
* @klen: key length
* @dlen: data length
@@ -541,6 +543,7 @@ struct nft_set_elem_expr {
struct nft_set {
struct list_head list;
struct list_head bindings;
+ refcount_t refs;
struct nft_table *table;
possible_net_t net;
char *name;
@@ -562,7 +565,8 @@ struct nft_set {
struct list_head pending_update;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- u16 flags:14,
+ u16 flags:13,
+ dead:1,
genmask:2;
u8 klen;
u8 dlen;
@@ -583,6 +587,11 @@ static inline void *nft_set_priv(const struct nft_set *set)
return (void *)set->data;
}
+static inline bool nft_set_gc_is_pending(const struct nft_set *s)
+{
+ return refcount_read(&s->refs) != 1;
+}
+
static inline struct nft_set *nft_set_container_of(const void *priv)
{
return (void *)priv - offsetof(struct nft_set, data);
@@ -596,7 +605,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
const struct nft_set *set);
-void *nft_set_catchall_gc(const struct nft_set *set);
static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
{
@@ -813,62 +821,6 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
const struct nft_set *set, void *elem);
-/**
- * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
- *
- * @rcu: rcu head
- * @set: set the elements belong to
- * @cnt: count of elements
- */
-struct nft_set_gc_batch_head {
- struct rcu_head rcu;
- const struct nft_set *set;
- unsigned int cnt;
-};
-
-#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
- sizeof(struct nft_set_gc_batch_head)) / \
- sizeof(void *))
-
-/**
- * struct nft_set_gc_batch - nf_tables set garbage collection batch
- *
- * @head: GC batch head
- * @elems: garbage collection elements
- */
-struct nft_set_gc_batch {
- struct nft_set_gc_batch_head head;
- void *elems[NFT_SET_GC_BATCH_SIZE];
-};
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp);
-void nft_set_gc_batch_release(struct rcu_head *rcu);
-
-static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
-{
- if (gcb != NULL)
- call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
-}
-
-static inline struct nft_set_gc_batch *
-nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
- gfp_t gfp)
-{
- if (gcb != NULL) {
- if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
- return gcb;
- nft_set_gc_batch_complete(gcb);
- }
- return nft_set_gc_batch_alloc(set, gfp);
-}
-
-static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
- void *elem)
-{
- gcb->elems[gcb->head.cnt++] = elem;
-}
-
struct nft_expr_ops;
/**
* struct nft_expr_type - nf_tables expression type
@@ -1557,39 +1509,30 @@ static inline void nft_set_elem_change_active(const struct net *net,
#endif /* IS_ENABLED(CONFIG_NF_TABLES) */
-/*
- * We use a free bit in the genmask field to indicate the element
- * is busy, meaning it is currently being processed either by
- * the netlink API or GC.
- *
- * Even though the genmask is only a single byte wide, this works
- * because the extension structure if fully constant once initialized,
- * so there are no non-atomic write accesses unless it is already
- * marked busy.
- */
-#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+#define NFT_SET_ELEM_DEAD_MASK (1 << 2)
#if defined(__LITTLE_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT 2
+#define NFT_SET_ELEM_DEAD_BIT 2
#elif defined(__BIG_ENDIAN_BITFIELD)
-#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#define NFT_SET_ELEM_DEAD_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
#else
#error
#endif
-static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+static inline void nft_set_elem_dead(struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
- return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ set_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
-static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext)
{
unsigned long *word = (unsigned long *)ext;
- clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ return test_bit(NFT_SET_ELEM_DEAD_BIT, word);
}
/**
@@ -1732,6 +1675,38 @@ struct nft_trans_flowtable {
#define nft_trans_flowtable_flags(trans) \
(((struct nft_trans_flowtable *)trans->data)->flags)
+#define NFT_TRANS_GC_BATCHCOUNT 256
+
+struct nft_trans_gc {
+ struct list_head list;
+ struct net *net;
+ struct nft_set *set;
+ u32 seq;
+ u8 count;
+ void *priv[NFT_TRANS_GC_BATCHCOUNT];
+ struct rcu_head rcu;
+};
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_destroy(struct nft_trans_gc *trans);
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp);
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *gc);
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp);
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans);
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *gc, void *priv);
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq);
+
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
int __init nft_chain_filter_init(void);
void nft_chain_filter_fini(void);
@@ -1758,6 +1733,8 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
unsigned int base_seq;
+ unsigned int gc_seq;
+ u8 validate_state;
};
extern unsigned int nf_tables_net_id;
diff --git a/include/net/nsh.h b/include/net/nsh.h
index 350b1ad11c7f..16a751093896 100644
--- a/include/net/nsh.h
+++ b/include/net/nsh.h
@@ -192,7 +192,7 @@
/**
* struct nsh_md1_ctx - Keeps track of NSH context data
- * @nshc<1-4>: NSH Contexts.
+ * @context: NSH Contexts.
*/
struct nsh_md1_ctx {
__be32 context[4];
diff --git a/include/net/pie.h b/include/net/pie.h
index 3fe2361e03b4..01cbc66825a4 100644
--- a/include/net/pie.h
+++ b/include/net/pie.h
@@ -17,7 +17,7 @@
/**
* struct pie_params - contains pie parameters
* @target: target delay in pschedtime
- * @tudpate: interval at which drop probability is calculated
+ * @tupdate: interval at which drop probability is calculated
* @limit: total number of packets that can be in the queue
* @alpha: parameter to control drop probability
* @beta: parameter to control drop probability
diff --git a/include/net/route.h b/include/net/route.h
index 5a5c726472bd..8c2a8e7d8f8e 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
__be16 dport, __be16 sport,
__u8 proto, __u8 tos, int oif)
{
- flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
+ flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
RT_SCOPE_UNIVERSE, proto,
sk ? inet_sk_flowi_flags(sk) : 0,
daddr, saddr, dport, sport, sock_net_uid(net, sk));
@@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
if (inet_sk(sk)->transparent)
flow_flags |= FLOWI_FLAG_ANYSRC;
- flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
+ flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
ip_sock_rt_scope(sk), protocol, flow_flags, dst,
src, dport, sport, sk->sk_uid);
}
diff --git a/include/net/rsi_91x.h b/include/net/rsi_91x.h
index 040f07b47f1f..b2283762e446 100644
--- a/include/net/rsi_91x.h
+++ b/include/net/rsi_91x.h
@@ -1,4 +1,4 @@
-/**
+/*
* Copyright (c) 2017 Redpine Signals Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index d9076a7a430c..6506221c5fe3 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -190,8 +190,8 @@ int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *
int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
u32 portid, const struct nlmsghdr *nlh);
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
- struct netlink_ext_ack *exterr);
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+ struct netlink_ext_ack *exterr);
struct net *rtnl_get_net_ns_capable(struct sock *sk, int netnsid);
#define MODULE_ALIAS_RTNL_LINK(kind) MODULE_ALIAS("rtnl-link-" kind)
diff --git a/include/net/sock.h b/include/net/sock.h
index 2eb916d1ff64..690e22139543 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1323,6 +1323,7 @@ struct proto {
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
+ * Make sure to use READ_ONCE()/WRITE_ONCE() for all reads/writes.
* All the __sk_mem_schedule() is of this nature: accounting
* is strict, actions are advisory and have some latency.
*/
@@ -1420,6 +1421,12 @@ static inline bool sk_has_memory_pressure(const struct sock *sk)
return sk->sk_prot->memory_pressure != NULL;
}
+static inline bool sk_under_global_memory_pressure(const struct sock *sk)
+{
+ return sk->sk_prot->memory_pressure &&
+ !!READ_ONCE(*sk->sk_prot->memory_pressure);
+}
+
static inline bool sk_under_memory_pressure(const struct sock *sk)
{
if (!sk->sk_prot->memory_pressure)
@@ -1429,7 +1436,7 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
mem_cgroup_under_socket_pressure(sk->sk_memcg))
return true;
- return !!*sk->sk_prot->memory_pressure;
+ return !!READ_ONCE(*sk->sk_prot->memory_pressure);
}
static inline long
@@ -1506,7 +1513,7 @@ proto_memory_pressure(struct proto *prot)
{
if (!prot->memory_pressure)
return false;
- return !!*prot->memory_pressure;
+ return !!READ_ONCE(*prot->memory_pressure);
}
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 226bce6d1e8c..0ca972ebd3dd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1509,25 +1509,38 @@ void tcp_leave_memory_pressure(struct sock *sk);
static inline int keepalive_intvl_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
- return tp->keepalive_intvl ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
+ * and do_tcp_setsockopt().
+ */
+ val = READ_ONCE(tp->keepalive_intvl);
+
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
- return tp->keepalive_time ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
+ val = READ_ONCE(tp->keepalive_time);
+
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
+ int val;
- return tp->keepalive_probes ? :
- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
+ /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
+ * and do_tcp_setsockopt().
+ */
+ val = READ_ONCE(tp->keepalive_probes);
+
+ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
@@ -2048,7 +2061,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ u32 val;
+
+ val = READ_ONCE(tp->notsent_lowat);
+
+ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
bool tcp_stream_memory_free(const struct sock *sk, int wake);
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 0be91ca78d3a..6a9f8a5f387c 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -386,10 +386,15 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
return features;
}
-/* IP header + UDP + VXLAN + Ethernet header */
-#define VXLAN_HEADROOM (20 + 8 + 8 + 14)
-/* IPv6 header + UDP + VXLAN + Ethernet header */
-#define VXLAN6_HEADROOM (40 + 8 + 8 + 14)
+static inline int vxlan_headroom(u32 flags)
+{
+ /* VXLAN: IP4/6 header + UDP + VXLAN + Ethernet header */
+ /* VXLAN-GPE: IP4/6 header + UDP + VXLAN */
+ return (flags & VXLAN_F_IPV6 ? sizeof(struct ipv6hdr) :
+ sizeof(struct iphdr)) +
+ sizeof(struct udphdr) + sizeof(struct vxlanhdr) +
+ (flags & VXLAN_F_GPE ? 0 : ETH_HLEN);
+}
static inline struct vxlanhdr *vxlan_hdr(struct sk_buff *skb)
{
@@ -551,12 +556,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
}
static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
- int hash,
+ u32 hash,
struct vxlan_rdst *rdst)
{
struct fib_nh_common *nhc;
- nhc = nexthop_path_fdb_result(nh, hash);
+ nhc = nexthop_path_fdb_result(nh, hash >> 1);
if (unlikely(!nhc))
return false;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 151ca95dd08d..363c7d510554 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1984,6 +1984,7 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
if (dev->xfrmdev_ops->xdo_dev_state_free)
dev->xfrmdev_ops->xdo_dev_state_free(x);
xso->dev = NULL;
+ xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
netdev_put(dev, &xso->dev_tracker);
}
}
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 75b2235b99e2..b9230b6add04 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -194,6 +194,7 @@ struct scsi_device {
unsigned no_start_on_add:1; /* do not issue start on add */
unsigned allow_restart:1; /* issue START_UNIT in error handler */
unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */
+ unsigned no_start_on_resume:1; /* Do not issue START_STOP_UNIT on resume */
unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */
unsigned no_uld_attach:1; /* disable connecting to upper level drivers */
unsigned select_no_atn:1;
diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h
index fc3001483e62..a5ef84944a06 100644
--- a/include/soc/tegra/mc.h
+++ b/include/soc/tegra/mc.h
@@ -175,6 +175,9 @@ struct tegra_mc_icc_ops {
int (*get_bw)(struct icc_node *node, u32 *avg, u32 *peak);
};
+struct icc_node *tegra_mc_icc_xlate(struct of_phandle_args *spec, void *data);
+extern const struct tegra_mc_icc_ops tegra_mc_icc_ops;
+
struct tegra_mc_ops {
/*
* @probe: Callback to set up SoC-specific bits of the memory controller. This is called
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index a8206f5332e9..b2db2c2f1c57 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -38,7 +38,6 @@ struct find_free_extent_ctl;
__print_symbolic(type, \
{ BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \
{ BTRFS_EXTENT_DATA_REF_KEY, "EXTENT_DATA_REF" }, \
- { BTRFS_EXTENT_REF_V0_KEY, "EXTENT_REF_V0" }, \
{ BTRFS_SHARED_BLOCK_REF_KEY, "SHARED_BLOCK_REF" }, \
{ BTRFS_SHARED_DATA_REF_KEY, "SHARED_DATA_REF" })
@@ -2482,7 +2481,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio,
__entry->offset, __entry->opf, __entry->physical, __entry->len)
);
-DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
+DEFINE_EVENT(btrfs_raid56_bio, raid56_read,
TP_PROTO(const struct btrfs_raid_bio *rbio,
const struct bio *bio,
const struct raid56_bio_trace_info *trace_info),
@@ -2490,32 +2489,7 @@ DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
TP_ARGS(rbio, bio, trace_info)
);
-DEFINE_EVENT(btrfs_raid56_bio, raid56_write_stripe,
- TP_PROTO(const struct btrfs_raid_bio *rbio,
- const struct bio *bio,
- const struct raid56_bio_trace_info *trace_info),
-
- TP_ARGS(rbio, bio, trace_info)
-);
-
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_write_stripe,
- TP_PROTO(const struct btrfs_raid_bio *rbio,
- const struct bio *bio,
- const struct raid56_bio_trace_info *trace_info),
-
- TP_ARGS(rbio, bio, trace_info)
-);
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read,
- TP_PROTO(const struct btrfs_raid_bio *rbio,
- const struct bio *bio,
- const struct raid56_bio_trace_info *trace_info),
-
- TP_ARGS(rbio, bio, trace_info)
-);
-
-DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read_recover,
+DEFINE_EVENT(btrfs_raid56_bio, raid56_write,
TP_PROTO(const struct btrfs_raid_bio *rbio,
const struct bio *bio,
const struct raid56_bio_trace_info *trace_info),
diff --git a/include/trace/events/erofs.h b/include/trace/events/erofs.h
index 71dbe8bfa7db..e18684b02c3d 100644
--- a/include/trace/events/erofs.h
+++ b/include/trace/events/erofs.h
@@ -80,11 +80,11 @@ TRACE_EVENT(erofs_fill_inode,
__entry->blkaddr, __entry->ofs)
);
-TRACE_EVENT(erofs_readpage,
+TRACE_EVENT(erofs_read_folio,
- TP_PROTO(struct page *page, bool raw),
+ TP_PROTO(struct folio *folio, bool raw),
- TP_ARGS(page, raw),
+ TP_ARGS(folio, raw),
TP_STRUCT__entry(
__field(dev_t, dev )
@@ -96,11 +96,11 @@ TRACE_EVENT(erofs_readpage,
),
TP_fast_assign(
- __entry->dev = page->mapping->host->i_sb->s_dev;
- __entry->nid = EROFS_I(page->mapping->host)->nid;
- __entry->dir = S_ISDIR(page->mapping->host->i_mode);
- __entry->index = page->index;
- __entry->uptodate = PageUptodate(page);
+ __entry->dev = folio->mapping->host->i_sb->s_dev;
+ __entry->nid = EROFS_I(folio->mapping->host)->nid;
+ __entry->dir = S_ISDIR(folio->mapping->host->i_mode);
+ __entry->index = folio->index;
+ __entry->uptodate = folio_test_uptodate(folio);
__entry->raw = raw;
),
diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h
index 8f5ee380d309..5646ae15a957 100644
--- a/include/trace/events/jbd2.h
+++ b/include/trace/events/jbd2.h
@@ -462,11 +462,9 @@ TRACE_EVENT(jbd2_shrink_scan_exit,
TRACE_EVENT(jbd2_shrink_checkpoint_list,
TP_PROTO(journal_t *journal, tid_t first_tid, tid_t tid, tid_t last_tid,
- unsigned long nr_freed, unsigned long nr_scanned,
- tid_t next_tid),
+ unsigned long nr_freed, tid_t next_tid),
- TP_ARGS(journal, first_tid, tid, last_tid, nr_freed,
- nr_scanned, next_tid),
+ TP_ARGS(journal, first_tid, tid, last_tid, nr_freed, next_tid),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -474,7 +472,6 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
__field(tid_t, tid)
__field(tid_t, last_tid)
__field(unsigned long, nr_freed)
- __field(unsigned long, nr_scanned)
__field(tid_t, next_tid)
),
@@ -484,15 +481,14 @@ TRACE_EVENT(jbd2_shrink_checkpoint_list,
__entry->tid = tid;
__entry->last_tid = last_tid;
__entry->nr_freed = nr_freed;
- __entry->nr_scanned = nr_scanned;
__entry->next_tid = next_tid;
),
TP_printk("dev %d,%d shrink transaction %u-%u(%u) freed %lu "
- "scanned %lu next transaction %u",
+ "next transaction %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->first_tid, __entry->tid, __entry->last_tid,
- __entry->nr_freed, __entry->nr_scanned, __entry->next_tid)
+ __entry->nr_freed, __entry->next_tid)
);
#endif /* _TRACE_JBD2_H */
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index bf06db8d2046..7b1ddffa3dfc 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -381,6 +381,7 @@ TRACE_EVENT(tcp_cong_state_set,
__field(const void *, skaddr)
__field(__u16, sport)
__field(__u16, dport)
+ __field(__u16, family)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
@@ -396,6 +397,7 @@ TRACE_EVENT(tcp_cong_state_set,
__entry->sport = ntohs(inet->inet_sport);
__entry->dport = ntohs(inet->inet_dport);
+ __entry->family = sk->sk_family;
p32 = (__be32 *) __entry->saddr;
*p32 = inet->inet_saddr;
@@ -409,7 +411,8 @@ TRACE_EVENT(tcp_cong_state_set,
__entry->cong_state = ca_state;
),
- TP_printk("sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+ TP_printk("family=%s sport=%hu dport=%hu saddr=%pI4 daddr=%pI4 saddrv6=%pI6c daddrv6=%pI6c cong_state=%u",
+ show_family_name(__entry->family),
__entry->sport, __entry->dport,
__entry->saddr, __entry->daddr,
__entry->saddr_v6, __entry->daddr_v6,
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index fd6c1cb585db..abe087c53b4b 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -820,8 +820,11 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
#define __NR_cachestat 451
__SYSCALL(__NR_cachestat, sys_cachestat)
+#define __NR_fchmodat2 452
+__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
+
#undef __NR_syscalls
-#define __NR_syscalls 452
+#define __NR_syscalls 453
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h
index b80fcc9ea525..f85743ef6e7d 100644
--- a/include/uapi/linux/blkzoned.h
+++ b/include/uapi/linux/blkzoned.h
@@ -51,13 +51,13 @@ enum blk_zone_type {
*
* The Zone Condition state machine in the ZBC/ZAC standards maps the above
* deinitions as:
- * - ZC1: Empty | BLK_ZONE_EMPTY
+ * - ZC1: Empty | BLK_ZONE_COND_EMPTY
* - ZC2: Implicit Open | BLK_ZONE_COND_IMP_OPEN
* - ZC3: Explicit Open | BLK_ZONE_COND_EXP_OPEN
- * - ZC4: Closed | BLK_ZONE_CLOSED
- * - ZC5: Full | BLK_ZONE_FULL
- * - ZC6: Read Only | BLK_ZONE_READONLY
- * - ZC7: Offline | BLK_ZONE_OFFLINE
+ * - ZC4: Closed | BLK_ZONE_COND_CLOSED
+ * - ZC5: Full | BLK_ZONE_COND_FULL
+ * - ZC6: Read Only | BLK_ZONE_COND_READONLY
+ * - ZC7: Offline | BLK_ZONE_COND_OFFLINE
*
* Conditions 0x5 to 0xC are reserved by the current ZBC/ZAC spec and should
* be considered invalid.
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h
index ab38d0f411fa..fc3c32186d7e 100644
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@@ -220,7 +220,11 @@
#define BTRFS_EXTENT_DATA_REF_KEY 178
-#define BTRFS_EXTENT_REF_V0_KEY 180
+/*
+ * Obsolete key. Defintion removed in 6.6, value may be reused in the future.
+ *
+ * #define BTRFS_EXTENT_REF_V0_KEY 180
+ */
#define BTRFS_SHARED_BLOCK_REF_KEY 182
diff --git a/include/uapi/linux/elf.h b/include/uapi/linux/elf.h
index 0c8cf359ea5b..e0e159138331 100644
--- a/include/uapi/linux/elf.h
+++ b/include/uapi/linux/elf.h
@@ -443,7 +443,6 @@ typedef struct elf64_shdr {
#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */
#define NT_MIPS_FP_MODE 0x801 /* MIPS floating-point mode */
#define NT_MIPS_MSA 0x802 /* MIPS SIMD registers */
-#define NT_RISCV_VECTOR 0x900 /* RISC-V vector registers */
#define NT_LOONGARCH_CPUCFG 0xa00 /* LoongArch CPU config registers */
#define NT_LOONGARCH_CSR 0xa01 /* LoongArch control and status registers */
#define NT_LOONGARCH_LSX 0xa02 /* LoongArch Loongson SIMD Extension registers */
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 1b9d0dfae72d..b3fcab13fcd3 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -206,6 +206,7 @@
* - add extension header
* - add FUSE_EXT_GROUPS
* - add FUSE_CREATE_SUPP_GROUP
+ * - add FUSE_HAS_EXPIRE_ONLY
*/
#ifndef _LINUX_FUSE_H
@@ -369,6 +370,7 @@ struct fuse_file_lock {
* FUSE_HAS_INODE_DAX: use per inode DAX
* FUSE_CREATE_SUPP_GROUP: add supplementary group info to create, mkdir,
* symlink and mknod (single group that matches parent)
+ * FUSE_HAS_EXPIRE_ONLY: kernel supports expiry-only entry invalidation
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -406,6 +408,7 @@ struct fuse_file_lock {
#define FUSE_SECURITY_CTX (1ULL << 32)
#define FUSE_HAS_INODE_DAX (1ULL << 33)
#define FUSE_CREATE_SUPP_GROUP (1ULL << 34)
+#define FUSE_HAS_EXPIRE_ONLY (1ULL << 35)
/**
* CUSE INIT request/reply flags
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 9efc42382fdb..4d0ad22f83b5 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -18,7 +18,11 @@ struct sockaddr_ll {
unsigned short sll_hatype;
unsigned char sll_pkttype;
unsigned char sll_halen;
- unsigned char sll_addr[8];
+ union {
+ unsigned char sll_addr[8];
+ /* Actual length is in sll_halen. */
+ __DECLARE_FLEX_ARRAY(unsigned char, sll_addr_flex);
+ };
};
/* Packet types */
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 8eb0d7b758d2..bb242fdcfe6b 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -100,8 +100,9 @@ enum fsconfig_command {
FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */
FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */
FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */
- FSCONFIG_CMD_CREATE = 6, /* Invoke superblock creation */
+ FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */
FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */
+ FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */
};
/*
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 7865f5a9885b..4f3932bb712d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -710,9 +710,11 @@ enum {
TCA_FLOWER_KEY_CFM_OPT_UNSPEC,
TCA_FLOWER_KEY_CFM_MD_LEVEL,
TCA_FLOWER_KEY_CFM_OPCODE,
- TCA_FLOWER_KEY_CFM_OPT_MAX,
+ __TCA_FLOWER_KEY_CFM_OPT_MAX,
};
+#define TCA_FLOWER_KEY_CFM_OPT_MAX (__TCA_FLOWER_KEY_CFM_OPT_MAX - 1)
+
#define TCA_FLOWER_MASK_FLAGS_RANGE (1 << 0) /* Range-based match */
/* Match-all classifier */
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index f17c9636a859..52090105b828 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -77,6 +77,7 @@
#define QFMT_VFS_V0 2
#define QFMT_OCFS2 3
#define QFMT_VFS_V1 4
+#define QFMT_SHMEM 5
/* Size of block in which space limits are passed through the quota
* interface */
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0fdc6ef02b94..dbfc9b37fcae 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -115,6 +115,8 @@ struct seccomp_notif_resp {
__u32 flags;
};
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+
/* valid flags for seccomp_notif_addfd */
#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
@@ -150,4 +152,6 @@ struct seccomp_notif_addfd {
#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
struct seccomp_notif_addfd)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
+
#endif /* _UAPI_LINUX_SECCOMP_H */
diff --git a/include/uapi/xen/evtchn.h b/include/uapi/xen/evtchn.h
index 7fbf732f168f..aef2b75f3413 100644
--- a/include/uapi/xen/evtchn.h
+++ b/include/uapi/xen/evtchn.h
@@ -101,4 +101,13 @@ struct ioctl_evtchn_restrict_domid {
domid_t domid;
};
+/*
+ * Bind statically allocated @port.
+ */
+#define IOCTL_EVTCHN_BIND_STATIC \
+ _IOC(_IOC_NONE, 'E', 7, sizeof(struct ioctl_evtchn_bind))
+struct ioctl_evtchn_bind {
+ unsigned int port;
+};
+
#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
diff --git a/include/video/kyro.h b/include/video/kyro.h
index b958c2e9c915..418eef6c5523 100644
--- a/include/video/kyro.h
+++ b/include/video/kyro.h
@@ -38,18 +38,6 @@ struct kyrofb_info {
int wc_cookie;
};
-extern int kyro_dev_init(void);
-extern void kyro_dev_reset(void);
-
-extern unsigned char *kyro_dev_physical_fb_ptr(void);
-extern unsigned char *kyro_dev_virtual_fb_ptr(void);
-extern void *kyro_dev_physical_regs_ptr(void);
-extern void *kyro_dev_virtual_regs_ptr(void);
-extern unsigned int kyro_dev_fb_size(void);
-extern unsigned int kyro_dev_regs_size(void);
-
-extern u32 kyro_dev_overlay_offset(void);
-
/*
* benedict.gaster@superh.com
* Added the follow IOCTLS for the creation of overlay services...
diff --git a/include/xen/events.h b/include/xen/events.h
index ac1281c5ead6..95970a2f7695 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -69,7 +69,7 @@ int xen_set_irq_priority(unsigned irq, unsigned priority);
/*
* Allow extra references to event channels exposed to userspace by evtchn
*/
-int evtchn_make_refcounted(evtchn_port_t evtchn);
+int evtchn_make_refcounted(evtchn_port_t evtchn, bool is_static);
int evtchn_get(evtchn_port_t evtchn);
void evtchn_put(evtchn_port_t evtchn);
@@ -141,4 +141,13 @@ void xen_init_IRQ(void);
irqreturn_t xen_debug_interrupt(int irq, void *dev_id);
+static inline void xen_evtchn_close(evtchn_port_t port)
+{
+ struct evtchn_close close;
+
+ close.port = port;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ BUG();
+}
+
#endif /* _XEN_EVENTS_H */
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 1aa015883519..5dfd30b13f48 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/ramfs.h>
#include <linux/shmem_fs.h>
+#include <linux/ktime.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -71,12 +72,37 @@ static int __init rootwait_setup(char *str)
{
if (*str)
return 0;
- root_wait = 1;
+ root_wait = -1;
return 1;
}
__setup("rootwait", rootwait_setup);
+static int __init rootwait_timeout_setup(char *str)
+{
+ int sec;
+
+ if (kstrtoint(str, 0, &sec) || sec < 0) {
+ pr_warn("ignoring invalid rootwait value\n");
+ goto ignore;
+ }
+
+ if (check_mul_overflow(sec, MSEC_PER_SEC, &root_wait)) {
+ pr_warn("ignoring excessive rootwait value\n");
+ goto ignore;
+ }
+
+ return 1;
+
+ignore:
+ /* Fallback to indefinite wait */
+ root_wait = -1;
+
+ return 1;
+}
+
+__setup("rootwait=", rootwait_timeout_setup);
+
static char * __initdata root_mount_data;
static int __init root_data_setup(char *str)
{
@@ -384,14 +410,22 @@ void __init mount_root(char *root_device_name)
/* wait for any asynchronous scanning to complete */
static void __init wait_for_root(char *root_device_name)
{
+ ktime_t end;
+
if (ROOT_DEV != 0)
return;
pr_info("Waiting for root device %s...\n", root_device_name);
+ end = ktime_add_ms(ktime_get_raw(), root_wait);
+
while (!driver_probe_done() ||
- early_lookup_bdev(root_device_name, &ROOT_DEV) < 0)
+ early_lookup_bdev(root_device_name, &ROOT_DEV) < 0) {
msleep(5);
+ if (root_wait > 0 && ktime_after(ktime_get_raw(), end))
+ break;
+ }
+
async_synchronize_full();
}
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 7505de2428e0..93db3e4e7b68 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1948,6 +1948,14 @@ fail:
ret = io_issue_sqe(req, issue_flags);
if (ret != -EAGAIN)
break;
+
+ /*
+ * If REQ_F_NOWAIT is set, then don't wait or retry with
+ * poll. -EAGAIN is final for that case.
+ */
+ if (req->flags & REQ_F_NOWAIT)
+ break;
+
/*
* We can get EAGAIN for iopolled IO even though we're
* forcing a sync submission from here, since we can't
@@ -2485,11 +2493,20 @@ int io_run_task_work_sig(struct io_ring_ctx *ctx)
return 0;
}
+static bool current_pending_io(void)
+{
+ struct io_uring_task *tctx = current->io_uring;
+
+ if (!tctx)
+ return false;
+ return percpu_counter_read_positive(&tctx->inflight);
+}
+
/* when returns >0, the caller should retry */
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
struct io_wait_queue *iowq)
{
- int token, ret;
+ int io_wait, ret;
if (unlikely(READ_ONCE(ctx->check_cq)))
return 1;
@@ -2503,17 +2520,19 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
return 0;
/*
- * Use io_schedule_prepare/finish, so cpufreq can take into account
- * that the task is waiting for IO - turns out to be important for low
- * QD IO.
+ * Mark us as being in io_wait if we have pending requests, so cpufreq
+ * can take into account that the task is waiting for IO - turns out
+ * to be important for low QD IO.
*/
- token = io_schedule_prepare();
+ io_wait = current->in_iowait;
+ if (current_pending_io())
+ current->in_iowait = 1;
ret = 0;
if (iowq->timeout == KTIME_MAX)
schedule();
else if (!schedule_hrtimeout(&iowq->timeout, HRTIMER_MODE_ABS))
ret = -ETIME;
- io_schedule_finish(token);
+ current->in_iowait = io_wait;
return ret;
}
@@ -3429,8 +3448,6 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
{
- const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
- struct vm_unmapped_area_info info;
void *ptr;
/*
@@ -3445,32 +3462,29 @@ static unsigned long io_uring_mmu_get_unmapped_area(struct file *filp,
if (IS_ERR(ptr))
return -ENOMEM;
- info.flags = VM_UNMAPPED_AREA_TOPDOWN;
- info.length = len;
- info.low_limit = max(PAGE_SIZE, mmap_min_addr);
- info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
+ /*
+ * Some architectures have strong cache aliasing requirements.
+ * For such architectures we need a coherent mapping which aliases
+ * kernel memory *and* userspace memory. To achieve that:
+ * - use a NULL file pointer to reference physical memory, and
+ * - use the kernel virtual address of the shared io_uring context
+ * (instead of the userspace-provided address, which has to be 0UL
+ * anyway).
+ * - use the same pgoff which the get_unmapped_area() uses to
+ * calculate the page colouring.
+ * For architectures without such aliasing requirements, the
+ * architecture will return any suitable mapping because addr is 0.
+ */
+ filp = NULL;
+ flags |= MAP_SHARED;
+ pgoff = 0; /* has been translated to ptr above */
#ifdef SHM_COLOUR
- info.align_mask = PAGE_MASK & (SHM_COLOUR - 1UL);
+ addr = (uintptr_t) ptr;
+ pgoff = addr >> PAGE_SHIFT;
#else
- info.align_mask = PAGE_MASK & (SHMLBA - 1UL);
+ addr = 0UL;
#endif
- info.align_offset = (unsigned long) ptr;
-
- /*
- * A failed mmap() very likely causes application failure,
- * so fall back to the bottom-up function here. This scenario
- * can happen with large stack limits and large mmap()
- * allocations.
- */
- addr = vm_unmapped_area(&info);
- if (offset_in_page(addr)) {
- info.flags = 0;
- info.low_limit = TASK_UNMAPPED_BASE;
- info.high_limit = mmap_end;
- addr = vm_unmapped_area(&info);
- }
-
- return addr;
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
}
#else /* !CONFIG_MMU */
@@ -3870,7 +3884,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
ctx->syscall_iopoll = 1;
ctx->compat = in_compat_syscall();
- if (!capable(CAP_IPC_LOCK))
+ if (!ns_capable_noaudit(&init_user_ns, CAP_IPC_LOCK))
ctx->user = get_uid(current_user());
/*
diff --git a/io_uring/openclose.c b/io_uring/openclose.c
index 10ca57f5bd24..e3fae26e025d 100644
--- a/io_uring/openclose.c
+++ b/io_uring/openclose.c
@@ -35,9 +35,11 @@ static bool io_openat_force_async(struct io_open *open)
{
/*
* Don't bother trying for O_TRUNC, O_CREAT, or O_TMPFILE open,
- * it'll always -EAGAIN
+ * it'll always -EAGAIN. Note that we test for __O_TMPFILE because
+ * O_TMPFILE includes O_DIRECTORY, which isn't a flag we need to force
+ * async for.
*/
- return open->how.flags & (O_TRUNC | O_CREAT | O_TMPFILE);
+ return open->how.flags & (O_TRUNC | O_CREAT | __O_TMPFILE);
}
static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
diff --git a/io_uring/rw.c b/io_uring/rw.c
index 1bce2208b65c..b3435033fadf 100644
--- a/io_uring/rw.c
+++ b/io_uring/rw.c
@@ -105,6 +105,7 @@ int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
} else {
rw->kiocb.ki_ioprio = get_current_ioprio();
}
+ rw->kiocb.dio_complete = NULL;
rw->addr = READ_ONCE(sqe->addr);
rw->len = READ_ONCE(sqe->len);
@@ -220,17 +221,12 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
}
#endif
-static void kiocb_end_write(struct io_kiocb *req)
+static void io_req_end_write(struct io_kiocb *req)
{
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
if (req->flags & REQ_F_ISREG) {
- struct super_block *sb = file_inode(req->file)->i_sb;
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
- __sb_writers_acquired(sb, SB_FREEZE_WRITE);
- sb_end_write(sb);
+ kiocb_end_write(&rw->kiocb);
}
}
@@ -243,7 +239,7 @@ static void io_req_io_end(struct io_kiocb *req)
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
if (rw->kiocb.ki_flags & IOCB_WRITE) {
- kiocb_end_write(req);
+ io_req_end_write(req);
fsnotify_modify(req->file);
} else {
fsnotify_access(req->file);
@@ -285,6 +281,15 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
{
+ struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
+ struct kiocb *kiocb = &rw->kiocb;
+
+ if ((kiocb->ki_flags & IOCB_DIO_CALLER_COMP) && kiocb->dio_complete) {
+ long res = kiocb->dio_complete(rw->kiocb.private);
+
+ io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+ }
+
io_req_io_end(req);
if (req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)) {
@@ -300,9 +305,11 @@ static void io_complete_rw(struct kiocb *kiocb, long res)
struct io_rw *rw = container_of(kiocb, struct io_rw, kiocb);
struct io_kiocb *req = cmd_to_io_kiocb(rw);
- if (__io_complete_rw_common(req, res))
- return;
- io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+ if (!kiocb->dio_complete || !(kiocb->ki_flags & IOCB_DIO_CALLER_COMP)) {
+ if (__io_complete_rw_common(req, res))
+ return;
+ io_req_set_res(req, io_fixup_rw_res(req, res), 0);
+ }
req->io_task_work.func = io_req_rw_complete;
__io_req_task_work_add(req, IOU_F_TWQ_LAZY_WAKE);
}
@@ -313,7 +320,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res)
struct io_kiocb *req = cmd_to_io_kiocb(rw);
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ io_req_end_write(req);
if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
@@ -902,19 +909,18 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
return ret;
}
+ if (req->flags & REQ_F_ISREG)
+ kiocb_start_write(kiocb);
+ kiocb->ki_flags |= IOCB_WRITE;
+
/*
- * Open-code file_start_write here to grab freeze protection,
- * which will be released by another thread in
- * io_complete_rw(). Fool lockdep by telling it the lock got
- * released so that it doesn't complain about the held lock when
- * we return to userspace.
+ * For non-polled IO, set IOCB_DIO_CALLER_COMP, stating that our handler
+ * groks deferring the completion to task context. This isn't
+ * necessary and useful for polled IO as that can always complete
+ * directly.
*/
- if (req->flags & REQ_F_ISREG) {
- sb_start_write(file_inode(req->file)->i_sb);
- __sb_writers_release(file_inode(req->file)->i_sb,
- SB_FREEZE_WRITE);
- }
- kiocb->ki_flags |= IOCB_WRITE;
+ if (!(kiocb->ki_flags & IOCB_HIPRI))
+ kiocb->ki_flags |= IOCB_DIO_CALLER_COMP;
if (likely(req->file->f_op->write_iter))
ret2 = call_write_iter(req->file, kiocb, &s->iter);
@@ -961,7 +967,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
io->bytes_done += ret2;
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ io_req_end_write(req);
return ret ? ret : -EAGAIN;
}
done:
@@ -972,7 +978,7 @@ copy_iov:
ret = io_setup_async_rw(req, iovec, s, false);
if (!ret) {
if (kiocb->ki_flags & IOCB_WRITE)
- kiocb_end_write(req);
+ io_req_end_write(req);
return -EAGAIN;
}
return ret;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 71881bddad25..ba8215ed663a 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -302,7 +302,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
inode->i_mode = mode;
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
- inode->i_mtime = inode->i_ctime = inode->i_atime = current_time(inode);
+ inode->i_mtime = inode->i_atime = inode_set_ctime_current(inode);
if (S_ISREG(mode)) {
struct mqueue_inode_info *info;
@@ -596,7 +596,7 @@ static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg)
put_ipc_ns(ipc_ns);
dir->i_size += DIRENT_SIZE;
- dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+ dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
d_instantiate(dentry, inode);
dget(dentry);
@@ -618,7 +618,7 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
- dir->i_ctime = dir->i_mtime = dir->i_atime = current_time(dir);
+ dir->i_mtime = dir->i_atime = inode_set_ctime_current(dir);
dir->i_size -= DIRENT_SIZE;
drop_nlink(inode);
dput(dentry);
@@ -635,7 +635,8 @@ static int mqueue_unlink(struct inode *dir, struct dentry *dentry)
static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
size_t count, loff_t *off)
{
- struct mqueue_inode_info *info = MQUEUE_I(file_inode(filp));
+ struct inode *inode = file_inode(filp);
+ struct mqueue_inode_info *info = MQUEUE_I(inode);
char buffer[FILENT_SIZE];
ssize_t ret;
@@ -656,7 +657,7 @@ static ssize_t mqueue_read_file(struct file *filp, char __user *u_data,
if (ret <= 0)
return ret;
- file_inode(filp)->i_atime = file_inode(filp)->i_ctime = current_time(file_inode(filp));
+ inode->i_atime = inode_set_ctime_current(inode);
return ret;
}
@@ -1162,8 +1163,7 @@ static int do_mq_timedsend(mqd_t mqdes, const char __user *u_msg_ptr,
goto out_unlock;
__do_notify(info);
}
- inode->i_atime = inode->i_mtime = inode->i_ctime =
- current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
}
out_unlock:
spin_unlock(&info->lock);
@@ -1257,8 +1257,7 @@ static int do_mq_timedreceive(mqd_t mqdes, char __user *u_msg_ptr,
msg_ptr = msg_get(info);
- inode->i_atime = inode->i_mtime = inode->i_ctime =
- current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
/* There is now free space in queue. */
pipelined_receive(&wake_q, info);
@@ -1396,7 +1395,7 @@ retry:
if (notification == NULL) {
if (info->notify_owner == task_tgid(current)) {
remove_notification(info);
- inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode_set_ctime_current(inode);
}
} else if (info->notify_owner != NULL) {
ret = -EBUSY;
@@ -1422,7 +1421,7 @@ retry:
info->notify_owner = get_pid(task_tgid(current));
info->notify_user_ns = get_user_ns(current_user_ns());
- inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode_set_ctime_current(inode);
}
spin_unlock(&info->lock);
out_fput:
@@ -1485,7 +1484,7 @@ static int do_mq_getsetattr(int mqdes, struct mq_attr *new, struct mq_attr *old)
f.file->f_flags &= ~O_NONBLOCK;
spin_unlock(&f.file->f_lock);
- inode->i_atime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode_set_ctime_current(inode);
}
spin_unlock(&info->lock);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 6ae02be7a48e..286ab3db0fde 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -28,6 +28,7 @@
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
+#include <linux/completion.h>
#include <trace/events/xdp.h>
#include <linux/btf_ids.h>
@@ -73,6 +74,7 @@ struct bpf_cpu_map_entry {
struct rcu_head rcu;
struct work_struct kthread_stop_wq;
+ struct completion kthread_running;
};
struct bpf_cpu_map {
@@ -129,11 +131,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
* invoked cpu_map_kthread_stop(). Catch any broken behaviour
* gracefully and warn once.
*/
- struct xdp_frame *xdpf;
+ void *ptr;
- while ((xdpf = ptr_ring_consume(ring)))
- if (WARN_ON_ONCE(xdpf))
- xdp_return_frame(xdpf);
+ while ((ptr = ptr_ring_consume(ring))) {
+ WARN_ON_ONCE(1);
+ if (unlikely(__ptr_test_bit(0, &ptr))) {
+ __ptr_clear_bit(0, &ptr);
+ kfree_skb(ptr);
+ continue;
+ }
+ xdp_return_frame(ptr);
+ }
}
static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
@@ -153,7 +161,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
static void cpu_map_kthread_stop(struct work_struct *work)
{
struct bpf_cpu_map_entry *rcpu;
- int err;
rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
@@ -163,14 +170,7 @@ static void cpu_map_kthread_stop(struct work_struct *work)
rcu_barrier();
/* kthread_stop will wake_up_process and wait for it to complete */
- err = kthread_stop(rcpu->kthread);
- if (err) {
- /* kthread_stop may be called before cpu_map_kthread_run
- * is executed, so we need to release the memory related
- * to rcpu.
- */
- put_cpu_map_entry(rcpu);
- }
+ kthread_stop(rcpu->kthread);
}
static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
@@ -298,11 +298,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
return nframes;
}
-
static int cpu_map_kthread_run(void *data)
{
struct bpf_cpu_map_entry *rcpu = data;
+ complete(&rcpu->kthread_running);
set_current_state(TASK_INTERRUPTIBLE);
/* When kthread gives stop order, then rcpu have been disconnected
@@ -467,6 +467,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
goto free_ptr_ring;
/* Setup kthread */
+ init_completion(&rcpu->kthread_running);
rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
"cpumap/%d/map:%d", cpu,
map->id);
@@ -480,6 +481,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
kthread_bind(rcpu->kthread, cpu);
wake_up_process(rcpu->kthread);
+ /* Make sure kthread has been running, so kthread_stop() will not
+ * stop the kthread prematurely and all pending frames or skbs
+ * will be handled by the kthread before kthread_stop() returns.
+ */
+ wait_for_completion(&rcpu->kthread_running);
+
return rcpu;
free_prog:
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 4174f76133df..99d0625b6c82 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -118,9 +118,8 @@ static struct inode *bpf_get_inode(struct super_block *sb,
return ERR_PTR(-ENOSPC);
inode->i_ino = get_next_ino();
- inode->i_atime = current_time(inode);
+ inode->i_atime = inode_set_ctime_current(inode);
inode->i_mtime = inode->i_atime;
- inode->i_ctime = inode->i_atime;
inode_init_owner(&nop_mnt_idmap, inode, dir, mode);
@@ -148,8 +147,7 @@ static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
d_instantiate(dentry, inode);
dget(dentry);
- dir->i_mtime = current_time(dir);
- dir->i_ctime = dir->i_mtime;
+ dir->i_mtime = inode_set_ctime_current(dir);
}
static int bpf_mkdir(struct mnt_idmap *idmap, struct inode *dir,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 930b5555cfd3..02a021c524ab 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5573,16 +5573,17 @@ static int update_stack_depth(struct bpf_verifier_env *env,
* Since recursion is prevented by check_cfg() this algorithm
* only needs a local stack of MAX_CALL_FRAMES to remember callsites
*/
-static int check_max_stack_depth(struct bpf_verifier_env *env)
+static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
{
- int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
+ int depth = 0, frame = 0, i, subprog_end;
bool tail_call_reachable = false;
int ret_insn[MAX_CALL_FRAMES];
int ret_prog[MAX_CALL_FRAMES];
int j;
+ i = subprog[idx].start;
process_func:
/* protect against potential stack overflow that might happen when
* bpf2bpf calls get combined with tailcalls. Limit the caller's stack
@@ -5621,7 +5622,7 @@ process_func:
continue_func:
subprog_end = subprog[idx + 1].start;
for (; i < subprog_end; i++) {
- int next_insn;
+ int next_insn, sidx;
if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
continue;
@@ -5631,14 +5632,14 @@ continue_func:
/* find the callee */
next_insn = i + insn[i].imm + 1;
- idx = find_subprog(env, next_insn);
- if (idx < 0) {
+ sidx = find_subprog(env, next_insn);
+ if (sidx < 0) {
WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
next_insn);
return -EFAULT;
}
- if (subprog[idx].is_async_cb) {
- if (subprog[idx].has_tail_call) {
+ if (subprog[sidx].is_async_cb) {
+ if (subprog[sidx].has_tail_call) {
verbose(env, "verifier bug. subprog has tail_call and async cb\n");
return -EFAULT;
}
@@ -5647,6 +5648,7 @@ continue_func:
continue;
}
i = next_insn;
+ idx = sidx;
if (subprog[idx].has_tail_call)
tail_call_reachable = true;
@@ -5682,6 +5684,22 @@ continue_func:
goto continue_func;
}
+static int check_max_stack_depth(struct bpf_verifier_env *env)
+{
+ struct bpf_subprog_info *si = env->subprog_info;
+ int ret;
+
+ for (int i = 0; i < env->subprog_cnt; i++) {
+ if (!i || si[i].is_async_cb) {
+ ret = check_max_stack_depth_subprog(env, i);
+ if (ret < 0)
+ return ret;
+ }
+ continue;
+ }
+ return 0;
+}
+
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
static int get_callee_stack_depth(struct bpf_verifier_env *env,
const struct bpf_insn *insn, int idx)
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index edec335c0a7a..5f2c66860ac6 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -68,11 +68,16 @@ static int irq_sw_resend(struct irq_desc *desc)
*/
if (!desc->parent_irq)
return -EINVAL;
+
+ desc = irq_to_desc(desc->parent_irq);
+ if (!desc)
+ return -EINVAL;
}
/* Add to resend_list and activate the softirq: */
raw_spin_lock(&irq_resend_lock);
- hlist_add_head(&desc->resend_node, &irq_resend_list);
+ if (hlist_unhashed(&desc->resend_node))
+ hlist_add_head(&desc->resend_node, &irq_resend_list);
raw_spin_unlock(&irq_resend_lock);
tasklet_schedule(&resend_tasklet);
return 0;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 1fc6095d502d..ca385b61d546 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1545,6 +1545,17 @@ static int check_ftrace_location(struct kprobe *p)
return 0;
}
+static bool is_cfi_preamble_symbol(unsigned long addr)
+{
+ char symbuf[KSYM_NAME_LEN];
+
+ if (lookup_symbol_name(addr, symbuf))
+ return false;
+
+ return str_has_prefix("__cfi_", symbuf) ||
+ str_has_prefix("__pfx_", symbuf);
+}
+
static int check_kprobe_address_safe(struct kprobe *p,
struct module **probed_mod)
{
@@ -1563,7 +1574,8 @@ static int check_kprobe_address_safe(struct kprobe *p,
within_kprobe_blacklist((unsigned long) p->addr) ||
jump_label_text_reserved(p->addr, p->addr) ||
static_call_text_reserved(p->addr, p->addr) ||
- find_bug((unsigned long)p->addr)) {
+ find_bug((unsigned long)p->addr) ||
+ is_cfi_preamble_symbol((unsigned long)p->addr)) {
ret = -EINVAL;
goto out;
}
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 728f434de2bb..21db0df0eb00 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -333,21 +333,43 @@ static __always_inline int __waiter_prio(struct task_struct *task)
return prio;
}
+/*
+ * Update the waiter->tree copy of the sort keys.
+ */
static __always_inline void
waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
{
- waiter->prio = __waiter_prio(task);
- waiter->deadline = task->dl.deadline;
+ lockdep_assert_held(&waiter->lock->wait_lock);
+ lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
+
+ waiter->tree.prio = __waiter_prio(task);
+ waiter->tree.deadline = task->dl.deadline;
+}
+
+/*
+ * Update the waiter->pi_tree copy of the sort keys (from the tree copy).
+ */
+static __always_inline void
+waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
+{
+ lockdep_assert_held(&waiter->lock->wait_lock);
+ lockdep_assert_held(&task->pi_lock);
+ lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
+
+ waiter->pi_tree.prio = waiter->tree.prio;
+ waiter->pi_tree.deadline = waiter->tree.deadline;
}
/*
- * Only use with rt_mutex_waiter_{less,equal}()
+ * Only use with rt_waiter_node_{less,equal}()
*/
+#define task_to_waiter_node(p) \
+ &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
#define task_to_waiter(p) \
- &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
+ &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
-static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
- struct rt_mutex_waiter *right)
+static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
+ struct rt_waiter_node *right)
{
if (left->prio < right->prio)
return 1;
@@ -364,8 +386,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
return 0;
}
-static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
- struct rt_mutex_waiter *right)
+static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
+ struct rt_waiter_node *right)
{
if (left->prio != right->prio)
return 0;
@@ -385,7 +407,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
struct rt_mutex_waiter *top_waiter)
{
- if (rt_mutex_waiter_less(waiter, top_waiter))
+ if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
return true;
#ifdef RT_MUTEX_BUILD_SPINLOCKS
@@ -393,30 +415,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
* Note that RT tasks are excluded from same priority (lateral)
* steals to prevent the introduction of an unbounded latency.
*/
- if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
+ if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
return false;
- return rt_mutex_waiter_equal(waiter, top_waiter);
+ return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
#else
return false;
#endif
}
#define __node_2_waiter(node) \
- rb_entry((node), struct rt_mutex_waiter, tree_entry)
+ rb_entry((node), struct rt_mutex_waiter, tree.entry)
static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
{
struct rt_mutex_waiter *aw = __node_2_waiter(a);
struct rt_mutex_waiter *bw = __node_2_waiter(b);
- if (rt_mutex_waiter_less(aw, bw))
+ if (rt_waiter_node_less(&aw->tree, &bw->tree))
return 1;
if (!build_ww_mutex())
return 0;
- if (rt_mutex_waiter_less(bw, aw))
+ if (rt_waiter_node_less(&bw->tree, &aw->tree))
return 0;
/* NOTE: relies on waiter->ww_ctx being set before insertion */
@@ -434,48 +456,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
static __always_inline void
rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
{
- rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
+ lockdep_assert_held(&lock->wait_lock);
+
+ rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
}
static __always_inline void
rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
{
- if (RB_EMPTY_NODE(&waiter->tree_entry))
+ lockdep_assert_held(&lock->wait_lock);
+
+ if (RB_EMPTY_NODE(&waiter->tree.entry))
return;
- rb_erase_cached(&waiter->tree_entry, &lock->waiters);
- RB_CLEAR_NODE(&waiter->tree_entry);
+ rb_erase_cached(&waiter->tree.entry, &lock->waiters);
+ RB_CLEAR_NODE(&waiter->tree.entry);
}
-#define __node_2_pi_waiter(node) \
- rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
+#define __node_2_rt_node(node) \
+ rb_entry((node), struct rt_waiter_node, entry)
-static __always_inline bool
-__pi_waiter_less(struct rb_node *a, const struct rb_node *b)
+static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
{
- return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
+ return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
}
static __always_inline void
rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
- rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
+ lockdep_assert_held(&task->pi_lock);
+
+ rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
}
static __always_inline void
rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
{
- if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
+ lockdep_assert_held(&task->pi_lock);
+
+ if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
return;
- rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
- RB_CLEAR_NODE(&waiter->pi_tree_entry);
+ rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
+ RB_CLEAR_NODE(&waiter->pi_tree.entry);
}
-static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
+static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
+ struct task_struct *p)
{
struct task_struct *pi_task = NULL;
+ lockdep_assert_held(&lock->wait_lock);
+ lockdep_assert(rt_mutex_owner(lock) == p);
lockdep_assert_held(&p->pi_lock);
if (task_has_pi_waiters(p))
@@ -571,9 +603,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
* Chain walk basics and protection scope
*
* [R] refcount on task
- * [P] task->pi_lock held
+ * [Pn] task->pi_lock held
* [L] rtmutex->wait_lock held
*
+ * Normal locking order:
+ *
+ * rtmutex->wait_lock
+ * task->pi_lock
+ *
* Step Description Protected by
* function arguments:
* @task [R]
@@ -588,27 +625,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
* again:
* loop_sanity_check();
* retry:
- * [1] lock(task->pi_lock); [R] acquire [P]
- * [2] waiter = task->pi_blocked_on; [P]
- * [3] check_exit_conditions_1(); [P]
- * [4] lock = waiter->lock; [P]
- * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L]
- * unlock(task->pi_lock); release [P]
+ * [1] lock(task->pi_lock); [R] acquire [P1]
+ * [2] waiter = task->pi_blocked_on; [P1]
+ * [3] check_exit_conditions_1(); [P1]
+ * [4] lock = waiter->lock; [P1]
+ * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L]
+ * unlock(task->pi_lock); release [P1]
* goto retry;
* }
- * [6] check_exit_conditions_2(); [P] + [L]
- * [7] requeue_lock_waiter(lock, waiter); [P] + [L]
- * [8] unlock(task->pi_lock); release [P]
+ * [6] check_exit_conditions_2(); [P1] + [L]
+ * [7] requeue_lock_waiter(lock, waiter); [P1] + [L]
+ * [8] unlock(task->pi_lock); release [P1]
* put_task_struct(task); release [R]
* [9] check_exit_conditions_3(); [L]
* [10] task = owner(lock); [L]
* get_task_struct(task); [L] acquire [R]
- * lock(task->pi_lock); [L] acquire [P]
- * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
- * [12] check_exit_conditions_4(); [P] + [L]
- * [13] unlock(task->pi_lock); release [P]
+ * lock(task->pi_lock); [L] acquire [P2]
+ * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
+ * [12] check_exit_conditions_4(); [P2] + [L]
+ * [13] unlock(task->pi_lock); release [P2]
* unlock(lock->wait_lock); release [L]
* goto again;
+ *
+ * Where P1 is the blocking task and P2 is the lock owner; going up one step
+ * the owner becomes the next blocked task etc..
+ *
+*
*/
static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
enum rtmutex_chainwalk chwalk,
@@ -756,7 +798,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* enabled we continue, but stop the requeueing in the chain
* walk.
*/
- if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
if (!detect_deadlock)
goto out_unlock_pi;
else
@@ -764,13 +806,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
}
/*
- * [4] Get the next lock
+ * [4] Get the next lock; per holding task->pi_lock we can't unblock
+ * and guarantee @lock's existence.
*/
lock = waiter->lock;
/*
* [5] We need to trylock here as we are holding task->pi_lock,
* which is the reverse lock order versus the other rtmutex
* operations.
+ *
+ * Per the above, holding task->pi_lock guarantees lock exists, so
+ * inverting this lock order is infeasible from a life-time
+ * perspective.
*/
if (!raw_spin_trylock(&lock->wait_lock)) {
raw_spin_unlock_irq(&task->pi_lock);
@@ -874,17 +921,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* or
*
* DL CBS enforcement advancing the effective deadline.
- *
- * Even though pi_waiters also uses these fields, and that tree is only
- * updated in [11], we can do this here, since we hold [L], which
- * serializes all pi_waiters access and rb_erase() does not care about
- * the values of the node being removed.
*/
waiter_update_prio(waiter, task);
rt_mutex_enqueue(lock, waiter);
- /* [8] Release the task */
+ /*
+ * [8] Release the (blocking) task in preparation for
+ * taking the owner task in [10].
+ *
+ * Since we hold lock->waiter_lock, task cannot unblock, even if we
+ * release task->pi_lock.
+ */
raw_spin_unlock(&task->pi_lock);
put_task_struct(task);
@@ -908,7 +956,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
return 0;
}
- /* [10] Grab the next task, i.e. the owner of @lock */
+ /*
+ * [10] Grab the next task, i.e. the owner of @lock
+ *
+ * Per holding lock->wait_lock and checking for !owner above, there
+ * must be an owner and it cannot go away.
+ */
task = get_task_struct(rt_mutex_owner(lock));
raw_spin_lock(&task->pi_lock);
@@ -921,8 +974,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
* and adjust the priority of the owner.
*/
rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
+ waiter_clone_prio(waiter, task);
rt_mutex_enqueue_pi(task, waiter);
- rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(lock, task);
} else if (prerequeue_top_waiter == waiter) {
/*
@@ -937,8 +991,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
*/
rt_mutex_dequeue_pi(task, waiter);
waiter = rt_mutex_top_waiter(lock);
+ waiter_clone_prio(waiter, task);
rt_mutex_enqueue_pi(task, waiter);
- rt_mutex_adjust_prio(task);
+ rt_mutex_adjust_prio(lock, task);
} else {
/*
* Nothing changed. No need to do any priority
@@ -1154,6 +1209,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
waiter->task = task;
waiter->lock = lock;
waiter_update_prio(waiter, task);
+ waiter_clone_prio(waiter, task);
/* Get the top priority waiter on the lock */
if (rt_mutex_has_waiters(lock))
@@ -1187,7 +1243,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
rt_mutex_dequeue_pi(owner, top_waiter);
rt_mutex_enqueue_pi(owner, waiter);
- rt_mutex_adjust_prio(owner);
+ rt_mutex_adjust_prio(lock, owner);
if (owner->pi_blocked_on)
chain_walk = 1;
} else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
@@ -1234,6 +1290,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
{
struct rt_mutex_waiter *waiter;
+ lockdep_assert_held(&lock->wait_lock);
+
raw_spin_lock(&current->pi_lock);
waiter = rt_mutex_top_waiter(lock);
@@ -1246,7 +1304,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
* task unblocks.
*/
rt_mutex_dequeue_pi(current, waiter);
- rt_mutex_adjust_prio(current);
+ rt_mutex_adjust_prio(lock, current);
/*
* As we are waking up the top waiter, and the waiter stays
@@ -1482,7 +1540,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
if (rt_mutex_has_waiters(lock))
rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
- rt_mutex_adjust_prio(owner);
+ rt_mutex_adjust_prio(lock, owner);
/* Store the lock on which owner is blocked or NULL */
next_lock = task_blocked_on_lock(owner);
diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
index cb9fdff76a8a..a6974d044593 100644
--- a/kernel/locking/rtmutex_api.c
+++ b/kernel/locking/rtmutex_api.c
@@ -459,7 +459,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task)
raw_spin_lock_irqsave(&task->pi_lock, flags);
waiter = task->pi_blocked_on;
- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
+ if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
raw_spin_unlock_irqrestore(&task->pi_lock, flags);
return;
}
diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
index c47e8361bfb5..1162e07cdaea 100644
--- a/kernel/locking/rtmutex_common.h
+++ b/kernel/locking/rtmutex_common.h
@@ -17,27 +17,44 @@
#include <linux/rtmutex.h>
#include <linux/sched/wake_q.h>
+
+/*
+ * This is a helper for the struct rt_mutex_waiter below. A waiter goes in two
+ * separate trees and they need their own copy of the sort keys because of
+ * different locking requirements.
+ *
+ * @entry: rbtree node to enqueue into the waiters tree
+ * @prio: Priority of the waiter
+ * @deadline: Deadline of the waiter if applicable
+ *
+ * See rt_waiter_node_less() and waiter_*_prio().
+ */
+struct rt_waiter_node {
+ struct rb_node entry;
+ int prio;
+ u64 deadline;
+};
+
/*
* This is the control structure for tasks blocked on a rt_mutex,
* which is allocated on the kernel stack on of the blocked task.
*
- * @tree_entry: pi node to enqueue into the mutex waiters tree
- * @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree
+ * @tree: node to enqueue into the mutex waiters tree
+ * @pi_tree: node to enqueue into the mutex owner waiters tree
* @task: task reference to the blocked task
* @lock: Pointer to the rt_mutex on which the waiter blocks
* @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT)
- * @prio: Priority of the waiter
- * @deadline: Deadline of the waiter if applicable
* @ww_ctx: WW context pointer
+ *
+ * @tree is ordered by @lock->wait_lock
+ * @pi_tree is ordered by rt_mutex_owner(@lock)->pi_lock
*/
struct rt_mutex_waiter {
- struct rb_node tree_entry;
- struct rb_node pi_tree_entry;
+ struct rt_waiter_node tree;
+ struct rt_waiter_node pi_tree;
struct task_struct *task;
struct rt_mutex_base *lock;
unsigned int wake_state;
- int prio;
- u64 deadline;
struct ww_acquire_ctx *ww_ctx;
};
@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
{
struct rb_node *leftmost = rb_first_cached(&lock->waiters);
- return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
+ return rb_entry(leftmost, struct rt_mutex_waiter, tree.entry) == waiter;
}
static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *
struct rb_node *leftmost = rb_first_cached(&lock->waiters);
struct rt_mutex_waiter *w = NULL;
+ lockdep_assert_held(&lock->wait_lock);
+
if (leftmost) {
- w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry);
+ w = rb_entry(leftmost, struct rt_mutex_waiter, tree.entry);
BUG_ON(w->lock != lock);
}
return w;
@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p)
static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
{
+ lockdep_assert_held(&p->pi_lock);
+
return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter,
- pi_tree_entry);
+ pi_tree.entry);
}
#define RT_MUTEX_HAS_WAITERS 1UL
@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
{
debug_rt_mutex_init_waiter(waiter);
- RB_CLEAR_NODE(&waiter->pi_tree_entry);
- RB_CLEAR_NODE(&waiter->tree_entry);
+ RB_CLEAR_NODE(&waiter->pi_tree.entry);
+ RB_CLEAR_NODE(&waiter->tree.entry);
waiter->wake_state = TASK_NORMAL;
waiter->task = NULL;
}
diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
index 56f139201f24..3ad2cc4823e5 100644
--- a/kernel/locking/ww_mutex.h
+++ b/kernel/locking/ww_mutex.h
@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock)
struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
if (!n)
return NULL;
- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
}
static inline struct rt_mutex_waiter *
__ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w)
{
- struct rb_node *n = rb_next(&w->tree_entry);
+ struct rb_node *n = rb_next(&w->tree.entry);
if (!n)
return NULL;
- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
}
static inline struct rt_mutex_waiter *
__ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
{
- struct rb_node *n = rb_prev(&w->tree_entry);
+ struct rb_node *n = rb_prev(&w->tree.entry);
if (!n)
return NULL;
- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
}
static inline struct rt_mutex_waiter *
@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock)
struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
if (!n)
return NULL;
- return rb_entry(n, struct rt_mutex_waiter, tree_entry);
+ return rb_entry(n, struct rt_mutex_waiter, tree.entry);
}
static inline void
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index e1b4bfa938dd..2b4a946a6ff5 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -1166,7 +1166,7 @@ static ssize_t resume_store(struct kobject *kobj, struct kobj_attribute *attr,
int error;
if (!hibernation_available())
- return 0;
+ return n;
if (len && buf[len-1] == '\n')
len--;
diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c
index d57a5c1c1cd9..3561ab533dd4 100644
--- a/kernel/sched/completion.c
+++ b/kernel/sched/completion.c
@@ -13,6 +13,23 @@
* Waiting for completion is a typically sync point, but not an exclusion point.
*/
+static void complete_with_flags(struct completion *x, int wake_flags)
+{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&x->wait.lock, flags);
+
+ if (x->done != UINT_MAX)
+ x->done++;
+ swake_up_locked(&x->wait, wake_flags);
+ raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+}
+
+void complete_on_current_cpu(struct completion *x)
+{
+ return complete_with_flags(x, WF_CURRENT_CPU);
+}
+
/**
* complete: - signals a single thread waiting on this completion
* @x: holds the state of this particular completion
@@ -27,14 +44,7 @@
*/
void complete(struct completion *x)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&x->wait.lock, flags);
-
- if (x->done != UINT_MAX)
- x->done++;
- swake_up_locked(&x->wait);
- raw_spin_unlock_irqrestore(&x->wait.lock, flags);
+ complete_with_flags(x, 0);
}
EXPORT_SYMBOL(complete);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c52c2eba7c73..4d63e063608a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4193,8 +4193,7 @@ bool ttwu_state_match(struct task_struct *p, unsigned int state, int *success)
* Return: %true if @p->state changes (an actual wakeup was done),
* %false otherwise.
*/
-static int
-try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
+int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
unsigned long flags;
int cpu, success = 0;
@@ -7030,7 +7029,7 @@ asmlinkage __visible void __sched preempt_schedule_irq(void)
int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flags,
void *key)
{
- WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~WF_SYNC);
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_SCHED_DEBUG) && wake_flags & ~(WF_SYNC|WF_CURRENT_CPU));
return try_to_wake_up(curr->private, mode, wake_flags);
}
EXPORT_SYMBOL(default_wake_function);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b3e25be58e2b..ceb5d4c4738e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7741,6 +7741,10 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
if (wake_flags & WF_TTWU) {
record_wakee(p);
+ if ((wake_flags & WF_CURRENT_CPU) &&
+ cpumask_test_cpu(cpu, p->cpus_ptr))
+ return cpu;
+
if (sched_energy_enabled()) {
new_cpu = find_energy_efficient_cpu(p, prev_cpu);
if (new_cpu >= 0)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e93e006a942b..48d0be005f08 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2131,12 +2131,13 @@ static inline int task_on_rq_migrating(struct task_struct *p)
}
/* Wake flags. The first three directly map to some SD flag value */
-#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
-#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
-#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
+#define WF_EXEC 0x02 /* Wakeup after exec; maps to SD_BALANCE_EXEC */
+#define WF_FORK 0x04 /* Wakeup after fork; maps to SD_BALANCE_FORK */
+#define WF_TTWU 0x08 /* Wakeup; maps to SD_BALANCE_WAKE */
-#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
-#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
+#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
+#define WF_MIGRATED 0x20 /* Internal use, task got migrated */
+#define WF_CURRENT_CPU 0x40 /* Prefer to move the wakee to the current CPU. */
#ifdef CONFIG_SMP
static_assert(WF_EXEC == SD_BALANCE_EXEC);
@@ -3229,6 +3230,8 @@ static inline bool is_per_cpu_kthread(struct task_struct *p)
extern void swake_up_all_locked(struct swait_queue_head *q);
extern void __prepare_to_swait(struct swait_queue_head *q, struct swait_queue *wait);
+extern int try_to_wake_up(struct task_struct *tsk, unsigned int state, int wake_flags);
+
#ifdef CONFIG_PREEMPT_DYNAMIC
extern int preempt_dynamic_mode;
extern int sched_dynamic_mode(const char *str);
diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c
index 76b9b796e695..72505cd3b60a 100644
--- a/kernel/sched/swait.c
+++ b/kernel/sched/swait.c
@@ -18,7 +18,7 @@ EXPORT_SYMBOL(__init_swait_queue_head);
* If for some reason it would return 0, that means the previously waiting
* task is already running, so it will observe condition true (or has already).
*/
-void swake_up_locked(struct swait_queue_head *q)
+void swake_up_locked(struct swait_queue_head *q, int wake_flags)
{
struct swait_queue *curr;
@@ -26,7 +26,7 @@ void swake_up_locked(struct swait_queue_head *q)
return;
curr = list_first_entry(&q->task_list, typeof(*curr), task_list);
- wake_up_process(curr->task);
+ try_to_wake_up(curr->task, TASK_NORMAL, wake_flags);
list_del_init(&curr->task_list);
}
EXPORT_SYMBOL(swake_up_locked);
@@ -41,7 +41,7 @@ EXPORT_SYMBOL(swake_up_locked);
void swake_up_all_locked(struct swait_queue_head *q)
{
while (!list_empty(&q->task_list))
- swake_up_locked(q);
+ swake_up_locked(q, 0);
}
void swake_up_one(struct swait_queue_head *q)
@@ -49,7 +49,7 @@ void swake_up_one(struct swait_queue_head *q)
unsigned long flags;
raw_spin_lock_irqsave(&q->lock, flags);
- swake_up_locked(q);
+ swake_up_locked(q, 0);
raw_spin_unlock_irqrestore(&q->lock, flags);
}
EXPORT_SYMBOL(swake_up_one);
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 48c53e4739ea..802d98cf2de3 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -161,6 +161,11 @@ int __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
}
EXPORT_SYMBOL(__wake_up);
+void __wake_up_on_current_cpu(struct wait_queue_head *wq_head, unsigned int mode, void *key)
+{
+ __wake_up_common_lock(wq_head, mode, 1, WF_CURRENT_CPU, key);
+}
+
/*
* Same as __wake_up but called with the spinlock in wait_queue_head_t held.
*/
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index d3e584065c7f..255999ba9190 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -110,11 +110,13 @@ struct seccomp_knotif {
* @flags: The flags for the new file descriptor. At the moment, only O_CLOEXEC
* is allowed.
* @ioctl_flags: The flags used for the seccomp_addfd ioctl.
+ * @setfd: whether or not SECCOMP_ADDFD_FLAG_SETFD was set during notify_addfd
* @ret: The return value of the installing process. It is set to the fd num
* upon success (>= 0).
* @completion: Indicates that the installing process has completed fd
* installation, or gone away (either due to successful
* reply, or signal)
+ * @list: list_head for chaining seccomp_kaddfd together.
*
*/
struct seccomp_kaddfd {
@@ -138,14 +140,17 @@ struct seccomp_kaddfd {
* structure is fairly large, we store the notification-specific stuff in a
* separate structure.
*
- * @request: A semaphore that users of this notification can wait on for
- * changes. Actual reads and writes are still controlled with
- * filter->notify_lock.
+ * @requests: A semaphore that users of this notification can wait on for
+ * changes. Actual reads and writes are still controlled with
+ * filter->notify_lock.
+ * @flags: A set of SECCOMP_USER_NOTIF_FD_* flags.
* @next_id: The id of the next request.
* @notifications: A list of struct seccomp_knotif elements.
*/
+
struct notification {
- struct semaphore request;
+ atomic_t requests;
+ u32 flags;
u64 next_id;
struct list_head notifications;
};
@@ -555,6 +560,8 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)
* drop its reference count, and notify
* about unused filters
*
+ * @tsk: task the filter should be released from.
+ *
* This function should only be called when the task is exiting as
* it detaches it from its filter tree. As such, READ_ONCE() and
* barriers are not needed here, as would normally be needed.
@@ -574,6 +581,8 @@ void seccomp_filter_release(struct task_struct *tsk)
/**
* seccomp_sync_threads: sets all threads to use current's filter
*
+ * @flags: SECCOMP_FILTER_FLAG_* flags to set during sync.
+ *
* Expects sighand and cred_guard_mutex locks to be held, and for
* seccomp_can_sync_threads() to have returned success already
* without dropping the locks.
@@ -1116,8 +1125,11 @@ static int seccomp_do_user_notification(int this_syscall,
list_add_tail(&n.list, &match->notif->notifications);
INIT_LIST_HEAD(&n.addfd);
- up(&match->notif->request);
- wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
+ atomic_inc(&match->notif->requests);
+ if (match->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+ wake_up_poll_on_current_cpu(&match->wqh, EPOLLIN | EPOLLRDNORM);
+ else
+ wake_up_poll(&match->wqh, EPOLLIN | EPOLLRDNORM);
/*
* This is where we wait for a reply from userspace.
@@ -1450,6 +1462,37 @@ find_notification(struct seccomp_filter *filter, u64 id)
return NULL;
}
+static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int sync,
+ void *key)
+{
+ /* Avoid a wakeup if event not interesting for us. */
+ if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR)))
+ return 0;
+ return autoremove_wake_function(wait, mode, sync, key);
+}
+
+static int recv_wait_event(struct seccomp_filter *filter)
+{
+ DEFINE_WAIT_FUNC(wait, recv_wake_function);
+ int ret;
+
+ if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
+ return 0;
+
+ for (;;) {
+ ret = prepare_to_wait_event(&filter->wqh, &wait, TASK_INTERRUPTIBLE);
+
+ if (atomic_dec_if_positive(&filter->notif->requests) >= 0)
+ break;
+
+ if (ret)
+ return ret;
+
+ schedule();
+ }
+ finish_wait(&filter->wqh, &wait);
+ return 0;
+}
static long seccomp_notify_recv(struct seccomp_filter *filter,
void __user *buf)
@@ -1467,7 +1510,7 @@ static long seccomp_notify_recv(struct seccomp_filter *filter,
memset(&unotif, 0, sizeof(unotif));
- ret = down_interruptible(&filter->notif->request);
+ ret = recv_wait_event(filter);
if (ret < 0)
return ret;
@@ -1515,7 +1558,8 @@ out:
if (should_sleep_killable(filter, knotif))
complete(&knotif->ready);
knotif->state = SECCOMP_NOTIFY_INIT;
- up(&filter->notif->request);
+ atomic_inc(&filter->notif->requests);
+ wake_up_poll(&filter->wqh, EPOLLIN | EPOLLRDNORM);
}
mutex_unlock(&filter->notify_lock);
}
@@ -1561,7 +1605,10 @@ static long seccomp_notify_send(struct seccomp_filter *filter,
knotif->error = resp.error;
knotif->val = resp.val;
knotif->flags = resp.flags;
- complete(&knotif->ready);
+ if (filter->notif->flags & SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+ complete_on_current_cpu(&knotif->ready);
+ else
+ complete(&knotif->ready);
out:
mutex_unlock(&filter->notify_lock);
return ret;
@@ -1591,6 +1638,22 @@ static long seccomp_notify_id_valid(struct seccomp_filter *filter,
return ret;
}
+static long seccomp_notify_set_flags(struct seccomp_filter *filter,
+ unsigned long flags)
+{
+ long ret;
+
+ if (flags & ~SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP)
+ return -EINVAL;
+
+ ret = mutex_lock_interruptible(&filter->notify_lock);
+ if (ret < 0)
+ return ret;
+ filter->notif->flags = flags;
+ mutex_unlock(&filter->notify_lock);
+ return 0;
+}
+
static long seccomp_notify_addfd(struct seccomp_filter *filter,
struct seccomp_notif_addfd __user *uaddfd,
unsigned int size)
@@ -1720,6 +1783,8 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd,
case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR:
case SECCOMP_IOCTL_NOTIF_ID_VALID:
return seccomp_notify_id_valid(filter, buf);
+ case SECCOMP_IOCTL_NOTIF_SET_FLAGS:
+ return seccomp_notify_set_flags(filter, arg);
}
/* Extensible Argument ioctls */
@@ -1777,7 +1842,6 @@ static struct file *init_listener(struct seccomp_filter *filter)
if (!filter->notif)
goto out;
- sema_init(&filter->notif->request, 0);
filter->notif->next_id = get_random_u64();
INIT_LIST_HEAD(&filter->notif->notifications);
diff --git a/kernel/signal.c b/kernel/signal.c
index b5370fe5c198..128e9bb3d1a2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -562,6 +562,10 @@ bool unhandled_signal(struct task_struct *tsk, int sig)
if (handler != SIG_IGN && handler != SIG_DFL)
return false;
+ /* If dying, we handle all new signals by ignoring them */
+ if (fatal_signal_pending(tsk))
+ return false;
+
/* if ptraced, let the tracer determine */
return !tsk->ptrace;
}
diff --git a/kernel/sys.c b/kernel/sys.c
index 05f838929e72..2410e3999ebe 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2535,11 +2535,6 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
return -EINVAL;
break;
- case PR_GET_AUXV:
- if (arg4 || arg5)
- return -EINVAL;
- error = prctl_get_auxv((void __user *)arg2, arg3);
- break;
default:
return -EINVAL;
}
@@ -2694,6 +2689,11 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_SET_VMA:
error = prctl_set_vma(arg2, arg3, arg4, arg5);
break;
+ case PR_GET_AUXV:
+ if (arg4 || arg5)
+ return -EINVAL;
+ error = prctl_get_auxv((void __user *)arg2, arg3);
+ break;
#ifdef CONFIG_KSM
case PR_SET_MEMORY_MERGE:
if (arg3 || arg4 || arg5)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 5f2dcabad202..bd1a42b23f3f 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -661,8 +661,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nest_level);
BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
u64, flags, void *, data, u64, size)
{
- struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
- int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
+ struct bpf_trace_sample_data *sds;
struct perf_raw_record raw = {
.frag = {
.size = size,
@@ -670,7 +669,11 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
},
};
struct perf_sample_data *sd;
- int err;
+ int nest_level, err;
+
+ preempt_disable();
+ sds = this_cpu_ptr(&bpf_trace_sds);
+ nest_level = this_cpu_inc_return(bpf_trace_nest_level);
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
err = -EBUSY;
@@ -688,9 +691,9 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
perf_sample_save_raw_data(sd, &raw);
err = __bpf_perf_event_output(regs, map, flags, sd);
-
out:
this_cpu_dec(bpf_trace_nest_level);
+ preempt_enable();
return err;
}
@@ -715,7 +718,6 @@ static DEFINE_PER_CPU(struct bpf_trace_sample_data, bpf_misc_sds);
u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
{
- int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
struct perf_raw_frag frag = {
.copy = ctx_copy,
.size = ctx_size,
@@ -732,8 +734,12 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
};
struct perf_sample_data *sd;
struct pt_regs *regs;
+ int nest_level;
u64 ret;
+ preempt_disable();
+ nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
+
if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
ret = -EBUSY;
goto out;
@@ -748,6 +754,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
ret = __bpf_perf_event_output(regs, map, flags, sd);
out:
this_cpu_dec(bpf_event_output_nest_level);
+ preempt_enable();
return ret;
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 14d8001140c8..52dea5dd5362 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -523,6 +523,8 @@ struct ring_buffer_per_cpu {
rb_time_t before_stamp;
u64 event_stamp[MAX_NEST];
u64 read_stamp;
+ /* pages removed since last reset */
+ unsigned long pages_removed;
/* ring buffer pages to update, > 0 to add, < 0 to remove */
long nr_pages_to_update;
struct list_head new_pages; /* new pages to add */
@@ -536,6 +538,7 @@ struct trace_buffer {
unsigned flags;
int cpus;
atomic_t record_disabled;
+ atomic_t resizing;
cpumask_var_t cpumask;
struct lock_class_key *reader_lock_key;
@@ -558,6 +561,7 @@ struct ring_buffer_iter {
struct buffer_page *head_page;
struct buffer_page *cache_reader_page;
unsigned long cache_read;
+ unsigned long cache_pages_removed;
u64 read_stamp;
u64 page_stamp;
struct ring_buffer_event *event;
@@ -946,6 +950,7 @@ static void rb_wake_up_waiters(struct irq_work *work)
/**
* ring_buffer_wake_waiters - wake up any waiters on this ring buffer
* @buffer: The ring buffer to wake waiters on
+ * @cpu: The CPU buffer to wake waiters on
*
* In the case of a file that represents a ring buffer is closing,
* it is prudent to wake up any waiters that are on this.
@@ -1956,6 +1961,8 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
to_remove = rb_list_head(to_remove)->next;
head_bit |= (unsigned long)to_remove & RB_PAGE_HEAD;
}
+ /* Read iterators need to reset themselves when some pages removed */
+ cpu_buffer->pages_removed += nr_removed;
next_page = rb_list_head(to_remove)->next;
@@ -1977,12 +1984,6 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned long nr_pages)
cpu_buffer->head_page = list_entry(next_page,
struct buffer_page, list);
- /*
- * change read pointer to make sure any read iterators reset
- * themselves
- */
- cpu_buffer->read = 0;
-
/* pages are removed, resume tracing and then free the pages */
atomic_dec(&cpu_buffer->record_disabled);
raw_spin_unlock_irq(&cpu_buffer->reader_lock);
@@ -2167,7 +2168,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
/* prevent another thread from changing buffer sizes */
mutex_lock(&buffer->mutex);
-
+ atomic_inc(&buffer->resizing);
if (cpu_id == RING_BUFFER_ALL_CPUS) {
/*
@@ -2322,6 +2323,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
atomic_dec(&buffer->record_disabled);
}
+ atomic_dec(&buffer->resizing);
mutex_unlock(&buffer->mutex);
return 0;
@@ -2342,6 +2344,7 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
}
}
out_err_unlock:
+ atomic_dec(&buffer->resizing);
mutex_unlock(&buffer->mutex);
return err;
}
@@ -3373,7 +3376,6 @@ void ring_buffer_nest_end(struct trace_buffer *buffer)
/**
* ring_buffer_unlock_commit - commit a reserved
* @buffer: The buffer to commit to
- * @event: The event pointer to commit.
*
* This commits the data to the ring buffer, and releases any locks held.
*
@@ -4392,6 +4394,7 @@ static void rb_iter_reset(struct ring_buffer_iter *iter)
iter->cache_reader_page = iter->head_page;
iter->cache_read = cpu_buffer->read;
+ iter->cache_pages_removed = cpu_buffer->pages_removed;
if (iter->head) {
iter->read_stamp = cpu_buffer->read_stamp;
@@ -4846,12 +4849,13 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
buffer = cpu_buffer->buffer;
/*
- * Check if someone performed a consuming read to
- * the buffer. A consuming read invalidates the iterator
- * and we need to reset the iterator in this case.
+ * Check if someone performed a consuming read to the buffer
+ * or removed some pages from the buffer. In these cases,
+ * iterator was invalidated and we need to reset it.
*/
if (unlikely(iter->cache_read != cpu_buffer->read ||
- iter->cache_reader_page != cpu_buffer->reader_page))
+ iter->cache_reader_page != cpu_buffer->reader_page ||
+ iter->cache_pages_removed != cpu_buffer->pages_removed))
rb_iter_reset(iter);
again:
@@ -5295,6 +5299,7 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->last_overrun = 0;
rb_head_page_activate(cpu_buffer);
+ cpu_buffer->pages_removed = 0;
}
/* Must have disabled the cpu buffer then done a synchronize_rcu */
@@ -5353,7 +5358,6 @@ EXPORT_SYMBOL_GPL(ring_buffer_reset_cpu);
/**
* ring_buffer_reset_online_cpus - reset a ring buffer per CPU buffer
* @buffer: The ring buffer to reset a per cpu buffer of
- * @cpu: The CPU buffer to be reset
*/
void ring_buffer_reset_online_cpus(struct trace_buffer *buffer)
{
@@ -5541,6 +5545,15 @@ int ring_buffer_swap_cpu(struct trace_buffer *buffer_a,
if (local_read(&cpu_buffer_b->committing))
goto out_dec;
+ /*
+ * When resize is in progress, we cannot swap it because
+ * it will mess the state of the cpu buffer.
+ */
+ if (atomic_read(&buffer_a->resizing))
+ goto out_dec;
+ if (atomic_read(&buffer_b->resizing))
+ goto out_dec;
+
buffer_a->buffers[cpu] = cpu_buffer_b;
buffer_b->buffers[cpu] = cpu_buffer_a;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index be847d45d81c..8e64aaad5361 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1928,9 +1928,10 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
* place on this CPU. We fail to record, but we reset
* the max trace buffer (no one writes directly to it)
* and flag that it failed.
+ * Another reason is resize is in progress.
*/
trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
- "Failed to swap buffers due to commit in progress\n");
+ "Failed to swap buffers due to commit or resize in progress\n");
}
WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
@@ -4212,8 +4213,15 @@ static void *s_start(struct seq_file *m, loff_t *pos)
* will point to the same string as current_trace->name.
*/
mutex_lock(&trace_types_lock);
- if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name))
+ if (unlikely(tr->current_trace && iter->trace->name != tr->current_trace->name)) {
+ /* Close iter->trace before switching to the new current tracer */
+ if (iter->trace->close)
+ iter->trace->close(iter);
*iter->trace = *tr->current_trace;
+ /* Reopen the new current tracer */
+ if (iter->trace->open)
+ iter->trace->open(iter);
+ }
mutex_unlock(&trace_types_lock);
#ifdef CONFIG_TRACER_MAX_TRACE
@@ -5276,11 +5284,17 @@ int tracing_set_cpumask(struct trace_array *tr,
!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+ ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
+#endif
}
if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
cpumask_test_cpu(cpu, tracing_cpumask_new)) {
atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
+#ifdef CONFIG_TRACER_MAX_TRACE
+ ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
+#endif
}
}
arch_spin_unlock(&tr->max_lock);
@@ -6704,10 +6718,36 @@ tracing_max_lat_write(struct file *filp, const char __user *ubuf,
#endif
+static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ if (cpumask_empty(tr->pipe_cpumask)) {
+ cpumask_setall(tr->pipe_cpumask);
+ return 0;
+ }
+ } else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
+ cpumask_set_cpu(cpu, tr->pipe_cpumask);
+ return 0;
+ }
+ return -EBUSY;
+}
+
+static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
+{
+ if (cpu == RING_BUFFER_ALL_CPUS) {
+ WARN_ON(!cpumask_full(tr->pipe_cpumask));
+ cpumask_clear(tr->pipe_cpumask);
+ } else {
+ WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
+ cpumask_clear_cpu(cpu, tr->pipe_cpumask);
+ }
+}
+
static int tracing_open_pipe(struct inode *inode, struct file *filp)
{
struct trace_array *tr = inode->i_private;
struct trace_iterator *iter;
+ int cpu;
int ret;
ret = tracing_check_open_get_tr(tr);
@@ -6715,13 +6755,16 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
return ret;
mutex_lock(&trace_types_lock);
+ cpu = tracing_get_cpu(inode);
+ ret = open_pipe_on_cpu(tr, cpu);
+ if (ret)
+ goto fail_pipe_on_cpu;
/* create a buffer to store the information to pass to userspace */
iter = kzalloc(sizeof(*iter), GFP_KERNEL);
if (!iter) {
ret = -ENOMEM;
- __trace_array_put(tr);
- goto out;
+ goto fail_alloc_iter;
}
trace_seq_init(&iter->seq);
@@ -6744,7 +6787,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
iter->tr = tr;
iter->array_buffer = &tr->array_buffer;
- iter->cpu_file = tracing_get_cpu(inode);
+ iter->cpu_file = cpu;
mutex_init(&iter->mutex);
filp->private_data = iter;
@@ -6754,12 +6797,15 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
nonseekable_open(inode, filp);
tr->trace_ref++;
-out:
+
mutex_unlock(&trace_types_lock);
return ret;
fail:
kfree(iter);
+fail_alloc_iter:
+ close_pipe_on_cpu(tr, cpu);
+fail_pipe_on_cpu:
__trace_array_put(tr);
mutex_unlock(&trace_types_lock);
return ret;
@@ -6776,7 +6822,7 @@ static int tracing_release_pipe(struct inode *inode, struct file *file)
if (iter->trace->pipe_close)
iter->trace->pipe_close(iter);
-
+ close_pipe_on_cpu(tr, iter->cpu_file);
mutex_unlock(&trace_types_lock);
free_cpumask_var(iter->started);
@@ -9440,6 +9486,9 @@ static struct trace_array *trace_array_create(const char *name)
if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
goto out_free_tr;
+ if (!alloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
+ goto out_free_tr;
+
tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
@@ -9481,6 +9530,7 @@ static struct trace_array *trace_array_create(const char *name)
out_free_tr:
ftrace_free_ftrace_ops(tr);
free_trace_buffers(tr);
+ free_cpumask_var(tr->pipe_cpumask);
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
@@ -9583,6 +9633,7 @@ static int __remove_instance(struct trace_array *tr)
}
kfree(tr->topts);
+ free_cpumask_var(tr->pipe_cpumask);
free_cpumask_var(tr->tracing_cpumask);
kfree(tr->name);
kfree(tr);
@@ -10380,12 +10431,14 @@ __init static int tracer_alloc_buffers(void)
if (trace_create_savedcmd() < 0)
goto out_free_temp_buffer;
+ if (!alloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
+ goto out_free_savedcmd;
+
/* TODO: make the number of buffers hot pluggable with CPUS */
if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
- goto out_free_savedcmd;
+ goto out_free_pipe_cpumask;
}
-
if (global_trace.buffer_disabled)
tracing_off();
@@ -10438,6 +10491,8 @@ __init static int tracer_alloc_buffers(void)
return 0;
+out_free_pipe_cpumask:
+ free_cpumask_var(global_trace.pipe_cpumask);
out_free_savedcmd:
free_saved_cmdlines_buffer(savedcmd);
out_free_temp_buffer:
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index e1edc2197fc8..73eaec158473 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -377,6 +377,8 @@ struct trace_array {
struct list_head events;
struct trace_event_file *trace_marker_file;
cpumask_var_t tracing_cpumask; /* only trace on set CPUs */
+ /* one per_cpu trace_pipe can be opened by only one user */
+ cpumask_var_t pipe_cpumask;
int ref;
int trace_ref;
#ifdef CONFIG_FUNCTION_TRACER
@@ -1295,6 +1297,14 @@ static inline void trace_branch_disable(void)
/* set ring buffers to default size if not already done so */
int tracing_update_buffers(void);
+union trace_synth_field {
+ u8 as_u8;
+ u16 as_u16;
+ u32 as_u32;
+ u64 as_u64;
+ struct trace_dynamic_info as_dynamic;
+};
+
struct ftrace_event_field {
struct list_head link;
const char *name;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5d6ae4eae510..578f1f7d49a6 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -611,7 +611,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
{
struct trace_event_call *call = file->event_call;
struct trace_array *tr = file->tr;
- unsigned long file_flags = file->flags;
int ret = 0;
int disable;
@@ -635,6 +634,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
break;
disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
+ /* Disable use of trace_buffered_event */
+ trace_buffered_event_disable();
} else
disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
@@ -673,6 +674,8 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
if (atomic_inc_return(&file->sm_ref) > 1)
break;
set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
+ /* Enable use of trace_buffered_event */
+ trace_buffered_event_enable();
}
if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
@@ -712,15 +715,6 @@ static int __ftrace_event_enable_disable(struct trace_event_file *file,
break;
}
- /* Enable or disable use of trace_buffered_event */
- if ((file_flags & EVENT_FILE_FL_SOFT_DISABLED) !=
- (file->flags & EVENT_FILE_FL_SOFT_DISABLED)) {
- if (file->flags & EVENT_FILE_FL_SOFT_DISABLED)
- trace_buffered_event_enable();
- else
- trace_buffered_event_disable();
- }
-
return ret;
}
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index c8c61381eba4..d06938ae0717 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -6668,7 +6668,8 @@ static int event_hist_trigger_parse(struct event_command *cmd_ops,
goto out_unreg;
if (has_hist_vars(hist_data) || hist_data->n_var_refs) {
- if (save_hist_vars(hist_data))
+ ret = save_hist_vars(hist_data);
+ if (ret)
goto out_unreg;
}
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index d6a70aff2410..9897d0bfcab7 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -127,7 +127,7 @@ static bool synth_event_match(const char *system, const char *event,
struct synth_trace_event {
struct trace_entry ent;
- u64 fields[];
+ union trace_synth_field fields[];
};
static int synth_event_define_fields(struct trace_event_call *call)
@@ -321,19 +321,19 @@ static const char *synth_field_fmt(char *type)
static void print_synth_event_num_val(struct trace_seq *s,
char *print_fmt, char *name,
- int size, u64 val, char *space)
+ int size, union trace_synth_field *val, char *space)
{
switch (size) {
case 1:
- trace_seq_printf(s, print_fmt, name, (u8)val, space);
+ trace_seq_printf(s, print_fmt, name, val->as_u8, space);
break;
case 2:
- trace_seq_printf(s, print_fmt, name, (u16)val, space);
+ trace_seq_printf(s, print_fmt, name, val->as_u16, space);
break;
case 4:
- trace_seq_printf(s, print_fmt, name, (u32)val, space);
+ trace_seq_printf(s, print_fmt, name, val->as_u32, space);
break;
default:
@@ -350,7 +350,7 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
struct trace_seq *s = &iter->seq;
struct synth_trace_event *entry;
struct synth_event *se;
- unsigned int i, n_u64;
+ unsigned int i, j, n_u64;
char print_fmt[32];
const char *fmt;
@@ -374,43 +374,28 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
/* parameter values */
if (se->fields[i]->is_string) {
if (se->fields[i]->is_dynamic) {
- u32 offset, data_offset;
- char *str_field;
-
- offset = (u32)entry->fields[n_u64];
- data_offset = offset & 0xffff;
-
- str_field = (char *)entry + data_offset;
+ union trace_synth_field *data = &entry->fields[n_u64];
trace_seq_printf(s, print_fmt, se->fields[i]->name,
STR_VAR_LEN_MAX,
- str_field,
+ (char *)entry + data->as_dynamic.offset,
i == se->n_fields - 1 ? "" : " ");
n_u64++;
} else {
trace_seq_printf(s, print_fmt, se->fields[i]->name,
STR_VAR_LEN_MAX,
- (char *)&entry->fields[n_u64],
+ (char *)&entry->fields[n_u64].as_u64,
i == se->n_fields - 1 ? "" : " ");
n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
}
} else if (se->fields[i]->is_stack) {
- u32 offset, data_offset, len;
- unsigned long *p, *end;
-
- offset = (u32)entry->fields[n_u64];
- data_offset = offset & 0xffff;
- len = offset >> 16;
-
- p = (void *)entry + data_offset;
- end = (void *)p + len - (sizeof(long) - 1);
+ union trace_synth_field *data = &entry->fields[n_u64];
+ unsigned long *p = (void *)entry + data->as_dynamic.offset;
trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
-
- for (; *p && p < end; p++)
- trace_seq_printf(s, "=> %pS\n", (void *)*p);
+ for (j = 1; j < data->as_dynamic.len / sizeof(long); j++)
+ trace_seq_printf(s, "=> %pS\n", (void *)p[j]);
n_u64++;
-
} else {
struct trace_print_flags __flags[] = {
__def_gfpflag_names, {-1, NULL} };
@@ -419,13 +404,13 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
print_synth_event_num_val(s, print_fmt,
se->fields[i]->name,
se->fields[i]->size,
- entry->fields[n_u64],
+ &entry->fields[n_u64],
space);
if (strcmp(se->fields[i]->type, "gfp_t") == 0) {
trace_seq_puts(s, " (");
trace_print_flags_seq(s, "|",
- entry->fields[n_u64],
+ entry->fields[n_u64].as_u64,
__flags);
trace_seq_putc(s, ')');
}
@@ -454,21 +439,16 @@ static unsigned int trace_string(struct synth_trace_event *entry,
int ret;
if (is_dynamic) {
- u32 data_offset;
+ union trace_synth_field *data = &entry->fields[*n_u64];
- data_offset = struct_size(entry, fields, event->n_u64);
- data_offset += data_size;
-
- len = fetch_store_strlen((unsigned long)str_val);
-
- data_offset |= len << 16;
- *(u32 *)&entry->fields[*n_u64] = data_offset;
+ data->as_dynamic.offset = struct_size(entry, fields, event->n_u64) + data_size;
+ data->as_dynamic.len = fetch_store_strlen((unsigned long)str_val);
ret = fetch_store_string((unsigned long)str_val, &entry->fields[*n_u64], entry);
(*n_u64)++;
} else {
- str_field = (char *)&entry->fields[*n_u64];
+ str_field = (char *)&entry->fields[*n_u64].as_u64;
#ifdef CONFIG_ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
if ((unsigned long)str_val < TASK_SIZE)
@@ -492,6 +472,7 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
unsigned int data_size,
unsigned int *n_u64)
{
+ union trace_synth_field *data = &entry->fields[*n_u64];
unsigned int len;
u32 data_offset;
void *data_loc;
@@ -504,10 +485,6 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
break;
}
- /* Include the zero'd element if it fits */
- if (len < HIST_STACKTRACE_DEPTH)
- len++;
-
len *= sizeof(long);
/* Find the dynamic section to copy the stack into. */
@@ -515,8 +492,9 @@ static unsigned int trace_stack(struct synth_trace_event *entry,
memcpy(data_loc, stack, len);
/* Fill in the field that holds the offset/len combo */
- data_offset |= len << 16;
- *(u32 *)&entry->fields[*n_u64] = data_offset;
+
+ data->as_dynamic.offset = data_offset;
+ data->as_dynamic.len = len;
(*n_u64)++;
@@ -550,7 +528,8 @@ static notrace void trace_event_raw_event_synth(void *__data,
str_val = (char *)(long)var_ref_vals[val_idx];
if (event->dynamic_fields[i]->is_stack) {
- len = *((unsigned long *)str_val);
+ /* reserve one extra element for size */
+ len = *((unsigned long *)str_val) + 1;
len *= sizeof(unsigned long);
} else {
len = fetch_store_strlen((unsigned long)str_val);
@@ -592,19 +571,19 @@ static notrace void trace_event_raw_event_synth(void *__data,
switch (field->size) {
case 1:
- *(u8 *)&entry->fields[n_u64] = (u8)val;
+ entry->fields[n_u64].as_u8 = (u8)val;
break;
case 2:
- *(u16 *)&entry->fields[n_u64] = (u16)val;
+ entry->fields[n_u64].as_u16 = (u16)val;
break;
case 4:
- *(u32 *)&entry->fields[n_u64] = (u32)val;
+ entry->fields[n_u64].as_u32 = (u32)val;
break;
default:
- entry->fields[n_u64] = val;
+ entry->fields[n_u64].as_u64 = val;
break;
}
n_u64++;
@@ -1230,6 +1209,7 @@ EXPORT_SYMBOL_GPL(__synth_event_gen_cmd_start);
* synth_event_gen_cmd_array_start - Start synthetic event command from an array
* @cmd: A pointer to the dynevent_cmd struct representing the new event
* @name: The name of the synthetic event
+ * @mod: The module creating the event, NULL if not created from a module
* @fields: An array of type/name field descriptions
* @n_fields: The number of field descriptions contained in the fields array
*
@@ -1790,19 +1770,19 @@ int synth_event_trace(struct trace_event_file *file, unsigned int n_vals, ...)
switch (field->size) {
case 1:
- *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+ state.entry->fields[n_u64].as_u8 = (u8)val;
break;
case 2:
- *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+ state.entry->fields[n_u64].as_u16 = (u16)val;
break;
case 4:
- *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+ state.entry->fields[n_u64].as_u32 = (u32)val;
break;
default:
- state.entry->fields[n_u64] = val;
+ state.entry->fields[n_u64].as_u64 = val;
break;
}
n_u64++;
@@ -1883,19 +1863,19 @@ int synth_event_trace_array(struct trace_event_file *file, u64 *vals,
switch (field->size) {
case 1:
- *(u8 *)&state.entry->fields[n_u64] = (u8)val;
+ state.entry->fields[n_u64].as_u8 = (u8)val;
break;
case 2:
- *(u16 *)&state.entry->fields[n_u64] = (u16)val;
+ state.entry->fields[n_u64].as_u16 = (u16)val;
break;
case 4:
- *(u32 *)&state.entry->fields[n_u64] = (u32)val;
+ state.entry->fields[n_u64].as_u32 = (u32)val;
break;
default:
- state.entry->fields[n_u64] = val;
+ state.entry->fields[n_u64].as_u64 = val;
break;
}
n_u64++;
@@ -2030,19 +2010,19 @@ static int __synth_event_add_val(const char *field_name, u64 val,
} else {
switch (field->size) {
case 1:
- *(u8 *)&trace_state->entry->fields[field->offset] = (u8)val;
+ trace_state->entry->fields[field->offset].as_u8 = (u8)val;
break;
case 2:
- *(u16 *)&trace_state->entry->fields[field->offset] = (u16)val;
+ trace_state->entry->fields[field->offset].as_u16 = (u16)val;
break;
case 4:
- *(u32 *)&trace_state->entry->fields[field->offset] = (u32)val;
+ trace_state->entry->fields[field->offset].as_u32 = (u32)val;
break;
default:
- trace_state->entry->fields[field->offset] = val;
+ trace_state->entry->fields[field->offset].as_u64 = val;
break;
}
}
diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index e535959939d3..46439e3bcec4 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -31,7 +31,9 @@ void trigger_data_free(struct event_trigger_data *data)
/**
* event_triggers_call - Call triggers associated with a trace event
* @file: The trace_event_file associated with the event
+ * @buffer: The ring buffer that the event is being written to
* @rec: The trace entry for the event, NULL for unconditional invocation
+ * @event: The event meta data in the ring buffer
*
* For each trigger associated with an event, invoke the trigger
* function registered with the associated trigger command. If rec is
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 590b3d51afae..ba37f768e2f2 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -231,7 +231,8 @@ static void irqsoff_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
-
+ else
+ iter->private = NULL;
}
static void irqsoff_trace_close(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index b2b726bea1f9..c68a72707852 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -386,12 +386,12 @@ static const struct btf_type *find_btf_func_proto(const char *funcname)
/* Get BTF_KIND_FUNC type */
t = btf_type_by_id(btf, id);
- if (!btf_type_is_func(t))
+ if (!t || !btf_type_is_func(t))
return ERR_PTR(-ENOENT);
/* The type of BTF_KIND_FUNC is BTF_KIND_FUNC_PROTO */
t = btf_type_by_id(btf, t->type);
- if (!btf_type_is_func_proto(t))
+ if (!t || !btf_type_is_func_proto(t))
return ERR_PTR(-ENOENT);
return t;
@@ -443,7 +443,7 @@ static int parse_btf_arg(const char *varname, struct fetch_insn *code,
if (!ctx->params) {
params = find_btf_func_param(ctx->funcname, &ctx->nr_params,
ctx->flags & TPARG_FL_TPOINT);
- if (IS_ERR(params)) {
+ if (IS_ERR_OR_NULL(params)) {
trace_probe_log_err(ctx->offset, NO_BTF_ENTRY);
return PTR_ERR(params);
}
@@ -1273,7 +1273,7 @@ const char **traceprobe_expand_meta_args(int argc, const char *argv[],
params = find_btf_func_param(ctx->funcname, &nr_params,
ctx->flags & TPARG_FL_TPOINT);
- if (IS_ERR(params)) {
+ if (IS_ERR_OR_NULL(params)) {
if (args_idx != -1) {
/* $arg* requires BTF info */
trace_probe_log_err(0, NOSUP_BTFARG);
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index 330aee1c1a49..0469a04a355f 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -168,6 +168,8 @@ static void wakeup_trace_open(struct trace_iterator *iter)
{
if (is_graph(iter->tr))
graph_trace_open(iter);
+ else
+ iter->private = NULL;
}
static void wakeup_trace_close(struct trace_iterator *iter)
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index e5e299260d0c..bac06ee3b98b 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -131,6 +131,7 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask);
* trace_seq_vprintf - sequence printing of trace information
* @s: trace sequence descriptor
* @fmt: printf format string
+ * @args: Arguments for the format string
*
* The tracer may use either sequence operations or its own
* copy to user routines. To simplify formatting of a trace
diff --git a/kernel/trace/tracing_map.h b/kernel/trace/tracing_map.h
index 2c765ee2a4d4..99c37eeebc16 100644
--- a/kernel/trace/tracing_map.h
+++ b/kernel/trace/tracing_map.h
@@ -272,10 +272,6 @@ extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i);
extern u64 tracing_map_read_var(struct tracing_map_elt *elt, unsigned int i);
extern u64 tracing_map_read_var_once(struct tracing_map_elt *elt, unsigned int i);
-extern void tracing_map_set_field_descr(struct tracing_map *map,
- unsigned int i,
- unsigned int key_offset,
- tracing_map_cmp_fn_t cmp_fn);
extern int
tracing_map_sort_entries(struct tracing_map *map,
struct tracing_map_sort_key *sort_keys,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 02a8f402eeb5..800b4208dba9 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -52,6 +52,7 @@
#include <linux/sched/debug.h>
#include <linux/nmi.h>
#include <linux/kvm_para.h>
+#include <linux/delay.h>
#include "workqueue_internal.h"
@@ -338,8 +339,10 @@ static cpumask_var_t *wq_numa_possible_cpumask;
* Per-cpu work items which run for longer than the following threshold are
* automatically considered CPU intensive and excluded from concurrency
* management to prevent them from noticeably delaying other per-cpu work items.
+ * ULONG_MAX indicates that the user hasn't overridden it with a boot parameter.
+ * The actual value is initialized in wq_cpu_intensive_thresh_init().
*/
-static unsigned long wq_cpu_intensive_thresh_us = 10000;
+static unsigned long wq_cpu_intensive_thresh_us = ULONG_MAX;
module_param_named(cpu_intensive_thresh_us, wq_cpu_intensive_thresh_us, ulong, 0644);
static bool wq_disable_numa;
@@ -6513,6 +6516,42 @@ void __init workqueue_init_early(void)
!system_freezable_power_efficient_wq);
}
+static void __init wq_cpu_intensive_thresh_init(void)
+{
+ unsigned long thresh;
+ unsigned long bogo;
+
+ /* if the user set it to a specific value, keep it */
+ if (wq_cpu_intensive_thresh_us != ULONG_MAX)
+ return;
+
+ /*
+ * The default of 10ms is derived from the fact that most modern (as of
+ * 2023) processors can do a lot in 10ms and that it's just below what
+ * most consider human-perceivable. However, the kernel also runs on a
+ * lot slower CPUs including microcontrollers where the threshold is way
+ * too low.
+ *
+ * Let's scale up the threshold upto 1 second if BogoMips is below 4000.
+ * This is by no means accurate but it doesn't have to be. The mechanism
+ * is still useful even when the threshold is fully scaled up. Also, as
+ * the reports would usually be applicable to everyone, some machines
+ * operating on longer thresholds won't significantly diminish their
+ * usefulness.
+ */
+ thresh = 10 * USEC_PER_MSEC;
+
+ /* see init/calibrate.c for lpj -> BogoMIPS calculation */
+ bogo = max_t(unsigned long, loops_per_jiffy / 500000 * HZ, 1);
+ if (bogo < 4000)
+ thresh = min_t(unsigned long, thresh * 4000 / bogo, USEC_PER_SEC);
+
+ pr_debug("wq_cpu_intensive_thresh: lpj=%lu BogoMIPS=%lu thresh_us=%lu\n",
+ loops_per_jiffy, bogo, thresh);
+
+ wq_cpu_intensive_thresh_us = thresh;
+}
+
/**
* workqueue_init - bring workqueue subsystem fully online
*
@@ -6528,6 +6567,8 @@ void __init workqueue_init(void)
struct worker_pool *pool;
int cpu, bkt;
+ wq_cpu_intensive_thresh_init();
+
/*
* It'd be simpler to initialize NUMA in workqueue_init_early() but
* CPU to node mapping may not be available that early on some
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c7348d1fabe5..73a0c8e41559 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1200,7 +1200,7 @@ config WQ_CPU_INTENSIVE_REPORT
help
Say Y here to enable reporting of concurrency-managed per-cpu work
items that hog CPUs for longer than
- workqueue.cpu_intensive_threshold_us. Workqueue automatically
+ workqueue.cpu_intensive_thresh_us. Workqueue automatically
detects and excludes them from concurrency management to prevent
them from stalling other per-cpu work items. Occassional
triggering may not necessarily indicate a problem. Repeated
diff --git a/lib/Makefile b/lib/Makefile
index 110936c9a68b..d1397785ec16 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -82,7 +82,13 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_SCANF) += test_scanf.o
+
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
+ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy)
+# FIXME: Clang breaks test_bitmap_const_eval when KASAN and GCOV are enabled
+GCOV_PROFILE_test_bitmap.o := n
+endif
+
obj-$(CONFIG_TEST_UUID) += test_uuid.o
obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c
index 0d3a686b5ba2..fb8c0c5c2bd2 100644
--- a/lib/clz_ctz.c
+++ b/lib/clz_ctz.c
@@ -28,36 +28,16 @@ int __weak __clzsi2(int val)
}
EXPORT_SYMBOL(__clzsi2);
-int __weak __clzdi2(long val);
-int __weak __ctzdi2(long val);
-#if BITS_PER_LONG == 32
-
-int __weak __clzdi2(long val)
+int __weak __clzdi2(u64 val);
+int __weak __clzdi2(u64 val)
{
- return 32 - fls((int)val);
+ return 64 - fls64(val);
}
EXPORT_SYMBOL(__clzdi2);
-int __weak __ctzdi2(long val)
+int __weak __ctzdi2(u64 val);
+int __weak __ctzdi2(u64 val)
{
- return __ffs((u32)val);
+ return __ffs64(val);
}
EXPORT_SYMBOL(__ctzdi2);
-
-#elif BITS_PER_LONG == 64
-
-int __weak __clzdi2(long val)
-{
- return 64 - fls64((u64)val);
-}
-EXPORT_SYMBOL(__clzdi2);
-
-int __weak __ctzdi2(long val)
-{
- return __ffs64((u64)val);
-}
-EXPORT_SYMBOL(__ctzdi2);
-
-#else
-#error BITS_PER_LONG not 32 or 64
-#endif
diff --git a/lib/cpumask.c b/lib/cpumask.c
index de356f16773a..a7fd02b5ae26 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -45,6 +45,7 @@ EXPORT_SYMBOL(cpumask_next_wrap);
* alloc_cpumask_var_node - allocate a struct cpumask on a given node
* @mask: pointer to cpumask_var_t where the cpumask is returned
* @flags: GFP_ flags
+ * @node: memory node from which to allocate or %NUMA_NO_NODE
*
* Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is
* a nop returning a constant 1 (in <linux/cpumask.h>)
@@ -157,7 +158,9 @@ EXPORT_SYMBOL(cpumask_local_spread);
static DEFINE_PER_CPU(int, distribute_cpu_mask_prev);
/**
- * cpumask_any_and_distribute - Return an arbitrary cpu within srcp1 & srcp2.
+ * cpumask_any_and_distribute - Return an arbitrary cpu within src1p & src2p.
+ * @src1p: first &cpumask for intersection
+ * @src2p: second &cpumask for intersection
*
* Iterated calls using the same srcp1 and srcp2 will be distributed within
* their intersection.
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 0c883d6fbd44..6c644f954bc5 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -895,7 +895,7 @@ struct gen_pool *of_gen_pool_get(struct device_node *np,
of_property_read_string(np_pool, "label", &name);
if (!name)
- name = np_pool->name;
+ name = of_node_full_name(np_pool);
}
if (pdev)
pool = gen_pool_get(&pdev->dev, name);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e4dc809d1075..424737045b97 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -566,24 +566,37 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
}
EXPORT_SYMBOL(iov_iter_zero);
-size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
- struct iov_iter *i)
+size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
+ size_t bytes, struct iov_iter *i)
{
- char *kaddr = kmap_atomic(page), *p = kaddr + offset;
- if (!page_copy_sane(page, offset, bytes)) {
- kunmap_atomic(kaddr);
+ size_t n, copied = 0;
+
+ if (!page_copy_sane(page, offset, bytes))
return 0;
- }
- if (WARN_ON_ONCE(!i->data_source)) {
- kunmap_atomic(kaddr);
+ if (WARN_ON_ONCE(!i->data_source))
return 0;
- }
- iterate_and_advance(i, bytes, base, len, off,
- copyin(p + off, base, len),
- memcpy_from_iter(i, p + off, base, len)
- )
- kunmap_atomic(kaddr);
- return bytes;
+
+ do {
+ char *p;
+
+ n = bytes - copied;
+ if (PageHighMem(page)) {
+ page += offset / PAGE_SIZE;
+ offset %= PAGE_SIZE;
+ n = min_t(size_t, n, PAGE_SIZE - offset);
+ }
+
+ p = kmap_atomic(page) + offset;
+ iterate_and_advance(i, n, base, len, off,
+ copyin(p + off, base, len),
+ memcpy_from_iter(i, p + off, base, len)
+ )
+ kunmap_atomic(p);
+ copied += n;
+ offset += n;
+ } while (PageHighMem(page) && copied != bytes && n > 0);
+
+ return copied;
}
EXPORT_SYMBOL(copy_page_from_iter_atomic);
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index bfffbb7cab26..f723024e1426 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -3692,7 +3692,8 @@ static inline int mas_root_expand(struct ma_state *mas, void *entry)
mas->offset = slot;
pivots[slot] = mas->last;
if (mas->last != ULONG_MAX)
- slot++;
+ pivots[++slot] = ULONG_MAX;
+
mas->depth = 1;
mas_set_height(mas);
ma_set_meta(node, maple_leaf_64, 0, slot);
@@ -4264,6 +4265,10 @@ static inline unsigned char mas_wr_new_end(struct ma_wr_state *wr_mas)
* mas_wr_append: Attempt to append
* @wr_mas: the maple write state
*
+ * This is currently unsafe in rcu mode since the end of the node may be cached
+ * by readers while the node contents may be updated which could result in
+ * inaccurate information.
+ *
* Return: True if appended, false otherwise
*/
static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
@@ -4273,6 +4278,9 @@ static inline bool mas_wr_append(struct ma_wr_state *wr_mas)
struct ma_state *mas = wr_mas->mas;
unsigned char node_pivots = mt_pivots[wr_mas->type];
+ if (mt_in_rcu(mas->tree))
+ return false;
+
if (mas->offset != wr_mas->node_end)
return false;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 1a31065b2036..976b9bd02a1b 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1136,7 +1136,6 @@ static void set_iter_tags(struct radix_tree_iter *iter,
void __rcu **radix_tree_iter_resume(void __rcu **slot,
struct radix_tree_iter *iter)
{
- slot++;
iter->index = __radix_tree_iter_add(iter, 1);
iter->next_index = iter->index;
iter->tags = 0;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index eff4e42c425a..d0a5081dfd12 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -550,7 +550,7 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_min_shallow_depth);
static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
{
- int i, wake_index;
+ int i, wake_index, woken;
if (!atomic_read(&sbq->ws_active))
return;
@@ -567,13 +567,12 @@ static void __sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr)
*/
wake_index = sbq_index_inc(wake_index);
- /*
- * It is sufficient to wake up at least one waiter to
- * guarantee forward progress.
- */
- if (waitqueue_active(&ws->wait) &&
- wake_up_nr(&ws->wait, nr))
- break;
+ if (waitqueue_active(&ws->wait)) {
+ woken = wake_up_nr(&ws->wait, nr);
+ if (woken == nr)
+ break;
+ nr -= woken;
+ }
}
if (wake_index != atomic_read(&sbq->wake_index))
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index e86231a44c3d..c65566b4dc66 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -1148,7 +1148,7 @@ static ssize_t extract_user_to_sg(struct iov_iter *iter,
failed:
while (sgtable->nents > sgtable->orig_nents)
- put_page(sg_page(&sgtable->sgl[--sgtable->nents]));
+ unpin_user_page(sg_page(&sgtable->sgl[--sgtable->nents]));
return res;
}
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 187f5b2db4cf..f2ea9f30c7c5 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -1161,6 +1161,10 @@ static void __init test_bitmap_print_buf(void)
}
}
+/*
+ * FIXME: Clang breaks compile-time evaluations when KASAN and GCOV are enabled.
+ * To workaround it, GCOV is force-disabled in Makefile for this configuration.
+ */
static void __init test_bitmap_const_eval(void)
{
DECLARE_BITMAP(bitmap, BITS_PER_LONG);
@@ -1186,11 +1190,7 @@ static void __init test_bitmap_const_eval(void)
* the compiler is fixed.
*/
bitmap_clear(bitmap, 0, BITS_PER_LONG);
-#if defined(__s390__) && defined(__clang__)
- if (!const_test_bit(7, bitmap))
-#else
if (!test_bit(7, bitmap))
-#endif
bitmap_set(bitmap, 5, 2);
/* Equals to `unsigned long bitopvar = BIT(20)` */
diff --git a/lib/test_maple_tree.c b/lib/test_maple_tree.c
index 9939be34e516..8d4c92cbdd0c 100644
--- a/lib/test_maple_tree.c
+++ b/lib/test_maple_tree.c
@@ -1898,13 +1898,16 @@ static noinline void __init next_prev_test(struct maple_tree *mt)
725};
static const unsigned long level2_32[] = { 1747, 2000, 1750, 1755,
1760, 1765};
+ unsigned long last_index;
if (MAPLE_32BIT) {
nr_entries = 500;
level2 = level2_32;
+ last_index = 0x138e;
} else {
nr_entries = 200;
level2 = level2_64;
+ last_index = 0x7d6;
}
for (i = 0; i <= nr_entries; i++)
@@ -2011,7 +2014,7 @@ static noinline void __init next_prev_test(struct maple_tree *mt)
val = mas_next(&mas, ULONG_MAX);
MT_BUG_ON(mt, val != NULL);
- MT_BUG_ON(mt, mas.index != 0x7d6);
+ MT_BUG_ON(mt, mas.index != last_index);
MT_BUG_ON(mt, mas.last != ULONG_MAX);
val = mas_prev(&mas, 0);
diff --git a/mm/Makefile b/mm/Makefile
index 678530a07326..d4ee20988dd1 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -51,7 +51,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
readahead.o swap.o truncate.o vmscan.o shmem.o \
util.o mmzone.o vmstat.o backing-dev.o \
mm_init.o percpu.o slab_common.o \
- compaction.o show_mem.o\
+ compaction.o show_mem.o shmem_quota.o\
interval_tree.o list_lru.o workingset.o \
debug.o gup.o mmap_lock.o $(mmu-y)
diff --git a/mm/compaction.c b/mm/compaction.c
index dbc9f86b1934..eacca2794e47 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -912,11 +912,12 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
/*
* Check if the pageblock has already been marked skipped.
- * Only the aligned PFN is checked as the caller isolates
+ * Only the first PFN is checked as the caller isolates
* COMPACT_CLUSTER_MAX at a time so the second call must
* not falsely conclude that the block should be skipped.
*/
- if (!valid_page && pageblock_aligned(low_pfn)) {
+ if (!valid_page && (pageblock_aligned(low_pfn) ||
+ low_pfn == cc->zone->zone_start_pfn)) {
if (!isolation_suitable(cc, page)) {
low_pfn = end_pfn;
folio = NULL;
@@ -2002,7 +2003,8 @@ static isolate_migrate_t isolate_migratepages(struct compact_control *cc)
* before making it "skip" so other compaction instances do
* not scan the same block.
*/
- if (pageblock_aligned(low_pfn) &&
+ if ((pageblock_aligned(low_pfn) ||
+ low_pfn == cc->zone->zone_start_pfn) &&
!fast_find_block && !isolation_suitable(cc, page))
continue;
diff --git a/mm/damon/core-test.h b/mm/damon/core-test.h
index c11210124344..bb07721909e1 100644
--- a/mm/damon/core-test.h
+++ b/mm/damon/core-test.h
@@ -320,25 +320,25 @@ static void damon_test_update_monitoring_result(struct kunit *test)
static void damon_test_set_attrs(struct kunit *test)
{
- struct damon_ctx ctx;
+ struct damon_ctx *c = damon_new_ctx();
struct damon_attrs valid_attrs = {
.min_nr_regions = 10, .max_nr_regions = 1000,
.sample_interval = 5000, .aggr_interval = 100000,};
struct damon_attrs invalid_attrs;
- KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &valid_attrs), 0);
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &valid_attrs), 0);
invalid_attrs = valid_attrs;
invalid_attrs.min_nr_regions = 1;
- KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &invalid_attrs), -EINVAL);
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
invalid_attrs = valid_attrs;
invalid_attrs.max_nr_regions = 9;
- KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &invalid_attrs), -EINVAL);
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
invalid_attrs = valid_attrs;
invalid_attrs.aggr_interval = 4999;
- KUNIT_EXPECT_EQ(test, damon_set_attrs(&ctx, &invalid_attrs), -EINVAL);
+ KUNIT_EXPECT_EQ(test, damon_set_attrs(c, &invalid_attrs), -EINVAL);
}
static struct kunit_case damon_test_cases[] = {
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 91cff7f2997e..eb9580942a5c 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -273,6 +273,7 @@ struct damos_filter *damos_new_filter(enum damos_filter_type type,
return NULL;
filter->type = type;
filter->matching = matching;
+ INIT_LIST_HEAD(&filter->list);
return filter;
}
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 2fcc9731528a..e0e59d420fca 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -386,6 +386,7 @@ out:
static const struct mm_walk_ops damon_mkold_ops = {
.pmd_entry = damon_mkold_pmd_entry,
.hugetlb_entry = damon_mkold_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -525,6 +526,7 @@ out:
static const struct mm_walk_ops damon_young_ops = {
.pmd_entry = damon_young_pmd_entry,
.hugetlb_entry = damon_young_hugetlb_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static bool damon_va_young(struct mm_struct *mm, unsigned long addr,
diff --git a/mm/filemap.c b/mm/filemap.c
index 9e44a49bbd74..baafbf324c9f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1855,30 +1855,15 @@ out:
*
* Looks up the page cache entry at @mapping & @index.
*
- * @fgp_flags can be zero or more of these flags:
- *
- * * %FGP_ACCESSED - The folio will be marked accessed.
- * * %FGP_LOCK - The folio is returned locked.
- * * %FGP_CREAT - If no page is present then a new page is allocated using
- * @gfp and added to the page cache and the VM's LRU list.
- * The page is returned locked and with an increased refcount.
- * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the
- * page is already in cache. If the page was allocated, unlock it before
- * returning so the caller can do the same dance.
- * * %FGP_WRITE - The page will be written to by the caller.
- * * %FGP_NOFS - __GFP_FS will get cleared in gfp.
- * * %FGP_NOWAIT - Don't get blocked by page lock.
- * * %FGP_STABLE - Wait for the folio to be stable (finished writeback)
- *
* If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even
* if the %GFP flags specified for %FGP_CREAT are atomic.
*
- * If there is a page cache page, it is returned with an increased refcount.
+ * If this function returns a folio, it is returned with an increased refcount.
*
* Return: The found folio or an ERR_PTR() otherwise.
*/
struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp)
+ fgf_t fgp_flags, gfp_t gfp)
{
struct folio *folio;
@@ -1920,7 +1905,9 @@ repeat:
folio_wait_stable(folio);
no_page:
if (!folio && (fgp_flags & FGP_CREAT)) {
+ unsigned order = FGF_GET_ORDER(fgp_flags);
int err;
+
if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping))
gfp |= __GFP_WRITE;
if (fgp_flags & FGP_NOFS)
@@ -1929,26 +1916,44 @@ no_page:
gfp &= ~GFP_KERNEL;
gfp |= GFP_NOWAIT | __GFP_NOWARN;
}
-
- folio = filemap_alloc_folio(gfp, 0);
- if (!folio)
- return ERR_PTR(-ENOMEM);
-
if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP))))
fgp_flags |= FGP_LOCK;
- /* Init accessed so avoid atomic mark_page_accessed later */
- if (fgp_flags & FGP_ACCESSED)
- __folio_set_referenced(folio);
+ if (!mapping_large_folio_support(mapping))
+ order = 0;
+ if (order > MAX_PAGECACHE_ORDER)
+ order = MAX_PAGECACHE_ORDER;
+ /* If we're not aligned, allocate a smaller folio */
+ if (index & ((1UL << order) - 1))
+ order = __ffs(index);
- err = filemap_add_folio(mapping, folio, index, gfp);
- if (unlikely(err)) {
+ do {
+ gfp_t alloc_gfp = gfp;
+
+ err = -ENOMEM;
+ if (order == 1)
+ order = 0;
+ if (order > 0)
+ alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;
+ folio = filemap_alloc_folio(alloc_gfp, order);
+ if (!folio)
+ continue;
+
+ /* Init accessed so avoid atomic mark_page_accessed later */
+ if (fgp_flags & FGP_ACCESSED)
+ __folio_set_referenced(folio);
+
+ err = filemap_add_folio(mapping, folio, index, gfp);
+ if (!err)
+ break;
folio_put(folio);
folio = NULL;
- if (err == -EEXIST)
- goto repeat;
- }
+ } while (order-- > 0);
+ if (err == -EEXIST)
+ goto repeat;
+ if (err)
+ return ERR_PTR(err);
/*
* filemap_add_folio locks the page, and for mmap
* we expect an unlocked page.
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index c6f056c20503..10c3247542cb 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -92,7 +92,7 @@ EXPORT_SYMBOL(add_to_page_cache_lru);
noinline
struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
- int fgp_flags, gfp_t gfp)
+ fgf_t fgp_flags, gfp_t gfp)
{
struct folio *folio;
diff --git a/mm/gup.c b/mm/gup.c
index 76d222ccc3ff..6e2f9e9d6537 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -597,7 +597,7 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
pte = ptep_get(ptep);
if (!pte_present(pte))
goto no_page;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ if (pte_protnone(pte) && !gup_can_follow_protnone(vma, flags))
goto no_page;
page = vm_normal_page(vma, address, pte);
@@ -714,7 +714,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
if (likely(!pmd_trans_huge(pmdval)))
return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
- if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(pmdval) && !gup_can_follow_protnone(vma, flags))
return no_page_table(vma, flags);
ptl = pmd_lock(mm, pmd);
@@ -851,6 +851,10 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
if (WARN_ON_ONCE(foll_flags & FOLL_PIN))
return NULL;
+ /*
+ * We never set FOLL_HONOR_NUMA_FAULT because callers don't expect
+ * to fail on PROT_NONE-mapped pages.
+ */
page = follow_page_mask(vma, address, foll_flags, &ctx);
if (ctx.pgmap)
put_dev_pagemap(ctx.pgmap);
@@ -2227,6 +2231,13 @@ static bool is_valid_gup_args(struct page **pages, int *locked,
gup_flags |= FOLL_UNLOCKABLE;
}
+ /*
+ * For now, always trigger NUMA hinting faults. Some GUP users like
+ * KVM require the hint to be as the calling context of GUP is
+ * functionally similar to a memory reference from task context.
+ */
+ gup_flags |= FOLL_HONOR_NUMA_FAULT;
+
/* FOLL_GET and FOLL_PIN are mutually exclusive. */
if (WARN_ON_ONCE((gup_flags & (FOLL_PIN | FOLL_GET)) ==
(FOLL_PIN | FOLL_GET)))
@@ -2551,7 +2562,14 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
struct page *page;
struct folio *folio;
- if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
+ /*
+ * Always fallback to ordinary GUP on PROT_NONE-mapped pages:
+ * pte_access_permitted() better should reject these pages
+ * either way: otherwise, GUP-fast might succeed in
+ * cases where ordinary GUP would fail due to VMA access
+ * permissions.
+ */
+ if (pte_protnone(pte))
goto pte_unmap;
if (!pte_access_permitted(pte, flags & FOLL_WRITE))
@@ -2970,8 +2988,8 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
pmd_devmap(pmd))) {
- if (pmd_protnone(pmd) &&
- !gup_can_follow_protnone(flags))
+ /* See gup_pte_range() */
+ if (pmd_protnone(pmd))
return 0;
if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
@@ -3151,7 +3169,7 @@ static int internal_get_user_pages_fast(unsigned long start,
if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
FOLL_FORCE | FOLL_PIN | FOLL_GET |
FOLL_FAST_ONLY | FOLL_NOFAULT |
- FOLL_PCI_P2PDMA)))
+ FOLL_PCI_P2PDMA | FOLL_HONOR_NUMA_FAULT)))
return -EINVAL;
if (gup_flags & FOLL_PIN)
diff --git a/mm/hmm.c b/mm/hmm.c
index 855e25e59d8f..277ddcab4947 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -562,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
.pte_hole = hmm_vma_walk_hole,
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
.test_walk = hmm_vma_walk_test,
+ .walk_lock = PGWALK_RDLOCK,
};
/**
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eb3678360b97..e4f0266a22d4 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1467,8 +1467,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
if ((flags & FOLL_DUMP) && is_huge_zero_pmd(*pmd))
return ERR_PTR(-EFAULT);
- /* Full NUMA hinting faults to serialise migration in fault paths */
- if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
+ if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags))
return NULL;
if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
@@ -1613,7 +1612,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
* If other processes are mapping this folio, we couldn't discard
* the folio unless they all do MADV_FREE so let's skip the folio.
*/
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
goto out;
if (!folio_trylock(folio))
@@ -2521,7 +2520,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
struct address_space *swap_cache = NULL;
unsigned long offset = 0;
unsigned int nr = thp_nr_pages(head);
- int i;
+ int i, nr_dropped = 0;
/* complete memcg works before add pages to LRU */
split_page_memcg(head, nr);
@@ -2546,7 +2545,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
struct folio *tail = page_folio(head + i);
if (shmem_mapping(head->mapping))
- shmem_uncharge(head->mapping->host, 1);
+ nr_dropped++;
else if (folio_test_clear_dirty(tail))
folio_account_cleaned(tail,
inode_to_wb(folio->mapping->host));
@@ -2583,6 +2582,8 @@ static void __split_huge_page(struct page *page, struct list_head *list,
}
local_irq_enable();
+ if (nr_dropped)
+ shmem_uncharge(head->mapping->host, nr_dropped);
remap_page(folio, nr);
if (PageSwapCache(head)) {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 64a3239b6407..6da626bfb52e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1579,9 +1579,37 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
unsigned int order) { }
#endif
+static inline void __clear_hugetlb_destructor(struct hstate *h,
+ struct folio *folio)
+{
+ lockdep_assert_held(&hugetlb_lock);
+
+ /*
+ * Very subtle
+ *
+ * For non-gigantic pages set the destructor to the normal compound
+ * page dtor. This is needed in case someone takes an additional
+ * temporary ref to the page, and freeing is delayed until they drop
+ * their reference.
+ *
+ * For gigantic pages set the destructor to the null dtor. This
+ * destructor will never be called. Before freeing the gigantic
+ * page destroy_compound_gigantic_folio will turn the folio into a
+ * simple group of pages. After this the destructor does not
+ * apply.
+ *
+ */
+ if (hstate_is_gigantic(h))
+ folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
+ else
+ folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
+}
+
/*
- * Remove hugetlb folio from lists, and update dtor so that the folio appears
- * as just a compound page.
+ * Remove hugetlb folio from lists.
+ * If vmemmap exists for the folio, update dtor so that the folio appears
+ * as just a compound page. Otherwise, wait until after allocating vmemmap
+ * to update dtor.
*
* A reference is held on the folio, except in the case of demote.
*
@@ -1612,31 +1640,19 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
}
/*
- * Very subtle
- *
- * For non-gigantic pages set the destructor to the normal compound
- * page dtor. This is needed in case someone takes an additional
- * temporary ref to the page, and freeing is delayed until they drop
- * their reference.
- *
- * For gigantic pages set the destructor to the null dtor. This
- * destructor will never be called. Before freeing the gigantic
- * page destroy_compound_gigantic_folio will turn the folio into a
- * simple group of pages. After this the destructor does not
- * apply.
- *
- * This handles the case where more than one ref is held when and
- * after update_and_free_hugetlb_folio is called.
- *
- * In the case of demote we do not ref count the page as it will soon
- * be turned into a page of smaller size.
+ * We can only clear the hugetlb destructor after allocating vmemmap
+ * pages. Otherwise, someone (memory error handling) may try to write
+ * to tail struct pages.
+ */
+ if (!folio_test_hugetlb_vmemmap_optimized(folio))
+ __clear_hugetlb_destructor(h, folio);
+
+ /*
+ * In the case of demote we do not ref count the page as it will soon
+ * be turned into a page of smaller size.
*/
if (!demote)
folio_ref_unfreeze(folio, 1);
- if (hstate_is_gigantic(h))
- folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
- else
- folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
h->nr_huge_pages--;
h->nr_huge_pages_node[nid]--;
@@ -1705,6 +1721,7 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
{
int i;
struct page *subpage;
+ bool clear_dtor = folio_test_hugetlb_vmemmap_optimized(folio);
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
@@ -1735,6 +1752,16 @@ static void __update_and_free_hugetlb_folio(struct hstate *h,
if (unlikely(folio_test_hwpoison(folio)))
folio_clear_hugetlb_hwpoison(folio);
+ /*
+ * If vmemmap pages were allocated above, then we need to clear the
+ * hugetlb destructor under the hugetlb lock.
+ */
+ if (clear_dtor) {
+ spin_lock_irq(&hugetlb_lock);
+ __clear_hugetlb_destructor(h, folio);
+ spin_unlock_irq(&hugetlb_lock);
+ }
+
for (i = 0; i < pages_per_huge_page(h); i++) {
subpage = folio_page(folio, i);
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
diff --git a/mm/internal.h b/mm/internal.h
index a7d9e980429a..8ed127c1c808 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -924,6 +924,13 @@ int migrate_device_coherent_page(struct page *page);
struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags);
int __must_check try_grab_page(struct page *page, unsigned int flags);
+/*
+ * mm/huge_memory.c
+ */
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd,
+ unsigned int flags);
+
enum {
/* mark page accessed */
FOLL_TOUCH = 1 << 16,
@@ -998,6 +1005,16 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,
smp_rmb();
/*
+ * During GUP-fast we might not get called on the head page for a
+ * hugetlb page that is mapped using cont-PTE, because GUP-fast does
+ * not work with the abstracted hugetlb PTEs that always point at the
+ * head page. For hugetlb, PageAnonExclusive only applies on the head
+ * page (as it cannot be partially COW-shared), so lookup the head page.
+ */
+ if (unlikely(!PageHead(page) && PageHuge(page)))
+ page = compound_head(page);
+
+ /*
* Note that PageKsm() pages cannot be exclusive, and consequently,
* cannot get pinned.
*/
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 78c8d5d8b628..47d1d32c734f 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1955,10 +1955,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
goto xa_locked;
}
}
- if (!shmem_charge(mapping->host, 1)) {
- result = SCAN_FAIL;
- goto xa_locked;
- }
nr_none++;
continue;
}
@@ -2145,8 +2141,13 @@ xa_unlocked:
*/
try_to_unmap_flush();
- if (result != SCAN_SUCCEED)
+ if (result == SCAN_SUCCEED && nr_none &&
+ !shmem_charge(mapping->host, nr_none))
+ result = SCAN_FAIL;
+ if (result != SCAN_SUCCEED) {
+ nr_none = 0;
goto rollback;
+ }
/*
* The old pages are locked, so they won't change anymore.
@@ -2283,8 +2284,8 @@ rollback:
if (nr_none) {
xas_lock_irq(&xas);
mapping->nrpages -= nr_none;
- shmem_uncharge(mapping->host, nr_none);
xas_unlock_irq(&xas);
+ shmem_uncharge(mapping->host, nr_none);
}
list_for_each_entry_safe(page, tmp, &pagelist, lru) {
diff --git a/mm/ksm.c b/mm/ksm.c
index ba266359da55..d7b5b95e936e 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -455,6 +455,12 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
static const struct mm_walk_ops break_ksm_ops = {
.pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops break_ksm_lock_vma_ops = {
+ .pmd_entry = break_ksm_pmd_entry,
+ .walk_lock = PGWALK_WRLOCK,
};
/*
@@ -470,16 +476,17 @@ static const struct mm_walk_ops break_ksm_ops = {
* of the process that owns 'vma'. We also do not want to enforce
* protection keys here anyway.
*/
-static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
+static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
{
vm_fault_t ret = 0;
+ const struct mm_walk_ops *ops = lock_vma ?
+ &break_ksm_lock_vma_ops : &break_ksm_ops;
do {
int ksm_page;
cond_resched();
- ksm_page = walk_page_range_vma(vma, addr, addr + 1,
- &break_ksm_ops, NULL);
+ ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
if (WARN_ON_ONCE(ksm_page < 0))
return ksm_page;
if (!ksm_page)
@@ -565,7 +572,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
mmap_read_lock(mm);
vma = find_mergeable_vma(mm, addr);
if (vma)
- break_ksm(vma, addr);
+ break_ksm(vma, addr, false);
mmap_read_unlock(mm);
}
@@ -871,7 +878,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
* in cmp_and_merge_page on one of the rmap_items we would be removing.
*/
static int unmerge_ksm_pages(struct vm_area_struct *vma,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end, bool lock_vma)
{
unsigned long addr;
int err = 0;
@@ -882,7 +889,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
if (signal_pending(current))
err = -ERESTARTSYS;
else
- err = break_ksm(vma, addr);
+ err = break_ksm(vma, addr, lock_vma);
}
return err;
}
@@ -1029,7 +1036,7 @@ static int unmerge_and_remove_all_rmap_items(void)
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
continue;
err = unmerge_ksm_pages(vma,
- vma->vm_start, vma->vm_end);
+ vma->vm_start, vma->vm_end, false);
if (err)
goto error;
}
@@ -2530,7 +2537,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
return 0;
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
+ err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
if (err)
return err;
}
@@ -2668,7 +2675,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
return 0; /* just ignore the advice */
if (vma->anon_vma) {
- err = unmerge_ksm_pages(vma, start, end);
+ err = unmerge_ksm_pages(vma, start, end, true);
if (err)
return err;
}
@@ -2784,6 +2791,8 @@ struct page *ksm_might_need_to_copy(struct page *page,
anon_vma->root == vma->anon_vma->root) {
return page; /* still no need to copy it */
}
+ if (PageHWPoison(page))
+ return ERR_PTR(-EHWPOISON);
if (!PageUptodate(page))
return page; /* let do_swap_page report the error */
diff --git a/mm/madvise.c b/mm/madvise.c
index 886f06066622..ec30f48f8f2e 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -233,6 +233,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
static const struct mm_walk_ops swapin_walk_ops = {
.pmd_entry = swapin_walk_pmd_entry,
+ .walk_lock = PGWALK_RDLOCK,
};
static void shmem_swapin_range(struct vm_area_struct *vma,
@@ -383,7 +384,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
folio = pfn_folio(pmd_pfn(orig_pmd));
/* Do not interfere with other mappings of this folio */
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
goto huge_unlock;
if (pageout_anon_only_filter && !folio_test_anon(folio))
@@ -457,7 +458,7 @@ regular_folio:
if (folio_test_large(folio)) {
int err;
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
break;
if (pageout_anon_only_filter && !folio_test_anon(folio))
break;
@@ -534,6 +535,7 @@ regular_folio:
static const struct mm_walk_ops cold_walk_ops = {
.pmd_entry = madvise_cold_or_pageout_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static void madvise_cold_page_range(struct mmu_gather *tlb,
@@ -678,7 +680,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
if (folio_test_large(folio)) {
int err;
- if (folio_mapcount(folio) != 1)
+ if (folio_estimated_sharers(folio) != 1)
break;
if (!folio_trylock(folio))
break;
@@ -757,6 +759,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
static const struct mm_walk_ops madvise_free_walk_ops = {
.pmd_entry = madvise_free_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static int madvise_free_single_vma(struct vm_area_struct *vma,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e8ca4bdcb03c..315fd5f45e3c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6024,6 +6024,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
static const struct mm_walk_ops precharge_walk_ops = {
.pmd_entry = mem_cgroup_count_precharge_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@@ -6303,6 +6304,7 @@ put: /* get_mctgt_type() gets & locks the page */
static const struct mm_walk_ops charge_walk_ops = {
.pmd_entry = mem_cgroup_move_charge_pte_range,
+ .walk_lock = PGWALK_RDLOCK,
};
static void mem_cgroup_move_charge(void)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index e245191e6b04..fe121fdb05f7 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -831,6 +831,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
static const struct mm_walk_ops hwp_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
@@ -2466,7 +2467,7 @@ int unpoison_memory(unsigned long pfn)
{
struct folio *folio;
struct page *p;
- int ret = -EBUSY;
+ int ret = -EBUSY, ghp;
unsigned long count = 1;
bool huge = false;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -2487,7 +2488,7 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (!folio_test_hwpoison(folio)) {
+ if (!PageHWPoison(p)) {
unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
pfn, &unpoison_rs);
goto unlock_mutex;
@@ -2499,6 +2500,13 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
+ if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+ goto unlock_mutex;
+
+ /*
+ * Note that folio->_mapcount is overloaded in SLAB, so the simple test
+ * in folio_mapped() has to be done after folio_test_slab() is checked.
+ */
if (folio_mapped(folio)) {
unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
pfn, &unpoison_rs);
@@ -2511,32 +2519,28 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
- goto unlock_mutex;
-
- ret = get_hwpoison_page(p, MF_UNPOISON);
- if (!ret) {
+ ghp = get_hwpoison_page(p, MF_UNPOISON);
+ if (!ghp) {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
- if (count == 0) {
- ret = -EBUSY;
+ if (count == 0)
goto unlock_mutex;
- }
}
ret = folio_test_clear_hwpoison(folio) ? 0 : -EBUSY;
- } else if (ret < 0) {
- if (ret == -EHWPOISON) {
+ } else if (ghp < 0) {
+ if (ghp == -EHWPOISON) {
ret = put_page_back_buddy(p) ? 0 : -EBUSY;
- } else
+ } else {
+ ret = ghp;
unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
pfn, &unpoison_rs);
+ }
} else {
if (PageHuge(p)) {
huge = true;
count = folio_free_raw_hwp(folio, false);
if (count == 0) {
- ret = -EBUSY;
folio_put(folio);
goto unlock_mutex;
}
@@ -2737,10 +2741,13 @@ retry:
if (ret > 0) {
ret = soft_offline_in_use_page(page);
} else if (ret == 0) {
- if (!page_handle_poison(page, true, false) && try_again) {
- try_again = false;
- flags &= ~MF_COUNT_INCREASED;
- goto retry;
+ if (!page_handle_poison(page, true, false)) {
+ if (try_again) {
+ try_again = false;
+ flags &= ~MF_COUNT_INCREASED;
+ goto retry;
+ }
+ ret = -EBUSY;
}
}
diff --git a/mm/memory.c b/mm/memory.c
index 01f39e8144ef..cdc4d4c1c858 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5257,11 +5257,8 @@ EXPORT_SYMBOL_GPL(handle_mm_fault);
static inline bool get_mmap_lock_carefully(struct mm_struct *mm, struct pt_regs *regs)
{
- /* Even if this succeeds, make it clear we *might* have slept */
- if (likely(mmap_read_trylock(mm))) {
- might_sleep();
+ if (likely(mmap_read_trylock(mm)))
return true;
- }
if (regs && !user_mode(regs)) {
unsigned long ip = instruction_pointer(regs);
@@ -5393,27 +5390,28 @@ retry:
if (!vma_is_anonymous(vma) && !vma_is_tcp(vma))
goto inval;
- /* find_mergeable_anon_vma uses adjacent vmas which are not locked */
- if (!vma->anon_vma && !vma_is_tcp(vma))
- goto inval;
-
if (!vma_start_read(vma))
goto inval;
/*
+ * find_mergeable_anon_vma uses adjacent vmas which are not locked.
+ * This check must happen after vma_start_read(); otherwise, a
+ * concurrent mremap() with MREMAP_DONTUNMAP could dissociate the VMA
+ * from its anon_vma.
+ */
+ if (unlikely(!vma->anon_vma && !vma_is_tcp(vma)))
+ goto inval_end_read;
+
+ /*
* Due to the possibility of userfault handler dropping mmap_lock, avoid
* it for now and fall back to page fault handling under mmap_lock.
*/
- if (userfaultfd_armed(vma)) {
- vma_end_read(vma);
- goto inval;
- }
+ if (userfaultfd_armed(vma))
+ goto inval_end_read;
/* Check since vm_start/vm_end might change before we lock the VMA */
- if (unlikely(address < vma->vm_start || address >= vma->vm_end)) {
- vma_end_read(vma);
- goto inval;
- }
+ if (unlikely(address < vma->vm_start || address >= vma->vm_end))
+ goto inval_end_read;
/* Check if the VMA got isolated after we found it */
if (vma->detached) {
@@ -5425,6 +5423,9 @@ retry:
rcu_read_unlock();
return vma;
+
+inval_end_read:
+ vma_end_read(vma);
inval:
rcu_read_unlock();
count_vm_vma_lock_event(VMA_LOCK_ABORT);
@@ -5701,6 +5702,9 @@ int __access_remote_vm(struct mm_struct *mm, unsigned long addr, void *buf,
if (mmap_read_lock_killable(mm))
return 0;
+ /* Untag the address before looking up the VMA */
+ addr = untagged_addr_remote(mm, addr);
+
/* Avoid triggering the temporary warning in __get_user_pages */
if (!vma_lookup(mm, addr) && !expand_stack(mm, addr))
return 0;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index edc25195f5bd..ec2eaceffd74 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -384,8 +384,10 @@ void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
VMA_ITERATOR(vmi, mm, 0);
mmap_write_lock(mm);
- for_each_vma(vmi, vma)
+ for_each_vma(vmi, vma) {
+ vma_start_write(vma);
mpol_rebind_policy(vma->vm_policy, new);
+ }
mmap_write_unlock(mm);
}
@@ -716,6 +718,14 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
.hugetlb_entry = queue_folios_hugetlb,
.pmd_entry = queue_folios_pte_range,
.test_walk = queue_pages_test_walk,
+ .walk_lock = PGWALK_RDLOCK,
+};
+
+static const struct mm_walk_ops queue_pages_lock_vma_walk_ops = {
+ .hugetlb_entry = queue_folios_hugetlb,
+ .pmd_entry = queue_folios_pte_range,
+ .test_walk = queue_pages_test_walk,
+ .walk_lock = PGWALK_WRLOCK,
};
/*
@@ -736,7 +746,7 @@ static const struct mm_walk_ops queue_pages_walk_ops = {
static int
queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
nodemask_t *nodes, unsigned long flags,
- struct list_head *pagelist)
+ struct list_head *pagelist, bool lock_vma)
{
int err;
struct queue_pages qp = {
@@ -747,8 +757,10 @@ queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
.end = end,
.first = NULL,
};
+ const struct mm_walk_ops *ops = lock_vma ?
+ &queue_pages_lock_vma_walk_ops : &queue_pages_walk_ops;
- err = walk_page_range(mm, start, end, &queue_pages_walk_ops, &qp);
+ err = walk_page_range(mm, start, end, ops, &qp);
if (!qp.first)
/* whole range in hole */
@@ -768,6 +780,8 @@ static int vma_replace_policy(struct vm_area_struct *vma,
struct mempolicy *old;
struct mempolicy *new;
+ vma_assert_write_locked(vma);
+
pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
vma->vm_start, vma->vm_end, vma->vm_pgoff,
vma->vm_ops, vma->vm_file,
@@ -1074,7 +1088,7 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
vma = find_vma(mm, 0);
VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
queue_pages_range(mm, vma->vm_start, mm->task_size, &nmask,
- flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+ flags | MPOL_MF_DISCONTIG_OK, &pagelist, false);
if (!list_empty(&pagelist)) {
err = migrate_pages(&pagelist, alloc_migration_target, NULL,
@@ -1313,8 +1327,12 @@ static long do_mbind(unsigned long start, unsigned long len,
if (err)
goto mpol_out;
+ /*
+ * Lock the VMAs before scanning for pages to migrate, to ensure we don't
+ * miss a concurrently inserted page.
+ */
ret = queue_pages_range(mm, start, end, nmask,
- flags | MPOL_MF_INVERT, &pagelist);
+ flags | MPOL_MF_INVERT, &pagelist, true);
if (ret < 0) {
err = ret;
@@ -1538,6 +1556,7 @@ SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, le
break;
}
+ vma_start_write(vma);
new->home_node = home_node;
err = mbind_range(&vmi, vma, &prev, start, end, new);
mpol_put(new);
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index 8365158460ed..d5f492356e3e 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -279,6 +279,7 @@ next:
static const struct mm_walk_ops migrate_vma_walk_ops = {
.pmd_entry = migrate_vma_collect_pmd,
.pte_hole = migrate_vma_collect_hole,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
diff --git a/mm/mincore.c b/mm/mincore.c
index b7f7a516b26c..dad3622cc963 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -176,6 +176,7 @@ static const struct mm_walk_ops mincore_walk_ops = {
.pmd_entry = mincore_pte_range,
.pte_hole = mincore_unmapped_range,
.hugetlb_entry = mincore_hugetlb,
+ .walk_lock = PGWALK_RDLOCK,
};
/*
diff --git a/mm/mlock.c b/mm/mlock.c
index d7db94519884..479e09d0994c 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -371,6 +371,7 @@ static void mlock_vma_pages_range(struct vm_area_struct *vma,
{
static const struct mm_walk_ops mlock_walk_ops = {
.pmd_entry = mlock_pte_range,
+ .walk_lock = PGWALK_WRLOCK_VERIFY,
};
/*
@@ -477,7 +478,6 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
{
unsigned long nstart, end, tmp;
struct vm_area_struct *vma, *prev;
- int error;
VMA_ITERATOR(vmi, current->mm, start);
VM_BUG_ON(offset_in_page(start));
@@ -498,6 +498,7 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
nstart = start;
tmp = vma->vm_start;
for_each_vma_range(vmi, vma, end) {
+ int error;
vm_flags_t newflags;
if (vma->vm_start != tmp)
@@ -511,14 +512,15 @@ static int apply_vma_lock_flags(unsigned long start, size_t len,
tmp = end;
error = mlock_fixup(&vmi, vma, &prev, nstart, tmp, newflags);
if (error)
- break;
+ return error;
+ tmp = vma_iter_end(&vmi);
nstart = tmp;
}
- if (vma_iter_end(&vmi) < end)
+ if (tmp < end)
return -ENOMEM;
- return error;
+ return 0;
}
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index 3eda23c9ebe7..3937479d0e07 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -615,6 +615,7 @@ static inline int dup_anon_vma(struct vm_area_struct *dst,
* anon pages imported.
*/
if (src->anon_vma && !dst->anon_vma) {
+ vma_start_write(dst);
dst->anon_vma = src->anon_vma;
return anon_vma_clone(dst, src);
}
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6f658d483704..3aef1340533a 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -568,6 +568,7 @@ static const struct mm_walk_ops prot_none_walk_ops = {
.pte_entry = prot_none_pte_entry,
.hugetlb_entry = prot_none_hugetlb_entry,
.test_walk = prot_none_test,
+ .walk_lock = PGWALK_WRLOCK,
};
int
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d3f42009bb70..b8d3d7040a50 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1193,7 +1193,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
* write_bandwidth = ---------------------------------------------------
* period
*
- * @written may have decreased due to folio_account_redirty().
+ * @written may have decreased due to folio_redirty_for_writepage().
* Avoid underflowing @bw calculation.
*/
bw = written - min(written, wb->written_stamp);
@@ -2712,37 +2712,6 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
EXPORT_SYMBOL(filemap_dirty_folio);
/**
- * folio_account_redirty - Manually account for redirtying a page.
- * @folio: The folio which is being redirtied.
- *
- * Most filesystems should call folio_redirty_for_writepage() instead
- * of this fuction. If your filesystem is doing writeback outside the
- * context of a writeback_control(), it can call this when redirtying
- * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED,
- * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN,
- * WB_WRITTEN) in long term. The mismatches will lead to systematic errors
- * in balanced_dirty_ratelimit and the dirty pages position control.
- */
-void folio_account_redirty(struct folio *folio)
-{
- struct address_space *mapping = folio->mapping;
-
- if (mapping && mapping_can_writeback(mapping)) {
- struct inode *inode = mapping->host;
- struct bdi_writeback *wb;
- struct wb_lock_cookie cookie = {};
- long nr = folio_nr_pages(folio);
-
- wb = unlocked_inode_to_wb_begin(inode, &cookie);
- current->nr_dirtied -= nr;
- node_stat_mod_folio(folio, NR_DIRTIED, -nr);
- wb_stat_mod(wb, WB_DIRTIED, -nr);
- unlocked_inode_to_wb_end(inode, &cookie);
- }
-}
-EXPORT_SYMBOL(folio_account_redirty);
-
-/**
* folio_redirty_for_writepage - Decline to write a dirty folio.
* @wbc: The writeback control.
* @folio: The folio.
@@ -2757,13 +2726,23 @@ EXPORT_SYMBOL(folio_account_redirty);
bool folio_redirty_for_writepage(struct writeback_control *wbc,
struct folio *folio)
{
- bool ret;
+ struct address_space *mapping = folio->mapping;
long nr = folio_nr_pages(folio);
+ bool ret;
wbc->pages_skipped += nr;
- ret = filemap_dirty_folio(folio->mapping, folio);
- folio_account_redirty(folio);
+ ret = filemap_dirty_folio(mapping, folio);
+ if (mapping && mapping_can_writeback(mapping)) {
+ struct inode *inode = mapping->host;
+ struct bdi_writeback *wb;
+ struct wb_lock_cookie cookie = {};
+ wb = unlocked_inode_to_wb_begin(inode, &cookie);
+ current->nr_dirtied -= nr;
+ node_stat_mod_folio(folio, NR_DIRTIED, -nr);
+ wb_stat_mod(wb, WB_DIRTIED, -nr);
+ unlocked_inode_to_wb_end(inode, &cookie);
+ }
return ret;
}
EXPORT_SYMBOL(folio_redirty_for_writepage);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 64437105fe0d..9b2d23fbf4d3 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -48,8 +48,11 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (walk->no_vma) {
/*
* pte_offset_map() might apply user-specific validation.
+ * Indeed, on x86_64 the pmd entries set up by init_espfix_ap()
+ * fit its pmd_bad() check (_PAGE_NX set and _PAGE_RW clear),
+ * and CONFIG_EFI_PGT_DUMP efi_mm goes so far as to walk them.
*/
- if (walk->mm == &init_mm)
+ if (walk->mm == &init_mm || addr >= TASK_SIZE)
pte = pte_offset_kernel(pmd, addr);
else
pte = pte_offset_map(pmd, addr);
@@ -397,6 +400,33 @@ static int __walk_page_range(unsigned long start, unsigned long end,
return err;
}
+static inline void process_mm_walk_lock(struct mm_struct *mm,
+ enum page_walk_lock walk_lock)
+{
+ if (walk_lock == PGWALK_RDLOCK)
+ mmap_assert_locked(mm);
+ else
+ mmap_assert_write_locked(mm);
+}
+
+static inline void process_vma_walk_lock(struct vm_area_struct *vma,
+ enum page_walk_lock walk_lock)
+{
+#ifdef CONFIG_PER_VMA_LOCK
+ switch (walk_lock) {
+ case PGWALK_WRLOCK:
+ vma_start_write(vma);
+ break;
+ case PGWALK_WRLOCK_VERIFY:
+ vma_assert_write_locked(vma);
+ break;
+ case PGWALK_RDLOCK:
+ /* PGWALK_RDLOCK is handled by process_mm_walk_lock */
+ break;
+ }
+#endif
+}
+
/**
* walk_page_range - walk page table with caller specific callbacks
* @mm: mm_struct representing the target process of page table walk
@@ -456,7 +486,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (!walk.mm)
return -EINVAL;
- mmap_assert_locked(walk.mm);
+ process_mm_walk_lock(walk.mm, ops->walk_lock);
vma = find_vma(walk.mm, start);
do {
@@ -471,6 +501,7 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
if (ops->pte_hole)
err = ops->pte_hole(start, next, -1, &walk);
} else { /* inside vma */
+ process_vma_walk_lock(vma, ops->walk_lock);
walk.vma = vma;
next = min(end, vma->vm_end);
vma = find_vma(mm, vma->vm_end);
@@ -546,7 +577,8 @@ int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
if (start < vma->vm_start || end > vma->vm_end)
return -EINVAL;
- mmap_assert_locked(walk.mm);
+ process_mm_walk_lock(walk.mm, ops->walk_lock);
+ process_vma_walk_lock(vma, ops->walk_lock);
return __walk_page_range(start, end, &walk);
}
@@ -563,7 +595,8 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
if (!walk.mm)
return -EINVAL;
- mmap_assert_locked(walk.mm);
+ process_mm_walk_lock(walk.mm, ops->walk_lock);
+ process_vma_walk_lock(vma, ops->walk_lock);
return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
}
diff --git a/mm/readahead.c b/mm/readahead.c
index a9c999aa19af..e815c114de21 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -461,19 +461,6 @@ static int try_context_readahead(struct address_space *mapping,
return 1;
}
-/*
- * There are some parts of the kernel which assume that PMD entries
- * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then,
- * limit the maximum allocation order to PMD size. I'm not aware of any
- * assumptions about maximum order if THP are disabled, but 8 seems like
- * a good order (that's 1MB if you're using 4kB pages)
- */
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER
-#else
-#define MAX_PAGECACHE_ORDER 8
-#endif
-
static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index,
pgoff_t mark, unsigned int order, gfp_t gfp)
{
diff --git a/mm/shmem.c b/mm/shmem.c
index 2f2e0e618072..479d1ce65868 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -78,6 +78,7 @@ static struct vfsmount *shm_mnt;
#include <uapi/linux/memfd.h>
#include <linux/rmap.h>
#include <linux/uuid.h>
+#include <linux/quotaops.h>
#include <linux/uaccess.h>
@@ -89,6 +90,9 @@ static struct vfsmount *shm_mnt;
/* Pretend that each entry is of this size in directory's i_size */
#define BOGO_DIRENT_SIZE 20
+/* Pretend that one inode + its dentry occupy this much memory */
+#define BOGO_INODE_SIZE 1024
+
/* Symlink up to this size is kmalloc'ed instead of using a swappable page */
#define SHORT_SYMLINK_LEN 128
@@ -116,11 +120,14 @@ struct shmem_options {
int huge;
int seen;
bool noswap;
+ unsigned short quota_types;
+ struct shmem_quota_limits qlimits;
#define SHMEM_SEEN_BLOCKS 1
#define SHMEM_SEEN_INODES 2
#define SHMEM_SEEN_HUGE 4
#define SHMEM_SEEN_INUMS 8
#define SHMEM_SEEN_NOSWAP 16
+#define SHMEM_SEEN_QUOTA 32
};
#ifdef CONFIG_TMPFS
@@ -133,7 +140,8 @@ static unsigned long shmem_default_max_inodes(void)
{
unsigned long nr_pages = totalram_pages();
- return min(nr_pages - totalhigh_pages(), nr_pages / 2);
+ return min3(nr_pages - totalhigh_pages(), nr_pages / 2,
+ ULONG_MAX / BOGO_INODE_SIZE);
}
#endif
@@ -199,33 +207,47 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
vm_unacct_memory(pages * VM_ACCT(PAGE_SIZE));
}
-static inline bool shmem_inode_acct_block(struct inode *inode, long pages)
+static int shmem_inode_acct_block(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ int err = -ENOSPC;
if (shmem_acct_block(info->flags, pages))
- return false;
+ return err;
+ might_sleep(); /* when quotas */
if (sbinfo->max_blocks) {
if (percpu_counter_compare(&sbinfo->used_blocks,
sbinfo->max_blocks - pages) > 0)
goto unacct;
+
+ err = dquot_alloc_block_nodirty(inode, pages);
+ if (err)
+ goto unacct;
+
percpu_counter_add(&sbinfo->used_blocks, pages);
+ } else {
+ err = dquot_alloc_block_nodirty(inode, pages);
+ if (err)
+ goto unacct;
}
- return true;
+ return 0;
unacct:
shmem_unacct_blocks(info->flags, pages);
- return false;
+ return err;
}
-static inline void shmem_inode_unacct_blocks(struct inode *inode, long pages)
+static void shmem_inode_unacct_blocks(struct inode *inode, long pages)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ might_sleep(); /* when quotas */
+ dquot_free_block_nodirty(inode, pages);
+
if (sbinfo->max_blocks)
percpu_counter_sub(&sbinfo->used_blocks, pages);
shmem_unacct_blocks(info->flags, pages);
@@ -254,6 +276,47 @@ bool vma_is_shmem(struct vm_area_struct *vma)
static LIST_HEAD(shmem_swaplist);
static DEFINE_MUTEX(shmem_swaplist_mutex);
+#ifdef CONFIG_TMPFS_QUOTA
+
+static int shmem_enable_quotas(struct super_block *sb,
+ unsigned short quota_types)
+{
+ int type, err = 0;
+
+ sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
+ for (type = 0; type < SHMEM_MAXQUOTAS; type++) {
+ if (!(quota_types & (1 << type)))
+ continue;
+ err = dquot_load_quota_sb(sb, type, QFMT_SHMEM,
+ DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED);
+ if (err)
+ goto out_err;
+ }
+ return 0;
+
+out_err:
+ pr_warn("tmpfs: failed to enable quota tracking (type=%d, err=%d)\n",
+ type, err);
+ for (type--; type >= 0; type--)
+ dquot_quota_off(sb, type);
+ return err;
+}
+
+static void shmem_disable_quotas(struct super_block *sb)
+{
+ int type;
+
+ for (type = 0; type < SHMEM_MAXQUOTAS; type++)
+ dquot_quota_off(sb, type);
+}
+
+static struct dquot **shmem_get_dquots(struct inode *inode)
+{
+ return SHMEM_I(inode)->i_dquot;
+}
+#endif /* CONFIG_TMPFS_QUOTA */
+
/*
* shmem_reserve_inode() performs bookkeeping to reserve a shmem inode, and
* produces a novel ino for the newly allocated inode.
@@ -272,11 +335,11 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
if (!(sb->s_flags & SB_KERNMOUNT)) {
raw_spin_lock(&sbinfo->stat_lock);
if (sbinfo->max_inodes) {
- if (!sbinfo->free_inodes) {
+ if (sbinfo->free_ispace < BOGO_INODE_SIZE) {
raw_spin_unlock(&sbinfo->stat_lock);
return -ENOSPC;
}
- sbinfo->free_inodes--;
+ sbinfo->free_ispace -= BOGO_INODE_SIZE;
}
if (inop) {
ino = sbinfo->next_ino++;
@@ -330,12 +393,12 @@ static int shmem_reserve_inode(struct super_block *sb, ino_t *inop)
return 0;
}
-static void shmem_free_inode(struct super_block *sb)
+static void shmem_free_inode(struct super_block *sb, size_t freed_ispace)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
if (sbinfo->max_inodes) {
raw_spin_lock(&sbinfo->stat_lock);
- sbinfo->free_inodes++;
+ sbinfo->free_ispace += BOGO_INODE_SIZE + freed_ispace;
raw_spin_unlock(&sbinfo->stat_lock);
}
}
@@ -343,62 +406,65 @@ static void shmem_free_inode(struct super_block *sb)
/**
* shmem_recalc_inode - recalculate the block usage of an inode
* @inode: inode to recalc
+ * @alloced: the change in number of pages allocated to inode
+ * @swapped: the change in number of pages swapped from inode
*
* We have to calculate the free blocks since the mm can drop
* undirtied hole pages behind our back.
*
* But normally info->alloced == inode->i_mapping->nrpages + info->swapped
* So mm freed is info->alloced - (inode->i_mapping->nrpages + info->swapped)
- *
- * It has to be called with the spinlock held.
*/
-static void shmem_recalc_inode(struct inode *inode)
+static void shmem_recalc_inode(struct inode *inode, long alloced, long swapped)
{
struct shmem_inode_info *info = SHMEM_I(inode);
long freed;
- freed = info->alloced - info->swapped - inode->i_mapping->nrpages;
- if (freed > 0) {
+ spin_lock(&info->lock);
+ info->alloced += alloced;
+ info->swapped += swapped;
+ freed = info->alloced - info->swapped -
+ READ_ONCE(inode->i_mapping->nrpages);
+ /*
+ * Special case: whereas normally shmem_recalc_inode() is called
+ * after i_mapping->nrpages has already been adjusted (up or down),
+ * shmem_writepage() has to raise swapped before nrpages is lowered -
+ * to stop a racing shmem_recalc_inode() from thinking that a page has
+ * been freed. Compensate here, to avoid the need for a followup call.
+ */
+ if (swapped > 0)
+ freed += swapped;
+ if (freed > 0)
info->alloced -= freed;
- inode->i_blocks -= freed * BLOCKS_PER_PAGE;
+ spin_unlock(&info->lock);
+
+ /* The quota case may block */
+ if (freed > 0)
shmem_inode_unacct_blocks(inode, freed);
- }
}
bool shmem_charge(struct inode *inode, long pages)
{
- struct shmem_inode_info *info = SHMEM_I(inode);
- unsigned long flags;
+ struct address_space *mapping = inode->i_mapping;
- if (!shmem_inode_acct_block(inode, pages))
+ if (shmem_inode_acct_block(inode, pages))
return false;
/* nrpages adjustment first, then shmem_recalc_inode() when balanced */
- inode->i_mapping->nrpages += pages;
-
- spin_lock_irqsave(&info->lock, flags);
- info->alloced += pages;
- inode->i_blocks += pages * BLOCKS_PER_PAGE;
- shmem_recalc_inode(inode);
- spin_unlock_irqrestore(&info->lock, flags);
+ xa_lock_irq(&mapping->i_pages);
+ mapping->nrpages += pages;
+ xa_unlock_irq(&mapping->i_pages);
+ shmem_recalc_inode(inode, pages, 0);
return true;
}
void shmem_uncharge(struct inode *inode, long pages)
{
- struct shmem_inode_info *info = SHMEM_I(inode);
- unsigned long flags;
-
+ /* pages argument is currently unused: keep it to help debugging */
/* nrpages adjustment done by __filemap_remove_folio() or caller */
- spin_lock_irqsave(&info->lock, flags);
- info->alloced -= pages;
- inode->i_blocks -= pages * BLOCKS_PER_PAGE;
- shmem_recalc_inode(inode);
- spin_unlock_irqrestore(&info->lock, flags);
-
- shmem_inode_unacct_blocks(inode, pages);
+ shmem_recalc_inode(inode, 0, 0);
}
/*
@@ -806,14 +872,16 @@ unsigned long shmem_partial_swap_usage(struct address_space *mapping,
XA_STATE(xas, &mapping->i_pages, start);
struct page *page;
unsigned long swapped = 0;
+ unsigned long max = end - 1;
rcu_read_lock();
- xas_for_each(&xas, page, end - 1) {
+ xas_for_each(&xas, page, max) {
if (xas_retry(&xas, page))
continue;
if (xa_is_value(page))
swapped++;
-
+ if (xas.xa_index == max)
+ break;
if (need_resched()) {
xas_pause(&xas);
cond_resched_rcu();
@@ -1038,16 +1106,13 @@ whole_folios:
folio_batch_release(&fbatch);
}
- spin_lock_irq(&info->lock);
- info->swapped -= nr_swaps_freed;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, -nr_swaps_freed);
}
void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
{
shmem_undo_range(inode, lstart, lend, false);
- inode->i_ctime = inode->i_mtime = current_time(inode);
+ inode->i_mtime = inode_set_ctime_current(inode);
inode_inc_iversion(inode);
}
EXPORT_SYMBOL_GPL(shmem_truncate_range);
@@ -1059,11 +1124,9 @@ static int shmem_getattr(struct mnt_idmap *idmap,
struct inode *inode = path->dentry->d_inode;
struct shmem_inode_info *info = SHMEM_I(inode);
- if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
- }
+ if (info->alloced - info->swapped != inode->i_mapping->nrpages)
+ shmem_recalc_inode(inode, 0, 0);
+
if (info->fsflags & FS_APPEND_FL)
stat->attributes |= STATX_ATTR_APPEND;
if (info->fsflags & FS_IMMUTABLE_FL)
@@ -1073,7 +1136,7 @@ static int shmem_getattr(struct mnt_idmap *idmap,
stat->attributes_mask |= (STATX_ATTR_APPEND |
STATX_ATTR_IMMUTABLE |
STATX_ATTR_NODUMP);
- generic_fillattr(idmap, inode, stat);
+ generic_fillattr(idmap, request_mask, inode, stat);
if (shmem_is_huge(inode, 0, false, NULL, 0))
stat->blksize = HPAGE_PMD_SIZE;
@@ -1140,13 +1203,28 @@ static int shmem_setattr(struct mnt_idmap *idmap,
}
}
+ if (is_quota_modification(idmap, inode, attr)) {
+ error = dquot_initialize(inode);
+ if (error)
+ return error;
+ }
+
+ /* Transfer quota accounting */
+ if (i_uid_needs_update(idmap, attr, inode) ||
+ i_gid_needs_update(idmap, attr, inode)) {
+ error = dquot_transfer(idmap, inode, attr);
+
+ if (error)
+ return error;
+ }
+
setattr_copy(idmap, inode, attr);
if (attr->ia_valid & ATTR_MODE)
error = posix_acl_chmod(idmap, dentry, inode->i_mode);
if (!error && update_ctime) {
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
if (update_mtime)
- inode->i_mtime = inode->i_ctime;
+ inode->i_mtime = inode_get_ctime(inode);
inode_inc_iversion(inode);
}
return error;
@@ -1156,6 +1234,7 @@ static void shmem_evict_inode(struct inode *inode)
{
struct shmem_inode_info *info = SHMEM_I(inode);
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ size_t freed = 0;
if (shmem_mapping(inode->i_mapping)) {
shmem_unacct_size(info->flags, inode->i_size);
@@ -1182,10 +1261,14 @@ static void shmem_evict_inode(struct inode *inode)
}
}
- simple_xattrs_free(&info->xattrs);
+ simple_xattrs_free(&info->xattrs, sbinfo->max_inodes ? &freed : NULL);
+ shmem_free_inode(inode->i_sb, freed);
WARN_ON(inode->i_blocks);
- shmem_free_inode(inode->i_sb);
clear_inode(inode);
+#ifdef CONFIG_TMPFS_QUOTA
+ dquot_free_inode(inode);
+ dquot_drop(inode);
+#endif
}
static int shmem_find_swap_entries(struct address_space *mapping,
@@ -1429,11 +1512,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
if (add_to_swap_cache(folio, swap,
__GFP_HIGH | __GFP_NOMEMALLOC | __GFP_NOWARN,
NULL) == 0) {
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- info->swapped++;
- spin_unlock_irq(&info->lock);
-
+ shmem_recalc_inode(inode, 0, 1);
swap_shmem_alloc(swap);
shmem_delete_from_page_cache(folio, swp_to_radix_entry(swap));
@@ -1588,13 +1667,14 @@ static struct folio *shmem_alloc_and_acct_folio(gfp_t gfp, struct inode *inode,
struct shmem_inode_info *info = SHMEM_I(inode);
struct folio *folio;
int nr;
- int err = -ENOSPC;
+ int err;
if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
huge = false;
nr = huge ? HPAGE_PMD_NR : 1;
- if (!shmem_inode_acct_block(inode, nr))
+ err = shmem_inode_acct_block(inode, nr);
+ if (err)
goto failed;
if (huge)
@@ -1703,7 +1783,6 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
struct folio *folio, swp_entry_t swap)
{
struct address_space *mapping = inode->i_mapping;
- struct shmem_inode_info *info = SHMEM_I(inode);
swp_entry_t swapin_error;
void *old;
@@ -1716,16 +1795,12 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
folio_wait_writeback(folio);
delete_from_swap_cache(folio);
- spin_lock_irq(&info->lock);
/*
- * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks won't
- * be 0 when inode is released and thus trigger WARN_ON(inode->i_blocks) in
- * shmem_evict_inode.
+ * Don't treat swapin error folio as alloced. Otherwise inode->i_blocks
+ * won't be 0 when inode is released and thus trigger WARN_ON(i_blocks)
+ * in shmem_evict_inode().
*/
- info->alloced--;
- info->swapped--;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, -1, -1);
swap_free(swap);
}
@@ -1812,10 +1887,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
if (error)
goto failed;
- spin_lock_irq(&info->lock);
- info->swapped--;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, -1);
if (sgp == SGP_WRITE)
folio_mark_accessed(folio);
@@ -1980,13 +2052,9 @@ alloc_nohuge:
charge_mm);
if (error)
goto unacct;
- folio_add_lru(folio);
- spin_lock_irq(&info->lock);
- info->alloced += folio_nr_pages(folio);
- inode->i_blocks += (blkcnt_t)BLOCKS_PER_PAGE << folio_order(folio);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ folio_add_lru(folio);
+ shmem_recalc_inode(inode, folio_nr_pages(folio), 0);
alloced = true;
if (folio_test_pmd_mappable(folio) &&
@@ -2035,9 +2103,7 @@ clear:
if (alloced) {
folio_clear_dirty(folio);
filemap_remove_folio(folio);
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, 0);
}
error = -EINVAL;
goto unlock;
@@ -2063,9 +2129,7 @@ unlock:
folio_put(folio);
}
if (error == -ENOSPC && !once++) {
- spin_lock_irq(&info->lock);
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
+ shmem_recalc_inode(inode, 0, 0);
goto repeat;
}
if (error == -EEXIST)
@@ -2326,6 +2390,12 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
return 0;
}
+static int shmem_file_open(struct inode *inode, struct file *file)
+{
+ file->f_mode |= FMODE_CAN_ODIRECT;
+ return generic_file_open(inode, file);
+}
+
#ifdef CONFIG_TMPFS_XATTR
static int shmem_initxattrs(struct inode *, const struct xattr *, void *);
@@ -2355,77 +2425,127 @@ static void shmem_set_inode_flags(struct inode *inode, unsigned int fsflags)
#define shmem_initxattrs NULL
#endif
-static struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb,
- struct inode *dir, umode_t mode, dev_t dev,
- unsigned long flags)
+static struct offset_ctx *shmem_get_offset_ctx(struct inode *inode)
+{
+ return &SHMEM_I(inode)->dir_offsets;
+}
+
+static struct inode *__shmem_get_inode(struct mnt_idmap *idmap,
+ struct super_block *sb,
+ struct inode *dir, umode_t mode,
+ dev_t dev, unsigned long flags)
{
struct inode *inode;
struct shmem_inode_info *info;
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
ino_t ino;
+ int err;
+
+ err = shmem_reserve_inode(sb, &ino);
+ if (err)
+ return ERR_PTR(err);
- if (shmem_reserve_inode(sb, &ino))
- return NULL;
inode = new_inode(sb);
- if (inode) {
- inode->i_ino = ino;
- inode_init_owner(idmap, inode, dir, mode);
- inode->i_blocks = 0;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
- inode->i_generation = get_random_u32();
- info = SHMEM_I(inode);
- memset(info, 0, (char *)inode - (char *)info);
- spin_lock_init(&info->lock);
- atomic_set(&info->stop_eviction, 0);
- info->seals = F_SEAL_SEAL;
- info->flags = flags & VM_NORESERVE;
- info->i_crtime = inode->i_mtime;
- info->fsflags = (dir == NULL) ? 0 :
- SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
- if (info->fsflags)
- shmem_set_inode_flags(inode, info->fsflags);
- INIT_LIST_HEAD(&info->shrinklist);
- INIT_LIST_HEAD(&info->swaplist);
- if (sbinfo->noswap)
- mapping_set_unevictable(inode->i_mapping);
- simple_xattrs_init(&info->xattrs);
- cache_no_acl(inode);
- mapping_set_large_folios(inode->i_mapping);
-
- switch (mode & S_IFMT) {
- default:
- inode->i_op = &shmem_special_inode_operations;
- init_special_inode(inode, mode, dev);
- break;
- case S_IFREG:
- inode->i_mapping->a_ops = &shmem_aops;
- inode->i_op = &shmem_inode_operations;
- inode->i_fop = &shmem_file_operations;
- mpol_shared_policy_init(&info->policy,
- shmem_get_sbmpol(sbinfo));
- break;
- case S_IFDIR:
- inc_nlink(inode);
- /* Some things misbehave if size == 0 on a directory */
- inode->i_size = 2 * BOGO_DIRENT_SIZE;
- inode->i_op = &shmem_dir_inode_operations;
- inode->i_fop = &simple_dir_operations;
- break;
- case S_IFLNK:
- /*
- * Must not load anything in the rbtree,
- * mpol_free_shared_policy will not be called.
- */
- mpol_shared_policy_init(&info->policy, NULL);
- break;
- }
+ if (!inode) {
+ shmem_free_inode(sb, 0);
+ return ERR_PTR(-ENOSPC);
+ }
+
+ inode->i_ino = ino;
+ inode_init_owner(idmap, inode, dir, mode);
+ inode->i_blocks = 0;
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
+ inode->i_generation = get_random_u32();
+ info = SHMEM_I(inode);
+ memset(info, 0, (char *)inode - (char *)info);
+ spin_lock_init(&info->lock);
+ atomic_set(&info->stop_eviction, 0);
+ info->seals = F_SEAL_SEAL;
+ info->flags = flags & VM_NORESERVE;
+ info->i_crtime = inode->i_mtime;
+ info->fsflags = (dir == NULL) ? 0 :
+ SHMEM_I(dir)->fsflags & SHMEM_FL_INHERITED;
+ if (info->fsflags)
+ shmem_set_inode_flags(inode, info->fsflags);
+ INIT_LIST_HEAD(&info->shrinklist);
+ INIT_LIST_HEAD(&info->swaplist);
+ INIT_LIST_HEAD(&info->swaplist);
+ if (sbinfo->noswap)
+ mapping_set_unevictable(inode->i_mapping);
+ simple_xattrs_init(&info->xattrs);
+ cache_no_acl(inode);
+ mapping_set_large_folios(inode->i_mapping);
+
+ switch (mode & S_IFMT) {
+ default:
+ inode->i_op = &shmem_special_inode_operations;
+ init_special_inode(inode, mode, dev);
+ break;
+ case S_IFREG:
+ inode->i_mapping->a_ops = &shmem_aops;
+ inode->i_op = &shmem_inode_operations;
+ inode->i_fop = &shmem_file_operations;
+ mpol_shared_policy_init(&info->policy,
+ shmem_get_sbmpol(sbinfo));
+ break;
+ case S_IFDIR:
+ inc_nlink(inode);
+ /* Some things misbehave if size == 0 on a directory */
+ inode->i_size = 2 * BOGO_DIRENT_SIZE;
+ inode->i_op = &shmem_dir_inode_operations;
+ inode->i_fop = &simple_offset_dir_operations;
+ simple_offset_init(shmem_get_offset_ctx(inode));
+ break;
+ case S_IFLNK:
+ /*
+ * Must not load anything in the rbtree,
+ * mpol_free_shared_policy will not be called.
+ */
+ mpol_shared_policy_init(&info->policy, NULL);
+ break;
+ }
+
+ lockdep_annotate_inode_mutex_key(inode);
+ return inode;
+}
- lockdep_annotate_inode_mutex_key(inode);
- } else
- shmem_free_inode(sb);
+#ifdef CONFIG_TMPFS_QUOTA
+static struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+ struct super_block *sb, struct inode *dir,
+ umode_t mode, dev_t dev, unsigned long flags)
+{
+ int err;
+ struct inode *inode;
+
+ inode = __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
+ if (IS_ERR(inode))
+ return inode;
+
+ err = dquot_initialize(inode);
+ if (err)
+ goto errout;
+
+ err = dquot_alloc_inode(inode);
+ if (err) {
+ dquot_drop(inode);
+ goto errout;
+ }
return inode;
+
+errout:
+ inode->i_flags |= S_NOQUOTA;
+ iput(inode);
+ return ERR_PTR(err);
+}
+#else
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap,
+ struct super_block *sb, struct inode *dir,
+ umode_t mode, dev_t dev, unsigned long flags)
+{
+ return __shmem_get_inode(idmap, sb, dir, mode, dev, flags);
}
+#endif /* CONFIG_TMPFS_QUOTA */
#ifdef CONFIG_USERFAULTFD
int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
@@ -2445,7 +2565,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
int ret;
pgoff_t max_off;
- if (!shmem_inode_acct_block(inode, 1)) {
+ if (shmem_inode_acct_block(inode, 1)) {
/*
* We may have got a page, returned -ENOENT triggering a retry,
* and now we find ourselves with -ENOMEM. Release the page, to
@@ -2527,12 +2647,7 @@ int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
if (ret)
goto out_delete_from_cache;
- spin_lock_irq(&info->lock);
- info->alloced++;
- inode->i_blocks += BLOCKS_PER_PAGE;
- shmem_recalc_inode(inode);
- spin_unlock_irq(&info->lock);
-
+ shmem_recalc_inode(inode, 1, 0);
folio_unlock(folio);
return 0;
out_delete_from_cache:
@@ -2731,6 +2846,28 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
return retval ? retval : error;
}
+static ssize_t shmem_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file->f_mapping->host;
+ ssize_t ret;
+
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto unlock;
+ ret = file_remove_privs(file);
+ if (ret)
+ goto unlock;
+ ret = file_update_time(file);
+ if (ret)
+ goto unlock;
+ ret = generic_perform_write(iocb, from);
+unlock:
+ inode_unlock(inode);
+ return ret;
+}
+
static bool zero_pipe_buf_get(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
@@ -2796,7 +2933,8 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
if (*ppos >= i_size_read(inode))
break;
- error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio, SGP_READ);
+ error = shmem_get_folio(inode, *ppos / PAGE_SIZE, &folio,
+ SGP_READ);
if (error) {
if (error == -EINVAL)
error = 0;
@@ -2805,7 +2943,9 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
if (folio) {
folio_unlock(folio);
- if (folio_test_hwpoison(folio)) {
+ if (folio_test_hwpoison(folio) ||
+ (folio_test_large(folio) &&
+ folio_test_has_hwpoisoned(folio))) {
error = -EIO;
break;
}
@@ -2841,7 +2981,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
folio_put(folio);
folio = NULL;
} else {
- n = splice_zeropage_into_pipe(pipe, *ppos, len);
+ n = splice_zeropage_into_pipe(pipe, *ppos, part);
}
if (!n)
@@ -3052,7 +3192,7 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
}
if (sbinfo->max_inodes) {
buf->f_files = sbinfo->max_inodes;
- buf->f_ffree = sbinfo->free_inodes;
+ buf->f_ffree = sbinfo->free_ispace / BOGO_INODE_SIZE;
}
/* else leave those fields 0 like simple_statfs */
@@ -3069,27 +3209,32 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir,
struct dentry *dentry, umode_t mode, dev_t dev)
{
struct inode *inode;
- int error = -ENOSPC;
+ int error;
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, dev, VM_NORESERVE);
- if (inode) {
- error = simple_acl_create(dir, inode);
- if (error)
- goto out_iput;
- error = security_inode_init_security(inode, dir,
- &dentry->d_name,
- shmem_initxattrs, NULL);
- if (error && error != -EOPNOTSUPP)
- goto out_iput;
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
- error = 0;
- dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_ctime = dir->i_mtime = current_time(dir);
- inode_inc_iversion(dir);
- d_instantiate(dentry, inode);
- dget(dentry); /* Extra count - pin the dentry in core */
- }
+ error = simple_acl_create(dir, inode);
+ if (error)
+ goto out_iput;
+ error = security_inode_init_security(inode, dir,
+ &dentry->d_name,
+ shmem_initxattrs, NULL);
+ if (error && error != -EOPNOTSUPP)
+ goto out_iput;
+
+ error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+ if (error)
+ goto out_iput;
+
+ dir->i_size += BOGO_DIRENT_SIZE;
+ dir->i_mtime = inode_set_ctime_current(dir);
+ inode_inc_iversion(dir);
+ d_instantiate(dentry, inode);
+ dget(dentry); /* Extra count - pin the dentry in core */
return error;
+
out_iput:
iput(inode);
return error;
@@ -3100,20 +3245,26 @@ shmem_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
struct file *file, umode_t mode)
{
struct inode *inode;
- int error = -ENOSPC;
+ int error;
inode = shmem_get_inode(idmap, dir->i_sb, dir, mode, 0, VM_NORESERVE);
- if (inode) {
- error = security_inode_init_security(inode, dir,
- NULL,
- shmem_initxattrs, NULL);
- if (error && error != -EOPNOTSUPP)
- goto out_iput;
- error = simple_acl_create(dir, inode);
- if (error)
- goto out_iput;
- d_tmpfile(file, inode);
+
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
+ goto err_out;
}
+
+ error = security_inode_init_security(inode, dir,
+ NULL,
+ shmem_initxattrs, NULL);
+ if (error && error != -EOPNOTSUPP)
+ goto out_iput;
+ error = simple_acl_create(dir, inode);
+ if (error)
+ goto out_iput;
+ d_tmpfile(file, inode);
+
+err_out:
return finish_open_simple(file, error);
out_iput:
iput(inode);
@@ -3159,8 +3310,16 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
goto out;
}
+ ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+ if (ret) {
+ if (inode->i_nlink)
+ shmem_free_inode(inode->i_sb, 0);
+ goto out;
+ }
+
dir->i_size += BOGO_DIRENT_SIZE;
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ dir->i_mtime = inode_set_ctime_to_ts(dir,
+ inode_set_ctime_current(inode));
inode_inc_iversion(dir);
inc_nlink(inode);
ihold(inode); /* New dentry reference */
@@ -3175,10 +3334,13 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
struct inode *inode = d_inode(dentry);
if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))
- shmem_free_inode(inode->i_sb);
+ shmem_free_inode(inode->i_sb, 0);
+
+ simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
dir->i_size -= BOGO_DIRENT_SIZE;
- inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
+ dir->i_mtime = inode_set_ctime_to_ts(dir,
+ inode_set_ctime_current(inode));
inode_inc_iversion(dir);
drop_nlink(inode);
dput(dentry); /* Undo the count from "create" - this does all the work */
@@ -3235,24 +3397,29 @@ static int shmem_rename2(struct mnt_idmap *idmap,
{
struct inode *inode = d_inode(old_dentry);
int they_are_dirs = S_ISDIR(inode->i_mode);
+ int error;
if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
return -EINVAL;
if (flags & RENAME_EXCHANGE)
- return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
+ return simple_offset_rename_exchange(old_dir, old_dentry,
+ new_dir, new_dentry);
if (!simple_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {
- int error;
-
error = shmem_whiteout(idmap, old_dir, old_dentry);
if (error)
return error;
}
+ simple_offset_remove(shmem_get_offset_ctx(old_dir), old_dentry);
+ error = simple_offset_add(shmem_get_offset_ctx(new_dir), old_dentry);
+ if (error)
+ return error;
+
if (d_really_is_positive(new_dentry)) {
(void) shmem_unlink(new_dir, new_dentry);
if (they_are_dirs) {
@@ -3266,9 +3433,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
old_dir->i_size -= BOGO_DIRENT_SIZE;
new_dir->i_size += BOGO_DIRENT_SIZE;
- old_dir->i_ctime = old_dir->i_mtime =
- new_dir->i_ctime = new_dir->i_mtime =
- inode->i_ctime = current_time(old_dir);
+ simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
return 0;
@@ -3288,31 +3453,32 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
inode = shmem_get_inode(idmap, dir->i_sb, dir, S_IFLNK | 0777, 0,
VM_NORESERVE);
- if (!inode)
- return -ENOSPC;
+
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
error = security_inode_init_security(inode, dir, &dentry->d_name,
shmem_initxattrs, NULL);
- if (error && error != -EOPNOTSUPP) {
- iput(inode);
- return error;
- }
+ if (error && error != -EOPNOTSUPP)
+ goto out_iput;
+
+ error = simple_offset_add(shmem_get_offset_ctx(dir), dentry);
+ if (error)
+ goto out_iput;
inode->i_size = len-1;
if (len <= SHORT_SYMLINK_LEN) {
inode->i_link = kmemdup(symname, len, GFP_KERNEL);
if (!inode->i_link) {
- iput(inode);
- return -ENOMEM;
+ error = -ENOMEM;
+ goto out_remove_offset;
}
inode->i_op = &shmem_short_symlink_operations;
} else {
inode_nohighmem(inode);
error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
- if (error) {
- iput(inode);
- return error;
- }
+ if (error)
+ goto out_remove_offset;
inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
memcpy(folio_address(folio), symname, len);
@@ -3322,11 +3488,17 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
folio_put(folio);
}
dir->i_size += BOGO_DIRENT_SIZE;
- dir->i_ctime = dir->i_mtime = current_time(dir);
+ dir->i_mtime = inode_set_ctime_current(dir);
inode_inc_iversion(dir);
d_instantiate(dentry, inode);
dget(dentry);
return 0;
+
+out_remove_offset:
+ simple_offset_remove(shmem_get_offset_ctx(dir), dentry);
+out_iput:
+ iput(inode);
+ return error;
}
static void shmem_put_link(void *arg)
@@ -3394,7 +3566,7 @@ static int shmem_fileattr_set(struct mnt_idmap *idmap,
(fa->flags & SHMEM_FL_USER_MODIFIABLE);
shmem_set_inode_flags(inode, info->fsflags);
- inode->i_ctime = current_time(inode);
+ inode_set_ctime_current(inode);
inode_inc_iversion(inode);
return 0;
}
@@ -3414,21 +3586,40 @@ static int shmem_initxattrs(struct inode *inode,
void *fs_info)
{
struct shmem_inode_info *info = SHMEM_I(inode);
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
const struct xattr *xattr;
struct simple_xattr *new_xattr;
+ size_t ispace = 0;
size_t len;
+ if (sbinfo->max_inodes) {
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ ispace += simple_xattr_space(xattr->name,
+ xattr->value_len + XATTR_SECURITY_PREFIX_LEN);
+ }
+ if (ispace) {
+ raw_spin_lock(&sbinfo->stat_lock);
+ if (sbinfo->free_ispace < ispace)
+ ispace = 0;
+ else
+ sbinfo->free_ispace -= ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ if (!ispace)
+ return -ENOSPC;
+ }
+ }
+
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
new_xattr = simple_xattr_alloc(xattr->value, xattr->value_len);
if (!new_xattr)
- return -ENOMEM;
+ break;
len = strlen(xattr->name) + 1;
new_xattr->name = kmalloc(XATTR_SECURITY_PREFIX_LEN + len,
- GFP_KERNEL);
+ GFP_KERNEL_ACCOUNT);
if (!new_xattr->name) {
kvfree(new_xattr);
- return -ENOMEM;
+ break;
}
memcpy(new_xattr->name, XATTR_SECURITY_PREFIX,
@@ -3439,6 +3630,16 @@ static int shmem_initxattrs(struct inode *inode,
simple_xattr_add(&info->xattrs, new_xattr);
}
+ if (xattr->name != NULL) {
+ if (ispace) {
+ raw_spin_lock(&sbinfo->stat_lock);
+ sbinfo->free_ispace += ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ }
+ simple_xattrs_free(&info->xattrs, NULL);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -3459,15 +3660,40 @@ static int shmem_xattr_handler_set(const struct xattr_handler *handler,
size_t size, int flags)
{
struct shmem_inode_info *info = SHMEM_I(inode);
- int err;
+ struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
+ struct simple_xattr *old_xattr;
+ size_t ispace = 0;
name = xattr_full_name(handler, name);
- err = simple_xattr_set(&info->xattrs, name, value, size, flags, NULL);
- if (!err) {
- inode->i_ctime = current_time(inode);
+ if (value && sbinfo->max_inodes) {
+ ispace = simple_xattr_space(name, size);
+ raw_spin_lock(&sbinfo->stat_lock);
+ if (sbinfo->free_ispace < ispace)
+ ispace = 0;
+ else
+ sbinfo->free_ispace -= ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ if (!ispace)
+ return -ENOSPC;
+ }
+
+ old_xattr = simple_xattr_set(&info->xattrs, name, value, size, flags);
+ if (!IS_ERR(old_xattr)) {
+ ispace = 0;
+ if (old_xattr && sbinfo->max_inodes)
+ ispace = simple_xattr_space(old_xattr->name,
+ old_xattr->size);
+ simple_xattr_free(old_xattr);
+ old_xattr = NULL;
+ inode_set_ctime_current(inode);
inode_inc_iversion(inode);
}
- return err;
+ if (ispace) {
+ raw_spin_lock(&sbinfo->stat_lock);
+ sbinfo->free_ispace += ispace;
+ raw_spin_unlock(&sbinfo->stat_lock);
+ }
+ return PTR_ERR(old_xattr);
}
static const struct xattr_handler shmem_security_xattr_handler = {
@@ -3482,9 +3708,16 @@ static const struct xattr_handler shmem_trusted_xattr_handler = {
.set = shmem_xattr_handler_set,
};
+static const struct xattr_handler shmem_user_xattr_handler = {
+ .prefix = XATTR_USER_PREFIX,
+ .get = shmem_xattr_handler_get,
+ .set = shmem_xattr_handler_set,
+};
+
static const struct xattr_handler *shmem_xattr_handlers[] = {
&shmem_security_xattr_handler,
&shmem_trusted_xattr_handler,
+ &shmem_user_xattr_handler,
NULL
};
@@ -3497,6 +3730,7 @@ static ssize_t shmem_listxattr(struct dentry *dentry, char *buffer, size_t size)
static const struct inode_operations shmem_short_symlink_operations = {
.getattr = shmem_getattr,
+ .setattr = shmem_setattr,
.get_link = simple_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -3505,6 +3739,7 @@ static const struct inode_operations shmem_short_symlink_operations = {
static const struct inode_operations shmem_symlink_inode_operations = {
.getattr = shmem_getattr,
+ .setattr = shmem_setattr,
.get_link = shmem_get_link,
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -3604,6 +3839,13 @@ enum shmem_param {
Opt_inode32,
Opt_inode64,
Opt_noswap,
+ Opt_quota,
+ Opt_usrquota,
+ Opt_grpquota,
+ Opt_usrquota_block_hardlimit,
+ Opt_usrquota_inode_hardlimit,
+ Opt_grpquota_block_hardlimit,
+ Opt_grpquota_inode_hardlimit,
};
static const struct constant_table shmem_param_enums_huge[] = {
@@ -3626,6 +3868,15 @@ const struct fs_parameter_spec shmem_fs_parameters[] = {
fsparam_flag ("inode32", Opt_inode32),
fsparam_flag ("inode64", Opt_inode64),
fsparam_flag ("noswap", Opt_noswap),
+#ifdef CONFIG_TMPFS_QUOTA
+ fsparam_flag ("quota", Opt_quota),
+ fsparam_flag ("usrquota", Opt_usrquota),
+ fsparam_flag ("grpquota", Opt_grpquota),
+ fsparam_string("usrquota_block_hardlimit", Opt_usrquota_block_hardlimit),
+ fsparam_string("usrquota_inode_hardlimit", Opt_usrquota_inode_hardlimit),
+ fsparam_string("grpquota_block_hardlimit", Opt_grpquota_block_hardlimit),
+ fsparam_string("grpquota_inode_hardlimit", Opt_grpquota_inode_hardlimit),
+#endif
{}
};
@@ -3636,6 +3887,8 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
unsigned long long size;
char *rest;
int opt;
+ kuid_t kuid;
+ kgid_t kgid;
opt = fs_parse(fc, shmem_fs_parameters, param, &result);
if (opt < 0)
@@ -3657,13 +3910,13 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
break;
case Opt_nr_blocks:
ctx->blocks = memparse(param->string, &rest);
- if (*rest || ctx->blocks > S64_MAX)
+ if (*rest || ctx->blocks > LONG_MAX)
goto bad_value;
ctx->seen |= SHMEM_SEEN_BLOCKS;
break;
case Opt_nr_inodes:
ctx->inodes = memparse(param->string, &rest);
- if (*rest)
+ if (*rest || ctx->inodes > ULONG_MAX / BOGO_INODE_SIZE)
goto bad_value;
ctx->seen |= SHMEM_SEEN_INODES;
break;
@@ -3671,14 +3924,32 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->mode = result.uint_32 & 07777;
break;
case Opt_uid:
- ctx->uid = make_kuid(current_user_ns(), result.uint_32);
- if (!uid_valid(ctx->uid))
+ kuid = make_kuid(current_user_ns(), result.uint_32);
+ if (!uid_valid(kuid))
+ goto bad_value;
+
+ /*
+ * The requested uid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kuid_has_mapping(fc->user_ns, kuid))
goto bad_value;
+
+ ctx->uid = kuid;
break;
case Opt_gid:
- ctx->gid = make_kgid(current_user_ns(), result.uint_32);
- if (!gid_valid(ctx->gid))
+ kgid = make_kgid(current_user_ns(), result.uint_32);
+ if (!gid_valid(kgid))
+ goto bad_value;
+
+ /*
+ * The requested gid must be representable in the
+ * filesystem's idmapping.
+ */
+ if (!kgid_has_mapping(fc->user_ns, kgid))
goto bad_value;
+
+ ctx->gid = kgid;
break;
case Opt_huge:
ctx->huge = result.uint_32;
@@ -3717,6 +3988,60 @@ static int shmem_parse_one(struct fs_context *fc, struct fs_parameter *param)
ctx->noswap = true;
ctx->seen |= SHMEM_SEEN_NOSWAP;
break;
+ case Opt_quota:
+ if (fc->user_ns != &init_user_ns)
+ return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+ ctx->seen |= SHMEM_SEEN_QUOTA;
+ ctx->quota_types |= (QTYPE_MASK_USR | QTYPE_MASK_GRP);
+ break;
+ case Opt_usrquota:
+ if (fc->user_ns != &init_user_ns)
+ return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+ ctx->seen |= SHMEM_SEEN_QUOTA;
+ ctx->quota_types |= QTYPE_MASK_USR;
+ break;
+ case Opt_grpquota:
+ if (fc->user_ns != &init_user_ns)
+ return invalfc(fc, "Quotas in unprivileged tmpfs mounts are unsupported");
+ ctx->seen |= SHMEM_SEEN_QUOTA;
+ ctx->quota_types |= QTYPE_MASK_GRP;
+ break;
+ case Opt_usrquota_block_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+ return invalfc(fc,
+ "User quota block hardlimit too large.");
+ ctx->qlimits.usrquota_bhardlimit = size;
+ break;
+ case Opt_grpquota_block_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_SPC_LIMIT)
+ return invalfc(fc,
+ "Group quota block hardlimit too large.");
+ ctx->qlimits.grpquota_bhardlimit = size;
+ break;
+ case Opt_usrquota_inode_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+ return invalfc(fc,
+ "User quota inode hardlimit too large.");
+ ctx->qlimits.usrquota_ihardlimit = size;
+ break;
+ case Opt_grpquota_inode_hardlimit:
+ size = memparse(param->string, &rest);
+ if (*rest || !size)
+ goto bad_value;
+ if (size > SHMEM_QUOTA_MAX_INO_LIMIT)
+ return invalfc(fc,
+ "Group quota inode hardlimit too large.");
+ ctx->qlimits.grpquota_ihardlimit = size;
+ break;
}
return 0;
@@ -3772,21 +4097,17 @@ static int shmem_parse_options(struct fs_context *fc, void *data)
/*
* Reconfigure a shmem filesystem.
- *
- * Note that we disallow change from limited->unlimited blocks/inodes while any
- * are in use; but we must separately disallow unlimited->limited, because in
- * that case we have no record of how much is already in use.
*/
static int shmem_reconfigure(struct fs_context *fc)
{
struct shmem_options *ctx = fc->fs_private;
struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
- unsigned long inodes;
+ unsigned long used_isp;
struct mempolicy *mpol = NULL;
const char *err;
raw_spin_lock(&sbinfo->stat_lock);
- inodes = sbinfo->max_inodes - sbinfo->free_inodes;
+ used_isp = sbinfo->max_inodes * BOGO_INODE_SIZE - sbinfo->free_ispace;
if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
if (!sbinfo->max_blocks) {
@@ -3804,7 +4125,7 @@ static int shmem_reconfigure(struct fs_context *fc)
err = "Cannot retroactively limit inodes";
goto out;
}
- if (ctx->inodes < inodes) {
+ if (ctx->inodes * BOGO_INODE_SIZE < used_isp) {
err = "Too few inodes for current use";
goto out;
}
@@ -3824,6 +4145,24 @@ static int shmem_reconfigure(struct fs_context *fc)
goto out;
}
+ if (ctx->seen & SHMEM_SEEN_QUOTA &&
+ !sb_any_quota_loaded(fc->root->d_sb)) {
+ err = "Cannot enable quota on remount";
+ goto out;
+ }
+
+#ifdef CONFIG_TMPFS_QUOTA
+#define CHANGED_LIMIT(name) \
+ (ctx->qlimits.name## hardlimit && \
+ (ctx->qlimits.name## hardlimit != sbinfo->qlimits.name## hardlimit))
+
+ if (CHANGED_LIMIT(usrquota_b) || CHANGED_LIMIT(usrquota_i) ||
+ CHANGED_LIMIT(grpquota_b) || CHANGED_LIMIT(grpquota_i)) {
+ err = "Cannot change global quota limit on remount";
+ goto out;
+ }
+#endif /* CONFIG_TMPFS_QUOTA */
+
if (ctx->seen & SHMEM_SEEN_HUGE)
sbinfo->huge = ctx->huge;
if (ctx->seen & SHMEM_SEEN_INUMS)
@@ -3832,7 +4171,7 @@ static int shmem_reconfigure(struct fs_context *fc)
sbinfo->max_blocks = ctx->blocks;
if (ctx->seen & SHMEM_SEEN_INODES) {
sbinfo->max_inodes = ctx->inodes;
- sbinfo->free_inodes = ctx->inodes - inodes;
+ sbinfo->free_ispace = ctx->inodes * BOGO_INODE_SIZE - used_isp;
}
/*
@@ -3915,6 +4254,9 @@ static void shmem_put_super(struct super_block *sb)
{
struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
+#ifdef CONFIG_TMPFS_QUOTA
+ shmem_disable_quotas(sb);
+#endif
free_percpu(sbinfo->ino_batch);
percpu_counter_destroy(&sbinfo->used_blocks);
mpol_put(sbinfo->mpol);
@@ -3927,12 +4269,13 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
struct shmem_options *ctx = fc->fs_private;
struct inode *inode;
struct shmem_sb_info *sbinfo;
+ int error = -ENOMEM;
/* Round up to L1_CACHE_BYTES to resist false sharing */
sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
L1_CACHE_BYTES), GFP_KERNEL);
if (!sbinfo)
- return -ENOMEM;
+ return error;
sb->s_fs_info = sbinfo;
@@ -3959,7 +4302,8 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
sb->s_flags |= SB_NOUSER;
#endif
sbinfo->max_blocks = ctx->blocks;
- sbinfo->free_inodes = sbinfo->max_inodes = ctx->inodes;
+ sbinfo->max_inodes = ctx->inodes;
+ sbinfo->free_ispace = sbinfo->max_inodes * BOGO_INODE_SIZE;
if (sb->s_flags & SB_KERNMOUNT) {
sbinfo->ino_batch = alloc_percpu(ino_t);
if (!sbinfo->ino_batch)
@@ -3993,10 +4337,27 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#endif
uuid_gen(&sb->s_uuid);
+#ifdef CONFIG_TMPFS_QUOTA
+ if (ctx->seen & SHMEM_SEEN_QUOTA) {
+ sb->dq_op = &shmem_quota_operations;
+ sb->s_qcop = &dquot_quotactl_sysfile_ops;
+ sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
+
+ /* Copy the default limits from ctx into sbinfo */
+ memcpy(&sbinfo->qlimits, &ctx->qlimits,
+ sizeof(struct shmem_quota_limits));
+
+ if (shmem_enable_quotas(sb, ctx->quota_types))
+ goto failed;
+ }
+#endif /* CONFIG_TMPFS_QUOTA */
+
inode = shmem_get_inode(&nop_mnt_idmap, sb, NULL, S_IFDIR | sbinfo->mode, 0,
VM_NORESERVE);
- if (!inode)
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
goto failed;
+ }
inode->i_uid = sbinfo->uid;
inode->i_gid = sbinfo->gid;
sb->s_root = d_make_root(inode);
@@ -4006,7 +4367,7 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
failed:
shmem_put_super(sb);
- return -ENOMEM;
+ return error;
}
static int shmem_get_tree(struct fs_context *fc)
@@ -4056,6 +4417,8 @@ static void shmem_destroy_inode(struct inode *inode)
{
if (S_ISREG(inode->i_mode))
mpol_free_shared_policy(&SHMEM_I(inode)->policy);
+ if (S_ISDIR(inode->i_mode))
+ simple_offset_destroy(shmem_get_offset_ctx(inode));
}
static void shmem_init_inode(void *foo)
@@ -4099,12 +4462,12 @@ EXPORT_SYMBOL(shmem_aops);
static const struct file_operations shmem_file_operations = {
.mmap = shmem_mmap,
- .open = generic_file_open,
+ .open = shmem_file_open,
.get_unmapped_area = shmem_get_unmapped_area,
#ifdef CONFIG_TMPFS
.llseek = shmem_file_llseek,
.read_iter = shmem_file_read_iter,
- .write_iter = generic_file_write_iter,
+ .write_iter = shmem_file_write_iter,
.fsync = noop_fsync,
.splice_read = shmem_file_splice_read,
.splice_write = iter_file_splice_write,
@@ -4136,6 +4499,7 @@ static const struct inode_operations shmem_dir_inode_operations = {
.mknod = shmem_mknod,
.rename = shmem_rename2,
.tmpfile = shmem_tmpfile,
+ .get_offset_ctx = shmem_get_offset_ctx,
#endif
#ifdef CONFIG_TMPFS_XATTR
.listxattr = shmem_listxattr,
@@ -4167,6 +4531,9 @@ static const struct super_operations shmem_ops = {
.statfs = shmem_statfs,
.show_options = shmem_show_options,
#endif
+#ifdef CONFIG_TMPFS_QUOTA
+ .get_dquots = shmem_get_dquots,
+#endif
.evict_inode = shmem_evict_inode,
.drop_inode = generic_delete_inode,
.put_super = shmem_put_super,
@@ -4220,7 +4587,7 @@ static struct file_system_type shmem_fs_type = {
#endif
.kill_sb = kill_litter_super,
#ifdef CONFIG_SHMEM
- .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
+ .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME,
#else
.fs_flags = FS_USERNS_MOUNT,
#endif
@@ -4232,6 +4599,14 @@ void __init shmem_init(void)
shmem_init_inodecache();
+#ifdef CONFIG_TMPFS_QUOTA
+ error = register_quota_format(&shmem_quota_format);
+ if (error < 0) {
+ pr_err("Could not register quota format\n");
+ goto out3;
+ }
+#endif
+
error = register_filesystem(&shmem_fs_type);
if (error) {
pr_err("Could not register tmpfs\n");
@@ -4256,6 +4631,10 @@ void __init shmem_init(void)
out1:
unregister_filesystem(&shmem_fs_type);
out2:
+#ifdef CONFIG_TMPFS_QUOTA
+ unregister_quota_format(&shmem_quota_format);
+out3:
+#endif
shmem_destroy_inodecache();
shm_mnt = ERR_PTR(error);
}
@@ -4375,10 +4754,16 @@ EXPORT_SYMBOL_GPL(shmem_truncate_range);
#define shmem_vm_ops generic_file_vm_ops
#define shmem_anon_vm_ops generic_file_vm_ops
#define shmem_file_operations ramfs_file_operations
-#define shmem_get_inode(idmap, sb, dir, mode, dev, flags) ramfs_get_inode(sb, dir, mode, dev)
#define shmem_acct_size(flags, size) 0
#define shmem_unacct_size(flags, size) do {} while (0)
+static inline struct inode *shmem_get_inode(struct mnt_idmap *idmap, struct super_block *sb, struct inode *dir,
+ umode_t mode, dev_t dev, unsigned long flags)
+{
+ struct inode *inode = ramfs_get_inode(sb, dir, mode, dev);
+ return inode ? inode : ERR_PTR(-ENOSPC);
+}
+
#endif /* CONFIG_SHMEM */
/* common code */
@@ -4403,9 +4788,10 @@ static struct file *__shmem_file_setup(struct vfsmount *mnt, const char *name, l
inode = shmem_get_inode(&nop_mnt_idmap, mnt->mnt_sb, NULL,
S_IFREG | S_IRWXUGO, 0, flags);
- if (unlikely(!inode)) {
+
+ if (IS_ERR(inode)) {
shmem_unacct_size(flags, size);
- return ERR_PTR(-ENOSPC);
+ return ERR_CAST(inode);
}
inode->i_flags |= i_flags;
inode->i_size = size;
diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c
new file mode 100644
index 000000000000..062d1c1097ae
--- /dev/null
+++ b/mm/shmem_quota.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * In memory quota format relies on quota infrastructure to store dquot
+ * information for us. While conventional quota formats for file systems
+ * with persistent storage can load quota information into dquot from the
+ * storage on-demand and hence quota dquot shrinker can free any dquot
+ * that is not currently being used, it must be avoided here. Otherwise we
+ * can lose valuable information, user provided limits, because there is
+ * no persistent storage to load the information from afterwards.
+ *
+ * One information that in-memory quota format needs to keep track of is
+ * a sorted list of ids for each quota type. This is done by utilizing
+ * an rb tree which root is stored in mem_dqinfo->dqi_priv for each quota
+ * type.
+ *
+ * This format can be used to support quota on file system without persistent
+ * storage such as tmpfs.
+ *
+ * Author: Lukas Czerner <lczerner@redhat.com>
+ * Carlos Maiolino <cmaiolino@redhat.com>
+ *
+ * Copyright (C) 2023 Red Hat, Inc.
+ */
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+#include <linux/shmem_fs.h>
+
+#include <linux/quotaops.h>
+#include <linux/quota.h>
+
+#ifdef CONFIG_TMPFS_QUOTA
+
+/*
+ * The following constants define the amount of time given a user
+ * before the soft limits are treated as hard limits (usually resulting
+ * in an allocation failure). The timer is started when the user crosses
+ * their soft limit, it is reset when they go below their soft limit.
+ */
+#define SHMEM_MAX_IQ_TIME 604800 /* (7*24*60*60) 1 week */
+#define SHMEM_MAX_DQ_TIME 604800 /* (7*24*60*60) 1 week */
+
+struct quota_id {
+ struct rb_node node;
+ qid_t id;
+ qsize_t bhardlimit;
+ qsize_t bsoftlimit;
+ qsize_t ihardlimit;
+ qsize_t isoftlimit;
+};
+
+static int shmem_check_quota_file(struct super_block *sb, int type)
+{
+ /* There is no real quota file, nothing to do */
+ return 1;
+}
+
+/*
+ * There is no real quota file. Just allocate rb_root for quota ids and
+ * set limits
+ */
+static int shmem_read_file_info(struct super_block *sb, int type)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ struct mem_dqinfo *info = &dqopt->info[type];
+
+ info->dqi_priv = kzalloc(sizeof(struct rb_root), GFP_NOFS);
+ if (!info->dqi_priv)
+ return -ENOMEM;
+
+ info->dqi_max_spc_limit = SHMEM_QUOTA_MAX_SPC_LIMIT;
+ info->dqi_max_ino_limit = SHMEM_QUOTA_MAX_INO_LIMIT;
+
+ info->dqi_bgrace = SHMEM_MAX_DQ_TIME;
+ info->dqi_igrace = SHMEM_MAX_IQ_TIME;
+ info->dqi_flags = 0;
+
+ return 0;
+}
+
+static int shmem_write_file_info(struct super_block *sb, int type)
+{
+ /* There is no real quota file, nothing to do */
+ return 0;
+}
+
+/*
+ * Free all the quota_id entries in the rb tree and rb_root.
+ */
+static int shmem_free_file_info(struct super_block *sb, int type)
+{
+ struct mem_dqinfo *info = &sb_dqopt(sb)->info[type];
+ struct rb_root *root = info->dqi_priv;
+ struct quota_id *entry;
+ struct rb_node *node;
+
+ info->dqi_priv = NULL;
+ node = rb_first(root);
+ while (node) {
+ entry = rb_entry(node, struct quota_id, node);
+ node = rb_next(&entry->node);
+
+ rb_erase(&entry->node, root);
+ kfree(entry);
+ }
+
+ kfree(root);
+ return 0;
+}
+
+static int shmem_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+ struct mem_dqinfo *info = sb_dqinfo(sb, qid->type);
+ struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+ qid_t id = from_kqid(&init_user_ns, *qid);
+ struct quota_info *dqopt = sb_dqopt(sb);
+ struct quota_id *entry = NULL;
+ int ret = 0;
+
+ if (!sb_has_quota_active(sb, qid->type))
+ return -ESRCH;
+
+ down_read(&dqopt->dqio_sem);
+ while (node) {
+ entry = rb_entry(node, struct quota_id, node);
+
+ if (id < entry->id)
+ node = node->rb_left;
+ else if (id > entry->id)
+ node = node->rb_right;
+ else
+ goto got_next_id;
+ }
+
+ if (!entry) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (id > entry->id) {
+ node = rb_next(&entry->node);
+ if (!node) {
+ ret = -ENOENT;
+ goto out_unlock;
+ }
+ entry = rb_entry(node, struct quota_id, node);
+ }
+
+got_next_id:
+ *qid = make_kqid(&init_user_ns, qid->type, entry->id);
+out_unlock:
+ up_read(&dqopt->dqio_sem);
+ return ret;
+}
+
+/*
+ * Load dquot with limits from existing entry, or create the new entry if
+ * it does not exist.
+ */
+static int shmem_acquire_dquot(struct dquot *dquot)
+{
+ struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
+ struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node;
+ struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
+ struct rb_node *parent = NULL, *new_node = NULL;
+ struct quota_id *new_entry, *entry;
+ qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ int ret = 0;
+
+ mutex_lock(&dquot->dq_lock);
+
+ down_write(&dqopt->dqio_sem);
+ while (*n) {
+ parent = *n;
+ entry = rb_entry(parent, struct quota_id, node);
+
+ if (id < entry->id)
+ n = &(*n)->rb_left;
+ else if (id > entry->id)
+ n = &(*n)->rb_right;
+ else
+ goto found;
+ }
+
+ /* We don't have entry for this id yet, create it */
+ new_entry = kzalloc(sizeof(struct quota_id), GFP_NOFS);
+ if (!new_entry) {
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ new_entry->id = id;
+ if (dquot->dq_id.type == USRQUOTA) {
+ new_entry->bhardlimit = sbinfo->qlimits.usrquota_bhardlimit;
+ new_entry->ihardlimit = sbinfo->qlimits.usrquota_ihardlimit;
+ } else if (dquot->dq_id.type == GRPQUOTA) {
+ new_entry->bhardlimit = sbinfo->qlimits.grpquota_bhardlimit;
+ new_entry->ihardlimit = sbinfo->qlimits.grpquota_ihardlimit;
+ }
+
+ new_node = &new_entry->node;
+ rb_link_node(new_node, parent, n);
+ rb_insert_color(new_node, (struct rb_root *)info->dqi_priv);
+ entry = new_entry;
+
+found:
+ /* Load the stored limits from the tree */
+ spin_lock(&dquot->dq_dqb_lock);
+ dquot->dq_dqb.dqb_bhardlimit = entry->bhardlimit;
+ dquot->dq_dqb.dqb_bsoftlimit = entry->bsoftlimit;
+ dquot->dq_dqb.dqb_ihardlimit = entry->ihardlimit;
+ dquot->dq_dqb.dqb_isoftlimit = entry->isoftlimit;
+
+ if (!dquot->dq_dqb.dqb_bhardlimit &&
+ !dquot->dq_dqb.dqb_bsoftlimit &&
+ !dquot->dq_dqb.dqb_ihardlimit &&
+ !dquot->dq_dqb.dqb_isoftlimit)
+ set_bit(DQ_FAKE_B, &dquot->dq_flags);
+ spin_unlock(&dquot->dq_dqb_lock);
+
+ /* Make sure flags update is visible after dquot has been filled */
+ smp_mb__before_atomic();
+ set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+out_unlock:
+ up_write(&dqopt->dqio_sem);
+ mutex_unlock(&dquot->dq_lock);
+ return ret;
+}
+
+static bool shmem_is_empty_dquot(struct dquot *dquot)
+{
+ struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info;
+ qsize_t bhardlimit;
+ qsize_t ihardlimit;
+
+ if (dquot->dq_id.type == USRQUOTA) {
+ bhardlimit = sbinfo->qlimits.usrquota_bhardlimit;
+ ihardlimit = sbinfo->qlimits.usrquota_ihardlimit;
+ } else if (dquot->dq_id.type == GRPQUOTA) {
+ bhardlimit = sbinfo->qlimits.grpquota_bhardlimit;
+ ihardlimit = sbinfo->qlimits.grpquota_ihardlimit;
+ }
+
+ if (test_bit(DQ_FAKE_B, &dquot->dq_flags) ||
+ (dquot->dq_dqb.dqb_curspace == 0 &&
+ dquot->dq_dqb.dqb_curinodes == 0 &&
+ dquot->dq_dqb.dqb_bhardlimit == bhardlimit &&
+ dquot->dq_dqb.dqb_ihardlimit == ihardlimit))
+ return true;
+
+ return false;
+}
+/*
+ * Store limits from dquot in the tree unless it's fake. If it is fake
+ * remove the id from the tree since there is no useful information in
+ * there.
+ */
+static int shmem_release_dquot(struct dquot *dquot)
+{
+ struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type);
+ struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node;
+ qid_t id = from_kqid(&init_user_ns, dquot->dq_id);
+ struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
+ struct quota_id *entry = NULL;
+
+ mutex_lock(&dquot->dq_lock);
+ /* Check whether we are not racing with some other dqget() */
+ if (dquot_is_busy(dquot))
+ goto out_dqlock;
+
+ down_write(&dqopt->dqio_sem);
+ while (node) {
+ entry = rb_entry(node, struct quota_id, node);
+
+ if (id < entry->id)
+ node = node->rb_left;
+ else if (id > entry->id)
+ node = node->rb_right;
+ else
+ goto found;
+ }
+
+ /* We should always find the entry in the rb tree */
+ WARN_ONCE(1, "quota id %u from dquot %p, not in rb tree!\n", id, dquot);
+ up_write(&dqopt->dqio_sem);
+ mutex_unlock(&dquot->dq_lock);
+ return -ENOENT;
+
+found:
+ if (shmem_is_empty_dquot(dquot)) {
+ /* Remove entry from the tree */
+ rb_erase(&entry->node, info->dqi_priv);
+ kfree(entry);
+ } else {
+ /* Store the limits in the tree */
+ spin_lock(&dquot->dq_dqb_lock);
+ entry->bhardlimit = dquot->dq_dqb.dqb_bhardlimit;
+ entry->bsoftlimit = dquot->dq_dqb.dqb_bsoftlimit;
+ entry->ihardlimit = dquot->dq_dqb.dqb_ihardlimit;
+ entry->isoftlimit = dquot->dq_dqb.dqb_isoftlimit;
+ spin_unlock(&dquot->dq_dqb_lock);
+ }
+
+ clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
+ up_write(&dqopt->dqio_sem);
+
+out_dqlock:
+ mutex_unlock(&dquot->dq_lock);
+ return 0;
+}
+
+static int shmem_mark_dquot_dirty(struct dquot *dquot)
+{
+ return 0;
+}
+
+static int shmem_dquot_write_info(struct super_block *sb, int type)
+{
+ return 0;
+}
+
+static const struct quota_format_ops shmem_format_ops = {
+ .check_quota_file = shmem_check_quota_file,
+ .read_file_info = shmem_read_file_info,
+ .write_file_info = shmem_write_file_info,
+ .free_file_info = shmem_free_file_info,
+};
+
+struct quota_format_type shmem_quota_format = {
+ .qf_fmt_id = QFMT_SHMEM,
+ .qf_ops = &shmem_format_ops,
+ .qf_owner = THIS_MODULE
+};
+
+const struct dquot_operations shmem_quota_operations = {
+ .acquire_dquot = shmem_acquire_dquot,
+ .release_dquot = shmem_release_dquot,
+ .alloc_dquot = dquot_alloc,
+ .destroy_dquot = dquot_destroy,
+ .write_info = shmem_dquot_write_info,
+ .mark_dirty = shmem_mark_dquot_dirty,
+ .get_next_id = shmem_get_next_id,
+};
+#endif /* CONFIG_TMPFS_QUOTA */
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8e6dde68b389..b15112b1f1a8 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1746,7 +1746,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
struct page *swapcache;
spinlock_t *ptl;
pte_t *pte, new_pte, old_pte;
- bool hwposioned = false;
+ bool hwpoisoned = PageHWPoison(page);
int ret = 1;
swapcache = page;
@@ -1754,7 +1754,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
if (unlikely(!page))
return -ENOMEM;
else if (unlikely(PTR_ERR(page) == -EHWPOISON))
- hwposioned = true;
+ hwpoisoned = true;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
if (unlikely(!pte || !pte_same_as_swp(ptep_get(pte),
@@ -1765,11 +1765,11 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
old_pte = ptep_get(pte);
- if (unlikely(hwposioned || !PageUptodate(page))) {
+ if (unlikely(hwpoisoned || !PageUptodate(page))) {
swp_entry_t swp_entry;
dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
- if (hwposioned) {
+ if (hwpoisoned) {
swp_entry = make_hwpoison_entry(swapcache);
page = swapcache;
} else {
diff --git a/mm/truncate.c b/mm/truncate.c
index 95d1291d269b..c3320e66d6ea 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -657,11 +657,11 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
}
folio_lock(folio);
- VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
- if (folio->mapping != mapping) {
+ if (unlikely(folio->mapping != mapping)) {
folio_unlock(folio);
continue;
}
+ VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
folio_wait_writeback(folio);
if (folio_mapped(folio))
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 93cf99aba335..228a4a5312f2 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2979,6 +2979,10 @@ void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot)
free_vm_area(area);
return NULL;
}
+
+ flush_cache_vmap((unsigned long)area->addr,
+ (unsigned long)area->addr + count * PAGE_SIZE);
+
return area->addr;
}
EXPORT_SYMBOL_GPL(vmap_pfn);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1080209a568b..2fe4a11d63f4 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4284,6 +4284,7 @@ static void walk_mm(struct lruvec *lruvec, struct mm_struct *mm, struct lru_gen_
static const struct mm_walk_ops mm_walk_ops = {
.test_walk = should_skip_vma,
.p4d_entry = walk_pud_range,
+ .walk_lock = PGWALK_RDLOCK,
};
int err;
@@ -4853,16 +4854,17 @@ void lru_gen_release_memcg(struct mem_cgroup *memcg)
spin_lock_irq(&pgdat->memcg_lru.lock);
- VM_WARN_ON_ONCE(hlist_nulls_unhashed(&lruvec->lrugen.list));
+ if (hlist_nulls_unhashed(&lruvec->lrugen.list))
+ goto unlock;
gen = lruvec->lrugen.gen;
- hlist_nulls_del_rcu(&lruvec->lrugen.list);
+ hlist_nulls_del_init_rcu(&lruvec->lrugen.list);
pgdat->memcg_lru.nr_memcgs[gen]--;
if (!pgdat->memcg_lru.nr_memcgs[gen] && gen == get_memcg_gen(pgdat->memcg_lru.seq))
WRITE_ONCE(pgdat->memcg_lru.seq, pgdat->memcg_lru.seq + 1);
-
+unlock:
spin_unlock_irq(&pgdat->memcg_lru.lock);
}
}
@@ -5434,8 +5436,10 @@ restart:
rcu_read_lock();
hlist_nulls_for_each_entry_rcu(lrugen, pos, &pgdat->memcg_lru.fifo[gen][bin], list) {
- if (op)
+ if (op) {
lru_gen_rotate_memcg(lruvec, op);
+ op = 0;
+ }
mem_cgroup_put(memcg);
@@ -5443,7 +5447,7 @@ restart:
memcg = lruvec_memcg(lruvec);
if (!mem_cgroup_tryget(memcg)) {
- op = 0;
+ lru_gen_release_memcg(memcg);
memcg = NULL;
continue;
}
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 3f057970504e..32916d28d9d9 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1798,6 +1798,7 @@ static void replace_sub_page(struct size_class *class, struct zspage *zspage,
static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
{
+ struct zs_pool *pool;
struct zspage *zspage;
/*
@@ -1807,9 +1808,10 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
VM_BUG_ON_PAGE(PageIsolated(page), page);
zspage = get_zspage(page);
- migrate_write_lock(zspage);
+ pool = zspage->pool;
+ spin_lock(&pool->lock);
inc_zspage_isolation(zspage);
- migrate_write_unlock(zspage);
+ spin_unlock(&pool->lock);
return true;
}
@@ -1875,12 +1877,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
kunmap_atomic(s_addr);
replace_sub_page(class, zspage, newpage, page);
+ dec_zspage_isolation(zspage);
/*
* Since we complete the data copy and set up new zspage structure,
* it's okay to release the pool's lock.
*/
spin_unlock(&pool->lock);
- dec_zspage_isolation(zspage);
migrate_write_unlock(zspage);
get_page(newpage);
@@ -1897,14 +1899,16 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
static void zs_page_putback(struct page *page)
{
+ struct zs_pool *pool;
struct zspage *zspage;
VM_BUG_ON_PAGE(!PageIsolated(page), page);
zspage = get_zspage(page);
- migrate_write_lock(zspage);
+ pool = zspage->pool;
+ spin_lock(&pool->lock);
dec_zspage_isolation(zspage);
- migrate_write_unlock(zspage);
+ spin_unlock(&pool->lock);
}
static const struct movable_operations zsmalloc_mops = {
diff --git a/net/9p/client.c b/net/9p/client.c
index a3340268ec8d..86bbc7147fc1 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -904,7 +904,7 @@ EXPORT_SYMBOL(do_trace_9p_fid_put);
static int p9_client_version(struct p9_client *c)
{
- int err = 0;
+ int err;
struct p9_req_t *req;
char *version = NULL;
int msize;
@@ -975,7 +975,6 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
struct p9_client *clnt;
char *client_id;
- err = 0;
clnt = kmalloc(sizeof(*clnt), GFP_KERNEL);
if (!clnt)
return ERR_PTR(-ENOMEM);
@@ -1094,7 +1093,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
const char *uname, kuid_t n_uname,
const char *aname)
{
- int err = 0;
+ int err;
struct p9_req_t *req;
struct p9_fid *fid;
struct p9_qid qid;
@@ -1147,7 +1146,6 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
struct p9_req_t *req;
u16 nwqids, count;
- err = 0;
wqids = NULL;
clnt = oldfid->clnt;
if (clone) {
@@ -1224,7 +1222,6 @@ int p9_client_open(struct p9_fid *fid, int mode)
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",
p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);
- err = 0;
if (fid->mode != -1)
return -EINVAL;
@@ -1262,7 +1259,7 @@ EXPORT_SYMBOL(p9_client_open);
int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags,
u32 mode, kgid_t gid, struct p9_qid *qid)
{
- int err = 0;
+ int err;
struct p9_client *clnt;
struct p9_req_t *req;
int iounit;
@@ -1314,7 +1311,6 @@ int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",
fid->fid, name, perm, mode);
- err = 0;
clnt = fid->clnt;
if (fid->mode != -1)
@@ -1350,7 +1346,7 @@ EXPORT_SYMBOL(p9_client_fcreate);
int p9_client_symlink(struct p9_fid *dfid, const char *name,
const char *symtgt, kgid_t gid, struct p9_qid *qid)
{
- int err = 0;
+ int err;
struct p9_client *clnt;
struct p9_req_t *req;
@@ -1402,13 +1398,12 @@ EXPORT_SYMBOL(p9_client_link);
int p9_client_fsync(struct p9_fid *fid, int datasync)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",
fid->fid, datasync);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync);
@@ -1428,7 +1423,7 @@ EXPORT_SYMBOL(p9_client_fsync);
int p9_client_clunk(struct p9_fid *fid)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
int retries = 0;
@@ -1436,7 +1431,6 @@ int p9_client_clunk(struct p9_fid *fid)
again:
p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n",
fid->fid, retries);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid);
@@ -1465,12 +1459,11 @@ EXPORT_SYMBOL(p9_client_clunk);
int p9_client_remove(struct p9_fid *fid)
{
- int err;
+ int err = 0;
struct p9_client *clnt;
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid);
@@ -1680,7 +1673,6 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
if (!ret)
return ERR_PTR(-ENOMEM);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid);
@@ -1733,7 +1725,6 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
if (!ret)
return ERR_PTR(-ENOMEM);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask);
@@ -1812,11 +1803,10 @@ static int p9_client_statsize(struct p9_wstat *wst, int proto_version)
int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
wst->size = p9_client_statsize(wst, clnt->proto_version);
p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n",
@@ -1851,11 +1841,10 @@ EXPORT_SYMBOL(p9_client_wstat);
int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid);
p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n",
@@ -1887,7 +1876,6 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);
@@ -1921,11 +1909,10 @@ EXPORT_SYMBOL(p9_client_statfs);
int p9_client_rename(struct p9_fid *fid,
struct p9_fid *newdirfid, const char *name)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",
@@ -1949,11 +1936,10 @@ EXPORT_SYMBOL(p9_client_rename);
int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name,
struct p9_fid *newdirfid, const char *new_name)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
- err = 0;
clnt = olddirfid->clnt;
p9_debug(P9_DEBUG_9P,
@@ -1986,7 +1972,6 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,
struct p9_client *clnt;
struct p9_fid *attr_fid;
- err = 0;
clnt = file_fid->clnt;
attr_fid = p9_fid_create(clnt);
if (!attr_fid) {
@@ -2027,14 +2012,13 @@ EXPORT_SYMBOL_GPL(p9_client_xattrwalk);
int p9_client_xattrcreate(struct p9_fid *fid, const char *name,
u64 attr_size, int flags)
{
- int err;
+ int err = 0;
struct p9_req_t *req;
struct p9_client *clnt;
p9_debug(P9_DEBUG_9P,
">>> TXATTRCREATE fid %d name %s size %llu flag %d\n",
fid->fid, name, attr_size, flags);
- err = 0;
clnt = fid->clnt;
req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd",
fid->fid, name, attr_size, flags);
@@ -2063,7 +2047,6 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
fid->fid, offset, count);
- err = 0;
clnt = fid->clnt;
rsize = fid->iounit;
@@ -2122,7 +2105,6 @@ int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TMKNOD fid %d name %s mode %d major %d minor %d\n",
@@ -2153,7 +2135,6 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
fid->fid, name, mode, from_kgid(&init_user_ns, gid));
@@ -2182,7 +2163,6 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n",
@@ -2214,7 +2194,6 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P,
">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n",
@@ -2251,7 +2230,6 @@ int p9_client_readlink(struct p9_fid *fid, char **target)
struct p9_client *clnt;
struct p9_req_t *req;
- err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3c27ffb781e3..e305071eb7b8 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -384,7 +384,7 @@ static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
void *to = req->rc.sdata + in_hdr_len;
// Fits entirely into the static data? Nothing to do.
- if (req->rc.size < in_hdr_len)
+ if (req->rc.size < in_hdr_len || !pages)
return;
// Really long error message? Tough, truncate the reply. Might get
@@ -428,7 +428,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
struct scatterlist *sgs[4];
- size_t offs;
+ size_t offs = 0;
int need_drop = 0;
int kicked = 0;
@@ -501,8 +501,8 @@ req_retry_pinned:
if (in_pages) {
sgs[out_sgs + in_sgs++] = chan->sg + out + in;
- in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
- in_pages, in_nr_pages, offs, inlen);
+ pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
+ in_pages, in_nr_pages, offs, inlen);
}
BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index acff565849ae..1d704574e6bf 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -505,7 +505,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_elp_packet *elp_packet;
struct batadv_hard_iface *primary_if;
- struct ethhdr *ethhdr = (struct ethhdr *)skb_mac_header(skb);
+ struct ethhdr *ethhdr;
bool res;
int ret = NET_RX_DROP;
@@ -513,6 +513,7 @@ int batadv_v_elp_packet_recv(struct sk_buff *skb,
if (!res)
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index e710e9afe78f..e503ee0d896b 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -123,8 +123,10 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- if (hard_iface->if_status != BATADV_IF_ACTIVE)
+ if (hard_iface->if_status != BATADV_IF_ACTIVE) {
+ kfree_skb(skb);
return;
+ }
batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX);
batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES,
@@ -985,7 +987,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
{
struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface);
struct batadv_ogm2_packet *ogm_packet;
- struct ethhdr *ethhdr = eth_hdr(skb);
+ struct ethhdr *ethhdr;
int ogm_offset;
u8 *packet_pos;
int ret = NET_RX_DROP;
@@ -999,6 +1001,7 @@ int batadv_v_ogm_packet_recv(struct sk_buff *skb,
if (!batadv_check_management_packet(skb, if_incoming, BATADV_OGM2_HLEN))
goto free_skb;
+ ethhdr = eth_hdr(skb);
if (batadv_is_my_mac(bat_priv, ethhdr->h_source))
goto free_skb;
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 41c1ad33d009..24c9c0c3f316 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -630,7 +630,19 @@ out:
*/
void batadv_update_min_mtu(struct net_device *soft_iface)
{
- soft_iface->mtu = batadv_hardif_min_mtu(soft_iface);
+ struct batadv_priv *bat_priv = netdev_priv(soft_iface);
+ int limit_mtu;
+ int mtu;
+
+ mtu = batadv_hardif_min_mtu(soft_iface);
+
+ if (bat_priv->mtu_set_by_user)
+ limit_mtu = bat_priv->mtu_set_by_user;
+ else
+ limit_mtu = ETH_DATA_LEN;
+
+ mtu = min(mtu, limit_mtu);
+ dev_set_mtu(soft_iface, mtu);
/* Check if the local translate table should be cleaned up to match a
* new (and smaller) MTU.
diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c
index ad5714f737be..6efbc9275aec 100644
--- a/net/batman-adv/netlink.c
+++ b/net/batman-adv/netlink.c
@@ -495,7 +495,10 @@ static int batadv_netlink_set_mesh(struct sk_buff *skb, struct genl_info *info)
attr = info->attrs[BATADV_ATTR_FRAGMENTATION_ENABLED];
atomic_set(&bat_priv->fragmentation, !!nla_get_u8(attr));
+
+ rtnl_lock();
batadv_update_min_mtu(bat_priv->soft_iface);
+ rtnl_unlock();
}
if (info->attrs[BATADV_ATTR_GW_BANDWIDTH_DOWN]) {
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d3fdf82282af..85d00dc9ce32 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -153,11 +153,14 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
{
+ struct batadv_priv *bat_priv = netdev_priv(dev);
+
/* check ranges */
if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev))
return -EINVAL;
dev->mtu = new_mtu;
+ bat_priv->mtu_set_by_user = new_mtu;
return 0;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 36ca31252a73..b95c36765d04 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -774,7 +774,6 @@ check_roaming:
if (roamed_back) {
batadv_tt_global_free(bat_priv, tt_global,
"Roaming canceled");
- tt_global = NULL;
} else {
/* The global entry has to be marked as ROAMING and
* has to be kept for consistency purpose
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ca9449ec9836..cf1a0eafe3ab 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1547,6 +1547,12 @@ struct batadv_priv {
struct net_device *soft_iface;
/**
+ * @mtu_set_by_user: MTU was set once by user
+ * protected by rtnl_lock
+ */
+ int mtu_set_by_user;
+
+ /**
* @bat_counters: mesh internal traffic statistic counters (see
* batadv_counters)
*/
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 056f9516e46d..76222565e2df 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -118,7 +118,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
*/
params->explicit_connect = false;
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
switch (params->auto_connect) {
case HCI_AUTO_CONN_EXPLICIT:
@@ -127,10 +127,10 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
return;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
- list_add(&params->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(params, &hdev->pend_le_reports);
break;
default:
break;
@@ -1426,8 +1426,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
params->auto_connect == HCI_AUTO_CONN_REPORT ||
params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
}
params->explicit_connect = true;
@@ -1684,7 +1684,7 @@ struct hci_conn *hci_connect_sco(struct hci_dev *hdev, int type, bdaddr_t *dst,
if (!link) {
hci_conn_drop(acl);
hci_conn_drop(sco);
- return NULL;
+ return ERR_PTR(-ENOLINK);
}
sco->setting = setting;
@@ -2254,7 +2254,7 @@ struct hci_conn *hci_connect_cis(struct hci_dev *hdev, bdaddr_t *dst,
if (!link) {
hci_conn_drop(le);
hci_conn_drop(cis);
- return NULL;
+ return ERR_PTR(-ENOLINK);
}
/* If LE is already connected and CIS handle is already set proceed to
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 48917c68358d..1ec83985f1ab 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1972,6 +1972,7 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
struct adv_monitor *monitor)
{
int status = 0;
+ int handle;
switch (hci_get_adv_monitor_offload_ext(hdev)) {
case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
@@ -1980,9 +1981,10 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
goto free_monitor;
case HCI_ADV_MONITOR_EXT_MSFT:
+ handle = monitor->handle;
status = msft_remove_monitor(hdev, monitor);
bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
- hdev->name, monitor->handle, status);
+ hdev->name, handle, status);
break;
}
@@ -2249,22 +2251,46 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
return NULL;
}
-/* This function requires the caller holds hdev->lock */
+/* This function requires the caller holds hdev->lock or rcu_read_lock */
struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
bdaddr_t *addr, u8 addr_type)
{
struct hci_conn_params *param;
- list_for_each_entry(param, list, action) {
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(param, list, action) {
if (bacmp(&param->addr, addr) == 0 &&
- param->addr_type == addr_type)
+ param->addr_type == addr_type) {
+ rcu_read_unlock();
return param;
+ }
}
+ rcu_read_unlock();
+
return NULL;
}
/* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_del_init(struct hci_conn_params *param)
+{
+ if (list_empty(&param->action))
+ return;
+
+ list_del_rcu(&param->action);
+ synchronize_rcu();
+ INIT_LIST_HEAD(&param->action);
+}
+
+/* This function requires the caller holds hdev->lock */
+void hci_pend_le_list_add(struct hci_conn_params *param,
+ struct list_head *list)
+{
+ list_add_rcu(&param->action, list);
+}
+
+/* This function requires the caller holds hdev->lock */
struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
bdaddr_t *addr, u8 addr_type)
{
@@ -2297,14 +2323,15 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
return params;
}
-static void hci_conn_params_free(struct hci_conn_params *params)
+void hci_conn_params_free(struct hci_conn_params *params)
{
+ hci_pend_le_list_del_init(params);
+
if (params->conn) {
hci_conn_drop(params->conn);
hci_conn_put(params->conn);
}
- list_del(&params->action);
list_del(&params->list);
kfree(params);
}
@@ -2342,8 +2369,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
continue;
}
- list_del(&params->list);
- kfree(params);
+ hci_conn_params_free(params);
}
BT_DBG("All LE disabled connection parameters were removed");
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 95816a938cea..31ca320ce38d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1564,7 +1564,7 @@ static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data,
params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type);
if (params)
- params->privacy_mode = cp->mode;
+ WRITE_ONCE(params->privacy_mode, cp->mode);
hci_dev_unlock(hdev);
@@ -2784,6 +2784,9 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
hci_enable_advertising(hdev);
}
+ /* Inform sockets conn is gone before we delete it */
+ hci_disconn_cfm(conn, HCI_ERROR_UNSPECIFIED);
+
goto done;
}
@@ -2804,8 +2807,8 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
default:
@@ -3423,8 +3426,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_del_init(&params->action);
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_del_init(params);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
hci_update_passive_scan(hdev);
break;
@@ -5962,7 +5965,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
conn->dst_type);
if (params) {
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
if (params->conn) {
hci_conn_drop(params->conn);
hci_conn_put(params->conn);
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 8561616abbe5..4d1e32bb6a9c 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -2160,15 +2160,23 @@ static int hci_le_del_accept_list_sync(struct hci_dev *hdev,
return 0;
}
+struct conn_params {
+ bdaddr_t addr;
+ u8 addr_type;
+ hci_conn_flags_t flags;
+ u8 privacy_mode;
+};
+
/* Adds connection to resolve list if needed.
* Setting params to NULL programs local hdev->irk
*/
static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
- struct hci_conn_params *params)
+ struct conn_params *params)
{
struct hci_cp_le_add_to_resolv_list cp;
struct smp_irk *irk;
struct bdaddr_list_with_irk *entry;
+ struct hci_conn_params *p;
if (!use_ll_privacy(hdev))
return 0;
@@ -2203,6 +2211,16 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
/* Default privacy mode is always Network */
params->privacy_mode = HCI_NETWORK_PRIVACY;
+ rcu_read_lock();
+ p = hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &params->addr, params->addr_type);
+ if (!p)
+ p = hci_pend_le_action_lookup(&hdev->pend_le_reports,
+ &params->addr, params->addr_type);
+ if (p)
+ WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY);
+ rcu_read_unlock();
+
done:
if (hci_dev_test_flag(hdev, HCI_PRIVACY))
memcpy(cp.local_irk, hdev->irk, 16);
@@ -2215,7 +2233,7 @@ done:
/* Set Device Privacy Mode. */
static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
- struct hci_conn_params *params)
+ struct conn_params *params)
{
struct hci_cp_le_set_privacy_mode cp;
struct smp_irk *irk;
@@ -2240,6 +2258,8 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
bacpy(&cp.bdaddr, &irk->bdaddr);
cp.mode = HCI_DEVICE_PRIVACY;
+ /* Note: params->privacy_mode is not updated since it is a copy */
+
return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE,
sizeof(cp), &cp, HCI_CMD_TIMEOUT);
}
@@ -2249,7 +2269,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
* properly set the privacy mode.
*/
static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
- struct hci_conn_params *params,
+ struct conn_params *params,
u8 *num_entries)
{
struct hci_cp_le_add_to_accept_list cp;
@@ -2447,6 +2467,52 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk);
}
+static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
+{
+ struct hci_conn_params *params;
+ struct conn_params *p;
+ size_t i;
+
+ rcu_read_lock();
+
+ i = 0;
+ list_for_each_entry_rcu(params, list, action)
+ ++i;
+ *n = i;
+
+ rcu_read_unlock();
+
+ p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL);
+ if (!p)
+ return NULL;
+
+ rcu_read_lock();
+
+ i = 0;
+ list_for_each_entry_rcu(params, list, action) {
+ /* Racing adds are handled in next scan update */
+ if (i >= *n)
+ break;
+
+ /* No hdev->lock, but: addr, addr_type are immutable.
+ * privacy_mode is only written by us or in
+ * hci_cc_le_set_privacy_mode that we wait for.
+ * We should be idempotent so MGMT updating flags
+ * while we are processing is OK.
+ */
+ bacpy(&p[i].addr, &params->addr);
+ p[i].addr_type = params->addr_type;
+ p[i].flags = READ_ONCE(params->flags);
+ p[i].privacy_mode = READ_ONCE(params->privacy_mode);
+ ++i;
+ }
+
+ rcu_read_unlock();
+
+ *n = i;
+ return p;
+}
+
/* Device must not be scanning when updating the accept list.
*
* Update is done using the following sequence:
@@ -2466,11 +2532,12 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
*/
static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
{
- struct hci_conn_params *params;
+ struct conn_params *params;
struct bdaddr_list *b, *t;
u8 num_entries = 0;
bool pend_conn, pend_report;
u8 filter_policy;
+ size_t i, n;
int err;
/* Pause advertising if resolving list can be used as controllers
@@ -2504,6 +2571,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
continue;
+ /* Pointers not dereferenced, no locks needed */
pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
&b->bdaddr,
b->bdaddr_type);
@@ -2532,23 +2600,50 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
* available accept list entries in the controller, then
* just abort and return filer policy value to not use the
* accept list.
+ *
+ * The list and params may be mutated while we wait for events,
+ * so make a copy and iterate it.
*/
- list_for_each_entry(params, &hdev->pend_le_conns, action) {
- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
- if (err)
+
+ params = conn_params_copy(&hdev->pend_le_conns, &n);
+ if (!params) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (i = 0; i < n; ++i) {
+ err = hci_le_add_accept_list_sync(hdev, &params[i],
+ &num_entries);
+ if (err) {
+ kvfree(params);
goto done;
+ }
}
+ kvfree(params);
+
/* After adding all new pending connections, walk through
* the list of pending reports and also add these to the
* accept list if there is still space. Abort if space runs out.
*/
- list_for_each_entry(params, &hdev->pend_le_reports, action) {
- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
- if (err)
+
+ params = conn_params_copy(&hdev->pend_le_reports, &n);
+ if (!params) {
+ err = -ENOMEM;
+ goto done;
+ }
+
+ for (i = 0; i < n; ++i) {
+ err = hci_le_add_accept_list_sync(hdev, &params[i],
+ &num_entries);
+ if (err) {
+ kvfree(params);
goto done;
+ }
}
+ kvfree(params);
+
/* Use the allowlist unless the following conditions are all true:
* - We are not currently suspending
* - There are 1 or more ADV monitors registered and it's not offloaded
@@ -4837,12 +4932,12 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
struct hci_conn_params *p;
list_for_each_entry(p, &hdev->le_conn_params, list) {
+ hci_pend_le_list_del_init(p);
if (p->conn) {
hci_conn_drop(p->conn);
hci_conn_put(p->conn);
p->conn = NULL;
}
- list_del_init(&p->action);
}
BT_DBG("All LE pending actions cleared");
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 0e6cc57b3911..505d62247268 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -123,8 +123,11 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
{
struct iso_conn *conn = hcon->iso_data;
- if (conn)
+ if (conn) {
+ if (!conn->hcon)
+ conn->hcon = hcon;
return conn;
+ }
conn = kzalloc(sizeof(*conn), GFP_KERNEL);
if (!conn)
@@ -300,14 +303,13 @@ static int iso_connect_bis(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
+ lock_sock(sk);
err = iso_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
- lock_sock(sk);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -321,7 +323,6 @@ static int iso_connect_bis(struct sock *sk)
}
release_sock(sk);
- return err;
unlock:
hci_dev_unlock(hdev);
@@ -389,14 +390,13 @@ static int iso_connect_cis(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
+ lock_sock(sk);
err = iso_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
- lock_sock(sk);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
@@ -413,7 +413,6 @@ static int iso_connect_cis(struct sock *sk)
}
release_sock(sk);
- return err;
unlock:
hci_dev_unlock(hdev);
@@ -1072,8 +1071,8 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
size_t len)
{
struct sock *sk = sock->sk;
- struct iso_conn *conn = iso_pi(sk)->conn;
struct sk_buff *skb, **frag;
+ size_t mtu;
int err;
BT_DBG("sock %p, sk %p", sock, sk);
@@ -1085,11 +1084,18 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
- if (sk->sk_state != BT_CONNECTED)
+ lock_sock(sk);
+
+ if (sk->sk_state != BT_CONNECTED) {
+ release_sock(sk);
return -ENOTCONN;
+ }
+
+ mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
+
+ release_sock(sk);
- skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
- HCI_ISO_DATA_HDR_SIZE, 0);
+ skb = bt_skb_sendmsg(sk, msg, len, mtu, HCI_ISO_DATA_HDR_SIZE, 0);
if (IS_ERR(skb))
return PTR_ERR(skb);
@@ -1102,8 +1108,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
while (len) {
struct sk_buff *tmp;
- tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
- 0, 0);
+ tmp = bt_skb_sendmsg(sk, msg, len, mtu, 0, 0);
if (IS_ERR(tmp)) {
kfree_skb(skb);
return PTR_ERR(tmp);
@@ -1158,15 +1163,19 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
BT_DBG("sk %p", sk);
if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
+ lock_sock(sk);
switch (sk->sk_state) {
case BT_CONNECT2:
- lock_sock(sk);
iso_conn_defer_accept(pi->conn->hcon);
sk->sk_state = BT_CONFIG;
release_sock(sk);
return 0;
case BT_CONNECT:
+ release_sock(sk);
return iso_connect_cis(sk);
+ default:
+ release_sock(sk);
+ break;
}
}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f7b2d0971f24..d4498037fadc 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1297,15 +1297,15 @@ static void restart_le_actions(struct hci_dev *hdev)
/* Needed for AUTO_OFF case where might not "really"
* have been powered off.
*/
- list_del_init(&p->action);
+ hci_pend_le_list_del_init(p);
switch (p->auto_connect) {
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
- list_add(&p->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(p, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
- list_add(&p->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(p, &hdev->pend_le_reports);
break;
default:
break;
@@ -5169,7 +5169,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
goto unlock;
}
- params->flags = current_flags;
+ WRITE_ONCE(params->flags, current_flags);
status = MGMT_STATUS_SUCCESS;
/* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
@@ -7285,7 +7285,7 @@ static void get_conn_info_complete(struct hci_dev *hdev, void *data, int err)
bt_dev_dbg(hdev, "err %d", err);
- memcpy(&rp.addr, &cp->addr.bdaddr, sizeof(rp.addr));
+ memcpy(&rp.addr, &cp->addr, sizeof(rp.addr));
status = mgmt_status(err);
if (status == MGMT_STATUS_SUCCESS) {
@@ -7580,7 +7580,7 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
if (params->auto_connect == auto_connect)
return 0;
- list_del_init(&params->action);
+ hci_pend_le_list_del_init(params);
switch (auto_connect) {
case HCI_AUTO_CONN_DISABLED:
@@ -7589,18 +7589,18 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
* connect to device, keep connecting.
*/
if (params->explicit_connect)
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
case HCI_AUTO_CONN_REPORT:
if (params->explicit_connect)
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
else
- list_add(&params->action, &hdev->pend_le_reports);
+ hci_pend_le_list_add(params, &hdev->pend_le_reports);
break;
case HCI_AUTO_CONN_DIRECT:
case HCI_AUTO_CONN_ALWAYS:
if (!is_connected(hdev, addr, addr_type))
- list_add(&params->action, &hdev->pend_le_conns);
+ hci_pend_le_list_add(params, &hdev->pend_le_conns);
break;
}
@@ -7823,9 +7823,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
goto unlock;
}
- list_del(&params->action);
- list_del(&params->list);
- kfree(params);
+ hci_conn_params_free(params);
device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
} else {
@@ -7856,9 +7854,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
p->auto_connect = HCI_AUTO_CONN_EXPLICIT;
continue;
}
- list_del(&p->action);
- list_del(&p->list);
- kfree(p);
+ hci_conn_params_free(p);
}
bt_dev_dbg(hdev, "All LE connection parameters were removed");
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index cd1a27ac555d..7762604ddfc0 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -126,8 +126,11 @@ static struct sco_conn *sco_conn_add(struct hci_conn *hcon)
struct hci_dev *hdev = hcon->hdev;
struct sco_conn *conn = hcon->sco_data;
- if (conn)
+ if (conn) {
+ if (!conn->hcon)
+ conn->hcon = hcon;
return conn;
+ }
conn = kzalloc(sizeof(struct sco_conn), GFP_KERNEL);
if (!conn)
@@ -268,21 +271,21 @@ static int sco_connect(struct sock *sk)
goto unlock;
}
- hci_dev_unlock(hdev);
- hci_dev_put(hdev);
-
conn = sco_conn_add(hcon);
if (!conn) {
hci_conn_drop(hcon);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto unlock;
}
- err = sco_chan_add(conn, sk, NULL);
- if (err)
- return err;
-
lock_sock(sk);
+ err = sco_chan_add(conn, sk, NULL);
+ if (err) {
+ release_sock(sk);
+ goto unlock;
+ }
+
/* Update source addr of the socket */
bacpy(&sco_pi(sk)->src, &hcon->src);
@@ -296,8 +299,6 @@ static int sco_connect(struct sock *sk)
release_sock(sk);
- return err;
-
unlock:
hci_dev_unlock(hdev);
hci_dev_put(hdev);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 9ba35685b043..9168114fc87f 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1526,6 +1526,12 @@ static int bcm_release(struct socket *sock)
lock_sock(sk);
+#if IS_ENABLED(CONFIG_PROC_FS)
+ /* remove procfs entry */
+ if (net->can.bcmproc_dir && bo->bcm_proc_read)
+ remove_proc_entry(bo->procname, net->can.bcmproc_dir);
+#endif /* CONFIG_PROC_FS */
+
list_for_each_entry_safe(op, next, &bo->tx_ops, list)
bcm_remove_op(op);
@@ -1561,12 +1567,6 @@ static int bcm_release(struct socket *sock)
list_for_each_entry_safe(op, next, &bo->rx_ops, list)
bcm_remove_op(op);
-#if IS_ENABLED(CONFIG_PROC_FS)
- /* remove procfs entry */
- if (net->can.bcmproc_dir && bo->bcm_proc_read)
- remove_proc_entry(bo->procname, net->can.bcmproc_dir);
-#endif /* CONFIG_PROC_FS */
-
/* remove device reference */
if (bo->bound) {
bo->bound = 0;
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 99770ed28531..f02b5d3e4733 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -188,12 +188,6 @@ static bool isotp_register_rxid(struct isotp_sock *so)
return (isotp_bc_flags(so) == 0);
}
-static bool isotp_register_txecho(struct isotp_sock *so)
-{
- /* all modes but SF_BROADCAST register for tx echo skbs */
- return (isotp_bc_flags(so) != CAN_ISOTP_SF_BROADCAST);
-}
-
static enum hrtimer_restart isotp_rx_timer_handler(struct hrtimer *hrtimer)
{
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
@@ -1209,7 +1203,7 @@ static int isotp_release(struct socket *sock)
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && isotp_register_txecho(so)) {
+ if (so->bound) {
if (so->ifindex) {
struct net_device *dev;
@@ -1332,14 +1326,12 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
can_rx_register(net, dev, rx_id, SINGLE_MASK(rx_id),
isotp_rcv, sk, "isotp", sk);
- if (isotp_register_txecho(so)) {
- /* no consecutive frame echo skb in flight */
- so->cfecho = 0;
+ /* no consecutive frame echo skb in flight */
+ so->cfecho = 0;
- /* register for echo skb's */
- can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
- isotp_rcv_echo, sk, "isotpe", sk);
- }
+ /* register for echo skb's */
+ can_rx_register(net, dev, tx_id, SINGLE_MASK(tx_id),
+ isotp_rcv_echo, sk, "isotpe", sk);
dev_put(dev);
@@ -1560,7 +1552,7 @@ static void isotp_notify(struct isotp_sock *so, unsigned long msg,
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (so->bound && isotp_register_txecho(so)) {
+ if (so->bound) {
if (isotp_register_rxid(so))
can_rx_unregister(dev_net(dev), dev, so->rxid,
SINGLE_MASK(so->rxid),
diff --git a/net/can/raw.c b/net/can/raw.c
index 15c79b079184..d50c3f3d892f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -84,6 +84,8 @@ struct raw_sock {
struct sock sk;
int bound;
int ifindex;
+ struct net_device *dev;
+ netdevice_tracker dev_tracker;
struct list_head notifier;
int loopback;
int recv_own_msgs;
@@ -277,21 +279,24 @@ static void raw_notify(struct raw_sock *ro, unsigned long msg,
if (!net_eq(dev_net(dev), sock_net(sk)))
return;
- if (ro->ifindex != dev->ifindex)
+ if (ro->dev != dev)
return;
switch (msg) {
case NETDEV_UNREGISTER:
lock_sock(sk);
/* remove current filters & unregister */
- if (ro->bound)
+ if (ro->bound) {
raw_disable_allfilters(dev_net(dev), dev, sk);
+ netdev_put(dev, &ro->dev_tracker);
+ }
if (ro->count > 1)
kfree(ro->filter);
ro->ifindex = 0;
ro->bound = 0;
+ ro->dev = NULL;
ro->count = 0;
release_sock(sk);
@@ -337,6 +342,7 @@ static int raw_init(struct sock *sk)
ro->bound = 0;
ro->ifindex = 0;
+ ro->dev = NULL;
/* set default filter to single entry dfilter */
ro->dfilter.can_id = 0;
@@ -383,18 +389,14 @@ static int raw_release(struct socket *sock)
list_del(&ro->notifier);
spin_unlock(&raw_notifier_lock);
+ rtnl_lock();
lock_sock(sk);
/* remove current filters & unregister */
if (ro->bound) {
- if (ro->ifindex) {
- struct net_device *dev;
-
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (dev) {
- raw_disable_allfilters(dev_net(dev), dev, sk);
- dev_put(dev);
- }
+ if (ro->dev) {
+ raw_disable_allfilters(dev_net(ro->dev), ro->dev, sk);
+ netdev_put(ro->dev, &ro->dev_tracker);
} else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
}
@@ -405,6 +407,7 @@ static int raw_release(struct socket *sock)
ro->ifindex = 0;
ro->bound = 0;
+ ro->dev = NULL;
ro->count = 0;
free_percpu(ro->uniq);
@@ -412,6 +415,8 @@ static int raw_release(struct socket *sock)
sock->sk = NULL;
release_sock(sk);
+ rtnl_unlock();
+
sock_put(sk);
return 0;
@@ -422,6 +427,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
struct sockaddr_can *addr = (struct sockaddr_can *)uaddr;
struct sock *sk = sock->sk;
struct raw_sock *ro = raw_sk(sk);
+ struct net_device *dev = NULL;
int ifindex;
int err = 0;
int notify_enetdown = 0;
@@ -431,24 +437,23 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (addr->can_family != AF_CAN)
return -EINVAL;
+ rtnl_lock();
lock_sock(sk);
if (ro->bound && addr->can_ifindex == ro->ifindex)
goto out;
if (addr->can_ifindex) {
- struct net_device *dev;
-
dev = dev_get_by_index(sock_net(sk), addr->can_ifindex);
if (!dev) {
err = -ENODEV;
goto out;
}
if (dev->type != ARPHRD_CAN) {
- dev_put(dev);
err = -ENODEV;
- goto out;
+ goto out_put_dev;
}
+
if (!(dev->flags & IFF_UP))
notify_enetdown = 1;
@@ -456,7 +461,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
/* filters set by default/setsockopt */
err = raw_enable_allfilters(sock_net(sk), dev, sk);
- dev_put(dev);
+ if (err)
+ goto out_put_dev;
+
} else {
ifindex = 0;
@@ -467,26 +474,30 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (!err) {
if (ro->bound) {
/* unregister old filters */
- if (ro->ifindex) {
- struct net_device *dev;
-
- dev = dev_get_by_index(sock_net(sk),
- ro->ifindex);
- if (dev) {
- raw_disable_allfilters(dev_net(dev),
- dev, sk);
- dev_put(dev);
- }
+ if (ro->dev) {
+ raw_disable_allfilters(dev_net(ro->dev),
+ ro->dev, sk);
+ /* drop reference to old ro->dev */
+ netdev_put(ro->dev, &ro->dev_tracker);
} else {
raw_disable_allfilters(sock_net(sk), NULL, sk);
}
}
ro->ifindex = ifindex;
ro->bound = 1;
+ /* bind() ok -> hold a reference for new ro->dev */
+ ro->dev = dev;
+ if (ro->dev)
+ netdev_hold(ro->dev, &ro->dev_tracker, GFP_KERNEL);
}
- out:
+out_put_dev:
+ /* remove potential reference from dev_get_by_index() */
+ if (dev)
+ dev_put(dev);
+out:
release_sock(sk);
+ rtnl_unlock();
if (notify_enetdown) {
sk->sk_err = ENETDOWN;
@@ -553,9 +564,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex) {
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (!dev) {
+ dev = ro->dev;
+ if (ro->bound && dev) {
+ if (dev->reg_state != NETREG_REGISTERED) {
if (count > 1)
kfree(filter);
err = -ENODEV;
@@ -596,7 +607,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->count = count;
out_fil:
- dev_put(dev);
release_sock(sk);
rtnl_unlock();
@@ -614,9 +624,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
rtnl_lock();
lock_sock(sk);
- if (ro->bound && ro->ifindex) {
- dev = dev_get_by_index(sock_net(sk), ro->ifindex);
- if (!dev) {
+ dev = ro->dev;
+ if (ro->bound && dev) {
+ if (dev->reg_state != NETREG_REGISTERED) {
err = -ENODEV;
goto out_err;
}
@@ -640,7 +650,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
ro->err_mask = err_mask;
out_err:
- dev_put(dev);
release_sock(sk);
rtnl_unlock();
@@ -873,7 +882,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
skb->dev = dev;
skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, sockc.tsflags);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index cd7b0bf5369e..5eb4898cccd4 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1123,6 +1123,7 @@ bool ceph_addr_is_blank(const struct ceph_entity_addr *addr)
return true;
}
}
+EXPORT_SYMBOL(ceph_addr_is_blank);
int ceph_addr_port(const struct ceph_entity_addr *addr)
{
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 11c04e7d928e..658a6f2320cf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
int ret;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
+ ret = wait_for_completion_killable(&lreq->reg_commit_wait);
return ret ?: lreq->reg_commit_error;
}
-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
+static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
+ unsigned long timeout)
{
- int ret;
+ long left;
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
- ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
- return ret ?: lreq->notify_finish_error;
+ left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
+ ceph_timeout_jiffies(timeout));
+ if (left <= 0)
+ left = left ?: -ETIMEDOUT;
+ else
+ left = lreq->notify_finish_error; /* completed */
+
+ return left;
}
/*
@@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
linger_submit(lreq);
ret = linger_reg_commit_wait(lreq);
if (!ret)
- ret = linger_notify_finish_wait(lreq);
+ ret = linger_notify_finish_wait(lreq,
+ msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
else
dout("lreq %p failed to initiate notify %d\n", lreq, ret);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index d4172534dfa8..cca7594be92e 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -496,8 +496,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
return ERR_PTR(-EPERM);
nla_for_each_nested(nla, nla_stgs, rem) {
- if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
+ if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
+ if (nla_len(nla) != sizeof(u32))
+ return ERR_PTR(-EINVAL);
nr_maps++;
+ }
}
diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
diff --git a/net/core/filter.c b/net/core/filter.c
index 06ba0e56e369..28a59596987a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4116,12 +4116,6 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
if (unlikely(data_end > data_hard_end))
return -EINVAL;
- /* ALL drivers MUST init xdp->frame_sz, chicken check below */
- if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
- WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
- return -EINVAL;
- }
-
if (unlikely(data_end < xdp->data + ETH_HLEN))
return -EINVAL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 3ad4e030846d..00c94d9622b4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2268,13 +2268,27 @@ out_err:
return err;
}
-int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
- struct netlink_ext_ack *exterr)
+int rtnl_nla_parse_ifinfomsg(struct nlattr **tb, const struct nlattr *nla_peer,
+ struct netlink_ext_ack *exterr)
{
- return nla_parse_deprecated(tb, IFLA_MAX, head, len, ifla_policy,
+ const struct ifinfomsg *ifmp;
+ const struct nlattr *attrs;
+ size_t len;
+
+ ifmp = nla_data(nla_peer);
+ attrs = nla_data(nla_peer) + sizeof(struct ifinfomsg);
+ len = nla_len(nla_peer) - sizeof(struct ifinfomsg);
+
+ if (ifmp->ifi_index < 0) {
+ NL_SET_ERR_MSG_ATTR(exterr, nla_peer,
+ "ifindex can't be negative");
+ return -EINVAL;
+ }
+
+ return nla_parse_deprecated(tb, IFLA_MAX, attrs, len, ifla_policy,
exterr);
}
-EXPORT_SYMBOL(rtnl_nla_parse_ifla);
+EXPORT_SYMBOL(rtnl_nla_parse_ifinfomsg);
struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
{
@@ -3547,6 +3561,9 @@ replay:
if (ifm->ifi_index > 0) {
link_specified = true;
dev = __dev_get_by_index(net, ifm->ifi_index);
+ } else if (ifm->ifi_index < 0) {
+ NL_SET_ERR_MSG(extack, "ifindex can't be negative");
+ return -EINVAL;
} else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) {
link_specified = true;
dev = rtnl_dev_get(net, tb);
@@ -5140,13 +5157,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
if (br_spec) {
nla_for_each_nested(attr, br_spec, rem) {
- if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
+ if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
if (nla_len(attr) < sizeof(flags))
return -EINVAL;
have_flags = true;
flags = nla_get_u16(attr);
- break;
+ }
+
+ if (nla_type(attr) == IFLA_BRIDGE_MODE) {
+ if (nla_len(attr) < sizeof(u16))
+ return -EINVAL;
}
}
}
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index a29508e1ff35..ef1a2eb6520b 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -1120,13 +1120,19 @@ static void sk_psock_strp_data_ready(struct sock *sk)
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
{
+ int ret;
+
static const struct strp_callbacks cb = {
.rcv_msg = sk_psock_strp_read,
.read_sock_done = sk_psock_strp_read_done,
.parse_msg = sk_psock_strp_parse,
};
- return strp_init(&psock->strp, sk, &cb);
+ ret = strp_init(&psock->strp, sk, &cb);
+ if (!ret)
+ sk_psock_set_state(psock, SK_PSOCK_RX_STRP_ENABLED);
+
+ return ret;
}
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock)
@@ -1154,7 +1160,7 @@ void sk_psock_stop_strp(struct sock *sk, struct sk_psock *psock)
static void sk_psock_done_strp(struct sk_psock *psock)
{
/* Parser has been stopped */
- if (psock->progs.stream_parser)
+ if (sk_psock_test_state(psock, SK_PSOCK_RX_STRP_ENABLED))
strp_done(&psock->strp);
}
#else
diff --git a/net/core/sock.c b/net/core/sock.c
index 9370fd50aa2c..c9cffb7acbea 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -429,6 +429,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
{
struct __kernel_sock_timeval tv;
int err = sock_copy_user_timeval(&tv, optval, optlen, old_timeval);
+ long val;
if (err)
return err;
@@ -439,7 +440,7 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
if (tv.tv_sec < 0) {
static int warned __read_mostly;
- *timeo_p = 0;
+ WRITE_ONCE(*timeo_p, 0);
if (warned < 10 && net_ratelimit()) {
warned++;
pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
@@ -447,11 +448,12 @@ static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
}
return 0;
}
- *timeo_p = MAX_SCHEDULE_TIMEOUT;
- if (tv.tv_sec == 0 && tv.tv_usec == 0)
- return 0;
- if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
- *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
+ val = MAX_SCHEDULE_TIMEOUT;
+ if ((tv.tv_sec || tv.tv_usec) &&
+ (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1)))
+ val = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec,
+ USEC_PER_SEC / HZ);
+ WRITE_ONCE(*timeo_p, val);
return 0;
}
@@ -804,7 +806,7 @@ EXPORT_SYMBOL(sock_no_linger);
void sock_set_priority(struct sock *sk, u32 priority)
{
lock_sock(sk);
- sk->sk_priority = priority;
+ WRITE_ONCE(sk->sk_priority, priority);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_priority);
@@ -813,9 +815,9 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs)
{
lock_sock(sk);
if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
- sk->sk_sndtimeo = secs * HZ;
+ WRITE_ONCE(sk->sk_sndtimeo, secs * HZ);
else
- sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+ WRITE_ONCE(sk->sk_sndtimeo, MAX_SCHEDULE_TIMEOUT);
release_sock(sk);
}
EXPORT_SYMBOL(sock_set_sndtimeo);
@@ -988,7 +990,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
static void __sock_set_mark(struct sock *sk, u32 val)
{
if (val != sk->sk_mark) {
- sk->sk_mark = val;
+ WRITE_ONCE(sk->sk_mark, val);
sk_dst_reset(sk);
}
}
@@ -1007,7 +1009,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes)
bytes = round_down(bytes, PAGE_SIZE);
WARN_ON(bytes > sk->sk_reserved_mem);
- sk->sk_reserved_mem -= bytes;
+ WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
sk_mem_reclaim(sk);
}
@@ -1044,7 +1046,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
}
sk->sk_forward_alloc += pages << PAGE_SHIFT;
- sk->sk_reserved_mem += pages << PAGE_SHIFT;
+ WRITE_ONCE(sk->sk_reserved_mem,
+ sk->sk_reserved_mem + (pages << PAGE_SHIFT));
return 0;
}
@@ -1213,7 +1216,7 @@ set_sndbuf:
if ((val >= 0 && val <= 6) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
- sk->sk_priority = val;
+ WRITE_ONCE(sk->sk_priority, val);
else
ret = -EPERM;
break;
@@ -1438,7 +1441,8 @@ set_sndbuf:
cmpxchg(&sk->sk_pacing_status,
SK_PACING_NONE,
SK_PACING_NEEDED);
- sk->sk_max_pacing_rate = ulval;
+ /* Pairs with READ_ONCE() from sk_getsockopt() */
+ WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
break;
}
@@ -1533,7 +1537,9 @@ set_sndbuf:
}
if ((u8)val == SOCK_TXREHASH_DEFAULT)
val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
- /* Paired with READ_ONCE() in tcp_rtx_synack() */
+ /* Paired with READ_ONCE() in tcp_rtx_synack()
+ * and sk_getsockopt().
+ */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;
@@ -1633,11 +1639,11 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_SNDBUF:
- v.val = sk->sk_sndbuf;
+ v.val = READ_ONCE(sk->sk_sndbuf);
break;
case SO_RCVBUF:
- v.val = sk->sk_rcvbuf;
+ v.val = READ_ONCE(sk->sk_rcvbuf);
break;
case SO_REUSEADDR:
@@ -1679,7 +1685,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_PRIORITY:
- v.val = sk->sk_priority;
+ v.val = READ_ONCE(sk->sk_priority);
break;
case SO_LINGER:
@@ -1717,16 +1723,18 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
case SO_RCVTIMEO_OLD:
case SO_RCVTIMEO_NEW:
- lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
+ lv = sock_get_timeout(READ_ONCE(sk->sk_rcvtimeo), &v,
+ SO_RCVTIMEO_OLD == optname);
break;
case SO_SNDTIMEO_OLD:
case SO_SNDTIMEO_NEW:
- lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
+ lv = sock_get_timeout(READ_ONCE(sk->sk_sndtimeo), &v,
+ SO_SNDTIMEO_OLD == optname);
break;
case SO_RCVLOWAT:
- v.val = sk->sk_rcvlowat;
+ v.val = READ_ONCE(sk->sk_rcvlowat);
break;
case SO_SNDLOWAT:
@@ -1770,7 +1778,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
spin_unlock(&sk->sk_peer_lock);
if (!peer_pid)
- return -ESRCH;
+ return -ENODATA;
pidfd = pidfd_prepare(peer_pid, 0, &pidfd_file);
put_pid(peer_pid);
@@ -1843,7 +1851,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
optval, optlen, len);
case SO_MARK:
- v.val = sk->sk_mark;
+ v.val = READ_ONCE(sk->sk_mark);
break;
case SO_RCVMARK:
@@ -1862,7 +1870,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
if (!sock->ops->set_peek_off)
return -EOPNOTSUPP;
- v.val = sk->sk_peek_off;
+ v.val = READ_ONCE(sk->sk_peek_off);
break;
case SO_NOFCS:
v.val = sock_flag(sk, SOCK_NOFCS);
@@ -1892,7 +1900,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_NET_RX_BUSY_POLL
case SO_BUSY_POLL:
- v.val = sk->sk_ll_usec;
+ v.val = READ_ONCE(sk->sk_ll_usec);
break;
case SO_PREFER_BUSY_POLL:
v.val = READ_ONCE(sk->sk_prefer_busy_poll);
@@ -1900,12 +1908,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
#endif
case SO_MAX_PACING_RATE:
+ /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
lv = sizeof(v.ulval);
- v.ulval = sk->sk_max_pacing_rate;
+ v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
} else {
/* 32bit version */
- v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
+ v.val = min_t(unsigned long, ~0U,
+ READ_ONCE(sk->sk_max_pacing_rate));
}
break;
@@ -1973,11 +1983,12 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
break;
case SO_RESERVE_MEM:
- v.val = sk->sk_reserved_mem;
+ v.val = READ_ONCE(sk->sk_reserved_mem);
break;
case SO_TXREHASH:
- v.val = sk->sk_txrehash;
+ /* Paired with WRITE_ONCE() in sk_setsockopt() */
+ v.val = READ_ONCE(sk->sk_txrehash);
break;
default:
@@ -3148,7 +3159,7 @@ void __sk_mem_reduce_allocated(struct sock *sk, int amount)
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
- if (sk_under_memory_pressure(sk) &&
+ if (sk_under_global_memory_pressure(sk) &&
(sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
sk_leave_memory_pressure(sk);
}
@@ -3168,7 +3179,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
int sk_set_peek_off(struct sock *sk, int val)
{
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
return 0;
}
EXPORT_SYMBOL_GPL(sk_set_peek_off);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 19538d628714..8f07fea39d9e 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk)
__acquires(&sk->sk_lock.slock)
{
lock_sock(sk);
- preempt_disable();
rcu_read_lock();
}
@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk)
__releases(&sk->sk_lock.slock)
{
rcu_read_unlock();
- preempt_enable();
release_sock(sk);
}
@@ -148,13 +146,13 @@ static void sock_map_del_link(struct sock *sk,
list_for_each_entry_safe(link, tmp, &psock->link, list) {
if (link->link_raw == link_raw) {
struct bpf_map *map = link->map;
- struct bpf_stab *stab = container_of(map, struct bpf_stab,
- map);
- if (psock->saved_data_ready && stab->progs.stream_parser)
+ struct sk_psock_progs *progs = sock_map_progs(map);
+
+ if (psock->saved_data_ready && progs->stream_parser)
strp_stop = true;
- if (psock->saved_data_ready && stab->progs.stream_verdict)
+ if (psock->saved_data_ready && progs->stream_verdict)
verdict_stop = true;
- if (psock->saved_data_ready && stab->progs.skb_verdict)
+ if (psock->saved_data_ready && progs->skb_verdict)
verdict_stop = true;
list_del(&link->list);
sk_psock_free_link(link);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index c0c438128575..2e6b8c8fd2de 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -980,7 +980,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
return -EOPNOTSUPP;
ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
- tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
+ tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
NULL);
if (ret)
return ret;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index fa8079303cb0..a545ad71201c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -130,7 +130,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
inet->inet_dport);
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
err = dccp_connect(sk);
rt = NULL;
@@ -432,7 +432,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL)
goto put_and_exit;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 7249ef218178..d29d1163203d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -238,8 +238,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
- sk->sk_priority);
+ err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
+ np->tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b8a24734385e..fd2eb148d24d 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -187,7 +187,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
/* And store cached results */
icsk->icsk_pmtu_cookie = pmtu;
- dp->dccps_mss_cache = cur_mps;
+ WRITE_ONCE(dp->dccps_mss_cache, cur_mps);
return cur_mps;
}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index f331e5977a84..fcc5c9d64f46 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -315,11 +315,15 @@ EXPORT_SYMBOL_GPL(dccp_disconnect);
__poll_t dccp_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- __poll_t mask;
struct sock *sk = sock->sk;
+ __poll_t mask;
+ u8 shutdown;
+ int state;
sock_poll_wait(file, sock, wait);
- if (sk->sk_state == DCCP_LISTEN)
+
+ state = inet_sk_state_load(sk);
+ if (state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
/* Socket is not locked. We are protected from async events
@@ -328,20 +332,21 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
*/
mask = 0;
- if (sk->sk_err)
+ if (READ_ONCE(sk->sk_err))
mask = EPOLLERR;
+ shutdown = READ_ONCE(sk->sk_shutdown);
- if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
+ if (shutdown == SHUTDOWN_MASK || state == DCCP_CLOSED)
mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
+ if (shutdown & RCV_SHUTDOWN)
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* Connected? */
- if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
+ if ((1 << state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
if (atomic_read(&sk->sk_rmem_alloc) > 0)
mask |= EPOLLIN | EPOLLRDNORM;
- if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
+ if (!(shutdown & SEND_SHUTDOWN)) {
if (sk_stream_is_writeable(sk)) {
mask |= EPOLLOUT | EPOLLWRNORM;
} else { /* send SIGIO later */
@@ -359,7 +364,6 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
}
return mask;
}
-
EXPORT_SYMBOL_GPL(dccp_poll);
int dccp_ioctl(struct sock *sk, int cmd, int *karg)
@@ -630,7 +634,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
return dccp_getsockopt_service(sk, len,
(__be32 __user *)optval, optlen);
case DCCP_SOCKOPT_GET_CUR_MPS:
- val = dp->dccps_mss_cache;
+ val = READ_ONCE(dp->dccps_mss_cache);
break;
case DCCP_SOCKOPT_AVAILABLE_CCIDS:
return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
@@ -739,7 +743,7 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
trace_dccp_probe(sk, len);
- if (len > dp->dccps_mss_cache)
+ if (len > READ_ONCE(dp->dccps_mss_cache))
return -EMSGSIZE;
lock_sock(sk);
@@ -772,6 +776,12 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
goto out_discard;
}
+ /* We need to check dccps_mss_cache after socket is locked. */
+ if (len > dp->dccps_mss_cache) {
+ rc = -EMSGSIZE;
+ goto out_discard;
+ }
+
skb_reserve(skb, sk->sk_prot->max_header);
rc = memcpy_from_msg(skb_put(skb, len), msg, len);
if (rc != 0)
diff --git a/net/devlink/leftover.c b/net/devlink/leftover.c
index 1f00f874471f..bfed7929a904 100644
--- a/net/devlink/leftover.c
+++ b/net/devlink/leftover.c
@@ -6704,6 +6704,7 @@ void devlink_notify_unregister(struct devlink *devlink)
struct devlink_param_item *param_item;
struct devlink_trap_item *trap_item;
struct devlink_port *devlink_port;
+ struct devlink_linecard *linecard;
struct devlink_rate *rate_node;
struct devlink_region *region;
unsigned long port_index;
@@ -6732,6 +6733,8 @@ void devlink_notify_unregister(struct devlink *devlink)
xa_for_each(&devlink->ports, port_index, devlink_port)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
+ list_for_each_entry_reverse(linecard, &devlink->linecard_list, list)
+ devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
devlink_notify(devlink, DEVLINK_CMD_DEL);
}
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 0ce8fd311c78..2f6195d7b741 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -1727,8 +1727,15 @@ int dsa_port_phylink_create(struct dsa_port *dp)
ds->ops->phylink_mac_an_restart)
dp->pl_config.legacy_pre_march2020 = true;
- if (ds->ops->phylink_get_caps)
+ if (ds->ops->phylink_get_caps) {
ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config);
+ } else {
+ /* For legacy drivers */
+ __set_bit(PHY_INTERFACE_MODE_INTERNAL,
+ dp->pl_config.supported_interfaces);
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ dp->pl_config.supported_interfaces);
+ }
pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn),
mode, &dsa_port_phylink_mac_ops);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9b2ca2fcc5a1..02736b83c303 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -340,7 +340,7 @@ lookup_protocol:
else
inet->pmtudisc = IP_PMTUDISC_WANT;
- inet->inet_id = 0;
+ atomic_set(&inet->inet_id, 0);
sock_init_data(sock, sk);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 4d1af0cd7d99..cb5dbee9e018 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -73,7 +73,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
reuseport_has_conns_set(sk);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
sk_dst_set(sk, &rt->dst);
err = 0;
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index ba06ed42e428..2be2d4922557 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -1132,7 +1132,7 @@ static int esp_init_authenc(struct xfrm_state *x,
err = crypto_aead_setkey(aead, key, keylen);
free_key:
- kfree(key);
+ kfree_sensitive(key);
error:
return err;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 0cc19cfbb673..aeebe8816689 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -1019,7 +1019,7 @@ static void reqsk_timer_handler(struct timer_list *t)
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b812eb36f0e3..f7426926a104 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
}
#endif
- if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
+ if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
goto errout;
if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
@@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry.ifindex = sk->sk_bound_dev_if;
entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
if (sk_fullsock(sk))
- entry.mark = sk->sk_mark;
+ entry.mark = READ_ONCE(sk->sk_mark);
else if (sk->sk_state == TCP_NEW_SYN_RECV)
entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
else if (sk->sk_state == TCP_TIME_WAIT)
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e7391bf310a7..0819d6001b9a 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -650,20 +650,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
spin_lock(lock);
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
- ret = sk_hashed(osk);
- if (ret) {
- /* Before deleting the node, we insert a new one to make
- * sure that the look-up-sk process would not miss either
- * of them and that at least one node would exist in ehash
- * table all the time. Otherwise there's a tiny chance
- * that lookup process could find nothing in ehash table.
- */
- __sk_nulls_add_node_tail_rcu(sk, list);
- sk_nulls_del_node_init_rcu(osk);
- }
- goto unlock;
- }
- if (found_dup_sk) {
+ ret = sk_nulls_del_node_init_rcu(osk);
+ } else if (found_dup_sk) {
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
if (*found_dup_sk)
ret = false;
@@ -672,7 +660,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ret)
__sk_nulls_add_node_rcu(sk, list);
-unlock:
spin_unlock(lock);
return ret;
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 40052414c7c7..2c1b245dba8e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -88,10 +88,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
- struct hlist_nulls_head *list)
+static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
+ struct hlist_nulls_head *list)
{
- hlist_nulls_add_tail_rcu(&tw->tw_node, list);
+ hlist_nulls_add_head_rcu(&tw->tw_node, list);
}
static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -144,7 +144,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
spin_lock(lock);
- inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
+ inet_twsk_add_node_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 81a1cce1a7d1..22a26d1d29a0 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -548,7 +548,8 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
goto err_free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto err_free_skb;
truncate = true;
}
@@ -689,7 +690,8 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
goto free_skb;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto free_skb;
truncate = true;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6e70839257f7..6ba1a0fafbaa 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -184,9 +184,9 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
ip_options_build(skb, &opt->opt, daddr, rt);
}
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
if (!skb->mark)
- skb->mark = sk->sk_mark;
+ skb->mark = READ_ONCE(sk->sk_mark);
/* Send it out. */
return ip_local_out(net, skb->sk, skb);
@@ -528,8 +528,8 @@ packet_routed:
skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
res = ip_local_out(net, sk, skb);
rcu_read_unlock();
@@ -1158,10 +1158,15 @@ alloc_new_skb:
}
copy = datalen - transhdrlen - fraggap - pagedlen;
+ /* [!] NOTE: copy will be negative if pagedlen>0
+ * because then the equation reduces to -fraggap.
+ */
if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
err = -EFAULT;
kfree_skb(skb);
goto error;
+ } else if (flags & MSG_SPLICE_PAGES) {
+ copy = 0;
}
offset += copy;
@@ -1209,6 +1214,10 @@ alloc_new_skb:
} else if (flags & MSG_SPLICE_PAGES) {
struct msghdr *msg = from;
+ err = -EIO;
+ if (WARN_ON_ONCE(copy > msg->msg_iter.count))
+ goto error;
+
err = skb_splice_from_iter(skb, &msg->msg_iter, copy,
sk->sk_allocation);
if (err < 0)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8e97d8d4cc9d..d41bce8927b2 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
}
if (inet_sk(sk)->tos != val) {
inet_sk(sk)->tos = val;
- sk->sk_priority = rt_tos2priority(val);
+ WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
sk_dst_reset(sk);
}
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 92c02c886fe7..586b1b3e35b8 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -224,7 +224,7 @@ static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
.un.frag.__unused = 0,
.un.frag.mtu = htons(mtu),
};
- icmph->checksum = ip_compute_csum(icmph, len);
+ icmph->checksum = csum_fold(skb_checksum(skb, 0, len, 0));
skb_reset_transport_header(skb);
niph = skb_push(skb, sizeof(*niph));
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 53bfd8af6920..d1e7d0ceb7ed 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -287,12 +287,12 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
break;
case htons(ETH_P_IPV6):
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
break;
default:
goto tx_err;
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index f95142e56da0..be5498f5dd31 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -3221,13 +3221,9 @@ static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
&rtm_dump_nexthop_cb, &filter);
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
@@ -3367,25 +3363,19 @@ static int rtm_dump_nexthop_bucket_nh(struct sk_buff *skb,
dd->filter.res_bucket_nh_id != nhge->nh->id)
continue;
+ dd->ctx->bucket_index = bucket_index;
err = nh_fill_res_bucket(skb, nh, bucket, bucket_index,
RTM_NEWNEXTHOPBUCKET, portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
cb->extack);
- if (err < 0) {
- if (likely(skb->len))
- goto out;
- goto out_err;
- }
+ if (err)
+ return err;
}
dd->ctx->done_nh_idx = dd->ctx->nh.idx + 1;
- bucket_index = 0;
+ dd->ctx->bucket_index = 0;
-out:
- err = skb->len;
-out_err:
- dd->ctx->bucket_index = bucket_index;
- return err;
+ return 0;
}
static int rtm_dump_nexthop_bucket_cb(struct sk_buff *skb,
@@ -3434,13 +3424,9 @@ static int rtm_dump_nexthop_bucket(struct sk_buff *skb,
if (err < 0) {
if (likely(skb->len))
- goto out;
- goto out_err;
+ err = skb->len;
}
-out:
- err = skb->len;
-out_err:
cb->seq = net->nexthop.seq;
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
return err;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 7782ff5e6539..cb381f5aa464 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
goto error;
skb_reserve(skb, hlen);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
skb_dst_set(skb, &rt->dst);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 98d7e6ba7493..92fede388d52 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
const struct inet_sock *inet = inet_sk(sk);
oif = sk->sk_bound_dev_if;
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
tos = ip_sock_rt_tos(sk);
scope = ip_sock_rt_scope(sk);
prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
@@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
inet_opt = rcu_dereference(inet->inet_opt);
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
- flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
+ flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
ip_sock_rt_scope(sk),
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e03e08745308..8ed52e1e3c99 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3291,7 +3291,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
return -EINVAL;
lock_sock(sk);
- inet_csk(sk)->icsk_syn_retries = val;
+ WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
release_sock(sk);
return 0;
}
@@ -3300,7 +3300,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt);
void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
{
lock_sock(sk);
- inet_csk(sk)->icsk_user_timeout = val;
+ WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
release_sock(sk);
}
EXPORT_SYMBOL(tcp_sock_set_user_timeout);
@@ -3312,7 +3312,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
if (val < 1 || val > MAX_TCP_KEEPIDLE)
return -EINVAL;
- tp->keepalive_time = val * HZ;
+ /* Paired with WRITE_ONCE() in keepalive_time_when() */
+ WRITE_ONCE(tp->keepalive_time, val * HZ);
if (sock_flag(sk, SOCK_KEEPOPEN) &&
!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
u32 elapsed = keepalive_time_elapsed(tp);
@@ -3344,7 +3345,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
return -EINVAL;
lock_sock(sk);
- tcp_sk(sk)->keepalive_intvl = val * HZ;
+ WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
release_sock(sk);
return 0;
}
@@ -3356,7 +3357,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
return -EINVAL;
lock_sock(sk);
- tcp_sk(sk)->keepalive_probes = val;
+ /* Paired with READ_ONCE() in keepalive_probes() */
+ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
release_sock(sk);
return 0;
}
@@ -3558,19 +3560,19 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (val < 1 || val > MAX_TCP_KEEPINTVL)
err = -EINVAL;
else
- tp->keepalive_intvl = val * HZ;
+ WRITE_ONCE(tp->keepalive_intvl, val * HZ);
break;
case TCP_KEEPCNT:
if (val < 1 || val > MAX_TCP_KEEPCNT)
err = -EINVAL;
else
- tp->keepalive_probes = val;
+ WRITE_ONCE(tp->keepalive_probes, val);
break;
case TCP_SYNCNT:
if (val < 1 || val > MAX_TCP_SYNCNT)
err = -EINVAL;
else
- icsk->icsk_syn_retries = val;
+ WRITE_ONCE(icsk->icsk_syn_retries, val);
break;
case TCP_SAVE_SYN:
@@ -3583,18 +3585,18 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_LINGER2:
if (val < 0)
- tp->linger2 = -1;
+ WRITE_ONCE(tp->linger2, -1);
else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
- tp->linger2 = TCP_FIN_TIMEOUT_MAX;
+ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
else
- tp->linger2 = val * HZ;
+ WRITE_ONCE(tp->linger2, val * HZ);
break;
case TCP_DEFER_ACCEPT:
/* Translate value in seconds to number of retransmits */
- icsk->icsk_accept_queue.rskq_defer_accept =
- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
- TCP_RTO_MAX / HZ);
+ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ));
break;
case TCP_WINDOW_CLAMP:
@@ -3618,7 +3620,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (val < 0)
err = -EINVAL;
else
- icsk->icsk_user_timeout = val;
+ WRITE_ONCE(icsk->icsk_user_timeout, val);
break;
case TCP_FASTOPEN:
@@ -3656,13 +3658,13 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
if (!tp->repair)
err = -EPERM;
else
- tp->tsoffset = val - tcp_time_stamp_raw();
+ WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
break;
case TCP_REPAIR_WINDOW:
err = tcp_repair_set_window(tp, optval, optlen);
break;
case TCP_NOTSENT_LOWAT:
- tp->notsent_lowat = val;
+ WRITE_ONCE(tp->notsent_lowat, val);
sk->sk_write_space(sk);
break;
case TCP_INQ:
@@ -3674,7 +3676,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
case TCP_TX_DELAY:
if (val)
tcp_enable_tx_delay();
- tp->tcp_tx_delay = val;
+ WRITE_ONCE(tp->tcp_tx_delay, val);
break;
default:
err = -ENOPROTOOPT;
@@ -3991,17 +3993,18 @@ int do_tcp_getsockopt(struct sock *sk, int level,
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? :
+ val = READ_ONCE(icsk->icsk_syn_retries) ? :
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
- val = tp->linger2;
+ val = READ_ONCE(tp->linger2);
if (val >= 0)
val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
+ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
+ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
val = tp->window_clamp;
@@ -4138,11 +4141,11 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_USER_TIMEOUT:
- val = icsk->icsk_user_timeout;
+ val = READ_ONCE(icsk->icsk_user_timeout);
break;
case TCP_FASTOPEN:
- val = icsk->icsk_accept_queue.fastopenq.max_qlen;
+ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
break;
case TCP_FASTOPEN_CONNECT:
@@ -4154,14 +4157,14 @@ int do_tcp_getsockopt(struct sock *sk, int level,
break;
case TCP_TX_DELAY:
- val = tp->tcp_tx_delay;
+ val = READ_ONCE(tp->tcp_tx_delay);
break;
case TCP_TIMESTAMP:
- val = tcp_time_stamp_raw() + tp->tsoffset;
+ val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
break;
case TCP_NOTSENT_LOWAT:
- val = tp->notsent_lowat;
+ val = READ_ONCE(tp->notsent_lowat);
break;
case TCP_INQ:
val = tp->recvmsg_inq;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index 45cc7f1ca296..85e4953f1182 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
static bool tcp_fastopen_queue_check(struct sock *sk)
{
struct fastopen_queue *fastopenq;
+ int max_qlen;
/* Make sure the listener has enabled fastopen, and we don't
* exceed the max # of pending TFO requests allowed before trying
@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
* temporarily vs a server not supporting Fast Open at all.
*/
fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
- if (fastopenq->max_qlen == 0)
+ max_qlen = READ_ONCE(fastopenq->max_qlen);
+ if (max_qlen == 0)
return false;
- if (fastopenq->qlen >= fastopenq->max_qlen) {
+ if (fastopenq->qlen >= max_qlen) {
struct request_sock *req1;
spin_lock(&fastopenq->lock);
req1 = fastopenq->rskq_rst_head;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fd365de4d5ff..2dbdc26da86e 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -307,11 +307,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_daddr,
inet->inet_sport,
usin->sin_port));
- tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
- inet->inet_daddr);
+ WRITE_ONCE(tp->tsoffset,
+ secure_tcp_ts_off(net, inet->inet_saddr,
+ inet->inet_daddr));
}
- inet->inet_id = get_random_u16();
+ atomic_set(&inet->inet_id, get_random_u16());
if (tcp_fastopen_defer_connect(sk, &err))
return err;
@@ -930,9 +931,9 @@ static void tcp_v4_send_ack(const struct sock *sk,
ctl_sk = this_cpu_read(ipv4_tcp_sk);
sock_net_set(ctl_sk, net);
ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_mark : sk->sk_mark;
+ inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
- inet_twsk(sk)->tw_priority : sk->sk_priority;
+ inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
transmit_time = tcp_transmit_time(sk);
ip_send_unicast_reply(ctl_sk,
skb, &TCP_SKB_CB(skb)->header.h4.opt,
@@ -988,11 +989,12 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
- req->ts_recent,
+ READ_ONCE(req->ts_recent),
0,
tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
- ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
+ ip_hdr(skb)->tos,
+ READ_ONCE(tcp_rsk(req)->txhash));
}
/*
@@ -1594,7 +1596,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
inet_csk(newsk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index 82f4575f9cd9..99ac5efe244d 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
struct tcp_metrics_block {
struct tcp_metrics_block __rcu *tcpm_next;
- possible_net_t tcpm_net;
+ struct net *tcpm_net;
struct inetpeer_addr tcpm_saddr;
struct inetpeer_addr tcpm_daddr;
unsigned long tcpm_stamp;
@@ -51,34 +51,38 @@ struct tcp_metrics_block {
struct rcu_head rcu_head;
};
-static inline struct net *tm_net(struct tcp_metrics_block *tm)
+static inline struct net *tm_net(const struct tcp_metrics_block *tm)
{
- return read_pnet(&tm->tcpm_net);
+ /* Paired with the WRITE_ONCE() in tcpm_new() */
+ return READ_ONCE(tm->tcpm_net);
}
static bool tcp_metric_locked(struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_lock & (1 << idx);
+ /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
+ return READ_ONCE(tm->tcpm_lock) & (1 << idx);
}
-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
enum tcp_metric_index idx)
{
- return tm->tcpm_vals[idx];
+ /* Paired with WRITE_ONCE() in tcp_metric_set() */
+ return READ_ONCE(tm->tcpm_vals[idx]);
}
static void tcp_metric_set(struct tcp_metrics_block *tm,
enum tcp_metric_index idx,
u32 val)
{
- tm->tcpm_vals[idx] = val;
+ /* Paired with READ_ONCE() in tcp_metric_get() */
+ WRITE_ONCE(tm->tcpm_vals[idx], val);
}
static bool addr_same(const struct inetpeer_addr *a,
const struct inetpeer_addr *b)
{
- return inetpeer_addr_cmp(a, b) == 0;
+ return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
}
struct tcpm_hash_bucket {
@@ -89,6 +93,7 @@ static struct tcpm_hash_bucket *tcp_metrics_hash __read_mostly;
static unsigned int tcp_metrics_hash_log __read_mostly;
static DEFINE_SPINLOCK(tcp_metrics_lock);
+static DEFINE_SEQLOCK(fastopen_seqlock);
static void tcpm_suck_dst(struct tcp_metrics_block *tm,
const struct dst_entry *dst,
@@ -97,7 +102,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
u32 msval;
u32 val;
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
val = 0;
if (dst_metric_locked(dst, RTAX_RTT))
@@ -110,30 +115,42 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
val |= 1 << TCP_METRIC_CWND;
if (dst_metric_locked(dst, RTAX_REORDERING))
val |= 1 << TCP_METRIC_REORDERING;
- tm->tcpm_lock = val;
+ /* Paired with READ_ONCE() in tcp_metric_locked() */
+ WRITE_ONCE(tm->tcpm_lock, val);
msval = dst_metric_raw(dst, RTAX_RTT);
- tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+ tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
msval = dst_metric_raw(dst, RTAX_RTTVAR);
- tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
- tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
- tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
- tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
+ tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
+ tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+ dst_metric_raw(dst, RTAX_SSTHRESH));
+ tcp_metric_set(tm, TCP_METRIC_CWND,
+ dst_metric_raw(dst, RTAX_CWND));
+ tcp_metric_set(tm, TCP_METRIC_REORDERING,
+ dst_metric_raw(dst, RTAX_REORDERING));
if (fastopen_clear) {
+ write_seqlock(&fastopen_seqlock);
tm->tcpm_fastopen.mss = 0;
tm->tcpm_fastopen.syn_loss = 0;
tm->tcpm_fastopen.try_exp = 0;
tm->tcpm_fastopen.cookie.exp = false;
tm->tcpm_fastopen.cookie.len = 0;
+ write_sequnlock(&fastopen_seqlock);
}
}
#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+static void tcpm_check_stamp(struct tcp_metrics_block *tm,
+ const struct dst_entry *dst)
{
- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+ unsigned long limit;
+
+ if (!tm)
+ return;
+ limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
+ if (unlikely(time_after(jiffies, limit)))
tcpm_suck_dst(tm, dst, false);
}
@@ -174,20 +191,23 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
oldest = deref_locked(tcp_metrics_hash[hash].chain);
for (tm = deref_locked(oldest->tcpm_next); tm;
tm = deref_locked(tm->tcpm_next)) {
- if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
+ if (time_before(READ_ONCE(tm->tcpm_stamp),
+ READ_ONCE(oldest->tcpm_stamp)))
oldest = tm;
}
tm = oldest;
} else {
- tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
+ tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
if (!tm)
goto out_unlock;
}
- write_pnet(&tm->tcpm_net, net);
+ /* Paired with the READ_ONCE() in tm_net() */
+ WRITE_ONCE(tm->tcpm_net, net);
+
tm->tcpm_saddr = *saddr;
tm->tcpm_daddr = *daddr;
- tcpm_suck_dst(tm, dst, true);
+ tcpm_suck_dst(tm, dst, reclaim);
if (likely(!reclaim)) {
tm->tcpm_next = tcp_metrics_hash[hash].chain;
@@ -434,7 +454,7 @@ void tcp_update_metrics(struct sock *sk)
tp->reordering);
}
}
- tm->tcpm_stamp = jiffies;
+ WRITE_ONCE(tm->tcpm_stamp, jiffies);
out_unlock:
rcu_read_unlock();
}
@@ -539,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
return ret;
}
-static DEFINE_SEQLOCK(fastopen_seqlock);
-
void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
struct tcp_fastopen_cookie *cookie)
{
@@ -647,7 +665,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
}
if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
- jiffies - tm->tcpm_stamp,
+ jiffies - READ_ONCE(tm->tcpm_stamp),
TCP_METRICS_ATTR_PAD) < 0)
goto nla_put_failure;
@@ -658,7 +676,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
if (!nest)
goto nla_put_failure;
for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
- u32 val = tm->tcpm_vals[i];
+ u32 val = tcp_metric_get(tm, i);
if (!val)
continue;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 04fc328727e6..c8f2aa003387 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -528,7 +528,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
newtp->lsndtime = tcp_jiffies32;
- newsk->sk_txhash = treq->txhash;
+ newsk->sk_txhash = READ_ONCE(treq->txhash);
newtp->total_retrans = req->num_retrans;
tcp_init_xmit_timers(newsk);
@@ -555,7 +555,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
newtp->max_window = newtp->snd_wnd;
if (newtp->rx_opt.tstamp_ok) {
- newtp->rx_opt.ts_recent = req->ts_recent;
+ newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
} else {
@@ -619,7 +619,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
if (tmp_opt.saw_tstamp) {
- tmp_opt.ts_recent = req->ts_recent;
+ tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
if (tmp_opt.rcv_tsecr)
tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
/* We do not store true stamp, but it is not required,
@@ -758,8 +758,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
/* In sequence, PAWS is OK. */
+ /* TODO: We probably should defer ts_recent change once
+ * we take ownership of @req.
+ */
if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
- req->ts_recent = tmp_opt.rcv_tsval;
+ WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
/* Truncate SYN, it is out of window starting
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2cb39b6dad02..51d8638d4b4c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -878,7 +878,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
- opts->tsecr = req->ts_recent;
+ opts->tsecr = READ_ONCE(req->ts_recent);
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
if (likely(ireq->sack_ok)) {
@@ -3660,7 +3660,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
rcu_read_lock();
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
- skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
+ skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
/* bpf program will be interested in the tcp_flags */
TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
@@ -4210,7 +4210,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
/* Paired with WRITE_ONCE() in sock_setsockopt() */
if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
- tcp_rsk(req)->txhash = net_tx_rndhash();
+ WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash());
res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
NULL);
if (!res) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 470f581eedd4..206418b6d7c4 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -591,7 +591,9 @@ out_reset_timer:
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
- icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
+ icsk->icsk_rto = clamp(__tcp_set_rto(tp),
+ tcp_rto_min(sk),
+ TCP_RTO_MAX);
} else if (sk->sk_state != TCP_SYN_SENT ||
icsk->icsk_backoff >
READ_ONCE(net->ipv4.sysctl_tcp_syn_linear_timeouts)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 42a96b3547c9..abfa860367aa 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -114,6 +114,7 @@
#include <net/sock_reuseport.h>
#include <net/addrconf.h>
#include <net/udp_tunnel.h>
+#include <net/gro.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -555,10 +556,13 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
{
const struct iphdr *iph = ip_hdr(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 75aa4de5b731..0f46b3c2e4ac 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -274,13 +274,20 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
__sum16 check;
__be16 newlen;
- if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
- return __udp_gso_segment_list(gso_skb, features, is_ipv6);
-
mss = skb_shinfo(gso_skb)->gso_size;
if (gso_skb->len <= sizeof(*uh) + mss)
return ERR_PTR(-EINVAL);
+ if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) {
+ /* Packet is from an untrusted source, reset gso_segs. */
+ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh),
+ mss);
+ return NULL;
+ }
+
+ if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST)
+ return __udp_gso_segment_list(gso_skb, features, is_ipv6);
+
skb_pull(gso_skb, sizeof(*uh));
/* clear destructor to avoid skb_segment assigning it to tail */
@@ -388,8 +395,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
return __udp_gso_segment(skb, features, false);
mss = skb_shinfo(skb)->gso_size;
@@ -603,10 +609,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct iphdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet_get_iif_sdif(skb, &iif, &sdif);
return __udp4_lib_lookup(net, iph->saddr, sport,
- iph->daddr, dport, inet_iif(skb),
- inet_sdif(skb), net->ipv4.udp_table, NULL);
+ iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 658bfed1df8b..08d4b7132d4c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -152,7 +152,7 @@ config INET6_TUNNEL
default n
config IPV6_VTI
-tristate "Virtual (secure) IPv6: tunneling"
+ tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
select NET_IP_TUNNEL
select XFRM
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e5213e598a04..94cec2075eee 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2561,12 +2561,18 @@ static void manage_tempaddrs(struct inet6_dev *idev,
ipv6_ifa_notify(0, ift);
}
- if ((create || list_empty(&idev->tempaddr_list)) &&
- idev->cnf.use_tempaddr > 0) {
+ /* Also create a temporary address if it's enabled but no temporary
+ * address currently exists.
+ * However, we get called with valid_lft == 0, prefered_lft == 0, create == false
+ * as part of cleanup (ie. deleting the mngtmpaddr).
+ * We don't want that to result in creating a new temporary ip address.
+ */
+ if (list_empty(&idev->tempaddr_list) && (valid_lft || prefered_lft))
+ create = true;
+
+ if (create && idev->cnf.use_tempaddr > 0) {
/* When a new public address is created as described
* in [ADDRCONF], also create a new temporary address.
- * Also create a temporary address if it's enabled but
- * no temporary address currently exists.
*/
read_unlock_bh(&idev->lock);
ipv6_create_tempaddr(ifp, false);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index da80974ad23a..070d87abf7c0 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -955,7 +955,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
goto tx_err;
if (skb->len > dev->mtu + dev->hard_header_len) {
- pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto tx_err;
truncate = true;
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 10b222865d46..73c85d4e0e9c 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -568,12 +568,12 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
vti6_addr_conflict(t, ipv6_hdr(skb)))
goto tx_err;
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
break;
default:
goto tx_err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index cc3d5ad17257..67a3b8f6e72b 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
And all this only to mangle msg->im6_msgtype and
to set msg->im6_mbz to "mbz" :-)
*/
- skb_push(skb, -skb_network_offset(pkt));
+ __skb_pull(skb, skb_network_offset(pkt));
skb_push(skb, sizeof(*msg));
skb_reset_transport_header(skb);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 18634ebd20a4..a42be96ae209 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -197,7 +197,8 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
static inline int ndisc_is_useropt(const struct net_device *dev,
struct nd_opt_hdr *opt)
{
- return opt->nd_opt_type == ND_OPT_RDNSS ||
+ return opt->nd_opt_type == ND_OPT_PREFIX_INFO ||
+ opt->nd_opt_type == ND_OPT_RDNSS ||
opt->nd_opt_type == ND_OPT_DNSSL ||
opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL ||
opt->nd_opt_type == ND_OPT_PREF64 ||
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index f804c11e2146..c2c291827a2c 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init_sk(&ipc6, np);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_oif = oif;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ac1cef094c5f..49381f35b623 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -614,7 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
skb_reserve(skb, hlen);
skb->protocol = htons(ETH_P_IPV6);
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc->mark;
skb->tstamp = sockc->transmit_time;
@@ -774,12 +774,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
*/
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = fl6.flowi6_mark;
if (sin6) {
if (addr_len < SIN6_LEN_RFC2133)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 64e873f5895f..56a55585eb79 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2951,7 +2951,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
if (!oif && skb->dev)
oif = l3mdev_master_ifindex(skb->dev);
- ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
+ ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
+ sk->sk_uid);
dst = __sk_dst_get(sk);
if (!dst || !dst->obsolete ||
@@ -3172,8 +3173,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
{
- ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
- sk->sk_uid);
+ ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
}
EXPORT_SYMBOL_GPL(ip6_sk_redirect);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 40dd92a2f480..6e86721e1cdb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -564,8 +564,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
- tclass, sk->sk_priority);
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
+ opt, tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -939,7 +939,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
else
- mark = sk->sk_mark;
+ mark = READ_ONCE(sk->sk_mark);
skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
}
if (txhash) {
@@ -1126,10 +1126,11 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_rsk(req)->rcv_nxt,
req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
- req->ts_recent, sk->sk_bound_dev_if,
+ READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
- tcp_rsk(req)->txhash);
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0,
+ READ_ONCE(sk->sk_priority),
+ READ_ONCE(tcp_rsk(req)->txhash));
}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b7c972aa09a7..f787e6b8424c 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -51,6 +51,7 @@
#include <net/inet6_hashtables.h>
#include <net/busy_poll.h>
#include <net/sock_reuseport.h>
+#include <net/gro.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
@@ -300,10 +301,13 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), net->ipv4.udp_table, NULL);
+ &iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
/* Must be called under rcu_read_lock().
@@ -624,7 +628,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (type == NDISC_REDIRECT) {
if (tunnel) {
ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
- sk->sk_mark, sk->sk_uid);
+ READ_ONCE(sk->sk_mark), sk->sk_uid);
} else {
ip6_sk_redirect(skb, sk);
}
@@ -1356,7 +1360,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipcm6_init(&ipc6);
ipc6.gso_size = READ_ONCE(up->gso_size);
ipc6.sockc.tsflags = sk->sk_tsflags;
- ipc6.sockc.mark = sk->sk_mark;
+ ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
/* destination address check */
if (sin6) {
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index ad3b8726873e..6b95ba241ebe 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -43,8 +43,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto out;
- if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
- !skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST))
+ if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
return __udp_gso_segment(skb, features, true);
mss = skb_shinfo(skb)->gso_size;
@@ -119,10 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
{
const struct ipv6hdr *iph = skb_gro_network_header(skb);
struct net *net = dev_net(skb->dev);
+ int iif, sdif;
+
+ inet6_get_iif_sdif(skb, &iif, &sdif);
return __udp6_lib_lookup(net, &iph->saddr, sport,
- &iph->daddr, dport, inet6_iif(skb),
- inet6_sdif(skb), net->ipv4.udp_table, NULL);
+ &iph->daddr, dport, iif,
+ sdif, net->ipv4.udp_table, NULL);
}
INDIRECT_CALLABLE_SCOPE
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ede3c6a60353..b4ea4cf9fad4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1848,9 +1848,9 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms
if (ext_hdrs[SADB_X_EXT_FILTER - 1]) {
struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1];
- if ((xfilter->sadb_x_filter_splen >=
+ if ((xfilter->sadb_x_filter_splen >
(sizeof(xfrm_address_t) << 3)) ||
- (xfilter->sadb_x_filter_dplen >=
+ (xfilter->sadb_x_filter_dplen >
(sizeof(xfrm_address_t) << 3))) {
mutex_unlock(&pfk->dump_lock);
return -EINVAL;
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b1623f9c4f92..ff78217f0cb1 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* Get and verify the address */
memset(&fl6, 0, sizeof(fl6));
- fl6.flowi6_mark = sk->sk_mark;
+ fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
fl6.flowi6_uid = sk->sk_uid;
ipcm6_init(&ipc6);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 57c35c960b2c..9b06c380866b 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -402,7 +402,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
memcpy(laddr.mac, addr->sllc_mac, IFHWADDRLEN);
laddr.lsap = addr->sllc_sap;
rc = -EADDRINUSE; /* mac + sap clash. */
- ask = llc_lookup_established(sap, &daddr, &laddr);
+ ask = llc_lookup_established(sap, &daddr, &laddr, &init_net);
if (ask) {
sock_put(ask);
goto out_put;
diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c
index 912aa9bd5e29..d037009ee10f 100644
--- a/net/llc/llc_conn.c
+++ b/net/llc/llc_conn.c
@@ -453,11 +453,13 @@ static int llc_exec_conn_trans_actions(struct sock *sk,
static inline bool llc_estab_match(const struct llc_sap *sap,
const struct llc_addr *daddr,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
- return llc->laddr.lsap == laddr->lsap &&
+ return net_eq(sock_net(sk), net) &&
+ llc->laddr.lsap == laddr->lsap &&
llc->daddr.lsap == daddr->lsap &&
ether_addr_equal(llc->laddr.mac, laddr->mac) &&
ether_addr_equal(llc->daddr.mac, daddr->mac);
@@ -468,6 +470,7 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
* @sap: SAP
* @daddr: address of remote LLC (MAC + SAP)
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search connection list of the SAP and finds connection using the remote
* mac, remote sap, local mac, and local sap. Returns pointer for
@@ -476,7 +479,8 @@ static inline bool llc_estab_match(const struct llc_sap *sap,
*/
static struct sock *__llc_lookup_established(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -486,12 +490,12 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap,
rcu_read_lock();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_estab_match(sap, daddr, laddr, rc)) {
+ if (llc_estab_match(sap, daddr, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_estab_match(sap, daddr, laddr, rc))) {
+ !llc_estab_match(sap, daddr, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -513,29 +517,33 @@ found:
struct sock *llc_lookup_established(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *sk;
local_bh_disable();
- sk = __llc_lookup_established(sap, daddr, laddr);
+ sk = __llc_lookup_established(sap, daddr, laddr, net);
local_bh_enable();
return sk;
}
static inline bool llc_listener_match(const struct llc_sap *sap,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
- return sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
+ return net_eq(sock_net(sk), net) &&
+ sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN &&
llc->laddr.lsap == laddr->lsap &&
ether_addr_equal(llc->laddr.mac, laddr->mac);
}
static struct sock *__llc_lookup_listener(struct llc_sap *sap,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -545,12 +553,12 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap,
rcu_read_lock();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_listener_match(sap, laddr, rc)) {
+ if (llc_listener_match(sap, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_listener_match(sap, laddr, rc))) {
+ !llc_listener_match(sap, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -574,6 +582,7 @@ found:
* llc_lookup_listener - Finds listener for local MAC + SAP
* @sap: SAP
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search connection list of the SAP and finds connection listening on
* local mac, and local sap. Returns pointer for parent socket found,
@@ -581,24 +590,26 @@ found:
* Caller has to make sure local_bh is disabled.
*/
static struct sock *llc_lookup_listener(struct llc_sap *sap,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
+ struct sock *rc = __llc_lookup_listener(sap, laddr, net);
static struct llc_addr null_addr;
- struct sock *rc = __llc_lookup_listener(sap, laddr);
if (!rc)
- rc = __llc_lookup_listener(sap, &null_addr);
+ rc = __llc_lookup_listener(sap, &null_addr, net);
return rc;
}
static struct sock *__llc_lookup(struct llc_sap *sap,
struct llc_addr *daddr,
- struct llc_addr *laddr)
+ struct llc_addr *laddr,
+ const struct net *net)
{
- struct sock *sk = __llc_lookup_established(sap, daddr, laddr);
+ struct sock *sk = __llc_lookup_established(sap, daddr, laddr, net);
- return sk ? : llc_lookup_listener(sap, laddr);
+ return sk ? : llc_lookup_listener(sap, laddr, net);
}
/**
@@ -776,7 +787,7 @@ void llc_conn_handler(struct llc_sap *sap, struct sk_buff *skb)
llc_pdu_decode_da(skb, daddr.mac);
llc_pdu_decode_dsap(skb, &daddr.lsap);
- sk = __llc_lookup(sap, &saddr, &daddr);
+ sk = __llc_lookup(sap, &saddr, &daddr, dev_net(skb->dev));
if (!sk)
goto drop;
diff --git a/net/llc/llc_if.c b/net/llc/llc_if.c
index dde9bf08a593..58a5f419adc6 100644
--- a/net/llc/llc_if.c
+++ b/net/llc/llc_if.c
@@ -92,7 +92,7 @@ int llc_establish_connection(struct sock *sk, const u8 *lmac, u8 *dmac, u8 dsap)
daddr.lsap = dsap;
memcpy(daddr.mac, dmac, sizeof(daddr.mac));
memcpy(laddr.mac, lmac, sizeof(laddr.mac));
- existing = llc_lookup_established(llc->sap, &daddr, &laddr);
+ existing = llc_lookup_established(llc->sap, &daddr, &laddr, sock_net(sk));
if (existing) {
if (existing->sk_state == TCP_ESTABLISHED) {
sk = existing;
diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
index c309b72a5877..7cac441862e2 100644
--- a/net/llc/llc_input.c
+++ b/net/llc/llc_input.c
@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
void (*sta_handler)(struct sk_buff *skb);
void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
- if (!net_eq(dev_net(dev), &init_net))
- goto drop;
-
/*
* When the interface is in promisc. mode, drop all the crap that it
* receives, do not try to analyse it.
diff --git a/net/llc/llc_sap.c b/net/llc/llc_sap.c
index 6805ce43a055..116c0e479183 100644
--- a/net/llc/llc_sap.c
+++ b/net/llc/llc_sap.c
@@ -294,25 +294,29 @@ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb,
static inline bool llc_dgram_match(const struct llc_sap *sap,
const struct llc_addr *laddr,
- const struct sock *sk)
+ const struct sock *sk,
+ const struct net *net)
{
struct llc_sock *llc = llc_sk(sk);
return sk->sk_type == SOCK_DGRAM &&
- llc->laddr.lsap == laddr->lsap &&
- ether_addr_equal(llc->laddr.mac, laddr->mac);
+ net_eq(sock_net(sk), net) &&
+ llc->laddr.lsap == laddr->lsap &&
+ ether_addr_equal(llc->laddr.mac, laddr->mac);
}
/**
* llc_lookup_dgram - Finds dgram socket for the local sap/mac
* @sap: SAP
* @laddr: address of local LLC (MAC + SAP)
+ * @net: netns to look up a socket in
*
* Search socket list of the SAP and finds connection using the local
* mac, and local sap. Returns pointer for socket found, %NULL otherwise.
*/
static struct sock *llc_lookup_dgram(struct llc_sap *sap,
- const struct llc_addr *laddr)
+ const struct llc_addr *laddr,
+ const struct net *net)
{
struct sock *rc;
struct hlist_nulls_node *node;
@@ -322,12 +326,12 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap,
rcu_read_lock_bh();
again:
sk_nulls_for_each_rcu(rc, node, laddr_hb) {
- if (llc_dgram_match(sap, laddr, rc)) {
+ if (llc_dgram_match(sap, laddr, rc, net)) {
/* Extra checks required by SLAB_TYPESAFE_BY_RCU */
if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt)))
goto again;
if (unlikely(llc_sk(rc)->sap != sap ||
- !llc_dgram_match(sap, laddr, rc))) {
+ !llc_dgram_match(sap, laddr, rc, net))) {
sock_put(rc);
continue;
}
@@ -429,7 +433,7 @@ void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb)
llc_sap_mcast(sap, &laddr, skb);
kfree_skb(skb);
} else {
- struct sock *sk = llc_lookup_dgram(sap, &laddr);
+ struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev));
if (sk) {
llc_sap_rcv(sap, skb, sk);
sock_put(sk);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 4f707d2a160f..0af2599c17e8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1083,7 +1083,8 @@ static inline bool ieee80211_rx_reorder_ready(struct tid_ampdu_rx *tid_agg_rx,
struct sk_buff *tail = skb_peek_tail(frames);
struct ieee80211_rx_status *status;
- if (tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
+ if (tid_agg_rx->reorder_buf_filtered &&
+ tid_agg_rx->reorder_buf_filtered & BIT_ULL(index))
return true;
if (!tail)
@@ -1124,7 +1125,8 @@ static void ieee80211_release_reorder_frame(struct ieee80211_sub_if_data *sdata,
}
no_frame:
- tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
+ if (tid_agg_rx->reorder_buf_filtered)
+ tid_agg_rx->reorder_buf_filtered &= ~BIT_ULL(index);
tid_agg_rx->head_seq_num = ieee80211_sn_inc(tid_agg_rx->head_seq_num);
}
@@ -4264,6 +4266,7 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
u16 ssn, u64 filtered,
u16 received_mpdus)
{
+ struct ieee80211_local *local;
struct sta_info *sta;
struct tid_ampdu_rx *tid_agg_rx;
struct sk_buff_head frames;
@@ -4281,6 +4284,11 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
sta = container_of(pubsta, struct sta_info, sta);
+ local = sta->sdata->local;
+ WARN_ONCE(local->hw.max_rx_aggregation_subframes > 64,
+ "RX BA marker can't support max_rx_aggregation_subframes %u > 64\n",
+ local->hw.max_rx_aggregation_subframes);
+
if (!ieee80211_rx_data_set_sta(&rx, sta, -1))
return;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3613489eb6e3..d80658547836 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2335,7 +2335,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
- if (flags & MPTCP_CF_FASTCLOSE) {
+ if ((flags & MPTCP_CF_FASTCLOSE) && !__mptcp_check_fallback(msk)) {
/* be sure to force the tcp_disconnect() path,
* to generate the egress reset
*/
@@ -3328,7 +3328,7 @@ static void mptcp_release_cb(struct sock *sk)
if (__test_and_clear_bit(MPTCP_CLEAN_UNA, &msk->cb_flags))
__mptcp_clean_una_wakeup(sk);
- if (unlikely(&msk->cb_flags)) {
+ if (unlikely(msk->cb_flags)) {
/* be sure to set the current sk state before tacking actions
* depending on sk_state, that is processing MPTCP_ERROR_REPORT
*/
@@ -3723,10 +3723,9 @@ static int mptcp_listen(struct socket *sock, int backlog)
if (!err) {
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
mptcp_copy_inaddrs(sk, ssock->sk);
+ mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
}
- mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
-
unlock:
release_sock(sk);
return err;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 37fbe22e2433..ba2a873a4d2e 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -325,7 +325,6 @@ struct mptcp_sock {
u32 subflow_id;
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
- struct mptcp_sock *dl_next;
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 63f7a09335c5..a3f1fe810cc9 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -103,7 +103,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
break;
case SO_MARK:
if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
- ssk->sk_mark = sk->sk_mark;
+ WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
sk_dst_reset(ssk);
}
break;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 9ee3b7abbaf6..94ae7dd01c65 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -1793,16 +1793,31 @@ static void subflow_state_change(struct sock *sk)
void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
- struct mptcp_sock *msk, *next, *head = NULL;
- struct request_sock *req;
- struct sock *sk;
+ struct request_sock *req, *head, *tail;
+ struct mptcp_subflow_context *subflow;
+ struct sock *sk, *ssk;
- /* build a list of all unaccepted mptcp sockets */
+ /* Due to lock dependencies no relevant lock can be acquired under rskq_lock.
+ * Splice the req list, so that accept() can not reach the pending ssk after
+ * the listener socket is released below.
+ */
spin_lock_bh(&queue->rskq_lock);
- for (req = queue->rskq_accept_head; req; req = req->dl_next) {
- struct mptcp_subflow_context *subflow;
- struct sock *ssk = req->sk;
+ head = queue->rskq_accept_head;
+ tail = queue->rskq_accept_tail;
+ queue->rskq_accept_head = NULL;
+ queue->rskq_accept_tail = NULL;
+ spin_unlock_bh(&queue->rskq_lock);
+
+ if (!head)
+ return;
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (req = head; req; req = req->dl_next) {
+ ssk = req->sk;
if (!sk_is_mptcp(ssk))
continue;
@@ -1810,32 +1825,10 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
if (!subflow || !subflow->conn)
continue;
- /* skip if already in list */
sk = subflow->conn;
- msk = mptcp_sk(sk);
- if (msk->dl_next || msk == head)
- continue;
-
sock_hold(sk);
- msk->dl_next = head;
- head = msk;
- }
- spin_unlock_bh(&queue->rskq_lock);
- if (!head)
- return;
-
- /* can't acquire the msk socket lock under the subflow one,
- * or will cause ABBA deadlock
- */
- release_sock(listener_ssk);
-
- for (msk = head; msk; msk = next) {
- sk = (struct sock *)msk;
lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
- next = msk->dl_next;
- msk->dl_next = NULL;
-
__mptcp_unaccepted_force_close(sk);
release_sock(sk);
@@ -1859,6 +1852,13 @@ void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_s
/* we are still under the listener msk socket lock */
lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+
+ /* restore the listener queue, to let the TCP code clean it up */
+ spin_lock_bh(&queue->rskq_lock);
+ WARN_ON_ONCE(queue->rskq_accept_head);
+ queue->rskq_accept_head = head;
+ queue->rskq_accept_tail = tail;
+ spin_unlock_bh(&queue->rskq_lock);
}
static int subflow_ulp_init(struct sock *sk)
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 62606fb44d02..4bb0d90eca1c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1876,6 +1876,7 @@ static int
proc_do_sync_threshold(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
+ struct netns_ipvs *ipvs = table->extra2;
int *valp = table->data;
int val[2];
int rc;
@@ -1885,6 +1886,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
.mode = table->mode,
};
+ mutex_lock(&ipvs->sync_mutex);
memcpy(val, valp, sizeof(val));
rc = proc_dointvec(&tmp, write, buffer, lenp, ppos);
if (write) {
@@ -1894,6 +1896,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
else
memcpy(valp, val, sizeof(val));
}
+ mutex_unlock(&ipvs->sync_mutex);
return rc;
}
@@ -4321,6 +4324,7 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs)
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx].extra2 = ipvs;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD;
tbl[idx++].data = &ipvs->sysctl_sync_refresh_period;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 91eacc9b0b98..b6bcc8f2f46b 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -49,8 +49,8 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
[SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
- [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
- [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = 3 SECS,
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = 3 SECS,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
};
@@ -105,7 +105,7 @@ static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init */ {sCL, sCL, sCW, sCE, sES, sCL, sCL, sSA, sCW},
/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 237f739da3ca..eb8b1167dced 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -31,7 +31,9 @@ static LIST_HEAD(nf_tables_expressions);
static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static LIST_HEAD(nf_tables_destroy_list);
+static LIST_HEAD(nf_tables_gc_list);
static DEFINE_SPINLOCK(nf_tables_destroy_list_lock);
+static DEFINE_SPINLOCK(nf_tables_gc_list_lock);
enum {
NFT_VALIDATE_SKIP = 0,
@@ -120,6 +122,9 @@ static void nft_validate_state_update(struct nft_table *table, u8 new_validate_s
static void nf_tables_trans_destroy_work(struct work_struct *w);
static DECLARE_WORK(trans_destroy_work, nf_tables_trans_destroy_work);
+static void nft_trans_gc_work(struct work_struct *work);
+static DECLARE_WORK(trans_gc_work, nft_trans_gc_work);
+
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
@@ -582,10 +587,6 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem);
-
static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
struct nft_set *set,
const struct nft_set_iter *iter,
@@ -1372,7 +1373,7 @@ static int nf_tables_newtable(struct sk_buff *skb, const struct nfnl_info *info,
if (table == NULL)
goto err_kzalloc;
- table->validate_state = NFT_VALIDATE_SKIP;
+ table->validate_state = nft_net->validate_state;
table->name = nla_strdup(attr, GFP_KERNEL_ACCOUNT);
if (table->name == NULL)
goto err_strdup;
@@ -3685,8 +3686,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
if (err < 0)
return err;
}
-
- cond_resched();
}
return 0;
@@ -3710,6 +3709,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
err = nft_chain_validate(&ctx, chain);
if (err < 0)
return err;
+
+ cond_resched();
}
return 0;
@@ -3811,8 +3812,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
- if (nft_chain_is_bound(chain))
- return -EOPNOTSUPP;
} else if (nla[NFTA_RULE_CHAIN_ID]) {
chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
@@ -3825,6 +3824,9 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
}
+ if (nft_chain_is_bound(chain))
+ return -EOPNOTSUPP;
+
if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
rule = __nft_rule_lookup(chain, handle);
@@ -4087,6 +4089,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
list_for_each_entry(chain, &table->chains, list) {
if (!nft_is_active_next(net, chain))
continue;
+ if (nft_chain_is_bound(chain))
+ continue;
ctx.chain = chain;
err = nft_delrule_by_chain(&ctx);
@@ -5052,6 +5056,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
INIT_LIST_HEAD(&set->bindings);
INIT_LIST_HEAD(&set->catchall_list);
+ refcount_set(&set->refs, 1);
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
@@ -5119,6 +5124,14 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
}
}
+static void nft_set_put(struct nft_set *set)
+{
+ if (refcount_dec_and_test(&set->refs)) {
+ kfree(set->name);
+ kvfree(set);
+ }
+}
+
static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
{
int i;
@@ -5131,8 +5144,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
- kfree(set->name);
- kvfree(set);
+ nft_set_put(set);
}
static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
@@ -5599,8 +5611,12 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
struct nft_set_elem *elem)
{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
struct nft_set_dump_args *args;
+ if (nft_set_elem_expired(ext))
+ return 0;
+
args = container_of(iter, struct nft_set_dump_args, iter);
return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
@@ -6271,7 +6287,8 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
if (nft_set_elem_active(ext, genmask) &&
- !nft_set_elem_expired(ext))
+ !nft_set_elem_expired(ext) &&
+ !nft_set_elem_is_dead(ext))
return ext;
}
@@ -6279,29 +6296,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net,
}
EXPORT_SYMBOL_GPL(nft_set_catchall_lookup);
-void *nft_set_catchall_gc(const struct nft_set *set)
-{
- struct nft_set_elem_catchall *catchall, *next;
- struct nft_set_ext *ext;
- void *elem = NULL;
-
- list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
- ext = nft_set_elem_ext(set, catchall->elem);
-
- if (!nft_set_elem_expired(ext) ||
- nft_set_elem_mark_busy(ext))
- continue;
-
- elem = catchall->elem;
- list_del_rcu(&catchall->list);
- kfree_rcu(catchall, rcu);
- break;
- }
-
- return elem;
-}
-EXPORT_SYMBOL_GPL(nft_set_catchall_gc);
-
static int nft_setelem_catchall_insert(const struct net *net,
struct nft_set *set,
const struct nft_set_elem *elem,
@@ -6363,7 +6357,6 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
if (nft_setelem_is_catchall(set, elem)) {
nft_set_elem_change_active(net, set, ext);
- nft_set_elem_clear_busy(ext);
} else {
set->ops->activate(net, set, elem);
}
@@ -6378,8 +6371,7 @@ static int nft_setelem_catchall_deactivate(const struct net *net,
list_for_each_entry(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_is_active(net, ext) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_is_active(net, ext))
continue;
kfree(elem->priv);
@@ -6774,7 +6766,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_elem_free;
}
- ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
+ ext->genmask = nft_genmask_cur(ctx->net);
err = nft_setelem_insert(ctx->net, set, &elem, &ext2, flags);
if (err) {
@@ -6926,9 +6918,9 @@ static void nft_setelem_data_activate(const struct net *net,
nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
-static void nft_setelem_data_deactivate(const struct net *net,
- const struct nft_set *set,
- struct nft_set_elem *elem)
+void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
@@ -7092,14 +7084,14 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_active(ext, genmask) ||
- nft_set_elem_mark_busy(ext))
+ if (!nft_set_elem_active(ext, genmask))
continue;
elem.priv = catchall->elem;
ret = __nft_set_catchall_flush(ctx, set, &elem);
if (ret < 0)
break;
+ nft_set_elem_change_active(ctx->net, set, ext);
}
return ret;
@@ -7167,29 +7159,6 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return err;
}
-void nft_set_gc_batch_release(struct rcu_head *rcu)
-{
- struct nft_set_gc_batch *gcb;
- unsigned int i;
-
- gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
- for (i = 0; i < gcb->head.cnt; i++)
- nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
- kfree(gcb);
-}
-
-struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
- gfp_t gfp)
-{
- struct nft_set_gc_batch *gcb;
-
- gcb = kzalloc(sizeof(*gcb), gfp);
- if (gcb == NULL)
- return gcb;
- gcb->head.set = set;
- return gcb;
-}
-
/*
* Stateful objects
*/
@@ -9082,9 +9051,8 @@ static int nf_tables_validate(struct net *net)
return -EAGAIN;
nft_validate_state_update(table, NFT_VALIDATE_SKIP);
+ break;
}
-
- break;
}
return 0;
@@ -9411,6 +9379,212 @@ void nft_chain_del(struct nft_chain *chain)
list_del_rcu(&chain->list);
}
+static void nft_trans_gc_setelem_remove(struct nft_ctx *ctx,
+ struct nft_trans_gc *trans)
+{
+ void **priv = trans->priv;
+ unsigned int i;
+
+ for (i = 0; i < trans->count; i++) {
+ struct nft_set_elem elem = {
+ .priv = priv[i],
+ };
+
+ nft_setelem_data_deactivate(ctx->net, trans->set, &elem);
+ nft_setelem_remove(ctx->net, trans->set, &elem);
+ }
+}
+
+void nft_trans_gc_destroy(struct nft_trans_gc *trans)
+{
+ nft_set_put(trans->set);
+ put_net(trans->net);
+ kfree(trans);
+}
+
+static void nft_trans_gc_trans_free(struct rcu_head *rcu)
+{
+ struct nft_set_elem elem = {};
+ struct nft_trans_gc *trans;
+ struct nft_ctx ctx = {};
+ unsigned int i;
+
+ trans = container_of(rcu, struct nft_trans_gc, rcu);
+ ctx.net = read_pnet(&trans->set->net);
+
+ for (i = 0; i < trans->count; i++) {
+ elem.priv = trans->priv[i];
+ if (!nft_setelem_is_catchall(trans->set, &elem))
+ atomic_dec(&trans->set->nelems);
+
+ nf_tables_set_elem_destroy(&ctx, trans->set, elem.priv);
+ }
+
+ nft_trans_gc_destroy(trans);
+}
+
+static bool nft_trans_gc_work_done(struct nft_trans_gc *trans)
+{
+ struct nftables_pernet *nft_net;
+ struct nft_ctx ctx = {};
+
+ nft_net = nft_pernet(trans->net);
+
+ mutex_lock(&nft_net->commit_mutex);
+
+ /* Check for race with transaction, otherwise this batch refers to
+ * stale objects that might not be there anymore. Skip transaction if
+ * set has been destroyed from control plane transaction in case gc
+ * worker loses race.
+ */
+ if (READ_ONCE(nft_net->gc_seq) != trans->seq || trans->set->dead) {
+ mutex_unlock(&nft_net->commit_mutex);
+ return false;
+ }
+
+ ctx.net = trans->net;
+ ctx.table = trans->set->table;
+
+ nft_trans_gc_setelem_remove(&ctx, trans);
+ mutex_unlock(&nft_net->commit_mutex);
+
+ return true;
+}
+
+static void nft_trans_gc_work(struct work_struct *work)
+{
+ struct nft_trans_gc *trans, *next;
+ LIST_HEAD(trans_gc_list);
+
+ spin_lock(&nf_tables_gc_list_lock);
+ list_splice_init(&nf_tables_gc_list, &trans_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ list_for_each_entry_safe(trans, next, &trans_gc_list, list) {
+ list_del(&trans->list);
+ if (!nft_trans_gc_work_done(trans)) {
+ nft_trans_gc_destroy(trans);
+ continue;
+ }
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+ }
+}
+
+struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ struct net *net = read_pnet(&set->net);
+ struct nft_trans_gc *trans;
+
+ trans = kzalloc(sizeof(*trans), gfp);
+ if (!trans)
+ return NULL;
+
+ trans->net = maybe_get_net(net);
+ if (!trans->net) {
+ kfree(trans);
+ return NULL;
+ }
+
+ refcount_inc(&set->refs);
+ trans->set = set;
+ trans->seq = gc_seq;
+
+ return trans;
+}
+
+void nft_trans_gc_elem_add(struct nft_trans_gc *trans, void *priv)
+{
+ trans->priv[trans->count++] = priv;
+}
+
+static void nft_trans_gc_queue_work(struct nft_trans_gc *trans)
+{
+ spin_lock(&nf_tables_gc_list_lock);
+ list_add_tail(&trans->list, &nf_tables_gc_list);
+ spin_unlock(&nf_tables_gc_list_lock);
+
+ schedule_work(&trans_gc_work);
+}
+
+static int nft_trans_gc_space(struct nft_trans_gc *trans)
+{
+ return NFT_TRANS_GC_BATCHCOUNT - trans->count;
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_async(struct nft_trans_gc *gc,
+ unsigned int gc_seq, gfp_t gfp)
+{
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ nft_trans_gc_queue_work(gc);
+
+ return nft_trans_gc_alloc(gc->set, gc_seq, gfp);
+}
+
+void nft_trans_gc_queue_async_done(struct nft_trans_gc *trans)
+{
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ nft_trans_gc_queue_work(trans);
+}
+
+struct nft_trans_gc *nft_trans_gc_queue_sync(struct nft_trans_gc *gc, gfp_t gfp)
+{
+ if (WARN_ON_ONCE(!lockdep_commit_lock_is_held(gc->net)))
+ return NULL;
+
+ if (nft_trans_gc_space(gc))
+ return gc;
+
+ call_rcu(&gc->rcu, nft_trans_gc_trans_free);
+
+ return nft_trans_gc_alloc(gc->set, 0, gfp);
+}
+
+void nft_trans_gc_queue_sync_done(struct nft_trans_gc *trans)
+{
+ WARN_ON_ONCE(!lockdep_commit_lock_is_held(trans->net));
+
+ if (trans->count == 0) {
+ nft_trans_gc_destroy(trans);
+ return;
+ }
+
+ call_rcu(&trans->rcu, nft_trans_gc_trans_free);
+}
+
+struct nft_trans_gc *nft_trans_gc_catchall(struct nft_trans_gc *gc,
+ unsigned int gc_seq)
+{
+ struct nft_set_elem_catchall *catchall;
+ const struct nft_set *set = gc->set;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+
+ if (!nft_set_elem_expired(ext))
+ continue;
+ if (nft_set_elem_is_dead(ext))
+ goto dead_elem;
+
+ nft_set_elem_dead(ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ return NULL;
+
+ nft_trans_gc_elem_add(gc, catchall->elem);
+ }
+
+ return gc;
+}
+
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -9569,15 +9743,31 @@ static void nft_set_commit_update(struct list_head *set_update_list)
}
}
+static unsigned int nft_gc_seq_begin(struct nftables_pernet *nft_net)
+{
+ unsigned int gc_seq;
+
+ /* Bump gc counter, it becomes odd, this is the busy mark. */
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+
+ return gc_seq;
+}
+
+static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq)
+{
+ WRITE_ONCE(nft_net->gc_seq, ++gc_seq);
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ unsigned int base_seq, gc_seq;
LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
- unsigned int base_seq;
LIST_HEAD(adl);
int err;
@@ -9608,8 +9798,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
/* 0. Validate ruleset, otherwise roll back for error reporting. */
- if (nf_tables_validate(net) < 0)
+ if (nf_tables_validate(net) < 0) {
+ nft_net->validate_state = NFT_VALIDATE_DO;
return -EAGAIN;
+ }
err = nft_flow_rule_offload_commit(net);
if (err < 0)
@@ -9654,6 +9846,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(nft_net->base_seq, base_seq);
+ gc_seq = nft_gc_seq_begin(nft_net);
+
/* step 3. Start new generation, rules_gen_X now in use. */
net->nft.gencursor = nft_gencursor_next(net);
@@ -9761,6 +9955,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
+ nft_trans_set(trans)->dead = 1;
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
trans->msg_type, GFP_KERNEL);
@@ -9863,6 +10058,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
nf_tables_commit_release(net);
return 0;
@@ -10139,8 +10337,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb,
enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
- int ret = __nf_tables_abort(net, action);
+ unsigned int gc_seq;
+ int ret;
+ gc_seq = nft_gc_seq_begin(nft_net);
+ ret = __nf_tables_abort(net, action);
+ nft_gc_seq_end(nft_net, gc_seq);
mutex_unlock(&nft_net->commit_mutex);
return ret;
@@ -10517,6 +10719,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (!tb[NFTA_VERDICT_CODE])
return -EINVAL;
+
+ /* zero padding hole for memcmp */
+ memset(data, 0, sizeof(*data));
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
@@ -10799,6 +11004,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.family = table->family;
ctx.table = table;
list_for_each_entry(chain, &table->chains, list) {
+ if (nft_chain_is_bound(chain))
+ continue;
+
ctx.chain = chain;
list_for_each_entry_safe(rule, nr, &chain->rules, list) {
list_del(&rule->list);
@@ -10857,6 +11065,7 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
struct net *net = n->net;
unsigned int deleted;
bool restart = false;
+ unsigned int gc_seq;
if (event != NETLINK_URELEASE || n->protocol != NETLINK_NETFILTER)
return NOTIFY_DONE;
@@ -10864,8 +11073,11 @@ static int nft_rcv_nl_event(struct notifier_block *this, unsigned long event,
nft_net = nft_pernet(net);
deleted = 0;
mutex_lock(&nft_net->commit_mutex);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
if (!list_empty(&nf_tables_destroy_list))
- rcu_barrier();
+ nf_tables_trans_destroy_flush_work();
again:
list_for_each_entry(table, &nft_net->tables, list) {
if (nft_table_has_owner(table) &&
@@ -10886,6 +11098,8 @@ again:
if (restart)
goto again;
}
+ nft_gc_seq_end(nft_net, gc_seq);
+
mutex_unlock(&nft_net->commit_mutex);
return NOTIFY_DONE;
@@ -10906,6 +11120,8 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
nft_net->base_seq = 1;
+ nft_net->gc_seq = 0;
+ nft_net->validate_state = NFT_VALIDATE_SKIP;
return 0;
}
@@ -10922,22 +11138,36 @@ static void __net_exit nf_tables_pre_exit_net(struct net *net)
static void __net_exit nf_tables_exit_net(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ unsigned int gc_seq;
mutex_lock(&nft_net->commit_mutex);
+
+ gc_seq = nft_gc_seq_begin(nft_net);
+
if (!list_empty(&nft_net->commit_list) ||
!list_empty(&nft_net->module_list))
__nf_tables_abort(net, NFNL_ABORT_NONE);
+
__nft_release_tables(net);
+
+ nft_gc_seq_end(nft_net, gc_seq);
+
mutex_unlock(&nft_net->commit_mutex);
WARN_ON_ONCE(!list_empty(&nft_net->tables));
WARN_ON_ONCE(!list_empty(&nft_net->module_list));
WARN_ON_ONCE(!list_empty(&nft_net->notify_list));
}
+static void nf_tables_exit_batch(struct list_head *net_exit_list)
+{
+ flush_work(&trans_gc_work);
+}
+
static struct pernet_operations nf_tables_net_ops = {
.init = nf_tables_init_net,
.pre_exit = nf_tables_pre_exit_net,
.exit = nf_tables_exit_net,
+ .exit_batch = nf_tables_exit_batch,
.id = &nf_tables_net_id,
.size = sizeof(struct nftables_pernet),
};
@@ -11009,6 +11239,7 @@ static void __exit nf_tables_module_exit(void)
nft_chain_filter_fini();
nft_chain_route_fini();
unregister_pernet_subsys(&nf_tables_net_ops);
+ cancel_work_sync(&trans_gc_work);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
rhltable_destroy(&nft_objname_ht);
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 4fb34d76dbea..5c5cc01c73c5 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -191,6 +191,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
if (IS_ERR(set))
return PTR_ERR(set);
+ if (set->flags & NFT_SET_OBJECT)
+ return -EOPNOTSUPP;
+
if (set->ops->update == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 407d7197f75b..fccb3cf7749c 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -125,15 +125,27 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
+static void nft_immediate_chain_deactivate(const struct nft_ctx *ctx,
+ struct nft_chain *chain,
+ enum nft_trans_phase phase)
+{
+ struct nft_ctx chain_ctx;
+ struct nft_rule *rule;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+}
+
static void nft_immediate_deactivate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
const struct nft_data *data = &priv->data;
- struct nft_ctx chain_ctx;
struct nft_chain *chain;
- struct nft_rule *rule;
if (priv->dreg == NFT_REG_VERDICT) {
switch (data->verdict.code) {
@@ -143,20 +155,17 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
if (!nft_chain_binding(chain))
break;
- chain_ctx = *ctx;
- chain_ctx.chain = chain;
-
- list_for_each_entry(rule, &chain->rules, list)
- nft_rule_expr_deactivate(&chain_ctx, rule, phase);
-
switch (phase) {
case NFT_TRANS_PREPARE_ERROR:
nf_tables_unbind_chain(ctx, chain);
- fallthrough;
+ nft_deactivate_next(ctx->net, chain);
+ break;
case NFT_TRANS_PREPARE:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_deactivate_next(ctx->net, chain);
break;
default:
+ nft_immediate_chain_deactivate(ctx, chain, phase);
nft_chain_del(chain);
chain->bound = false;
nft_use_dec(&chain->table->use);
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 0b73cb0e752f..524763659f25 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -59,6 +59,8 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
+ if (nft_set_elem_is_dead(&he->ext))
+ return 1;
if (nft_set_elem_expired(&he->ext))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
@@ -188,7 +190,6 @@ static void nft_rhash_activate(const struct net *net, const struct nft_set *set,
struct nft_rhash_elem *he = elem->priv;
nft_set_elem_change_active(net, set, &he->ext);
- nft_set_elem_clear_busy(&he->ext);
}
static bool nft_rhash_flush(const struct net *net,
@@ -196,12 +197,9 @@ static bool nft_rhash_flush(const struct net *net,
{
struct nft_rhash_elem *he = priv;
- if (!nft_set_elem_mark_busy(&he->ext) ||
- !nft_is_active(net, &he->ext)) {
- nft_set_elem_change_active(net, set, &he->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &he->ext);
+
+ return true;
}
static void *nft_rhash_deactivate(const struct net *net,
@@ -218,9 +216,8 @@ static void *nft_rhash_deactivate(const struct net *net,
rcu_read_lock();
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
- if (he != NULL &&
- !nft_rhash_flush(net, set, he))
- he = NULL;
+ if (he)
+ nft_set_elem_change_active(net, set, &he->ext);
rcu_read_unlock();
@@ -252,7 +249,9 @@ static bool nft_rhash_delete(const struct nft_set *set,
if (he == NULL)
return false;
- return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0;
+ nft_set_elem_dead(&he->ext);
+
+ return true;
}
static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
@@ -278,8 +277,6 @@ static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&he->ext))
- goto cont;
if (!nft_set_elem_active(&he->ext, iter->genmask))
goto cont;
@@ -314,25 +311,51 @@ static bool nft_rhash_expr_needs_gc_run(const struct nft_set *set,
static void nft_rhash_gc(struct work_struct *work)
{
+ struct nftables_pernet *nft_net;
struct nft_set *set;
struct nft_rhash_elem *he;
struct nft_rhash *priv;
- struct nft_set_gc_batch *gcb = NULL;
struct rhashtable_iter hti;
+ struct nft_trans_gc *gc;
+ struct net *net;
+ u32 gc_seq;
priv = container_of(work, struct nft_rhash, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
rhashtable_walk_enter(&priv->ht, &hti);
rhashtable_walk_start(&hti);
while ((he = rhashtable_walk_next(&hti))) {
if (IS_ERR(he)) {
- if (PTR_ERR(he) != -EAGAIN)
- break;
+ if (PTR_ERR(he) != -EAGAIN) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
continue;
}
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
+ if (nft_set_elem_is_dead(&he->ext))
+ goto dead_elem;
+
if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPRESSIONS) &&
nft_rhash_expr_needs_gc_run(set, &he->ext))
goto needs_gc_run;
@@ -340,26 +363,26 @@ static void nft_rhash_gc(struct work_struct *work)
if (!nft_set_elem_expired(&he->ext))
continue;
needs_gc_run:
- if (nft_set_elem_mark_busy(&he->ext))
- continue;
+ nft_set_elem_dead(&he->ext);
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb == NULL)
- break;
- rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, he);
+ nft_trans_gc_elem_add(gc, he);
}
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
+ /* catchall list iteration requires rcu read side lock. */
rhashtable_walk_stop(&hti);
rhashtable_walk_exit(&hti);
- he = nft_set_catchall_gc(set);
- if (he) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, he);
- }
- nft_set_gc_batch_complete(gcb);
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
@@ -394,7 +417,7 @@ static int nft_rhash_init(const struct nft_set *set,
return err;
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
- if (set->flags & NFT_SET_TIMEOUT)
+ if (set->flags & (NFT_SET_TIMEOUT | NFT_SET_EVAL))
nft_rhash_gc_init(set);
return 0;
@@ -422,7 +445,6 @@ static void nft_rhash_destroy(const struct nft_ctx *ctx,
};
cancel_delayed_work_sync(&priv->gc_work);
- rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
(void *)&rhash_ctx);
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index db526cb7a485..6af9c9ed4b5c 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -566,8 +566,9 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext) ||
- (genmask &&
+ if (nft_set_elem_expired(&f->mt[b].e->ext))
+ goto next_match;
+ if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
goto next_match;
@@ -602,7 +603,7 @@ static void *nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
return pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ nft_genmask_cur(net));
}
/**
@@ -901,12 +902,14 @@ static void pipapo_lt_bits_adjust(struct nft_pipapo_field *f)
static int pipapo_insert(struct nft_pipapo_field *f, const uint8_t *k,
int mask_bits)
{
- int rule = f->rules++, group, ret, bit_offset = 0;
+ int rule = f->rules, group, ret, bit_offset = 0;
- ret = pipapo_resize(f, f->rules - 1, f->rules);
+ ret = pipapo_resize(f, f->rules, f->rules + 1);
if (ret)
return ret;
+ f->rules++;
+
for (group = 0; group < f->groups; group++) {
int i, v;
u8 mask;
@@ -1051,7 +1054,9 @@ static int pipapo_expand(struct nft_pipapo_field *f,
step++;
if (step >= len) {
if (!masks) {
- pipapo_insert(f, base, 0);
+ err = pipapo_insert(f, base, 0);
+ if (err < 0)
+ return err;
masks = 1;
}
goto out;
@@ -1234,6 +1239,9 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
ret = pipapo_expand(f, start, end, f->groups * f->bb);
+ if (ret < 0)
+ return ret;
+
if (f->bsize > bsize_max)
bsize_max = f->bsize;
@@ -1528,16 +1536,34 @@ static void pipapo_drop(struct nft_pipapo_match *m,
}
}
+static void nft_pipapo_gc_deactivate(struct net *net, struct nft_set *set,
+ struct nft_pipapo_elem *e)
+
+{
+ struct nft_set_elem elem = {
+ .priv = e,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+}
+
/**
* pipapo_gc() - Drop expired entries from set, destroy start and end elements
- * @set: nftables API set representation
+ * @_set: nftables API set representation
* @m: Matching data
*/
-static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
+static void pipapo_gc(const struct nft_set *_set, struct nft_pipapo_match *m)
{
+ struct nft_set *set = (struct nft_set *) _set;
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
+ struct nft_trans_gc *gc;
+
+ gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
+ if (!gc)
+ return;
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
@@ -1561,13 +1587,20 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
f--;
i--;
e = f->mt[rulemap[i].to].e;
- if (nft_set_elem_expired(&e->ext) &&
- !nft_set_elem_mark_busy(&e->ext)) {
+
+ /* synchronous gc never fails, there is no need to set on
+ * NFT_SET_ELEM_DEAD_BIT.
+ */
+ if (nft_set_elem_expired(&e->ext)) {
priv->dirty = true;
- pipapo_drop(m, rulemap);
- rcu_barrier();
- nft_set_elem_destroy(set, e, true);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (!gc)
+ break;
+
+ nft_pipapo_gc_deactivate(net, set, e);
+ pipapo_drop(m, rulemap);
+ nft_trans_gc_elem_add(gc, e);
/* And check again current first rule, which is now the
* first we haven't checked.
@@ -1577,11 +1610,11 @@ static void pipapo_gc(const struct nft_set *set, struct nft_pipapo_match *m)
}
}
- e = nft_set_catchall_gc(set);
- if (e)
- nft_set_elem_destroy(set, e, true);
-
- priv->last_gc = jiffies;
+ gc = nft_trans_gc_catchall(gc, 0);
+ if (gc) {
+ nft_trans_gc_queue_sync_done(gc);
+ priv->last_gc = jiffies;
+ }
}
/**
@@ -1664,6 +1697,17 @@ static void nft_pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
+static bool nft_pipapo_transaction_mutex_held(const struct nft_set *set)
+{
+#ifdef CONFIG_PROVE_LOCKING
+ const struct net *net = read_pnet(&set->net);
+
+ return lockdep_is_held(&nft_pernet(net)->commit_mutex);
+#else
+ return true;
+#endif
+}
+
static void nft_pipapo_abort(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
@@ -1672,7 +1716,7 @@ static void nft_pipapo_abort(const struct nft_set *set)
if (!priv->dirty)
return;
- m = rcu_dereference(priv->match);
+ m = rcu_dereference_protected(priv->match, nft_pipapo_transaction_mutex_held(set));
new_clone = pipapo_clone(m);
if (IS_ERR(new_clone))
@@ -1699,14 +1743,9 @@ static void nft_pipapo_activate(const struct net *net,
const struct nft_set *set,
const struct nft_set_elem *elem)
{
- struct nft_pipapo_elem *e;
-
- e = pipapo_get(net, set, (const u8 *)elem->key.val.data, 0);
- if (IS_ERR(e))
- return;
+ struct nft_pipapo_elem *e = elem->priv;
nft_set_elem_change_active(net, set, &e->ext);
- nft_set_elem_clear_busy(&e->ext);
}
/**
@@ -1918,10 +1957,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
data = (const u8 *)nft_set_ext_key(&e->ext);
- e = pipapo_get(net, set, data, 0);
- if (IS_ERR(e))
- return;
-
while ((rules_f0 = pipapo_rules_same_key(m->f, first_rule))) {
union nft_pipapo_map_bucket rulemap[NFT_PIPAPO_MAX_FIELDS];
const u8 *match_start, *match_end;
@@ -1929,7 +1964,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
int i, start, rules_fx;
match_start = data;
- match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+
+ if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
+ match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
+ else
+ match_end = data;
start = first_rule;
rules_fx = rules_f0;
@@ -2001,8 +2040,6 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
goto cont;
e = f->mt[r].e;
- if (nft_set_elem_expired(&e->ext))
- goto cont;
elem.priv = e;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 5c05c9b990fb..c6435e709231 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -46,6 +46,12 @@ static int nft_rbtree_cmp(const struct nft_set *set,
set->klen);
}
+static bool nft_rbtree_elem_expired(const struct nft_rbtree_elem *rbe)
+{
+ return nft_set_elem_expired(&rbe->ext) ||
+ nft_set_elem_is_dead(&rbe->ext);
+}
+
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext,
unsigned int seq)
@@ -80,7 +86,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
continue;
}
- if (nft_set_elem_expired(&rbe->ext))
+ if (nft_rbtree_elem_expired(rbe))
return false;
if (nft_rbtree_interval_end(rbe)) {
@@ -98,7 +104,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
nft_set_elem_active(&interval->ext, genmask) &&
- !nft_set_elem_expired(&interval->ext) &&
+ !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval)) {
*ext = &interval->ext;
return true;
@@ -215,38 +221,70 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static void nft_rbtree_gc_remove(struct net *net, struct nft_set *set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set_elem elem = {
+ .priv = rbe,
+ };
+
+ nft_setelem_data_deactivate(net, set, &elem);
+ rb_erase(&rbe->node, &priv->root);
+}
+
static int nft_rbtree_gc_elem(const struct nft_set *__set,
struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe)
+ struct nft_rbtree_elem *rbe,
+ u8 genmask)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
- struct nft_rbtree_elem *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb;
+ struct net *net = read_pnet(&set->net);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_trans_gc *gc;
- gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
- if (!gcb)
+ gc = nft_trans_gc_alloc(set, 0, GFP_ATOMIC);
+ if (!gc)
return -ENOMEM;
- /* search for expired end interval coming before this element. */
+ /* search for end interval coming before this element.
+ * end intervals don't carry a timeout extension, they
+ * are coupled with the interval start element.
+ */
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
- if (nft_rbtree_interval_end(rbe_prev))
+ if (nft_rbtree_interval_end(rbe_prev) &&
+ nft_set_elem_active(&rbe_prev->ext, genmask))
break;
prev = rb_prev(prev);
}
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- atomic_dec(&set->nelems);
+ if (prev) {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ nft_rbtree_gc_remove(net, set, priv, rbe_prev);
+
+ /* There is always room in this trans gc for this element,
+ * memory allocation never actually happens, hence, the warning
+ * splat in such case. No need to set NFT_SET_ELEM_DEAD_BIT,
+ * this is synchronous gc which never fails.
+ */
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
+
+ nft_trans_gc_elem_add(gc, rbe_prev);
}
- rb_erase(&rbe->node, &priv->root);
- atomic_dec(&set->nelems);
+ nft_rbtree_gc_remove(net, set, priv, rbe);
+ gc = nft_trans_gc_queue_sync(gc, GFP_ATOMIC);
+ if (WARN_ON_ONCE(!gc))
+ return -ENOMEM;
- nft_set_gc_batch_add(gcb, rbe);
- nft_set_gc_batch_complete(gcb);
+ nft_trans_gc_elem_add(gc, rbe);
+
+ nft_trans_gc_queue_sync_done(gc);
return 0;
}
@@ -321,7 +359,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports. */
if (nft_set_elem_expired(&rbe->ext)) {
- err = nft_rbtree_gc_elem(set, priv, rbe);
+ err = nft_rbtree_gc_elem(set, priv, rbe, genmask);
if (err < 0)
return err;
@@ -474,7 +512,6 @@ static void nft_rbtree_activate(const struct net *net,
struct nft_rbtree_elem *rbe = elem->priv;
nft_set_elem_change_active(net, set, &rbe->ext);
- nft_set_elem_clear_busy(&rbe->ext);
}
static bool nft_rbtree_flush(const struct net *net,
@@ -482,12 +519,9 @@ static bool nft_rbtree_flush(const struct net *net,
{
struct nft_rbtree_elem *rbe = priv;
- if (!nft_set_elem_mark_busy(&rbe->ext) ||
- !nft_is_active(net, &rbe->ext)) {
- nft_set_elem_change_active(net, set, &rbe->ext);
- return true;
- }
- return false;
+ nft_set_elem_change_active(net, set, &rbe->ext);
+
+ return true;
}
static void *nft_rbtree_deactivate(const struct net *net,
@@ -544,8 +578,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
if (iter->count < iter->skip)
goto cont;
- if (nft_set_elem_expired(&rbe->ext))
- goto cont;
if (!nft_set_elem_active(&rbe->ext, iter->genmask))
goto cont;
@@ -564,26 +596,43 @@ cont:
static void nft_rbtree_gc(struct work_struct *work)
{
- struct nft_rbtree_elem *rbe, *rbe_end = NULL, *rbe_prev = NULL;
- struct nft_set_gc_batch *gcb = NULL;
+ struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct nftables_pernet *nft_net;
struct nft_rbtree *priv;
+ struct nft_trans_gc *gc;
struct rb_node *node;
struct nft_set *set;
+ unsigned int gc_seq;
struct net *net;
- u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
- genmask = nft_genmask_cur(net);
+ nft_net = nft_pernet(net);
+ gc_seq = READ_ONCE(nft_net->gc_seq);
+
+ if (nft_set_gc_is_pending(set))
+ goto done;
+
+ gc = nft_trans_gc_alloc(set, gc_seq, GFP_KERNEL);
+ if (!gc)
+ goto done;
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
+
+ /* Ruleset has been updated, try later. */
+ if (READ_ONCE(nft_net->gc_seq) != gc_seq) {
+ nft_trans_gc_destroy(gc);
+ gc = NULL;
+ goto try_later;
+ }
+
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (!nft_set_elem_active(&rbe->ext, genmask))
- continue;
+ if (nft_set_elem_is_dead(&rbe->ext))
+ goto dead_elem;
/* elements are reversed in the rbtree for historical reasons,
* from highest to lowest value, that is why end element is
@@ -596,46 +645,36 @@ static void nft_rbtree_gc(struct work_struct *work)
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext)) {
- rbe_end = NULL;
+ nft_set_elem_dead(&rbe->ext);
+
+ if (!rbe_end)
continue;
- }
- if (rbe_prev) {
- rb_erase(&rbe_prev->node, &priv->root);
- rbe_prev = NULL;
- }
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (!gcb)
- break;
+ nft_set_elem_dead(&rbe_end->ext);
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe);
- rbe_prev = rbe;
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
- if (rbe_end) {
- atomic_dec(&set->nelems);
- nft_set_gc_batch_add(gcb, rbe_end);
- rb_erase(&rbe_end->node, &priv->root);
- rbe_end = NULL;
- }
- node = rb_next(node);
- if (!node)
- break;
+ nft_trans_gc_elem_add(gc, rbe_end);
+ rbe_end = NULL;
+dead_elem:
+ gc = nft_trans_gc_queue_async(gc, gc_seq, GFP_ATOMIC);
+ if (!gc)
+ goto try_later;
+
+ nft_trans_gc_elem_add(gc, rbe);
}
- if (rbe_prev)
- rb_erase(&rbe_prev->node, &priv->root);
+
+ gc = nft_trans_gc_catchall(gc, gc_seq);
+
+try_later:
write_seqcount_end(&priv->count);
write_unlock_bh(&priv->lock);
- rbe = nft_set_catchall_gc(set);
- if (rbe) {
- gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
- if (gcb)
- nft_set_gc_batch_add(gcb, rbe);
- }
- nft_set_gc_batch_complete(gcb);
-
+ if (gc)
+ nft_trans_gc_queue_async_done(gc);
+done:
queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
nft_set_gc_interval(set));
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 84def74698b7..9ed85be79452 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
break;
case NFT_SOCKET_MARK:
if (sk_fullsock(sk)) {
- *dest = sk->sk_mark;
+ *dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
return;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 7013f55f05d1..76e01f292aaf 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);
@@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
- pskb->mark = sk->sk_mark;
+ pskb->mark = READ_ONCE(sk->sk_mark);
if (sk != skb->sk)
sock_gen_put(sk);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a6d2a0b1aa21..3d7a91e64c88 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1829,7 +1829,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
parms.port_no = OVSP_LOCAL;
parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_DP_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_DP_ATTR_IFINDEX]) : 0;
+ ? nla_get_s32(a[OVS_DP_ATTR_IFINDEX]) : 0;
/* So far only local changes have been made, now need the lock. */
ovs_lock();
@@ -2049,7 +2049,7 @@ static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
[OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
[OVS_DP_ATTR_MASKS_CACHE_SIZE] = NLA_POLICY_RANGE(NLA_U32, 0,
PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
- [OVS_DP_ATTR_IFINDEX] = {.type = NLA_U32 },
+ [OVS_DP_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
};
static const struct genl_small_ops dp_datapath_genl_ops[] = {
@@ -2302,7 +2302,7 @@ restart:
parms.port_no = port_no;
parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
parms.desired_ifindex = a[OVS_VPORT_ATTR_IFINDEX]
- ? nla_get_u32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
+ ? nla_get_s32(a[OVS_VPORT_ATTR_IFINDEX]) : 0;
vport = new_vport(&parms);
err = PTR_ERR(vport);
@@ -2539,7 +2539,7 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
- [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
+ [OVS_VPORT_ATTR_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 0),
[OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
[OVS_VPORT_ATTR_UPCALL_STATS] = { .type = NLA_NESTED },
};
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 85ff90a03b0c..a2935bd18ed9 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -401,18 +401,20 @@ static void __packet_set_status(struct packet_sock *po, void *frame, int status)
{
union tpacket_uhdr h;
+ /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
- h.h1->tp_status = status;
+ WRITE_ONCE(h.h1->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
break;
case TPACKET_V2:
- h.h2->tp_status = status;
+ WRITE_ONCE(h.h2->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
break;
case TPACKET_V3:
- h.h3->tp_status = status;
+ WRITE_ONCE(h.h3->tp_status, status);
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
break;
default:
@@ -429,17 +431,19 @@ static int __packet_get_status(const struct packet_sock *po, void *frame)
smp_rmb();
+ /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
+
h.raw = frame;
switch (po->tp_version) {
case TPACKET_V1:
flush_dcache_page(pgv_to_page(&h.h1->tp_status));
- return h.h1->tp_status;
+ return READ_ONCE(h.h1->tp_status);
case TPACKET_V2:
flush_dcache_page(pgv_to_page(&h.h2->tp_status));
- return h.h2->tp_status;
+ return READ_ONCE(h.h2->tp_status);
case TPACKET_V3:
flush_dcache_page(pgv_to_page(&h.h3->tp_status));
- return h.h3->tp_status;
+ return READ_ONCE(h.h3->tp_status);
default:
WARN(1, "TPACKET version not supported.\n");
BUG();
@@ -2050,8 +2054,8 @@ retry:
skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
+ skb->priority = READ_ONCE(sk->sk_priority);
+ skb->mark = READ_ONCE(sk->sk_mark);
skb->tstamp = sockc.transmit_time;
skb_setup_tx_timestamp(skb, sockc.tsflags);
@@ -2585,8 +2589,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb->protocol = proto;
skb->dev = dev;
- skb->priority = po->sk.sk_priority;
- skb->mark = po->sk.sk_mark;
+ skb->priority = READ_ONCE(po->sk.sk_priority);
+ skb->mark = READ_ONCE(po->sk.sk_mark);
skb->tstamp = sockc->transmit_time;
skb_setup_tx_timestamp(skb, sockc->tsflags);
skb_zcopy_set_nouarg(skb, ph.raw);
@@ -2988,7 +2992,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
goto out_unlock;
sockcm_init(&sockc, sk);
- sockc.mark = sk->sk_mark;
+ sockc.mark = READ_ONCE(sk->sk_mark);
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err))
@@ -3061,7 +3065,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->protocol = proto;
skb->dev = dev;
- skb->priority = sk->sk_priority;
+ skb->priority = READ_ONCE(sk->sk_priority);
skb->mark = sockc.mark;
skb->tstamp = sockc.transmit_time;
@@ -3601,7 +3605,7 @@ static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
if (dev) {
sll->sll_hatype = dev->type;
sll->sll_halen = dev->addr_len;
- memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
+ memcpy(sll->sll_addr_flex, dev->dev_addr, dev->addr_len);
} else {
sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
sll->sll_halen = 0;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 466c26df853a..382c7a71f81f 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -406,56 +406,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
return 0;
}
-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_bpf_prog *prog, unsigned long base,
- struct nlattr **tb, struct nlattr *est, u32 flags,
- struct netlink_ext_ack *extack)
-{
- bool is_bpf, is_ebpf, have_exts = false;
- u32 gen_flags = 0;
- int ret;
-
- is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
- is_ebpf = tb[TCA_BPF_FD];
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
- return -EINVAL;
-
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
- extack);
- if (ret < 0)
- return ret;
-
- if (tb[TCA_BPF_FLAGS]) {
- u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
-
- if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
- return -EINVAL;
-
- have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
- }
- if (tb[TCA_BPF_FLAGS_GEN]) {
- gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
- if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
- !tc_flags_valid(gen_flags))
- return -EINVAL;
- }
-
- prog->exts_integrated = have_exts;
- prog->gen_flags = gen_flags;
-
- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
- cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
- if (ret < 0)
- return ret;
-
- if (tb[TCA_BPF_CLASSID]) {
- prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
- tcf_bind_filter(tp, &prog->res, base);
- }
-
- return 0;
-}
-
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -463,9 +413,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
+ bool is_bpf, is_ebpf, have_exts = false;
struct cls_bpf_prog *oldprog = *arg;
struct nlattr *tb[TCA_BPF_MAX + 1];
+ bool bound_to_filter = false;
struct cls_bpf_prog *prog;
+ u32 gen_flags = 0;
int ret;
if (tca[TCA_OPTIONS] == NULL)
@@ -504,11 +457,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
prog->handle = handle;
- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
- extack);
+ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+ is_ebpf = tb[TCA_BPF_FD];
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+
+ ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts,
+ flags, extack);
+ if (ret < 0)
+ goto errout_idr;
+
+ if (tb[TCA_BPF_FLAGS]) {
+ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+
+ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+
+ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+ }
+ if (tb[TCA_BPF_FLAGS_GEN]) {
+ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+ !tc_flags_valid(gen_flags)) {
+ ret = -EINVAL;
+ goto errout_idr;
+ }
+ }
+
+ prog->exts_integrated = have_exts;
+ prog->gen_flags = gen_flags;
+
+ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
if (ret < 0)
goto errout_idr;
+ if (tb[TCA_BPF_CLASSID]) {
+ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+ tcf_bind_filter(tp, &prog->res, base);
+ bound_to_filter = true;
+ }
+
ret = cls_bpf_offload(tp, prog, oldprog, extack);
if (ret)
goto errout_parms;
@@ -530,6 +523,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout_parms:
+ if (bound_to_filter)
+ tcf_unbind_filter(tp, &prog->res);
cls_bpf_free_parms(prog);
errout_idr:
if (!oldprog)
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f2b0bc4142fe..9f0711da9c95 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -776,7 +776,8 @@ mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL] = { .type = NLA_U32 },
};
-static const struct nla_policy cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX] = {
+static const struct nla_policy
+cfm_opt_policy[TCA_FLOWER_KEY_CFM_OPT_MAX + 1] = {
[TCA_FLOWER_KEY_CFM_MD_LEVEL] = NLA_POLICY_MAX(NLA_U8,
FLOW_DIS_CFM_MDL_MAX),
[TCA_FLOWER_KEY_CFM_OPCODE] = { .type = NLA_U8 },
@@ -1709,7 +1710,7 @@ static int fl_set_key_cfm(struct nlattr **tb,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
{
- struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX];
+ struct nlattr *nla_cfm_opt[TCA_FLOWER_KEY_CFM_OPT_MAX + 1];
int err;
if (!tb[TCA_FLOWER_KEY_CFM])
@@ -2173,53 +2174,6 @@ static bool fl_needs_tc_skb_ext(const struct fl_flow_key *mask)
return mask->meta.l2_miss;
}
-static int fl_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_fl_filter *f, struct fl_flow_mask *mask,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est,
- struct fl_flow_tmplt *tmplt,
- u32 flags, u32 fl_flags,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- err = tcf_exts_validate_ex(net, tp, tb, est, &f->exts, flags,
- fl_flags, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_FLOWER_CLASSID]) {
- f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
- if (flags & TCA_ACT_FLAGS_NO_RTNL)
- rtnl_lock();
- tcf_bind_filter(tp, &f->res, base);
- if (flags & TCA_ACT_FLAGS_NO_RTNL)
- rtnl_unlock();
- }
-
- err = fl_set_key(net, tb, &f->key, &mask->key, extack);
- if (err)
- return err;
-
- fl_mask_update_range(mask);
- fl_set_masked_key(&f->mkey, &f->key, mask);
-
- if (!fl_mask_fits_tmplt(tmplt, mask)) {
- NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
- return -EINVAL;
- }
-
- /* Enable tc skb extension if filter matches on data extracted from
- * this extension.
- */
- if (fl_needs_tc_skb_ext(&mask->key)) {
- f->needs_tc_skb_ext = 1;
- tc_skb_ext_tc_enable();
- }
-
- return 0;
-}
-
static int fl_ht_insert_unique(struct cls_fl_filter *fnew,
struct cls_fl_filter *fold,
bool *in_ht)
@@ -2251,6 +2205,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
struct cls_fl_head *head = fl_head_dereference(tp);
bool rtnl_held = !(flags & TCA_ACT_FLAGS_NO_RTNL);
struct cls_fl_filter *fold = *arg;
+ bool bound_to_filter = false;
struct cls_fl_filter *fnew;
struct fl_flow_mask *mask;
struct nlattr **tb;
@@ -2335,15 +2290,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto errout_idr;
- err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
- tp->chain->tmplt_priv, flags, fnew->flags,
- extack);
- if (err)
+ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+ &fnew->exts, flags, fnew->flags,
+ extack);
+ if (err < 0)
goto errout_idr;
+ if (tb[TCA_FLOWER_CLASSID]) {
+ fnew->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_lock();
+ tcf_bind_filter(tp, &fnew->res, base);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_unlock();
+ bound_to_filter = true;
+ }
+
+ err = fl_set_key(net, tb, &fnew->key, &mask->key, extack);
+ if (err)
+ goto unbind_filter;
+
+ fl_mask_update_range(mask);
+ fl_set_masked_key(&fnew->mkey, &fnew->key, mask);
+
+ if (!fl_mask_fits_tmplt(tp->chain->tmplt_priv, mask)) {
+ NL_SET_ERR_MSG_MOD(extack, "Mask does not fit the template");
+ err = -EINVAL;
+ goto unbind_filter;
+ }
+
+ /* Enable tc skb extension if filter matches on data extracted from
+ * this extension.
+ */
+ if (fl_needs_tc_skb_ext(&mask->key)) {
+ fnew->needs_tc_skb_ext = 1;
+ tc_skb_ext_tc_enable();
+ }
+
err = fl_check_assign_mask(head, fnew, fold, mask);
if (err)
- goto errout_idr;
+ goto unbind_filter;
err = fl_ht_insert_unique(fnew, fold, &in_ht);
if (err)
@@ -2434,6 +2420,16 @@ errout_hw:
fnew->mask->filter_ht_params);
errout_mask:
fl_mask_put(head, fnew->mask);
+
+unbind_filter:
+ if (bound_to_filter) {
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_lock();
+ tcf_unbind_filter(tp, &fnew->res);
+ if (flags & TCA_ACT_FLAGS_NO_RTNL)
+ rtnl_unlock();
+ }
+
errout_idr:
if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 8641f8059317..c49d6af0e048 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
return -ENOBUFS;
fnew->id = f->id;
- fnew->res = f->res;
fnew->ifindex = f->ifindex;
fnew->tp = f->tp;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index fa3bbd187eb9..c4ed11df6254 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -159,26 +159,6 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
[TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
};
-static int mall_set_parms(struct net *net, struct tcf_proto *tp,
- struct cls_mall_head *head,
- unsigned long base, struct nlattr **tb,
- struct nlattr *est, u32 flags, u32 fl_flags,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags,
- fl_flags, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_MATCHALL_CLASSID]) {
- head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
- tcf_bind_filter(tp, &head->res, base);
- }
- return 0;
-}
-
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
@@ -187,6 +167,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
+ bool bound_to_filter = false;
struct cls_mall_head *new;
u32 userflags = 0;
int err;
@@ -226,11 +207,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
goto err_alloc_percpu;
}
- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE],
- flags, new->flags, extack);
- if (err)
+ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
+ &new->exts, flags, new->flags, extack);
+ if (err < 0)
goto err_set_parms;
+ if (tb[TCA_MATCHALL_CLASSID]) {
+ new->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+ tcf_bind_filter(tp, &new->res, base);
+ bound_to_filter = true;
+ }
+
if (!tc_skip_hw(new->flags)) {
err = mall_replace_hw_filter(tp, new, (unsigned long)new,
extack);
@@ -246,6 +233,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
return 0;
err_replace_hw_filter:
+ if (bound_to_filter)
+ tcf_unbind_filter(tp, &new->res);
err_set_parms:
free_percpu(new->pf);
err_alloc_percpu:
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d0c53724d3e8..1e20bbd687f1 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (fold) {
f->id = fold->id;
f->iif = fold->iif;
- f->res = fold->res;
f->handle = fold->handle;
f->tp = fold->tp;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index d15d50de7980..da4c179a4d41 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -712,8 +712,23 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
[TCA_U32_FLAGS] = { .type = NLA_U32 },
};
+static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+ struct nlattr **tb)
+{
+ if (tb[TCA_U32_CLASSID])
+ tcf_unbind_filter(tp, &n->res);
+}
+
+static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
+ unsigned long base, struct nlattr **tb)
+{
+ if (tb[TCA_U32_CLASSID]) {
+ n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
+ tcf_bind_filter(tp, &n->res, base);
+ }
+}
+
static int u32_set_parms(struct net *net, struct tcf_proto *tp,
- unsigned long base,
struct tc_u_knode *n, struct nlattr **tb,
struct nlattr *est, u32 flags, u32 fl_flags,
struct netlink_ext_ack *extack)
@@ -760,10 +775,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
if (ht_old)
ht_old->refcnt--;
}
- if (tb[TCA_U32_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
if (ifindex >= 0)
n->ifindex = ifindex;
@@ -815,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
new->ifindex = n->ifindex;
new->fshift = n->fshift;
- new->res = n->res;
new->flags = n->flags;
RCU_INIT_POINTER(new->ht_down, ht);
@@ -903,17 +913,27 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOMEM;
- err = u32_set_parms(net, tp, base, new, tb,
- tca[TCA_RATE], flags, new->flags,
- extack);
+ err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE],
+ flags, new->flags, extack);
if (err) {
__u32_destroy_key(new);
return err;
}
+ u32_bind_filter(tp, new, base, tb);
+
err = u32_replace_hw_knode(tp, new, flags, extack);
if (err) {
+ u32_unbind_filter(tp, new, tb);
+
+ if (tb[TCA_U32_LINK]) {
+ struct tc_u_hnode *ht_old;
+
+ ht_old = rtnl_dereference(n->ht_down);
+ if (ht_old)
+ ht_old->refcnt++;
+ }
__u32_destroy_key(new);
return err;
}
@@ -1003,18 +1023,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
+ /* At this point, we need to derive the new handle that will be used to
+ * uniquely map the identity of this table match entry. The
+ * identity of the entry that we need to construct is 32 bits made of:
+ * htid(12b):bucketid(8b):node/entryid(12b)
+ *
+ * At this point _we have the table(ht)_ in which we will insert this
+ * entry. We carry the table's id in variable "htid".
+ * Note that earlier code picked the ht selection either by a) the user
+ * providing the htid specified via TCA_U32_HASH attribute or b) when
+ * no such attribute is passed then the root ht, is default to at ID
+ * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
+ * If OTOH the user passed us the htid, they may also pass a bucketid of
+ * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
+ * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
+ * passed via the htid, so even if it was non-zero it will be ignored.
+ *
+ * We may also have a handle, if the user passed one. The handle also
+ * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
+ * Rule: the bucketid on the handle is ignored even if one was passed;
+ * rather the value on "htid" is always assumed to be the bucketid.
+ */
if (handle) {
+ /* Rule: The htid from handle and tableid from htid must match */
if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
return -EINVAL;
}
- handle = htid | TC_U32_NODE(handle);
- err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
- GFP_KERNEL);
- if (err)
- return err;
- } else
+ /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
+ * need to finalize the table entry identification with the last
+ * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
+ * entries. Rule: nodeid of 0 is reserved only for tables(see
+ * earlier code which processes TC_U32_DIVISOR attribute).
+ * Rule: The nodeid can only be derived from the handle (and not
+ * htid).
+ * Rule: if the handle specified zero for the node id example
+ * 0x60000000, then pick a new nodeid from the pool of IDs
+ * this hash table has been allocating from.
+ * If OTOH it is specified (i.e for example the user passed a
+ * handle such as 0x60000123), then we use it generate our final
+ * handle which is used to uniquely identify the match entry.
+ */
+ if (!TC_U32_NODE(handle)) {
+ handle = gen_new_kid(ht, htid);
+ } else {
+ handle = htid | TC_U32_NODE(handle);
+ err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
+ handle, GFP_KERNEL);
+ if (err)
+ return err;
+ }
+ } else {
+ /* The user did not give us a handle; lets just generate one
+ * from the table's pool of nodeids.
+ */
handle = gen_new_kid(ht, htid);
+ }
if (tb[TCA_U32_SEL] == NULL) {
NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
@@ -1074,15 +1138,18 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
}
#endif
- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE],
+ err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE],
flags, n->flags, extack);
+
+ u32_bind_filter(tp, n, base, tb);
+
if (err == 0) {
struct tc_u_knode __rcu **ins;
struct tc_u_knode *pins;
err = u32_replace_hw_knode(tp, n, flags, extack);
if (err)
- goto errhw;
+ goto errunbind;
if (!tc_in_hw(n->flags))
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
@@ -1100,7 +1167,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return 0;
}
-errhw:
+errunbind:
+ u32_unbind_filter(tp, n, tb);
+
#ifdef CONFIG_CLS_U32_MARK
free_percpu(n->pcpu_success);
#endif
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index af85a73c4c54..6fdba069f6bf 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -568,7 +568,7 @@ META_COLLECTOR(int_sk_rcvtimeo)
*err = -1;
return;
}
- dst->value = sk->sk_rcvtimeo / HZ;
+ dst->value = READ_ONCE(sk->sk_rcvtimeo) / HZ;
}
META_COLLECTOR(int_sk_sndtimeo)
@@ -579,7 +579,7 @@ META_COLLECTOR(int_sk_sndtimeo)
*err = -1;
return;
}
- dst->value = sk->sk_sndtimeo / HZ;
+ dst->value = READ_ONCE(sk->sk_sndtimeo) / HZ;
}
META_COLLECTOR(int_sk_sendmsg_off)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index aa6b1fe65151..e9eaf637220e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1547,10 +1547,28 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
return 0;
}
+static bool req_create_or_replace(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_REPLACE);
+}
+
+static bool req_create_exclusive(struct nlmsghdr *n)
+{
+ return (n->nlmsg_flags & NLM_F_CREATE &&
+ n->nlmsg_flags & NLM_F_EXCL);
+}
+
+static bool req_change(struct nlmsghdr *n)
+{
+ return (!(n->nlmsg_flags & NLM_F_CREATE) &&
+ !(n->nlmsg_flags & NLM_F_REPLACE) &&
+ !(n->nlmsg_flags & NLM_F_EXCL));
+}
+
/*
* Create/change qdisc.
*/
-
static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
@@ -1644,27 +1662,35 @@ replay:
*
* We know, that some child q is already
* attached to this parent and have choice:
- * either to change it or to create/graft new one.
+ * 1) change it or 2) create/graft new one.
+ * If the requested qdisc kind is different
+ * than the existing one, then we choose graft.
+ * If they are the same then this is "change"
+ * operation - just let it fallthrough..
*
* 1. We are allowed to create/graft only
- * if CREATE and REPLACE flags are set.
+ * if the request is explicitly stating
+ * "please create if it doesn't exist".
*
- * 2. If EXCL is set, requestor wanted to say,
- * that qdisc tcm_handle is not expected
+ * 2. If the request is to exclusive create
+ * then the qdisc tcm_handle is not expected
* to exist, so that we choose create/graft too.
*
* 3. The last case is when no flags are set.
+ * This will happen when for example tc
+ * utility issues a "change" command.
* Alas, it is sort of hole in API, we
* cannot decide what to do unambiguously.
- * For now we select create/graft, if
- * user gave KIND, which does not match existing.
+ * For now we select create/graft.
*/
- if ((n->nlmsg_flags & NLM_F_CREATE) &&
- (n->nlmsg_flags & NLM_F_REPLACE) &&
- ((n->nlmsg_flags & NLM_F_EXCL) ||
- (tca[TCA_KIND] &&
- nla_strcmp(tca[TCA_KIND], q->ops->id))))
- goto create_n_graft;
+ if (tca[TCA_KIND] &&
+ nla_strcmp(tca[TCA_KIND], q->ops->id)) {
+ if (req_create_or_replace(n) ||
+ req_create_exclusive(n))
+ goto create_n_graft;
+ else if (req_change(n))
+ goto create_n_graft2;
+ }
}
}
} else {
@@ -1698,6 +1724,7 @@ create_n_graft:
NL_SET_ERR_MSG(extack, "Qdisc not found. To create specify NLM_F_CREATE flag");
return -ENOENT;
}
+create_n_graft2:
if (clid == TC_H_INGRESS) {
if (dev_ingress_queue(dev)) {
q = qdisc_create(dev, dev_ingress_queue(dev),
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index ab69ff7577fc..793009f445c0 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -290,6 +290,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
"Attribute type expected to be TCA_MQPRIO_MIN_RATE64");
return -EINVAL;
}
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MIN_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
if (i >= qopt->num_tc)
break;
priv->min_rate[i] = nla_get_u64(attr);
@@ -312,6 +319,13 @@ static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
"Attribute type expected to be TCA_MQPRIO_MAX_RATE64");
return -EINVAL;
}
+
+ if (nla_len(attr) != sizeof(u64)) {
+ NL_SET_ERR_MSG_ATTR(extack, attr,
+ "Attribute TCA_MQPRIO_MAX_RATE64 expected to have 8 bytes length");
+ return -EINVAL;
+ }
+
if (i >= qopt->num_tc)
break;
priv->max_rate[i] = nla_get_u64(attr);
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 717ae51d94a0..8c9cfff7fd05 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -1015,6 +1015,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
TC_FP_PREEMPTIBLE),
};
+static struct netlink_range_validation_signed taprio_cycle_time_range = {
+ .min = 0,
+ .max = INT_MAX,
+};
+
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_PRIOMAP] = {
.len = sizeof(struct tc_mqprio_qopt)
@@ -1023,7 +1028,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
[TCA_TAPRIO_ATTR_SCHED_BASE_TIME] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY] = { .type = NLA_NESTED },
[TCA_TAPRIO_ATTR_SCHED_CLOCKID] = { .type = NLA_S32 },
- [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] = { .type = NLA_S64 },
+ [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME] =
+ NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
[TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
[TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 },
[TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 },
@@ -1159,6 +1165,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
return -EINVAL;
}
+ if (cycle < 0 || cycle > INT_MAX) {
+ NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
+ return -EINVAL;
+ }
+
new->cycle_time = cycle;
}
@@ -1347,7 +1358,7 @@ static void setup_txtime(struct taprio_sched *q,
struct sched_gate_list *sched, ktime_t base)
{
struct sched_entry *entry;
- u32 interval = 0;
+ u64 interval = 0;
list_for_each_entry(entry, &sched->entries, list) {
entry->next_txtime = ktime_add_ns(base, interval);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9388d98aebc0..76f1bce49a8e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -99,7 +99,7 @@ struct percpu_counter sctp_sockets_allocated;
static void sctp_enter_memory_pressure(struct sock *sk)
{
- sctp_memory_pressure = 1;
+ WRITE_ONCE(sctp_memory_pressure, 1);
}
@@ -9479,7 +9479,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->inet_rcv_saddr = inet->inet_rcv_saddr;
newinet->inet_dport = htons(asoc->peer.port);
newinet->pmtudisc = inet->pmtudisc;
- newinet->inet_id = get_random_u16();
+ atomic_set(&newinet->inet_id, get_random_u16());
newinet->uc_ttl = inet->uc_ttl;
newinet->mc_loop = 1;
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index a7f887d91d89..f5834af5fad5 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -378,8 +378,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
sk->sk_state = SMC_INIT;
sk->sk_destruct = smc_destruct;
sk->sk_protocol = protocol;
- WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem));
- WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem));
+ WRITE_ONCE(sk->sk_sndbuf, 2 * READ_ONCE(net->smc.sysctl_wmem));
+ WRITE_ONCE(sk->sk_rcvbuf, 2 * READ_ONCE(net->smc.sysctl_rmem));
smc = smc_sk(sk);
INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work);
INIT_WORK(&smc->connect_work, smc_connect_work);
@@ -436,24 +436,9 @@ out:
return rc;
}
-static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
- unsigned long mask)
-{
- /* options we don't get control via setsockopt for */
- nsk->sk_type = osk->sk_type;
- nsk->sk_sndbuf = osk->sk_sndbuf;
- nsk->sk_rcvbuf = osk->sk_rcvbuf;
- nsk->sk_sndtimeo = osk->sk_sndtimeo;
- nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
- nsk->sk_mark = osk->sk_mark;
- nsk->sk_priority = osk->sk_priority;
- nsk->sk_rcvlowat = osk->sk_rcvlowat;
- nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
- nsk->sk_err = osk->sk_err;
-
- nsk->sk_flags &= ~mask;
- nsk->sk_flags |= osk->sk_flags & mask;
-}
+/* copy only relevant settings and flags of SOL_SOCKET level from smc to
+ * clc socket (since smc is not called for these options from net/core)
+ */
#define SK_FLAGS_SMC_TO_CLC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
@@ -470,9 +455,55 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
(1UL << SOCK_NOFCS) | \
(1UL << SOCK_FILTER_LOCKED) | \
(1UL << SOCK_TSTAMP_NEW))
-/* copy only relevant settings and flags of SOL_SOCKET level from smc to
- * clc socket (since smc is not called for these options from net/core)
- */
+
+/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */
+static void smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ struct net *nnet = sock_net(nsk);
+
+ nsk->sk_userlocks = osk->sk_userlocks;
+ if (osk->sk_userlocks & SOCK_SNDBUF_LOCK) {
+ nsk->sk_sndbuf = osk->sk_sndbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_sndbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_wmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_sndbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_wmem));
+ }
+ if (osk->sk_userlocks & SOCK_RCVBUF_LOCK) {
+ nsk->sk_rcvbuf = osk->sk_rcvbuf;
+ } else {
+ if (mask == SK_FLAGS_SMC_TO_CLC)
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ READ_ONCE(nnet->ipv4.sysctl_tcp_rmem[1]));
+ else
+ WRITE_ONCE(nsk->sk_rcvbuf,
+ 2 * READ_ONCE(nnet->smc.sysctl_rmem));
+ }
+}
+
+static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
+ unsigned long mask)
+{
+ /* options we don't get control via setsockopt for */
+ nsk->sk_type = osk->sk_type;
+ nsk->sk_sndtimeo = osk->sk_sndtimeo;
+ nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+ nsk->sk_mark = READ_ONCE(osk->sk_mark);
+ nsk->sk_priority = osk->sk_priority;
+ nsk->sk_rcvlowat = osk->sk_rcvlowat;
+ nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
+ nsk->sk_err = osk->sk_err;
+
+ nsk->sk_flags &= ~mask;
+ nsk->sk_flags |= osk->sk_flags & mask;
+
+ smc_adjust_sock_bufsizes(nsk, osk, mask);
+}
+
static void smc_copy_sock_settings_to_clc(struct smc_sock *smc)
{
smc_copy_sock_settings(smc->clcsock->sk, &smc->sk, SK_FLAGS_SMC_TO_CLC);
@@ -2479,8 +2510,6 @@ static void smc_tcp_listen_work(struct work_struct *work)
sock_hold(lsk); /* sock_put in smc_listen_work */
INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
smc_copy_sock_settings_to_smc(new_smc);
- new_smc->sk.sk_sndbuf = lsmc->sk.sk_sndbuf;
- new_smc->sk.sk_rcvbuf = lsmc->sk.sk_rcvbuf;
sock_hold(&new_smc->sk); /* sock_put in passive closing */
if (!queue_work(smc_hs_wq, &new_smc->smc_listen_work))
sock_put(&new_smc->sk);
diff --git a/net/smc/smc.h b/net/smc/smc.h
index 2eeea4cdc718..1f2b912c43d1 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -161,7 +161,7 @@ struct smc_connection {
struct smc_buf_desc *sndbuf_desc; /* send buffer descriptor */
struct smc_buf_desc *rmb_desc; /* RMBE descriptor */
- int rmbe_size_short;/* compressed notation */
+ int rmbe_size_comp; /* compressed notation */
int rmbe_update_limit;
/* lower limit for consumer
* cursor update
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index b9b8b07aa702..c90d9e5dda54 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -1007,7 +1007,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->d0.gid =
conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
clc->d0.token = conn->rmb_desc->token;
- clc->d0.dmbe_size = conn->rmbe_size_short;
+ clc->d0.dmbe_size = conn->rmbe_size_comp;
clc->d0.dmbe_idx = 0;
memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE);
if (version == SMC_V1) {
@@ -1050,7 +1050,7 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu);
break;
}
- clc->r0.rmbe_size = conn->rmbe_size_short;
+ clc->r0.rmbe_size = conn->rmbe_size_comp;
clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ?
cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) :
cpu_to_be64((u64)sg_dma_address
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3f465faf2b68..6b78075404d7 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -2309,31 +2309,30 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_connection *conn = &smc->conn;
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
- int bufsize, bufsize_short;
+ int bufsize, bufsize_comp;
struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- int sk_buf_size;
if (is_rmb)
/* use socket recv buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_rcvbuf;
+ bufsize = smc->sk.sk_rcvbuf / 2;
else
/* use socket send buffer size (w/o overhead) as start value */
- sk_buf_size = smc->sk.sk_sndbuf;
+ bufsize = smc->sk.sk_sndbuf / 2;
- for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb);
- bufsize_short >= 0; bufsize_short--) {
+ for (bufsize_comp = smc_compress_bufsize(bufsize, is_smcd, is_rmb);
+ bufsize_comp >= 0; bufsize_comp--) {
if (is_rmb) {
lock = &lgr->rmbs_lock;
- buf_list = &lgr->rmbs[bufsize_short];
+ buf_list = &lgr->rmbs[bufsize_comp];
} else {
lock = &lgr->sndbufs_lock;
- buf_list = &lgr->sndbufs[bufsize_short];
+ buf_list = &lgr->sndbufs[bufsize_comp];
}
- bufsize = smc_uncompress_bufsize(bufsize_short);
+ bufsize = smc_uncompress_bufsize(bufsize_comp);
/* check for reusable slot in the link group */
- buf_desc = smc_buf_get_slot(bufsize_short, lock, buf_list);
+ buf_desc = smc_buf_get_slot(bufsize_comp, lock, buf_list);
if (buf_desc) {
buf_desc->is_dma_need_sync = 0;
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
@@ -2377,8 +2376,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
if (is_rmb) {
conn->rmb_desc = buf_desc;
- conn->rmbe_size_short = bufsize_short;
- smc->sk.sk_rcvbuf = bufsize;
+ conn->rmbe_size_comp = bufsize_comp;
+ smc->sk.sk_rcvbuf = bufsize * 2;
atomic_set(&conn->bytes_to_rcv, 0);
conn->rmbe_update_limit =
smc_rmb_wnd_update_limit(buf_desc->len);
@@ -2386,7 +2385,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */
} else {
conn->sndbuf_desc = buf_desc;
- smc->sk.sk_sndbuf = bufsize;
+ smc->sk.sk_sndbuf = bufsize * 2;
atomic_set(&conn->sndbuf_space, bufsize);
}
return 0;
diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c
index b6f79fabb9d3..0b2a957ca5f5 100644
--- a/net/smc/smc_sysctl.c
+++ b/net/smc/smc_sysctl.c
@@ -21,6 +21,10 @@
static int min_sndbuf = SMC_BUF_MIN_SIZE;
static int min_rcvbuf = SMC_BUF_MIN_SIZE;
+static int max_sndbuf = INT_MAX / 2;
+static int max_rcvbuf = INT_MAX / 2;
+static const int net_smc_wmem_init = (64 * 1024);
+static const int net_smc_rmem_init = (64 * 1024);
static struct ctl_table smc_table[] = {
{
@@ -53,6 +57,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_sndbuf,
+ .extra2 = &max_sndbuf,
},
{
.procname = "rmem",
@@ -61,6 +66,7 @@ static struct ctl_table smc_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &min_rcvbuf,
+ .extra2 = &max_rcvbuf,
},
{ }
};
@@ -88,8 +94,8 @@ int __net_init smc_sysctl_net_init(struct net *net)
net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE;
net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS;
net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME;
- WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]));
- WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]));
+ WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init);
+ WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init);
return 0;
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 0b6034fab9ab..f420d8457345 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -472,7 +472,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
return NULL;
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
switch (mode & S_IFMT) {
case S_IFDIR:
inode->i_fop = &simple_dir_operations;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e43f26382411..2eb8df44f894 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1244,6 +1244,9 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
if (ret != head->iov_len)
goto out;
+ if (xdr_buf_pagecount(xdr))
+ xdr->bvec[0].bv_offset = offset_in_page(xdr->page_base);
+
msg.msg_flags = MSG_SPLICE_PAGES;
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec,
xdr_buf_pagecount(xdr), xdr->page_len);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index b098fde373ab..28c0771c4e8c 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -935,9 +935,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
if (!rep->rr_rdmabuf)
goto out_free;
- if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf))
- goto out_free_regbuf;
-
rep->rr_cid.ci_completion_id =
atomic_inc_return(&r_xprt->rx_ep->re_completion_ids);
@@ -956,8 +953,6 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
spin_unlock(&buf->rb_lock);
return rep;
-out_free_regbuf:
- rpcrdma_regbuf_free(rep->rr_rdmabuf);
out_free:
kfree(rep);
out:
@@ -1363,6 +1358,10 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
rep = rpcrdma_rep_create(r_xprt, temp);
if (!rep)
break;
+ if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) {
+ rpcrdma_rep_put(buf, rep);
+ break;
+ }
rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id;
trace_xprtrdma_post_recv(rep);
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 577fa5af33ec..302fd749c424 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1960,7 +1960,8 @@ rcv:
skb_reset_network_header(*skb);
skb_pull(*skb, tipc_ehdr_size(ehdr));
- pskb_trim(*skb, (*skb)->len - aead->authsize);
+ if (pskb_trim(*skb, (*skb)->len - aead->authsize))
+ goto free_skb;
/* Validate TIPCv2 message */
if (unlikely(!tipc_msg_validate(skb))) {
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 5e000fde8067..a9c5b6594889 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -583,7 +583,7 @@ update:
n->capabilities, &n->bc_entry.inputq1,
&n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
pr_warn("Broadcast rcv link creation failed, no memory\n");
- kfree(n);
+ tipc_node_put(n);
n = NULL;
goto exit;
}
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 2021fe557e50..529101eb20bd 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -52,6 +52,8 @@ static LIST_HEAD(tls_device_list);
static LIST_HEAD(tls_device_down_list);
static DEFINE_SPINLOCK(tls_device_lock);
+static struct page *dummy_page;
+
static void tls_device_free_ctx(struct tls_context *ctx)
{
if (ctx->tx_conf == TLS_HW) {
@@ -312,36 +314,33 @@ static int tls_push_record(struct sock *sk,
return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags);
}
-static int tls_device_record_close(struct sock *sk,
- struct tls_context *ctx,
- struct tls_record_info *record,
- struct page_frag *pfrag,
- unsigned char record_type)
+static void tls_device_record_close(struct sock *sk,
+ struct tls_context *ctx,
+ struct tls_record_info *record,
+ struct page_frag *pfrag,
+ unsigned char record_type)
{
struct tls_prot_info *prot = &ctx->prot_info;
- int ret;
+ struct page_frag dummy_tag_frag;
/* append tag
* device will fill in the tag, we just need to append a placeholder
* use socket memory to improve coalescing (re-using a single buffer
* increases frag count)
- * if we can't allocate memory now, steal some back from data
+ * if we can't allocate memory now use the dummy page
*/
- if (likely(skb_page_frag_refill(prot->tag_size, pfrag,
- sk->sk_allocation))) {
- ret = 0;
- tls_append_frag(record, pfrag, prot->tag_size);
- } else {
- ret = prot->tag_size;
- if (record->len <= prot->overhead_size)
- return -ENOMEM;
+ if (unlikely(pfrag->size - pfrag->offset < prot->tag_size) &&
+ !skb_page_frag_refill(prot->tag_size, pfrag, sk->sk_allocation)) {
+ dummy_tag_frag.page = dummy_page;
+ dummy_tag_frag.offset = 0;
+ pfrag = &dummy_tag_frag;
}
+ tls_append_frag(record, pfrag, prot->tag_size);
/* fill prepend */
tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]),
record->len - prot->overhead_size,
record_type);
- return ret;
}
static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx,
@@ -541,18 +540,8 @@ last_record:
if (done || record->len >= max_open_record_len ||
(record->num_frags >= MAX_SKB_FRAGS - 1)) {
- rc = tls_device_record_close(sk, tls_ctx, record,
- pfrag, record_type);
- if (rc) {
- if (rc > 0) {
- size += rc;
- } else {
- size = orig_size;
- destroy_record(record);
- ctx->open_record = NULL;
- break;
- }
- }
+ tls_device_record_close(sk, tls_ctx, record,
+ pfrag, record_type);
rc = tls_push_record(sk,
tls_ctx,
@@ -1450,14 +1439,26 @@ int __init tls_device_init(void)
{
int err;
- destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
- if (!destruct_wq)
+ dummy_page = alloc_page(GFP_KERNEL);
+ if (!dummy_page)
return -ENOMEM;
+ destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0);
+ if (!destruct_wq) {
+ err = -ENOMEM;
+ goto err_free_dummy;
+ }
+
err = register_netdevice_notifier(&tls_dev_notifier);
if (err)
- destroy_workqueue(destruct_wq);
+ goto err_destroy_wq;
+ return 0;
+
+err_destroy_wq:
+ destroy_workqueue(destruct_wq);
+err_free_dummy:
+ put_page(dummy_page);
return err;
}
@@ -1466,4 +1467,5 @@ void __exit tls_device_cleanup(void)
unregister_netdevice_notifier(&tls_dev_notifier);
destroy_workqueue(destruct_wq);
clean_acked_data_flush();
+ put_page(dummy_page);
}
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index b6896126bb92..4a8ee2f6badb 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -139,9 +139,6 @@ int tls_push_sg(struct sock *sk,
ctx->splicing_pages = true;
while (1) {
- if (sg_is_last(sg))
- msg.msg_flags = flags;
-
/* is sending application-limited? */
tcp_rate_check_app_limited(sk);
p = sg_page(sg);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 123b35ddfd71..86930a8ed012 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -289,17 +289,29 @@ static int unix_validate_addr(struct sockaddr_un *sunaddr, int addr_len)
return 0;
}
-static void unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
+static int unix_mkname_bsd(struct sockaddr_un *sunaddr, int addr_len)
{
+ struct sockaddr_storage *addr = (struct sockaddr_storage *)sunaddr;
+ short offset = offsetof(struct sockaddr_storage, __data);
+
+ BUILD_BUG_ON(offset != offsetof(struct sockaddr_un, sun_path));
+
/* This may look like an off by one error but it is a bit more
* subtle. 108 is the longest valid AF_UNIX path for a binding.
* sun_path[108] doesn't as such exist. However in kernel space
* we are guaranteed that it is a valid memory location in our
* kernel address buffer because syscall functions always pass
* a pointer of struct sockaddr_storage which has a bigger buffer
- * than 108.
+ * than 108. Also, we must terminate sun_path for strlen() in
+ * getname_kernel().
+ */
+ addr->__data[addr_len - offset] = 0;
+
+ /* Don't pass sunaddr->sun_path to strlen(). Otherwise, 108 will
+ * cause panic if CONFIG_FORTIFY_SOURCE=y. Let __fortify_strlen()
+ * know the actual buffer.
*/
- ((char *)sunaddr)[addr_len] = 0;
+ return strlen(addr->__data) + offset + 1;
}
static void __unix_remove_socket(struct sock *sk)
@@ -778,7 +790,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
if (mutex_lock_interruptible(&u->iolock))
return -EINTR;
- sk->sk_peek_off = val;
+ WRITE_ONCE(sk->sk_peek_off, val);
mutex_unlock(&u->iolock);
return 0;
@@ -1208,10 +1220,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
struct path parent;
int err;
- unix_mkname_bsd(sunaddr, addr_len);
- addr_len = strlen(sunaddr->sun_path) +
- offsetof(struct sockaddr_un, sun_path) + 1;
-
+ addr_len = unix_mkname_bsd(sunaddr, addr_len);
addr = unix_create_addr(sunaddr, addr_len);
if (!addr)
return -ENOMEM;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0da2e6a2a7ea..8bcf8e293308 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5430,8 +5430,11 @@ nl80211_parse_mbssid_elems(struct wiphy *wiphy, struct nlattr *attrs)
if (!wiphy->mbssid_max_interfaces)
return ERR_PTR(-EINVAL);
- nla_for_each_nested(nl_elems, attrs, rem_elems)
+ nla_for_each_nested(nl_elems, attrs, rem_elems) {
+ if (num_elems >= 255)
+ return ERR_PTR(-EINVAL);
num_elems++;
+ }
elems = kzalloc(struct_size(elems, elem, num_elems), GFP_KERNEL);
if (!elems)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 8bf00caf5d29..0cf1ce7b6934 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -657,7 +657,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
if (ret)
- return ret;
+ return 0;
for_each_element_id(elem, WLAN_EID_REDUCED_NEIGHBOR_REPORT,
ies->data, ies->len) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 31dca4ecb2c5..10ea85c03147 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -505,7 +505,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
skb->dev = dev;
skb->priority = xs->sk.sk_priority;
- skb->mark = xs->sk.sk_mark;
+ skb->mark = READ_ONCE(xs->sk.sk_mark);
skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
skb->destructor = xsk_destruct_skb;
@@ -994,6 +994,7 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
err = xp_alloc_tx_descs(xs->pool, xs);
if (err) {
xp_put_pool(xs->pool);
+ xs->pool = NULL;
sockfd_put(sock);
goto out_unlock;
}
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index 8cbf45a8bcdc..655fe4ff8621 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -108,7 +108,7 @@ static const struct nla_policy compat_policy[XFRMA_MAX+1] = {
[XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
[XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
[XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
- [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
+ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) },
[XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
[XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
[XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 815b38080401..d5ee96789d4b 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -180,6 +180,8 @@ static int xfrm4_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
int optlen = 0;
int err = -EINVAL;
+ skb->protocol = htons(ETH_P_IP);
+
if (unlikely(XFRM_MODE_SKB_CB(skb)->protocol == IPPROTO_BEETPH)) {
struct ip_beet_phdr *ph;
int phlen;
@@ -232,6 +234,8 @@ static int xfrm4_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
+ skb->protocol = htons(ETH_P_IP);
+
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
goto out;
@@ -267,6 +271,8 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
{
int err = -EINVAL;
+ skb->protocol = htons(ETH_P_IPV6);
+
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
@@ -296,6 +302,8 @@ static int xfrm6_remove_beet_encap(struct xfrm_state *x, struct sk_buff *skb)
int size = sizeof(struct ipv6hdr);
int err;
+ skb->protocol = htons(ETH_P_IPV6);
+
err = skb_cow_head(skb, size + skb->mac_len);
if (err)
goto out;
@@ -346,6 +354,7 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
return xfrm6_remove_tunnel_encap(x, skb);
break;
}
+ return -EINVAL;
}
WARN_ON_ONCE(1);
@@ -366,19 +375,6 @@ static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
return -EAFNOSUPPORT;
}
- switch (XFRM_MODE_SKB_CB(skb)->protocol) {
- case IPPROTO_IPIP:
- case IPPROTO_BEETPH:
- skb->protocol = htons(ETH_P_IP);
- break;
- case IPPROTO_IPV6:
- skb->protocol = htons(ETH_P_IPV6);
- break;
- default:
- WARN_ON_ONCE(1);
- break;
- }
-
return xfrm_inner_mode_encap_remove(x, skb);
}
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index a3319965470a..b86474084690 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -537,8 +537,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IPV6):
- xfrm_decode_session(skb, &fl, AF_INET6);
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET6);
if (!dst) {
fl.u.ip6.flowi6_oif = dev->ifindex;
fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
@@ -552,8 +552,8 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
}
break;
case htons(ETH_P_IP):
- xfrm_decode_session(skb, &fl, AF_INET);
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+ xfrm_decode_session(skb, &fl, AF_INET);
if (!dst) {
struct rtable *rt;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e7617c9959c3..d6b405782b63 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2250,7 +2250,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
match = xfrm_selector_match(&pol->selector, fl, family);
if (match) {
- if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
+ if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
pol->if_id != if_id) {
pol = NULL;
goto out;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 49e63eea841d..bda5327bf34d 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1324,12 +1324,8 @@ found:
struct xfrm_dev_offload *xso = &x->xso;
if (xso->type == XFRM_DEV_OFFLOAD_PACKET) {
- xso->dev->xfrmdev_ops->xdo_dev_state_delete(x);
- xso->dir = 0;
- netdev_put(xso->dev, &xso->dev_tracker);
- xso->dev = NULL;
- xso->real_dev = NULL;
- xso->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
+ xfrm_dev_state_delete(x);
+ xfrm_dev_state_free(x);
}
#endif
x->km.state = XFRM_STATE_DEAD;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index c34a2a06ca94..ad01997c3aa9 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -628,7 +628,7 @@ static void xfrm_update_ae_params(struct xfrm_state *x, struct nlattr **attrs,
struct nlattr *rt = attrs[XFRMA_REPLAY_THRESH];
struct nlattr *mt = attrs[XFRMA_MTIMER_THRESH];
- if (re) {
+ if (re && x->replay_esn && x->preplay_esn) {
struct xfrm_replay_state_esn *replay_esn;
replay_esn = nla_data(re);
memcpy(x->replay_esn, replay_esn,
@@ -1267,6 +1267,15 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
sizeof(*filter), GFP_KERNEL);
if (filter == NULL)
return -ENOMEM;
+
+ /* see addr_match(), (prefix length >> 5) << 2
+ * will be used to compare xfrm_address_t
+ */
+ if (filter->splen > (sizeof(xfrm_address_t) << 3) ||
+ filter->dplen > (sizeof(xfrm_address_t) << 3)) {
+ kfree(filter);
+ return -EINVAL;
+ }
}
if (attrs[XFRMA_PROTO])
@@ -2336,6 +2345,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh,
NETLINK_CB(skb).portid);
}
} else {
+ xfrm_dev_policy_delete(xp);
xfrm_audit_policy_delete(xp, err ? 0 : 1, true);
if (err != 0)
@@ -3015,7 +3025,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_ALG_COMP] = { .len = sizeof(struct xfrm_algo) },
[XFRMA_ENCAP] = { .len = sizeof(struct xfrm_encap_tmpl) },
[XFRMA_TMPL] = { .len = sizeof(struct xfrm_user_tmpl) },
- [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_sec_ctx) },
+ [XFRMA_SEC_CTX] = { .len = sizeof(struct xfrm_user_sec_ctx) },
[XFRMA_LTIME_VAL] = { .len = sizeof(struct xfrm_lifetime_cur) },
[XFRMA_REPLAY_VAL] = { .len = sizeof(struct xfrm_replay_state) },
[XFRMA_REPLAY_THRESH] = { .type = NLA_U32 },
@@ -3035,6 +3045,7 @@ const struct nla_policy xfrma_policy[XFRMA_MAX+1] = {
[XFRMA_SET_MARK] = { .type = NLA_U32 },
[XFRMA_SET_MARK_MASK] = { .type = NLA_U32 },
[XFRMA_IF_ID] = { .type = NLA_U32 },
+ [XFRMA_MTIMER_THRESH] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(xfrma_policy);
diff --git a/rust/Makefile b/rust/Makefile
index 7c9d9f11aec5..4124bfa01798 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -257,7 +257,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \
-fno-partial-inlining -fplugin-arg-arm_ssp_per_task_plugin-% \
-fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \
-fzero-call-used-regs=% -fno-stack-clash-protection \
- -fno-inline-functions-called-once \
+ -fno-inline-functions-called-once -fsanitize=bounds-strict \
--param=% --param asan-%
# Derived from `scripts/Makefile.clang`.
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 3e601ce2548d..058954961bfc 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -13,5 +13,6 @@
#include <linux/sched.h>
/* `bindgen` gets confused at certain things. */
+const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
diff --git a/rust/kernel/allocator.rs b/rust/kernel/allocator.rs
index 397a3dd57a9b..9363b527be66 100644
--- a/rust/kernel/allocator.rs
+++ b/rust/kernel/allocator.rs
@@ -9,6 +9,36 @@ use crate::bindings;
struct KernelAllocator;
+/// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment.
+///
+/// # Safety
+///
+/// - `ptr` can be either null or a pointer which has been allocated by this allocator.
+/// - `new_layout` must have a non-zero size.
+unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 {
+ // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
+ let layout = new_layout.pad_to_align();
+
+ let mut size = layout.size();
+
+ if layout.align() > bindings::BINDINGS_ARCH_SLAB_MINALIGN {
+ // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
+ // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
+ // more information).
+ //
+ // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
+ // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
+ size = size.next_power_of_two();
+ }
+
+ // SAFETY:
+ // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
+ // function safety requirement.
+ // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
+ // according to the function safety requirement) or a result from `next_power_of_two()`.
+ unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 }
+}
+
unsafe impl GlobalAlloc for KernelAllocator {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
// `krealloc()` is used instead of `kmalloc()` because the latter is
@@ -30,10 +60,20 @@ static ALLOCATOR: KernelAllocator = KernelAllocator;
// to extract the object file that has them from the archive. For the moment,
// let's generate them ourselves instead.
//
+// Note: Although these are *safe* functions, they are called by the compiler
+// with parameters that obey the same `GlobalAlloc` function safety
+// requirements: size and align should form a valid layout, and size is
+// greater than 0.
+//
// Note that `#[no_mangle]` implies exported too, nowadays.
#[no_mangle]
-fn __rust_alloc(size: usize, _align: usize) -> *mut u8 {
- unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 }
+fn __rust_alloc(size: usize, align: usize) -> *mut u8 {
+ // SAFETY: See assumption above.
+ let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
+
+ // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
+ // than 0.
+ unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
}
#[no_mangle]
@@ -42,23 +82,27 @@ fn __rust_dealloc(ptr: *mut u8, _size: usize, _align: usize) {
}
#[no_mangle]
-fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 {
- unsafe {
- bindings::krealloc(
- ptr as *const core::ffi::c_void,
- new_size,
- bindings::GFP_KERNEL,
- ) as *mut u8
- }
+fn __rust_realloc(ptr: *mut u8, _old_size: usize, align: usize, new_size: usize) -> *mut u8 {
+ // SAFETY: See assumption above.
+ let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, align) };
+
+ // SAFETY: Per assumption above, `ptr` is allocated by `__rust_*` before, and the size of
+ // `new_layout` is greater than 0.
+ unsafe { krealloc_aligned(ptr, new_layout, bindings::GFP_KERNEL) }
}
#[no_mangle]
-fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 {
+fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8 {
+ // SAFETY: See assumption above.
+ let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
+
+ // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
+ // than 0.
unsafe {
- bindings::krealloc(
- core::ptr::null(),
- size,
+ krealloc_aligned(
+ ptr::null_mut(),
+ layout,
bindings::GFP_KERNEL | bindings::__GFP_ZERO,
- ) as *mut u8
+ )
}
}
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
index a89843cacaad..172f563976a9 100644
--- a/rust/kernel/sync/arc.rs
+++ b/rust/kernel/sync/arc.rs
@@ -243,8 +243,7 @@ impl<T: 'static> ForeignOwnable for Arc<T> {
let inner = NonNull::new(ptr as *mut ArcInner<T>).unwrap();
// SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive
- // for the lifetime of the returned value. Additionally, the safety requirements of
- // `ForeignOwnable::borrow_mut` ensure that no new mutable references are created.
+ // for the lifetime of the returned value.
unsafe { ArcBorrow::new(inner) }
}
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index 1e5380b16ed5..d479f8da8f38 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -35,34 +35,16 @@ pub trait ForeignOwnable: Sized {
///
/// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
/// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
- /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow_mut`]
- /// for this object must have been dropped.
unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Self::Borrowed<'a>;
- /// Mutably borrows a foreign-owned object.
- ///
- /// # Safety
- ///
- /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
- /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
- /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
- /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
- unsafe fn borrow_mut(ptr: *const core::ffi::c_void) -> ScopeGuard<Self, fn(Self)> {
- // SAFETY: The safety requirements ensure that `ptr` came from a previous call to
- // `into_foreign`.
- ScopeGuard::new_with_data(unsafe { Self::from_foreign(ptr) }, |d| {
- d.into_foreign();
- })
- }
-
/// Converts a foreign-owned object back to a Rust-owned one.
///
/// # Safety
///
/// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
/// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
- /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
- /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
+ /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] for
+ /// this object must have been dropped.
unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self;
}
diff --git a/rust/macros/vtable.rs b/rust/macros/vtable.rs
index 34d5e7fb5768..ee06044fcd4f 100644
--- a/rust/macros/vtable.rs
+++ b/rust/macros/vtable.rs
@@ -74,6 +74,7 @@ pub(crate) fn vtable(_attr: TokenStream, ts: TokenStream) -> TokenStream {
const {gen_const_name}: bool = false;",
)
.unwrap();
+ consts.insert(gen_const_name);
}
} else {
const_items = "const USE_VTABLE_ATTR: () = ();".to_owned();
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index e5ed08098ff3..e2a6a69352df 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -105,7 +105,7 @@ asm (
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:"
-" bti c\n"
+" hint 34\n" // bti c
" sub sp, sp, #16\n"
" stp x9, x30, [sp]\n"
" bl my_direct_func1\n"
@@ -117,7 +117,7 @@ asm (
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:"
-" bti c\n"
+" hint 34\n" // bti c
" sub sp, sp, #16\n"
" stp x9, x30, [sp]\n"
" bl my_direct_func2\n"
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index 292cff2b3f5d..2e349834d63c 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -112,7 +112,7 @@ asm (
" .type my_tramp1, @function\n"
" .globl my_tramp1\n"
" my_tramp1:"
-" bti c\n"
+" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
@@ -127,7 +127,7 @@ asm (
" .type my_tramp2, @function\n"
" .globl my_tramp2\n"
" my_tramp2:"
-" bti c\n"
+" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index b4391e08c913..9243dbfe4d0c 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -75,7 +75,7 @@ asm (
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:"
-" bti c\n"
+" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index e9804c5307c0..e39c3563ae4e 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -81,7 +81,7 @@ asm (
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:"
-" bti c\n"
+" hint 34\n" // bti c
" sub sp, sp, #48\n"
" stp x9, x30, [sp]\n"
" stp x0, x1, [sp, #16]\n"
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index 20f4a7caa810..32c477da1e9a 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -72,7 +72,7 @@ asm (
" .type my_tramp, @function\n"
" .globl my_tramp\n"
" my_tramp:"
-" bti c\n"
+" hint 34\n" // bti c
" sub sp, sp, #32\n"
" stp x9, x30, [sp]\n"
" str x0, [sp, #16]\n"
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 6413342a03f4..82e3fb19fdaf 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -264,6 +264,9 @@ $(obj)/%.lst: $(src)/%.c FORCE
rust_allowed_features := new_uninit
+# `--out-dir` is required to avoid temporaries being created by `rustc` in the
+# current working directory, which may be not accessible in the out-of-tree
+# modules case.
rust_common_cmd = \
RUST_MODFILE=$(modfile) $(RUSTC_OR_CLIPPY) $(rust_flags) \
-Zallow-features=$(rust_allowed_features) \
@@ -272,7 +275,7 @@ rust_common_cmd = \
--extern alloc --extern kernel \
--crate-type rlib -L $(objtree)/rust/ \
--crate-name $(basename $(notdir $@)) \
- --emit=dep-info=$(depfile)
+ --out-dir $(dir $@) --emit=dep-info=$(depfile)
# `--emit=obj`, `--emit=asm` and `--emit=llvm-ir` imply a single codegen unit
# will be used. We explicitly request `-Ccodegen-units=1` in any case, and
diff --git a/scripts/Makefile.host b/scripts/Makefile.host
index 7aea9005e497..8f7f842b54f9 100644
--- a/scripts/Makefile.host
+++ b/scripts/Makefile.host
@@ -86,7 +86,11 @@ hostc_flags = -Wp,-MMD,$(depfile) \
hostcxx_flags = -Wp,-MMD,$(depfile) \
$(KBUILD_HOSTCXXFLAGS) $(HOST_EXTRACXXFLAGS) \
$(HOSTCXXFLAGS_$(target-stem).o)
-hostrust_flags = --emit=dep-info=$(depfile) \
+
+# `--out-dir` is required to avoid temporaries being created by `rustc` in the
+# current working directory, which may be not accessible in the out-of-tree
+# modules case.
+hostrust_flags = --out-dir $(dir $@) --emit=dep-info=$(depfile) \
$(KBUILD_HOSTRUSTFLAGS) $(HOST_EXTRARUSTFLAGS) \
$(HOSTRUSTFLAGS_$(target-stem))
diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py
index 15ba56527acd..a84cc5737c2c 100755
--- a/scripts/clang-tools/gen_compile_commands.py
+++ b/scripts/clang-tools/gen_compile_commands.py
@@ -19,7 +19,7 @@ _DEFAULT_OUTPUT = 'compile_commands.json'
_DEFAULT_LOG_LEVEL = 'WARNING'
_FILENAME_PATTERN = r'^\..*\.cmd$'
-_LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.c) *(;|$)'
+_LINE_PATTERN = r'^savedcmd_[^ ]*\.o := (.* )([^ ]*\.[cS]) *(;|$)'
_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
# The tools/ directory adopts a different build system, and produces .cmd
# files in a different format. Do not support it.
diff --git a/scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci b/scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci
deleted file mode 100644
index 7c312310547c..000000000000
--- a/scripts/coccinelle/api/debugfs/debugfs_simple_attr.cocci
+++ /dev/null
@@ -1,68 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/// Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE
-/// for debugfs files.
-///
-//# Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file()
-//# imposes some significant overhead as compared to
-//# DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe().
-//
-// Copyright (C): 2016 Nicolai Stange
-// Options: --no-includes
-//
-
-virtual context
-virtual patch
-virtual org
-virtual report
-
-@dsa@
-declarer name DEFINE_SIMPLE_ATTRIBUTE;
-identifier dsa_fops;
-expression dsa_get, dsa_set, dsa_fmt;
-position p;
-@@
-DEFINE_SIMPLE_ATTRIBUTE@p(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-
-@dcf@
-expression name, mode, parent, data;
-identifier dsa.dsa_fops;
-@@
-debugfs_create_file(name, mode, parent, data, &dsa_fops)
-
-
-@context_dsa depends on context && dcf@
-declarer name DEFINE_DEBUGFS_ATTRIBUTE;
-identifier dsa.dsa_fops;
-expression dsa.dsa_get, dsa.dsa_set, dsa.dsa_fmt;
-@@
-* DEFINE_SIMPLE_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-
-
-@patch_dcf depends on patch expression@
-expression name, mode, parent, data;
-identifier dsa.dsa_fops;
-@@
-- debugfs_create_file(name, mode, parent, data, &dsa_fops)
-+ debugfs_create_file_unsafe(name, mode, parent, data, &dsa_fops)
-
-@patch_dsa depends on patch_dcf && patch@
-identifier dsa.dsa_fops;
-expression dsa.dsa_get, dsa.dsa_set, dsa.dsa_fmt;
-@@
-- DEFINE_SIMPLE_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-+ DEFINE_DEBUGFS_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
-
-
-@script:python depends on org && dcf@
-fops << dsa.dsa_fops;
-p << dsa.p;
-@@
-msg="%s should be defined with DEFINE_DEBUGFS_ATTRIBUTE" % (fops)
-coccilib.org.print_todo(p[0], msg)
-
-@script:python depends on report && dcf@
-fops << dsa.dsa_fops;
-p << dsa.p;
-@@
-msg="WARNING: %s should be defined with DEFINE_DEBUGFS_ATTRIBUTE" % (fops)
-coccilib.report.print_report(p[0], msg)
diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 16c87938b316..653b92f6d4c8 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -129,6 +129,7 @@ static struct sym_entry *read_symbol(FILE *in, char **buf, size_t *buf_len)
ssize_t readlen;
struct sym_entry *sym;
+ errno = 0;
readlen = getline(buf, buf_len, in);
if (readlen < 0) {
if (errno) {
diff --git a/scripts/kconfig/gconf.c b/scripts/kconfig/gconf.c
index 17adabfd6e6b..9709aca3a30f 100644
--- a/scripts/kconfig/gconf.c
+++ b/scripts/kconfig/gconf.c
@@ -636,7 +636,7 @@ void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data)
{
GtkWidget *dialog;
const gchar *intro_text =
- "Welcome to gkc, the GTK+ graphical configuration tool\n"
+ "Welcome to gconfig, the GTK+ graphical configuration tool.\n"
"For each option, a blank box indicates the feature is disabled, a\n"
"check indicates it is enabled, and a dot indicates that it is to\n"
"be compiled as a module. Clicking on the box will cycle through the three states.\n"
@@ -647,10 +647,7 @@ void on_introduction1_activate(GtkMenuItem * menuitem, gpointer user_data)
"Although there is no cross reference yet to help you figure out\n"
"what other options must be enabled to support the option you\n"
"are interested in, you can still view the help of a grayed-out\n"
- "option.\n"
- "\n"
- "Toggling Show Debug Info under the Options menu will show \n"
- "the dependencies, which you can then match by examining other options.";
+ "option.";
dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd),
GTK_DIALOG_DESTROY_WITH_PARENT,
@@ -667,7 +664,7 @@ void on_about1_activate(GtkMenuItem * menuitem, gpointer user_data)
{
GtkWidget *dialog;
const gchar *about_text =
- "gkc is copyright (c) 2002 Romain Lievin <roms@lpg.ticalc.org>.\n"
+ "gconfig is copyright (c) 2002 Romain Lievin <roms@lpg.ticalc.org>.\n"
"Based on the source code from Roman Zippel.\n";
dialog = gtk_message_dialog_new(GTK_WINDOW(main_wnd),
@@ -685,7 +682,7 @@ void on_license1_activate(GtkMenuItem * menuitem, gpointer user_data)
{
GtkWidget *dialog;
const gchar *license_text =
- "gkc is released under the terms of the GNU GPL v2.\n"
+ "gconfig is released under the terms of the GNU GPL v2.\n"
"For more information, please see the source code or\n"
"visit http://www.fsf.org/licenses/licenses.html\n";
diff --git a/scripts/spelling.txt b/scripts/spelling.txt
index fc7ba95e86a0..855c4863124b 100644
--- a/scripts/spelling.txt
+++ b/scripts/spelling.txt
@@ -1541,7 +1541,6 @@ temeprature||temperature
temorary||temporary
temproarily||temporarily
temperture||temperature
-thead||thread
theads||threads
therfore||therefore
thier||their
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index db7a51acf9db..bd6a910f6528 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -226,7 +226,7 @@ static int __aafs_setup_d_inode(struct inode *dir, struct dentry *dentry,
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_private = data;
if (S_ISDIR(mode)) {
inode->i_op = iops ? iops : &simple_dir_inode_operations;
@@ -1554,8 +1554,11 @@ void __aafs_profile_migrate_dents(struct aa_profile *old,
for (i = 0; i < AAFS_PROF_SIZEOF; i++) {
new->dents[i] = old->dents[i];
- if (new->dents[i])
- new->dents[i]->d_inode->i_mtime = current_time(new->dents[i]->d_inode);
+ if (new->dents[i]) {
+ struct inode *inode = d_inode(new->dents[i]);
+
+ inode->i_mtime = inode_set_ctime_current(inode);
+ }
old->dents[i] = NULL;
}
}
@@ -2540,7 +2543,7 @@ static int aa_mk_null_file(struct dentry *parent)
inode->i_ino = get_next_ino();
inode->i_mode = S_IFCHR | S_IRUGO | S_IWUGO;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO,
MKDEV(MEM_MAJOR, 3));
d_instantiate(dentry, inode);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 694fb7a09962..8b8846073e14 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -86,10 +86,13 @@ void __aa_loaddata_update(struct aa_loaddata *data, long revision)
data->revision = revision;
if ((data->dents[AAFS_LOADDATA_REVISION])) {
- d_inode(data->dents[AAFS_LOADDATA_DIR])->i_mtime =
- current_time(d_inode(data->dents[AAFS_LOADDATA_DIR]));
- d_inode(data->dents[AAFS_LOADDATA_REVISION])->i_mtime =
- current_time(d_inode(data->dents[AAFS_LOADDATA_REVISION]));
+ struct inode *inode;
+
+ inode = d_inode(data->dents[AAFS_LOADDATA_DIR]);
+ inode->i_mtime = inode_set_ctime_current(inode);
+
+ inode = d_inode(data->dents[AAFS_LOADDATA_REVISION]);
+ inode->i_mtime = inode_set_ctime_current(inode);
}
}
diff --git a/security/inode.c b/security/inode.c
index 6c326939750d..3aa75fffa8c9 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -145,7 +145,7 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode,
inode->i_ino = get_next_ino();
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
+ inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode);
inode->i_private = data;
if (S_ISDIR(mode)) {
inode->i_op = &simple_dir_inode_operations;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index d54f73c558f7..19be69fa4d05 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -980,14 +980,19 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group)
ret = -EACCES;
down_write(&key->sem);
- if (!capable(CAP_SYS_ADMIN)) {
+ {
+ bool is_privileged_op = false;
+
/* only the sysadmin can chown a key to some other UID */
if (user != (uid_t) -1 && !uid_eq(key->uid, uid))
- goto error_put;
+ is_privileged_op = true;
/* only the sysadmin can set the key's GID to a group other
* than one of those that the current process subscribes to */
if (group != (gid_t) -1 && !gid_eq(gid, key->gid) && !in_group_p(gid))
+ is_privileged_op = true;
+
+ if (is_privileged_op && !capable(CAP_SYS_ADMIN))
goto error_put;
}
@@ -1088,7 +1093,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm)
down_write(&key->sem);
/* if we're not the sysadmin, we can only change a key that we own */
- if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) {
+ if (uid_eq(key->uid, current_fsuid()) || capable(CAP_SYS_ADMIN)) {
key->perm = perm;
notify_key(key, NOTIFY_KEY_SETATTR, 0);
ret = 0;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 07a0ef2baacd..a7673ad86d18 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -401,17 +401,21 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags);
if (dest_keyring) {
- ret = __key_link_lock(dest_keyring, &ctx->index_key);
+ ret = __key_link_lock(dest_keyring, &key->index_key);
if (ret < 0)
goto link_lock_failed;
- ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit);
- if (ret < 0)
- goto link_prealloc_failed;
}
- /* attach the key to the destination keyring under lock, but we do need
+ /*
+ * Attach the key to the destination keyring under lock, but we do need
* to do another check just in case someone beat us to it whilst we
- * waited for locks */
+ * waited for locks.
+ *
+ * The caller might specify a comparison function which looks for keys
+ * that do not exactly match but are still equivalent from the caller's
+ * perspective. The __key_link_begin() operation must be done only after
+ * an actual key is determined.
+ */
mutex_lock(&key_construction_mutex);
rcu_read_lock();
@@ -420,12 +424,16 @@ static int construct_alloc_key(struct keyring_search_context *ctx,
if (!IS_ERR(key_ref))
goto key_already_present;
- if (dest_keyring)
+ if (dest_keyring) {
+ ret = __key_link_begin(dest_keyring, &key->index_key, &edit);
+ if (ret < 0)
+ goto link_alloc_failed;
__key_link(dest_keyring, key, &edit);
+ }
mutex_unlock(&key_construction_mutex);
if (dest_keyring)
- __key_link_end(dest_keyring, &ctx->index_key, edit);
+ __key_link_end(dest_keyring, &key->index_key, edit);
mutex_unlock(&user->cons_lock);
*_key = key;
kleave(" = 0 [%d]", key_serial(key));
@@ -438,10 +446,13 @@ key_already_present:
mutex_unlock(&key_construction_mutex);
key = key_ref_to_ptr(key_ref);
if (dest_keyring) {
+ ret = __key_link_begin(dest_keyring, &key->index_key, &edit);
+ if (ret < 0)
+ goto link_alloc_failed_unlocked;
ret = __key_link_check_live_key(dest_keyring, key);
if (ret == 0)
__key_link(dest_keyring, key, &edit);
- __key_link_end(dest_keyring, &ctx->index_key, edit);
+ __key_link_end(dest_keyring, &key->index_key, edit);
if (ret < 0)
goto link_check_failed;
}
@@ -456,8 +467,10 @@ link_check_failed:
kleave(" = %d [linkcheck]", ret);
return ret;
-link_prealloc_failed:
- __key_link_end(dest_keyring, &ctx->index_key, edit);
+link_alloc_failed:
+ mutex_unlock(&key_construction_mutex);
+link_alloc_failed_unlocked:
+ __key_link_end(dest_keyring, &key->index_key, edit);
link_lock_failed:
mutex_unlock(&user->cons_lock);
key_put(key);
diff --git a/security/keys/sysctl.c b/security/keys/sysctl.c
index b72b82bb20c6..b348e1679d5d 100644
--- a/security/keys/sysctl.c
+++ b/security/keys/sysctl.c
@@ -9,7 +9,7 @@
#include <linux/sysctl.h>
#include "internal.h"
-struct ctl_table key_sysctls[] = {
+static struct ctl_table key_sysctls[] = {
{
.procname = "maxkeys",
.data = &key_quota_maxkeys,
diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
index 2b2c8eb258d5..bc700f85f80b 100644
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -186,7 +186,7 @@ int tpm2_key_priv(void *context, size_t hdrlen,
}
/**
- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
+ * tpm2_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
*
* @buf: an allocated tpm_buf instance
* @session_handle: session handle
diff --git a/security/security.c b/security/security.c
index b720424ca37d..549104a447e3 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1139,6 +1139,20 @@ void security_bprm_committed_creds(struct linux_binprm *bprm)
}
/**
+ * security_fs_context_submount() - Initialise fc->security
+ * @fc: new filesystem context
+ * @reference: dentry reference for submount/remount
+ *
+ * Fill out the ->security field for a new fs_context.
+ *
+ * Return: Returns 0 on success or negative error code on failure.
+ */
+int security_fs_context_submount(struct fs_context *fc, struct super_block *reference)
+{
+ return call_int_hook(fs_context_submount, 0, fc, reference);
+}
+
+/**
* security_fs_context_dup() - Duplicate a fs_context LSM blob
* @fc: destination filesystem context
* @src_fc: source filesystem context
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index d06e350fedee..afd663744041 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2745,6 +2745,27 @@ static int selinux_umount(struct vfsmount *mnt, int flags)
FILESYSTEM__UNMOUNT, NULL);
}
+static int selinux_fs_context_submount(struct fs_context *fc,
+ struct super_block *reference)
+{
+ const struct superblock_security_struct *sbsec;
+ struct selinux_mnt_opts *opts;
+
+ opts = kzalloc(sizeof(*opts), GFP_KERNEL);
+ if (!opts)
+ return -ENOMEM;
+
+ sbsec = selinux_superblock(reference);
+ if (sbsec->flags & FSCONTEXT_MNT)
+ opts->fscontext_sid = sbsec->sid;
+ if (sbsec->flags & CONTEXT_MNT)
+ opts->context_sid = sbsec->mntpoint_sid;
+ if (sbsec->flags & DEFCONTEXT_MNT)
+ opts->defcontext_sid = sbsec->def_sid;
+ fc->security = opts;
+ return 0;
+}
+
static int selinux_fs_context_dup(struct fs_context *fc,
struct fs_context *src_fc)
{
@@ -7182,6 +7203,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
/*
* PUT "CLONING" (ACCESSING + ALLOCATING) HOOKS HERE
*/
+ LSM_HOOK_INIT(fs_context_submount, selinux_fs_context_submount),
LSM_HOOK_INIT(fs_context_dup, selinux_fs_context_dup),
LSM_HOOK_INIT(fs_context_parse_param, selinux_fs_context_parse_param),
LSM_HOOK_INIT(sb_eat_lsm_opts, selinux_sb_eat_lsm_opts),
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index bad1f6b685fd..9dafb6ff110d 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1197,7 +1197,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
if (ret) {
ret->i_mode = mode;
- ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret);
+ ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret);
}
return ret;
}
diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c
index 31b08b34c722..dc904865af58 100644
--- a/security/selinux/ss/policydb.c
+++ b/security/selinux/ss/policydb.c
@@ -2005,6 +2005,7 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
if (!datum)
goto out;
+ datum->next = NULL;
*dst = datum;
/* ebitmap_read() will at least init the bitmap */
@@ -2017,7 +2018,6 @@ static int filename_trans_read_helper(struct policydb *p, void *fp)
goto out;
datum->otype = le32_to_cpu(buf[0]);
- datum->next = NULL;
dst = &datum->next;
}
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index 6e270cf3fd30..a8201cf22f20 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -615,6 +615,56 @@ out_opt_err:
}
/**
+ * smack_fs_context_submount - Initialise security data for a filesystem context
+ * @fc: The filesystem context.
+ * @reference: reference superblock
+ *
+ * Returns 0 on success or -ENOMEM on error.
+ */
+static int smack_fs_context_submount(struct fs_context *fc,
+ struct super_block *reference)
+{
+ struct superblock_smack *sbsp;
+ struct smack_mnt_opts *ctx;
+ struct inode_smack *isp;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ fc->security = ctx;
+
+ sbsp = smack_superblock(reference);
+ isp = smack_inode(reference->s_root->d_inode);
+
+ if (sbsp->smk_default) {
+ ctx->fsdefault = kstrdup(sbsp->smk_default->smk_known, GFP_KERNEL);
+ if (!ctx->fsdefault)
+ return -ENOMEM;
+ }
+
+ if (sbsp->smk_floor) {
+ ctx->fsfloor = kstrdup(sbsp->smk_floor->smk_known, GFP_KERNEL);
+ if (!ctx->fsfloor)
+ return -ENOMEM;
+ }
+
+ if (sbsp->smk_hat) {
+ ctx->fshat = kstrdup(sbsp->smk_hat->smk_known, GFP_KERNEL);
+ if (!ctx->fshat)
+ return -ENOMEM;
+ }
+
+ if (isp->smk_flags & SMK_INODE_TRANSMUTE) {
+ if (sbsp->smk_root) {
+ ctx->fstransmute = kstrdup(sbsp->smk_root->smk_known, GFP_KERNEL);
+ if (!ctx->fstransmute)
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+/**
* smack_fs_context_dup - Duplicate the security data on fs_context duplication
* @fc: The new filesystem context.
* @src_fc: The source filesystem context being duplicated.
@@ -4876,6 +4926,7 @@ static struct security_hook_list smack_hooks[] __ro_after_init = {
LSM_HOOK_INIT(ptrace_traceme, smack_ptrace_traceme),
LSM_HOOK_INIT(syslog, smack_syslog),
+ LSM_HOOK_INIT(fs_context_submount, smack_fs_context_submount),
LSM_HOOK_INIT(fs_context_dup, smack_fs_context_dup),
LSM_HOOK_INIT(fs_context_parse_param, smack_fs_context_parse_param),
diff --git a/sound/core/seq/seq_ports.c b/sound/core/seq/seq_ports.c
index 9b80f8275026..f3f14ff0f80f 100644
--- a/sound/core/seq/seq_ports.c
+++ b/sound/core/seq/seq_ports.c
@@ -149,6 +149,7 @@ int snd_seq_create_port(struct snd_seq_client *client, int port,
write_lock_irq(&client->ports_lock);
list_for_each_entry(p, &client->ports_list_head, list) {
if (p->addr.port == port) {
+ kfree(new_port);
num = -EBUSY;
goto unlock;
}
diff --git a/sound/core/seq/seq_ump_client.c b/sound/core/seq/seq_ump_client.c
index fe21c801af74..f26a1812dfa7 100644
--- a/sound/core/seq/seq_ump_client.c
+++ b/sound/core/seq/seq_ump_client.c
@@ -298,8 +298,7 @@ static void update_group_attrs(struct seq_ump_client *client)
}
list_for_each_entry(fb, &client->ump->block_list, list) {
- if (fb->info.first_group < 0 ||
- fb->info.first_group + fb->info.num_groups > SNDRV_UMP_MAX_GROUPS)
+ if (fb->info.first_group + fb->info.num_groups > SNDRV_UMP_MAX_GROUPS)
break;
group = &client->groups[fb->info.first_group];
for (i = 0; i < fb->info.num_groups; i++, group++) {
diff --git a/sound/drivers/pcmtest.c b/sound/drivers/pcmtest.c
index 2ae912a64d16..291e7fe47893 100644
--- a/sound/drivers/pcmtest.c
+++ b/sound/drivers/pcmtest.c
@@ -110,8 +110,6 @@ struct pcmtst_buf_iter {
struct timer_list timer_instance;
};
-static struct pcmtst *pcmtst;
-
static struct snd_pcm_hardware snd_pcmtst_hw = {
.info = (SNDRV_PCM_INFO_INTERLEAVED |
SNDRV_PCM_INFO_BLOCK_TRANSFER |
@@ -552,6 +550,7 @@ _err_free_chip:
static int pcmtst_probe(struct platform_device *pdev)
{
struct snd_card *card;
+ struct pcmtst *pcmtst;
int err;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
@@ -573,13 +572,16 @@ static int pcmtst_probe(struct platform_device *pdev)
if (err < 0)
return err;
+ platform_set_drvdata(pdev, pcmtst);
+
return 0;
}
-static int pdev_remove(struct platform_device *dev)
+static void pdev_remove(struct platform_device *pdev)
{
+ struct pcmtst *pcmtst = platform_get_drvdata(pdev);
+
snd_pcmtst_free(pcmtst);
- return 0;
}
static struct platform_device pcmtst_pdev = {
@@ -589,7 +591,7 @@ static struct platform_device pcmtst_pdev = {
static struct platform_driver pcmtst_pdrv = {
.probe = pcmtst_probe,
- .remove = pdev_remove,
+ .remove_new = pdev_remove,
.driver = {
.name = "pcmtest",
},
diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c
index b288874e401e..36b411d1a960 100644
--- a/sound/pci/hda/patch_cs8409-tables.c
+++ b/sound/pci/hda/patch_cs8409-tables.c
@@ -550,6 +550,10 @@ const struct snd_pci_quirk cs8409_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0C50, "Dolphin", CS8409_DOLPHIN),
SND_PCI_QUIRK(0x1028, 0x0C51, "Dolphin", CS8409_DOLPHIN),
SND_PCI_QUIRK(0x1028, 0x0C52, "Dolphin", CS8409_DOLPHIN),
+ SND_PCI_QUIRK(0x1028, 0x0C73, "Dolphin", CS8409_DOLPHIN),
+ SND_PCI_QUIRK(0x1028, 0x0C75, "Dolphin", CS8409_DOLPHIN),
+ SND_PCI_QUIRK(0x1028, 0x0C7D, "Dolphin", CS8409_DOLPHIN),
+ SND_PCI_QUIRK(0x1028, 0x0C7F, "Dolphin", CS8409_DOLPHIN),
{} /* terminator */
};
diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index e2f8b608de82..dc7b7a407638 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c
@@ -122,6 +122,7 @@ struct alc_spec {
unsigned int ultra_low_power:1;
unsigned int has_hs_key:1;
unsigned int no_internal_mic_pin:1;
+ unsigned int en_3kpull_low:1;
/* for PLL fix */
hda_nid_t pll_nid;
@@ -3622,6 +3623,7 @@ static void alc256_shutup(struct hda_codec *codec)
if (!hp_pin)
hp_pin = 0x21;
+ alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
hp_pin_sense = snd_hda_jack_detect(codec, hp_pin);
if (hp_pin_sense)
@@ -3638,8 +3640,7 @@ static void alc256_shutup(struct hda_codec *codec)
/* If disable 3k pulldown control for alc257, the Mic detection will not work correctly
* when booting with headset plugged. So skip setting it for the codec alc257
*/
- if (codec->core.vendor_id != 0x10ec0236 &&
- codec->core.vendor_id != 0x10ec0257)
+ if (spec->en_3kpull_low)
alc_update_coef_idx(codec, 0x46, 0, 3 << 12);
if (!spec->no_shutup_pins)
@@ -4623,6 +4624,21 @@ static void alc236_fixup_hp_mute_led_coefbit(struct hda_codec *codec,
}
}
+static void alc236_fixup_hp_mute_led_coefbit2(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+ spec->mute_led_polarity = 0;
+ spec->mute_led_coef.idx = 0x07;
+ spec->mute_led_coef.mask = 1;
+ spec->mute_led_coef.on = 1;
+ spec->mute_led_coef.off = 0;
+ snd_hda_gen_add_mute_led_cdev(codec, coef_mute_led_set);
+ }
+}
+
/* turn on/off mic-mute LED per capture hook by coef bit */
static int coef_micmute_led_set(struct led_classdev *led_cdev,
enum led_brightness brightness)
@@ -7143,6 +7159,7 @@ enum {
ALC285_FIXUP_HP_GPIO_LED,
ALC285_FIXUP_HP_MUTE_LED,
ALC285_FIXUP_HP_SPECTRE_X360_MUTE_LED,
+ ALC236_FIXUP_HP_MUTE_LED_COEFBIT2,
ALC236_FIXUP_HP_GPIO_LED,
ALC236_FIXUP_HP_MUTE_LED,
ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF,
@@ -7213,6 +7230,7 @@ enum {
ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN,
ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS,
ALC236_FIXUP_DELL_DUAL_CODECS,
+ ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
};
/* A special fixup for Lenovo C940 and Yoga Duet 7;
@@ -8632,6 +8650,10 @@ static const struct hda_fixup alc269_fixups[] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc285_fixup_hp_spectre_x360_mute_led,
},
+ [ALC236_FIXUP_HP_MUTE_LED_COEFBIT2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc236_fixup_hp_mute_led_coefbit2,
+ },
[ALC236_FIXUP_HP_GPIO_LED] = {
.type = HDA_FIXUP_FUNC,
.v.func = alc236_fixup_hp_gpio_led,
@@ -9145,8 +9167,6 @@ static const struct hda_fixup alc269_fixups[] = {
[ALC287_FIXUP_CS35L41_I2C_2] = {
.type = HDA_FIXUP_FUNC,
.v.func = cs35l41_fixup_i2c_two,
- .chained = true,
- .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
},
[ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = {
.type = HDA_FIXUP_FUNC,
@@ -9283,6 +9303,12 @@ static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
},
+ [ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l41_fixup_i2c_two,
+ .chained = true,
+ .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -9393,6 +9419,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0c1c, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1d, "Dell Precision 3440", ALC236_FIXUP_DELL_DUAL_CODECS),
SND_PCI_QUIRK(0x1028, 0x0c1e, "Dell Precision 3540", ALC236_FIXUP_DELL_DUAL_CODECS),
+ SND_PCI_QUIRK(0x1028, 0x0cbd, "Dell Oasis 13 CS MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1028, 0x0cbe, "Dell Oasis 13 2-IN-1 MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1028, 0x0cbf, "Dell Oasis 13 Low Weight MTU-L", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1028, 0x0cc1, "Dell Oasis 14 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1028, 0x0cc2, "Dell Oasis 14 2-in-1 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1028, 0x0cc3, "Dell Oasis 14 Low Weight MTL-U", ALC245_FIXUP_CS35L41_SPI_2),
+ SND_PCI_QUIRK(0x1028, 0x0cc4, "Dell Oasis 16 MTL-H/U", ALC245_FIXUP_CS35L41_SPI_2),
SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x164b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1586, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC2),
@@ -9507,6 +9540,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x880d, "HP EliteBook 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8811, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
SND_PCI_QUIRK(0x103c, 0x8812, "HP Spectre x360 15-eb1xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
+ SND_PCI_QUIRK(0x103c, 0x881d, "HP 250 G8 Notebook PC", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8847, "HP EliteBook x360 830 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x884b, "HP EliteBook 840 Aero G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
@@ -9516,6 +9550,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x886d, "HP ZBook Fury 17.3 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8870, "HP ZBook Fury 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8873, "HP ZBook Studio 15.6 Inch G8 Mobile Workstation PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x887a, "HP Laptop 15s-eq2xxx", ALC236_FIXUP_HP_MUTE_LED_COEFBIT2),
SND_PCI_QUIRK(0x103c, 0x888d, "HP ZBook Power 15.6 inch G8 Mobile Workstation PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8895, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_SPEAKERS_MICMUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8896, "HP EliteBook 855 G8 Notebook PC", ALC285_FIXUP_HP_MUTE_LED),
@@ -9581,7 +9616,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x8b96, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8b97, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
- SND_PCI_QUIRK(0x103c, 0x8c26, "HP HP EliteBook 800G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c46, "HP EliteBook 830 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c47, "HP EliteBook 840 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c48, "HP EliteBook 860 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c49, "HP Elite x360 830 2-in-1 G11", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c70, "HP EliteBook 835 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c71, "HP EliteBook 845 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8c72, "HP EliteBook 865 G11", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -9636,6 +9677,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1043, 0x1c9f, "ASUS G614JI", ALC285_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1043, 0x1caf, "ASUS G634JYR/JZR", ALC285_FIXUP_ASUS_SPI_REAR_SPEAKERS),
SND_PCI_QUIRK(0x1043, 0x1ccd, "ASUS X555UB", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x1d1f, "ASUS ROG Strix G17 2023 (G713PV)", ALC287_FIXUP_CS35L41_I2C_2),
SND_PCI_QUIRK(0x1043, 0x1d42, "ASUS Zephyrus G14 2022", ALC289_FIXUP_ASUS_GA401),
SND_PCI_QUIRK(0x1043, 0x1d4e, "ASUS TM420", ALC256_FIXUP_ASUS_HPE),
SND_PCI_QUIRK(0x1043, 0x1e02, "ASUS UX3402", ALC245_FIXUP_CS35L41_SPI_2),
@@ -9727,6 +9769,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x1558, 0x5157, "Clevo W517GU1", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x51a1, "Clevo NS50MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x51b1, "Clevo NS50AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x51b3, "Clevo NS70AU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x5630, "Clevo NP50RNJS", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70a1, "Clevo NB70T[HJK]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70b3, "Clevo NK70SB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -9810,14 +9853,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
- SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
- SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+ SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
@@ -10600,6 +10643,7 @@ static int patch_alc269(struct hda_codec *codec)
spec = codec->spec;
spec->gen.shared_mic_vref_pin = 0x18;
codec->power_save_node = 0;
+ spec->en_3kpull_low = true;
#ifdef CONFIG_PM
codec->patch_ops.suspend = alc269_suspend;
@@ -10682,12 +10726,16 @@ static int patch_alc269(struct hda_codec *codec)
spec->shutup = alc256_shutup;
spec->init_hook = alc256_init;
spec->gen.mixer_nid = 0; /* ALC256 does not have any loopback mixer path */
+ if (codec->core.vendor_id == 0x10ec0236 &&
+ codec->bus->pci->vendor != PCI_VENDOR_ID_AMD)
+ spec->en_3kpull_low = false;
break;
case 0x10ec0257:
spec->codec_variant = ALC269_TYPE_ALC257;
spec->shutup = alc256_shutup;
spec->init_hook = alc256_init;
spec->gen.mixer_nid = 0;
+ spec->en_3kpull_low = false;
break;
case 0x10ec0215:
case 0x10ec0245:
diff --git a/sound/pci/ymfpci/ymfpci.c b/sound/pci/ymfpci/ymfpci.c
index b033bd290940..48444dda44de 100644
--- a/sound/pci/ymfpci/ymfpci.c
+++ b/sound/pci/ymfpci/ymfpci.c
@@ -152,8 +152,8 @@ static inline int snd_ymfpci_create_gameport(struct snd_ymfpci *chip, int dev, i
void snd_ymfpci_free_gameport(struct snd_ymfpci *chip) { }
#endif /* SUPPORT_JOYSTICK */
-static int snd_card_ymfpci_probe(struct pci_dev *pci,
- const struct pci_device_id *pci_id)
+static int __snd_card_ymfpci_probe(struct pci_dev *pci,
+ const struct pci_device_id *pci_id)
{
static int dev;
struct snd_card *card;
@@ -348,6 +348,12 @@ static int snd_card_ymfpci_probe(struct pci_dev *pci,
return 0;
}
+static int snd_card_ymfpci_probe(struct pci_dev *pci,
+ const struct pci_device_id *pci_id)
+{
+ return snd_card_free_on_error(&pci->dev, __snd_card_ymfpci_probe(pci, pci_id));
+}
+
static struct pci_driver ymfpci_driver = {
.name = KBUILD_MODNAME,
.id_table = snd_ymfpci_ids,
diff --git a/sound/soc/amd/acp/amd.h b/sound/soc/amd/acp/amd.h
index 5f2119f42271..12a176a50fd6 100644
--- a/sound/soc/amd/acp/amd.h
+++ b/sound/soc/amd/acp/amd.h
@@ -173,7 +173,7 @@ int snd_amd_acp_find_config(struct pci_dev *pci);
static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int direction)
{
- u64 byte_count, low = 0, high = 0;
+ u64 byte_count = 0, low = 0, high = 0;
if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
switch (dai_id) {
@@ -191,7 +191,7 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
break;
default:
dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
- return -EINVAL;
+ goto POINTER_RETURN_BYTES;
}
} else {
switch (dai_id) {
@@ -213,12 +213,13 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
break;
default:
dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
- return -EINVAL;
+ goto POINTER_RETURN_BYTES;
}
}
/* Get 64 bit value from two 32 bit registers */
byte_count = (high << 32) | low;
+POINTER_RETURN_BYTES:
return byte_count;
}
diff --git a/sound/soc/amd/ps/acp63.h b/sound/soc/amd/ps/acp63.h
index e96e6dc9d90f..8b853b8d0219 100644
--- a/sound/soc/amd/ps/acp63.h
+++ b/sound/soc/amd/ps/acp63.h
@@ -116,8 +116,28 @@
#define ACP63_SDW0_DMA_MAX_STREAMS 6
#define ACP63_SDW1_DMA_MAX_STREAMS 2
#define ACP_P1_AUDIO_TX_THRESHOLD 6
+
+/*
+ * Below entries describes SDW0 instance DMA stream id and DMA irq bit mapping
+ * in ACP_EXTENAL_INTR_CNTL register.
+ * Stream id IRQ Bit
+ * 0 (SDW0_AUDIO0_TX) 28
+ * 1 (SDW0_AUDIO1_TX) 26
+ * 2 (SDW0_AUDIO2_TX) 24
+ * 3 (SDW0_AUDIO0_RX) 27
+ * 4 (SDW0_AUDIO1_RX) 25
+ * 5 (SDW0_AUDIO2_RX) 23
+ */
#define SDW0_DMA_TX_IRQ_MASK(i) (ACP_AUDIO0_TX_THRESHOLD - (2 * (i)))
-#define SDW0_DMA_RX_IRQ_MASK(i) (ACP_AUDIO0_RX_THRESHOLD - (2 * (i)))
+#define SDW0_DMA_RX_IRQ_MASK(i) (ACP_AUDIO0_RX_THRESHOLD - (2 * ((i) - 3)))
+
+/*
+ * Below entries describes SDW1 instance DMA stream id and DMA irq bit mapping
+ * in ACP_EXTENAL_INTR_CNTL1 register.
+ * Stream id IRQ Bit
+ * 0 (SDW1_AUDIO1_TX) 6
+ * 1 (SDW1_AUDIO1_RX) 5
+ */
#define SDW1_DMA_IRQ_MASK(i) (ACP_P1_AUDIO_TX_THRESHOLD - (i))
#define ACP_DELAY_US 5
diff --git a/sound/soc/amd/ps/pci-ps.c b/sound/soc/amd/ps/pci-ps.c
index 5b46dc8573f8..4af3c3665387 100644
--- a/sound/soc/amd/ps/pci-ps.c
+++ b/sound/soc/amd/ps/pci-ps.c
@@ -257,7 +257,7 @@ static int sdw_amd_scan_controller(struct device *dev)
&sdw_manager_bitmap, 1);
if (ret) {
- dev_err(dev, "Failed to read mipi-sdw-manager-list: %d\n", ret);
+ dev_dbg(dev, "Failed to read mipi-sdw-manager-list: %d\n", ret);
return -EINVAL;
}
count = hweight32(sdw_manager_bitmap);
@@ -641,7 +641,7 @@ static int snd_acp63_probe(struct pci_dev *pci,
ret = get_acp63_device_config(val, pci, adata);
/* ACP PCI driver probe should be continued even PDM or SoundWire Devices are not found */
if (ret) {
- dev_err(&pci->dev, "get acp device config failed:%d\n", ret);
+ dev_dbg(&pci->dev, "get acp device config failed:%d\n", ret);
goto skip_pdev_creation;
}
ret = create_acp63_platform_devs(pci, adata, addr);
diff --git a/sound/soc/amd/ps/ps-sdw-dma.c b/sound/soc/amd/ps/ps-sdw-dma.c
index ade130a8062a..324c80fca672 100644
--- a/sound/soc/amd/ps/ps-sdw-dma.c
+++ b/sound/soc/amd/ps/ps-sdw-dma.c
@@ -30,7 +30,7 @@ static struct sdw_dma_ring_buf_reg sdw0_dma_ring_buf_reg[ACP63_SDW0_DMA_MAX_STRE
ACP_AUDIO2_TX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO2_TX_LINEARPOSITIONCNTR_HIGH},
{ACP_AUDIO0_RX_DMA_SIZE, ACP_AUDIO0_RX_FIFOADDR, ACP_AUDIO0_RX_FIFOSIZE,
ACP_AUDIO0_RX_RINGBUFSIZE, ACP_AUDIO0_RX_RINGBUFADDR, ACP_AUDIO0_RX_INTR_WATERMARK_SIZE,
- ACP_AUDIO0_TX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO0_TX_LINEARPOSITIONCNTR_HIGH},
+ ACP_AUDIO0_RX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO0_RX_LINEARPOSITIONCNTR_HIGH},
{ACP_AUDIO1_RX_DMA_SIZE, ACP_AUDIO1_RX_FIFOADDR, ACP_AUDIO1_RX_FIFOSIZE,
ACP_AUDIO1_RX_RINGBUFSIZE, ACP_AUDIO1_RX_RINGBUFADDR, ACP_AUDIO1_RX_INTR_WATERMARK_SIZE,
ACP_AUDIO1_RX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO1_RX_LINEARPOSITIONCNTR_HIGH},
@@ -39,6 +39,11 @@ static struct sdw_dma_ring_buf_reg sdw0_dma_ring_buf_reg[ACP63_SDW0_DMA_MAX_STRE
ACP_AUDIO2_RX_LINEARPOSITIONCNTR_LOW, ACP_AUDIO2_RX_LINEARPOSITIONCNTR_HIGH}
};
+/*
+ * SDW1 instance supports one TX stream and one RX stream.
+ * For TX/RX streams DMA registers programming for SDW1 instance, it uses ACP_P1_AUDIO1 register
+ * set as per hardware register documentation
+ */
static struct sdw_dma_ring_buf_reg sdw1_dma_ring_buf_reg[ACP63_SDW1_DMA_MAX_STREAMS] = {
{ACP_P1_AUDIO1_TX_DMA_SIZE, ACP_P1_AUDIO1_TX_FIFOADDR, ACP_P1_AUDIO1_TX_FIFOSIZE,
ACP_P1_AUDIO1_TX_RINGBUFSIZE, ACP_P1_AUDIO1_TX_RINGBUFADDR,
@@ -59,6 +64,12 @@ static u32 sdw0_dma_enable_reg[ACP63_SDW0_DMA_MAX_STREAMS] = {
ACP_SW0_AUDIO2_RX_EN,
};
+/*
+ * SDW1 instance supports one TX stream and one RX stream.
+ * For TX/RX streams DMA enable register programming for SDW1 instance,
+ * it uses ACP_SW1_AUDIO1_TX_EN and ACP_SW1_AUDIO1_RX_EN registers
+ * as per hardware register documentation.
+ */
static u32 sdw1_dma_enable_reg[ACP63_SDW1_DMA_MAX_STREAMS] = {
ACP_SW1_AUDIO1_TX_EN,
ACP_SW1_AUDIO1_RX_EN,
@@ -307,12 +318,13 @@ static u64 acp63_sdw_get_byte_count(struct acp_sdw_dma_stream *stream, void __io
pos_high_reg = sdw1_dma_ring_buf_reg[stream->stream_id].pos_high_reg;
break;
default:
- return -EINVAL;
+ goto POINTER_RETURN_BYTES;
}
if (pos_low_reg) {
byte_count.bcount.high = readl(acp_base + pos_high_reg);
byte_count.bcount.low = readl(acp_base + pos_low_reg);
}
+POINTER_RETURN_BYTES:
return byte_count.bytescount;
}
diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c
index a2fe3bd4f9a1..b304b3562c82 100644
--- a/sound/soc/amd/yc/acp6x-mach.c
+++ b/sound/soc/amd/yc/acp6x-mach.c
@@ -217,7 +217,7 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
.driver_data = &acp6x_card,
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"),
- DMI_MATCH(DMI_PRODUCT_NAME, "82"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "82V2"),
}
},
{
@@ -251,6 +251,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = {
{
.driver_data = &acp6x_card,
.matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "M6500RC"),
+ }
+ },
+ {
+ .driver_data = &acp6x_card,
+ .matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Alienware"),
DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m17 R5 AMD"),
}
diff --git a/sound/soc/atmel/atmel-i2s.c b/sound/soc/atmel/atmel-i2s.c
index 49930baf5e4d..69a88dc65165 100644
--- a/sound/soc/atmel/atmel-i2s.c
+++ b/sound/soc/atmel/atmel-i2s.c
@@ -163,11 +163,14 @@ struct atmel_i2s_gck_param {
#define I2S_MCK_12M288 12288000UL
#define I2S_MCK_11M2896 11289600UL
+#define I2S_MCK_6M144 6144000UL
/* mck = (32 * (imckfs+1) / (imckdiv+1)) * fs */
static const struct atmel_i2s_gck_param gck_params[] = {
+ /* mck = 6.144Mhz */
+ { 8000, I2S_MCK_6M144, 1, 47}, /* mck = 768 fs */
+
/* mck = 12.288MHz */
- { 8000, I2S_MCK_12M288, 0, 47}, /* mck = 1536 fs */
{ 16000, I2S_MCK_12M288, 1, 47}, /* mck = 768 fs */
{ 24000, I2S_MCK_12M288, 3, 63}, /* mck = 512 fs */
{ 32000, I2S_MCK_12M288, 3, 47}, /* mck = 384 fs */
diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 2a62dbd5339e..c2de4ee72183 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig
@@ -715,6 +715,7 @@ config SND_SOC_CS35L41_I2C
config SND_SOC_CS35L45
tristate
+ select REGMAP_IRQ
config SND_SOC_CS35L45_SPI
tristate "Cirrus Logic CS35L45 CODEC (SPI)"
@@ -1942,6 +1943,7 @@ config SND_SOC_WCD934X
tristate "WCD9340/WCD9341 Codec"
depends on COMMON_CLK
depends on SLIMBUS
+ select REGMAP_IRQ
select REGMAP_SLIMBUS
select SND_SOC_WCD_MBHC
depends on MFD_WCD934X || COMPILE_TEST
diff --git a/sound/soc/codecs/cs35l41.c b/sound/soc/codecs/cs35l41.c
index 6ac501f008ec..8a879b6f4829 100644
--- a/sound/soc/codecs/cs35l41.c
+++ b/sound/soc/codecs/cs35l41.c
@@ -168,7 +168,7 @@ static int cs35l41_get_fs_mon_config_index(int freq)
static const DECLARE_TLV_DB_RANGE(dig_vol_tlv,
0, 0, TLV_DB_SCALE_ITEM(TLV_DB_GAIN_MUTE, 0, 1),
1, 913, TLV_DB_MINMAX_ITEM(-10200, 1200));
-static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1);
+static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 50, 100, 0);
static const struct snd_kcontrol_new dre_ctrl =
SOC_DAPM_SINGLE("Switch", CS35L41_PWR_CTRL3, 20, 1, 0);
diff --git a/sound/soc/codecs/cs35l56-i2c.c b/sound/soc/codecs/cs35l56-i2c.c
index ed2a41943d97..40666e6698ba 100644
--- a/sound/soc/codecs/cs35l56-i2c.c
+++ b/sound/soc/codecs/cs35l56-i2c.c
@@ -62,10 +62,19 @@ static const struct i2c_device_id cs35l56_id_i2c[] = {
};
MODULE_DEVICE_TABLE(i2c, cs35l56_id_i2c);
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+ { "CSC355C", 0 },
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
static struct i2c_driver cs35l56_i2c_driver = {
.driver = {
.name = "cs35l56",
.pm = &cs35l56_pm_ops_i2c_spi,
+ .acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
},
.id_table = cs35l56_id_i2c,
.probe = cs35l56_i2c_probe,
diff --git a/sound/soc/codecs/cs35l56-spi.c b/sound/soc/codecs/cs35l56-spi.c
index 996aab10500e..302f9c47407a 100644
--- a/sound/soc/codecs/cs35l56-spi.c
+++ b/sound/soc/codecs/cs35l56-spi.c
@@ -59,10 +59,19 @@ static const struct spi_device_id cs35l56_id_spi[] = {
};
MODULE_DEVICE_TABLE(spi, cs35l56_id_spi);
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id cs35l56_asoc_acpi_match[] = {
+ { "CSC355C", 0 },
+ {},
+};
+MODULE_DEVICE_TABLE(acpi, cs35l56_asoc_acpi_match);
+#endif
+
static struct spi_driver cs35l56_spi_driver = {
.driver = {
.name = "cs35l56",
.pm = &cs35l56_pm_ops_i2c_spi,
+ .acpi_match_table = ACPI_PTR(cs35l56_asoc_acpi_match),
},
.id_table = cs35l56_id_spi,
.probe = cs35l56_spi_probe,
diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c
index c03f9d3c9a13..fd06b9f9d496 100644
--- a/sound/soc/codecs/cs35l56.c
+++ b/sound/soc/codecs/cs35l56.c
@@ -5,7 +5,6 @@
// Copyright (C) 2023 Cirrus Logic, Inc. and
// Cirrus Logic International Semiconductor Ltd.
-#include <linux/acpi.h>
#include <linux/completion.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
@@ -1354,26 +1353,22 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56)
return 0;
}
-static int cs35l56_acpi_get_name(struct cs35l56_private *cs35l56)
+static int cs35l56_get_firmware_uid(struct cs35l56_private *cs35l56)
{
- acpi_handle handle = ACPI_HANDLE(cs35l56->dev);
- const char *sub;
+ struct device *dev = cs35l56->dev;
+ const char *prop;
+ int ret;
- /* If there is no ACPI_HANDLE, there is no ACPI for this system, return 0 */
- if (!handle)
+ ret = device_property_read_string(dev, "cirrus,firmware-uid", &prop);
+ /* If bad sw node property, return 0 and fallback to legacy firmware path */
+ if (ret < 0)
return 0;
- sub = acpi_get_subsystem_id(handle);
- if (IS_ERR(sub)) {
- /* If bad ACPI, return 0 and fallback to legacy firmware path, otherwise fail */
- if (PTR_ERR(sub) == -ENODATA)
- return 0;
- else
- return PTR_ERR(sub);
- }
+ cs35l56->dsp.system_name = devm_kstrdup(dev, prop, GFP_KERNEL);
+ if (cs35l56->dsp.system_name == NULL)
+ return -ENOMEM;
- cs35l56->dsp.system_name = sub;
- dev_dbg(cs35l56->dev, "Subsystem ID: %s\n", cs35l56->dsp.system_name);
+ dev_dbg(dev, "Firmware UID: %s\n", cs35l56->dsp.system_name);
return 0;
}
@@ -1417,7 +1412,7 @@ int cs35l56_common_probe(struct cs35l56_private *cs35l56)
gpiod_set_value_cansleep(cs35l56->reset_gpio, 1);
}
- ret = cs35l56_acpi_get_name(cs35l56);
+ ret = cs35l56_get_firmware_uid(cs35l56);
if (ret != 0)
goto err;
@@ -1604,8 +1599,6 @@ void cs35l56_remove(struct cs35l56_private *cs35l56)
regcache_cache_only(cs35l56->regmap, true);
- kfree(cs35l56->dsp.system_name);
-
gpiod_set_value_cansleep(cs35l56->reset_gpio, 0);
regulator_bulk_disable(ARRAY_SIZE(cs35l56->supplies), cs35l56->supplies);
}
diff --git a/sound/soc/codecs/cs42l51-i2c.c b/sound/soc/codecs/cs42l51-i2c.c
index b2106ff6a7cb..e7db7bcd0296 100644
--- a/sound/soc/codecs/cs42l51-i2c.c
+++ b/sound/soc/codecs/cs42l51-i2c.c
@@ -19,6 +19,12 @@ static struct i2c_device_id cs42l51_i2c_id[] = {
};
MODULE_DEVICE_TABLE(i2c, cs42l51_i2c_id);
+const struct of_device_id cs42l51_of_match[] = {
+ { .compatible = "cirrus,cs42l51", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, cs42l51_of_match);
+
static int cs42l51_i2c_probe(struct i2c_client *i2c)
{
struct regmap_config config;
diff --git a/sound/soc/codecs/cs42l51.c b/sound/soc/codecs/cs42l51.c
index a67cd3ee84e0..a7079ae0ca09 100644
--- a/sound/soc/codecs/cs42l51.c
+++ b/sound/soc/codecs/cs42l51.c
@@ -823,13 +823,6 @@ int __maybe_unused cs42l51_resume(struct device *dev)
}
EXPORT_SYMBOL_GPL(cs42l51_resume);
-const struct of_device_id cs42l51_of_match[] = {
- { .compatible = "cirrus,cs42l51", },
- { }
-};
-MODULE_DEVICE_TABLE(of, cs42l51_of_match);
-EXPORT_SYMBOL_GPL(cs42l51_of_match);
-
MODULE_AUTHOR("Arnaud Patard <arnaud.patard@rtp-net.org>");
MODULE_DESCRIPTION("Cirrus Logic CS42L51 ALSA SoC Codec Driver");
MODULE_LICENSE("GPL");
diff --git a/sound/soc/codecs/cs42l51.h b/sound/soc/codecs/cs42l51.h
index a79343e8a54e..125703ede113 100644
--- a/sound/soc/codecs/cs42l51.h
+++ b/sound/soc/codecs/cs42l51.h
@@ -16,7 +16,6 @@ int cs42l51_probe(struct device *dev, struct regmap *regmap);
void cs42l51_remove(struct device *dev);
int __maybe_unused cs42l51_suspend(struct device *dev);
int __maybe_unused cs42l51_resume(struct device *dev);
-extern const struct of_device_id cs42l51_of_match[];
#define CS42L51_CHIP_ID 0x1B
#define CS42L51_CHIP_REV_A 0x00
diff --git a/sound/soc/codecs/da7219-aad.c b/sound/soc/codecs/da7219-aad.c
index c65256bd526d..581b334a6631 100644
--- a/sound/soc/codecs/da7219-aad.c
+++ b/sound/soc/codecs/da7219-aad.c
@@ -361,11 +361,15 @@ static irqreturn_t da7219_aad_irq_thread(int irq, void *data)
struct da7219_priv *da7219 = snd_soc_component_get_drvdata(component);
u8 events[DA7219_AAD_IRQ_REG_MAX];
u8 statusa;
- int i, report = 0, mask = 0;
+ int i, ret, report = 0, mask = 0;
/* Read current IRQ events */
- regmap_bulk_read(da7219->regmap, DA7219_ACCDET_IRQ_EVENT_A,
- events, DA7219_AAD_IRQ_REG_MAX);
+ ret = regmap_bulk_read(da7219->regmap, DA7219_ACCDET_IRQ_EVENT_A,
+ events, DA7219_AAD_IRQ_REG_MAX);
+ if (ret) {
+ dev_warn_ratelimited(component->dev, "Failed to read IRQ events: %d\n", ret);
+ return IRQ_NONE;
+ }
if (!events[DA7219_AAD_IRQ_REG_A] && !events[DA7219_AAD_IRQ_REG_B])
return IRQ_NONE;
@@ -944,6 +948,8 @@ void da7219_aad_suspend(struct snd_soc_component *component)
}
}
}
+
+ synchronize_irq(da7219_aad->irq);
}
void da7219_aad_resume(struct snd_soc_component *component)
diff --git a/sound/soc/codecs/es8316.c b/sound/soc/codecs/es8316.c
index 34cf60769b62..65e497b455d3 100644
--- a/sound/soc/codecs/es8316.c
+++ b/sound/soc/codecs/es8316.c
@@ -153,7 +153,7 @@ static const char * const es8316_dmic_txt[] = {
"dmic data at high level",
"dmic data at low level",
};
-static const unsigned int es8316_dmic_values[] = { 0, 1, 2 };
+static const unsigned int es8316_dmic_values[] = { 0, 2, 3 };
static const struct soc_enum es8316_dmic_src_enum =
SOC_VALUE_ENUM_SINGLE(ES8316_ADC_DMIC, 0, 3,
ARRAY_SIZE(es8316_dmic_txt),
diff --git a/sound/soc/codecs/max98363.c b/sound/soc/codecs/max98363.c
index b5c69bba0e48..2dfaf4fcfbd3 100644
--- a/sound/soc/codecs/max98363.c
+++ b/sound/soc/codecs/max98363.c
@@ -185,10 +185,10 @@ static int max98363_io_init(struct sdw_slave *slave)
pm_runtime_get_noresume(dev);
ret = regmap_read(max98363->regmap, MAX98363_R21FF_REV_ID, &reg);
- if (!ret) {
+ if (!ret)
dev_info(dev, "Revision ID: %X\n", reg);
- return ret;
- }
+ else
+ goto out;
if (max98363->first_hw_init) {
regcache_cache_bypass(max98363->regmap, false);
@@ -198,10 +198,11 @@ static int max98363_io_init(struct sdw_slave *slave)
max98363->first_hw_init = true;
max98363->hw_init = true;
+out:
pm_runtime_mark_last_busy(dev);
pm_runtime_put_autosuspend(dev);
- return 0;
+ return ret;
}
#define MAX98363_RATES SNDRV_PCM_RATE_8000_192000
diff --git a/sound/soc/codecs/nau8821.c b/sound/soc/codecs/nau8821.c
index 96d75882b33a..ca6beb2d2649 100644
--- a/sound/soc/codecs/nau8821.c
+++ b/sound/soc/codecs/nau8821.c
@@ -10,6 +10,7 @@
#include <linux/acpi.h>
#include <linux/clk.h>
#include <linux/delay.h>
+#include <linux/dmi.h>
#include <linux/init.h>
#include <linux/i2c.h>
#include <linux/module.h>
@@ -25,6 +26,13 @@
#include <sound/tlv.h>
#include "nau8821.h"
+#define NAU8821_JD_ACTIVE_HIGH BIT(0)
+
+static int nau8821_quirk;
+static int quirk_override = -1;
+module_param_named(quirk, quirk_override, uint, 0444);
+MODULE_PARM_DESC(quirk, "Board-specific quirk override");
+
#define NAU_FREF_MAX 13500000
#define NAU_FVCO_MAX 100000000
#define NAU_FVCO_MIN 90000000
@@ -1792,6 +1800,33 @@ static int nau8821_setup_irq(struct nau8821 *nau8821)
return 0;
}
+/* Please keep this list alphabetically sorted */
+static const struct dmi_system_id nau8821_quirk_table[] = {
+ {
+ /* Positivo CW14Q01P-V2 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Positivo Tecnologia SA"),
+ DMI_MATCH(DMI_BOARD_NAME, "CW14Q01P-V2"),
+ },
+ .driver_data = (void *)(NAU8821_JD_ACTIVE_HIGH),
+ },
+ {}
+};
+
+static void nau8821_check_quirks(void)
+{
+ const struct dmi_system_id *dmi_id;
+
+ if (quirk_override != -1) {
+ nau8821_quirk = quirk_override;
+ return;
+ }
+
+ dmi_id = dmi_first_match(nau8821_quirk_table);
+ if (dmi_id)
+ nau8821_quirk = (unsigned long)dmi_id->driver_data;
+}
+
static int nau8821_i2c_probe(struct i2c_client *i2c)
{
struct device *dev = &i2c->dev;
@@ -1812,6 +1847,12 @@ static int nau8821_i2c_probe(struct i2c_client *i2c)
nau8821->dev = dev;
nau8821->irq = i2c->irq;
+
+ nau8821_check_quirks();
+
+ if (nau8821_quirk & NAU8821_JD_ACTIVE_HIGH)
+ nau8821->jkdet_polarity = 0;
+
nau8821_print_device_properties(nau8821);
nau8821_reset_chip(nau8821->regmap);
diff --git a/sound/soc/codecs/rt1308-sdw.c b/sound/soc/codecs/rt1308-sdw.c
index f43520ca3187..e566c8ddd3e9 100644
--- a/sound/soc/codecs/rt1308-sdw.c
+++ b/sound/soc/codecs/rt1308-sdw.c
@@ -52,6 +52,7 @@ static bool rt1308_volatile_register(struct device *dev, unsigned int reg)
case 0x300a:
case 0xc000:
case 0xc710:
+ case 0xcf01:
case 0xc860 ... 0xc863:
case 0xc870 ... 0xc873:
return true;
@@ -213,7 +214,7 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
{
struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(dev);
int ret = 0;
- unsigned int tmp;
+ unsigned int tmp, hibernation_flag;
if (rt1308->hw_init)
return 0;
@@ -242,6 +243,10 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
pm_runtime_get_noresume(&slave->dev);
+ regmap_read(rt1308->regmap, 0xcf01, &hibernation_flag);
+ if ((hibernation_flag != 0x00) && rt1308->first_hw_init)
+ goto _preset_ready_;
+
/* sw reset */
regmap_write(rt1308->regmap, RT1308_SDW_RESET, 0);
@@ -282,6 +287,12 @@ static int rt1308_io_init(struct device *dev, struct sdw_slave *slave)
regmap_write(rt1308->regmap, 0xc100, 0xd7);
regmap_write(rt1308->regmap, 0xc101, 0xd7);
+ /* apply BQ params */
+ rt1308_apply_bq_params(rt1308);
+
+ regmap_write(rt1308->regmap, 0xcf01, 0x01);
+
+_preset_ready_:
if (rt1308->first_hw_init) {
regcache_cache_bypass(rt1308->regmap, false);
regcache_mark_dirty(rt1308->regmap);
diff --git a/sound/soc/codecs/rt5640.c b/sound/soc/codecs/rt5640.c
index 0ed4fa261abf..eceed8209787 100644
--- a/sound/soc/codecs/rt5640.c
+++ b/sound/soc/codecs/rt5640.c
@@ -53,7 +53,6 @@ static const struct reg_sequence init_list[] = {
{RT5640_PR_BASE + 0x3d, 0x3600},
{RT5640_PR_BASE + 0x12, 0x0aa8},
{RT5640_PR_BASE + 0x14, 0x0aaa},
- {RT5640_PR_BASE + 0x20, 0x6110},
{RT5640_PR_BASE + 0x21, 0xe0e0},
{RT5640_PR_BASE + 0x23, 0x1804},
};
@@ -2567,9 +2566,10 @@ static void rt5640_enable_jack_detect(struct snd_soc_component *component,
if (jack_data && jack_data->use_platform_clock)
rt5640->use_platform_clock = jack_data->use_platform_clock;
- ret = request_irq(rt5640->irq, rt5640_irq,
- IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
- "rt5640", rt5640);
+ ret = devm_request_threaded_irq(component->dev, rt5640->irq,
+ NULL, rt5640_irq,
+ IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ "rt5640", rt5640);
if (ret) {
dev_warn(component->dev, "Failed to reguest IRQ %d: %d\n", rt5640->irq, ret);
rt5640_disable_jack_detect(component);
@@ -2622,8 +2622,9 @@ static void rt5640_enable_hda_jack_detect(
rt5640->jack = jack;
- ret = request_irq(rt5640->irq, rt5640_irq,
- IRQF_TRIGGER_RISING | IRQF_ONESHOT, "rt5640", rt5640);
+ ret = devm_request_threaded_irq(component->dev, rt5640->irq,
+ NULL, rt5640_irq, IRQF_TRIGGER_RISING | IRQF_ONESHOT,
+ "rt5640", rt5640);
if (ret) {
dev_warn(component->dev, "Failed to reguest IRQ %d: %d\n", rt5640->irq, ret);
rt5640->irq = -ENXIO;
diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index acc7fb1581b2..a506d940a2ea 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c
@@ -3950,7 +3950,11 @@ static int rt5645_i2c_probe(struct i2c_client *i2c)
* read and power On.
*/
msleep(TIME_TO_POWER_MS);
- regmap_read(regmap, RT5645_VENDOR_ID2, &val);
+ ret = regmap_read(regmap, RT5645_VENDOR_ID2, &val);
+ if (ret < 0) {
+ dev_err(&i2c->dev, "Failed to read: 0x%02X\n, ret = %d", RT5645_VENDOR_ID2, ret);
+ goto err_enable;
+ }
switch (val) {
case RT5645_DEVICE_ID:
diff --git a/sound/soc/codecs/rt5665.c b/sound/soc/codecs/rt5665.c
index 83c367af91da..525713c33d71 100644
--- a/sound/soc/codecs/rt5665.c
+++ b/sound/soc/codecs/rt5665.c
@@ -4472,6 +4472,8 @@ static void rt5665_remove(struct snd_soc_component *component)
struct rt5665_priv *rt5665 = snd_soc_component_get_drvdata(component);
regmap_write(rt5665->regmap, RT5665_RESET, 0);
+
+ regulator_bulk_disable(ARRAY_SIZE(rt5665->supplies), rt5665->supplies);
}
#ifdef CONFIG_PM
diff --git a/sound/soc/codecs/rt5682-sdw.c b/sound/soc/codecs/rt5682-sdw.c
index 67404f45389f..4968a8c0064d 100644
--- a/sound/soc/codecs/rt5682-sdw.c
+++ b/sound/soc/codecs/rt5682-sdw.c
@@ -750,8 +750,15 @@ static int __maybe_unused rt5682_dev_resume(struct device *dev)
if (!rt5682->first_hw_init)
return 0;
- if (!slave->unattach_request)
+ if (!slave->unattach_request) {
+ if (rt5682->disable_irq == true) {
+ mutex_lock(&rt5682->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+ rt5682->disable_irq = false;
+ mutex_unlock(&rt5682->disable_irq_lock);
+ }
goto regmap_sync;
+ }
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT5682_PROBE_TIMEOUT));
diff --git a/sound/soc/codecs/rt711-sdca-sdw.c b/sound/soc/codecs/rt711-sdca-sdw.c
index 119e1f9605d7..23f23f714b39 100644
--- a/sound/soc/codecs/rt711-sdca-sdw.c
+++ b/sound/soc/codecs/rt711-sdca-sdw.c
@@ -438,8 +438,16 @@ static int __maybe_unused rt711_sdca_dev_resume(struct device *dev)
if (!rt711->first_hw_init)
return 0;
- if (!slave->unattach_request)
+ if (!slave->unattach_request) {
+ if (rt711->disable_irq == true) {
+ mutex_lock(&rt711->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ rt711->disable_irq = false;
+ mutex_unlock(&rt711->disable_irq_lock);
+ }
goto regmap_sync;
+ }
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
diff --git a/sound/soc/codecs/rt711-sdw.c b/sound/soc/codecs/rt711-sdw.c
index 87dafcb4545d..33dced388f9e 100644
--- a/sound/soc/codecs/rt711-sdw.c
+++ b/sound/soc/codecs/rt711-sdw.c
@@ -538,8 +538,15 @@ static int __maybe_unused rt711_dev_resume(struct device *dev)
if (!rt711->first_hw_init)
return 0;
- if (!slave->unattach_request)
+ if (!slave->unattach_request) {
+ if (rt711->disable_irq == true) {
+ mutex_lock(&rt711->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_INTMASK1, SDW_SCP_INT1_IMPL_DEF);
+ rt711->disable_irq = false;
+ mutex_unlock(&rt711->disable_irq_lock);
+ }
goto regmap_sync;
+ }
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT711_PROBE_TIMEOUT));
diff --git a/sound/soc/codecs/rt712-sdca-sdw.c b/sound/soc/codecs/rt712-sdca-sdw.c
index ad06267b0ea0..6bc50396a0f6 100644
--- a/sound/soc/codecs/rt712-sdca-sdw.c
+++ b/sound/soc/codecs/rt712-sdca-sdw.c
@@ -438,8 +438,16 @@ static int __maybe_unused rt712_sdca_dev_resume(struct device *dev)
if (!rt712->first_hw_init)
return 0;
- if (!slave->unattach_request)
+ if (!slave->unattach_request) {
+ if (rt712->disable_irq == true) {
+ mutex_lock(&rt712->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_0);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ rt712->disable_irq = false;
+ mutex_unlock(&rt712->disable_irq_lock);
+ }
goto regmap_sync;
+ }
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT712_PROBE_TIMEOUT));
diff --git a/sound/soc/codecs/rt722-sdca-sdw.c b/sound/soc/codecs/rt722-sdca-sdw.c
index cc57e4e27805..e9103ffb3f50 100644
--- a/sound/soc/codecs/rt722-sdca-sdw.c
+++ b/sound/soc/codecs/rt722-sdca-sdw.c
@@ -463,8 +463,16 @@ static int __maybe_unused rt722_sdca_dev_resume(struct device *dev)
if (!rt722->first_hw_init)
return 0;
- if (!slave->unattach_request)
+ if (!slave->unattach_request) {
+ if (rt722->disable_irq == true) {
+ mutex_lock(&rt722->disable_irq_lock);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK1, SDW_SCP_SDCA_INTMASK_SDCA_6);
+ sdw_write_no_pm(slave, SDW_SCP_SDCA_INTMASK2, SDW_SCP_SDCA_INTMASK_SDCA_8);
+ rt722->disable_irq = false;
+ mutex_unlock(&rt722->disable_irq_lock);
+ }
goto regmap_sync;
+ }
time = wait_for_completion_timeout(&slave->initialization_complete,
msecs_to_jiffies(RT722_PROBE_TIMEOUT));
diff --git a/sound/soc/codecs/tas2781-comlib.c b/sound/soc/codecs/tas2781-comlib.c
index a88c6c28a394..ffb26e4a7e2f 100644
--- a/sound/soc/codecs/tas2781-comlib.c
+++ b/sound/soc/codecs/tas2781-comlib.c
@@ -57,16 +57,17 @@ static int tasdevice_change_chn_book(struct tasdevice_priv *tas_priv,
if (client->addr != tasdev->dev_addr) {
client->addr = tasdev->dev_addr;
- if (tasdev->cur_book == book) {
- ret = regmap_write(map,
- TASDEVICE_PAGE_SELECT, 0);
- if (ret < 0) {
- dev_err(tas_priv->dev, "%s, E=%d\n",
- __func__, ret);
- goto out;
- }
+ /* All tas2781s share the same regmap, clear the page
+ * inside regmap once switching to another tas2781.
+ * Register 0 at any pages and any books inside tas2781
+ * is the same one for page-switching.
+ */
+ ret = regmap_write(map, TASDEVICE_PAGE_SELECT, 0);
+ if (ret < 0) {
+ dev_err(tas_priv->dev, "%s, E=%d\n",
+ __func__, ret);
+ goto out;
}
- goto out;
}
if (tasdev->cur_book != book) {
diff --git a/sound/soc/codecs/wcd-mbhc-v2.c b/sound/soc/codecs/wcd-mbhc-v2.c
index 1911750f7445..5da1934527f3 100644
--- a/sound/soc/codecs/wcd-mbhc-v2.c
+++ b/sound/soc/codecs/wcd-mbhc-v2.c
@@ -1454,7 +1454,7 @@ struct wcd_mbhc *wcd_mbhc_init(struct snd_soc_component *component,
return ERR_PTR(-EINVAL);
}
- mbhc = devm_kzalloc(dev, sizeof(*mbhc), GFP_KERNEL);
+ mbhc = kzalloc(sizeof(*mbhc), GFP_KERNEL);
if (!mbhc)
return ERR_PTR(-ENOMEM);
@@ -1474,61 +1474,76 @@ struct wcd_mbhc *wcd_mbhc_init(struct snd_soc_component *component,
INIT_WORK(&mbhc->correct_plug_swch, wcd_correct_swch_plug);
- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_sw_intr, NULL,
+ ret = request_threaded_irq(mbhc->intr_ids->mbhc_sw_intr, NULL,
wcd_mbhc_mech_plug_detect_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"mbhc sw intr", mbhc);
if (ret)
- goto err;
+ goto err_free_mbhc;
- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_btn_press_intr, NULL,
+ ret = request_threaded_irq(mbhc->intr_ids->mbhc_btn_press_intr, NULL,
wcd_mbhc_btn_press_handler,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"Button Press detect", mbhc);
if (ret)
- goto err;
+ goto err_free_sw_intr;
- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_btn_release_intr, NULL,
+ ret = request_threaded_irq(mbhc->intr_ids->mbhc_btn_release_intr, NULL,
wcd_mbhc_btn_release_handler,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"Button Release detect", mbhc);
if (ret)
- goto err;
+ goto err_free_btn_press_intr;
- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_hs_ins_intr, NULL,
+ ret = request_threaded_irq(mbhc->intr_ids->mbhc_hs_ins_intr, NULL,
wcd_mbhc_adc_hs_ins_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"Elect Insert", mbhc);
if (ret)
- goto err;
+ goto err_free_btn_release_intr;
disable_irq_nosync(mbhc->intr_ids->mbhc_hs_ins_intr);
- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->mbhc_hs_rem_intr, NULL,
+ ret = request_threaded_irq(mbhc->intr_ids->mbhc_hs_rem_intr, NULL,
wcd_mbhc_adc_hs_rem_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"Elect Remove", mbhc);
if (ret)
- goto err;
+ goto err_free_hs_ins_intr;
disable_irq_nosync(mbhc->intr_ids->mbhc_hs_rem_intr);
- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->hph_left_ocp, NULL,
+ ret = request_threaded_irq(mbhc->intr_ids->hph_left_ocp, NULL,
wcd_mbhc_hphl_ocp_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"HPH_L OCP detect", mbhc);
if (ret)
- goto err;
+ goto err_free_hs_rem_intr;
- ret = devm_request_threaded_irq(dev, mbhc->intr_ids->hph_right_ocp, NULL,
+ ret = request_threaded_irq(mbhc->intr_ids->hph_right_ocp, NULL,
wcd_mbhc_hphr_ocp_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"HPH_R OCP detect", mbhc);
if (ret)
- goto err;
+ goto err_free_hph_left_ocp;
return mbhc;
-err:
+
+err_free_hph_left_ocp:
+ free_irq(mbhc->intr_ids->hph_left_ocp, mbhc);
+err_free_hs_rem_intr:
+ free_irq(mbhc->intr_ids->mbhc_hs_rem_intr, mbhc);
+err_free_hs_ins_intr:
+ free_irq(mbhc->intr_ids->mbhc_hs_ins_intr, mbhc);
+err_free_btn_release_intr:
+ free_irq(mbhc->intr_ids->mbhc_btn_release_intr, mbhc);
+err_free_btn_press_intr:
+ free_irq(mbhc->intr_ids->mbhc_btn_press_intr, mbhc);
+err_free_sw_intr:
+ free_irq(mbhc->intr_ids->mbhc_sw_intr, mbhc);
+err_free_mbhc:
+ kfree(mbhc);
+
dev_err(dev, "Failed to request mbhc interrupts %d\n", ret);
return ERR_PTR(ret);
@@ -1537,9 +1552,19 @@ EXPORT_SYMBOL(wcd_mbhc_init);
void wcd_mbhc_deinit(struct wcd_mbhc *mbhc)
{
+ free_irq(mbhc->intr_ids->hph_right_ocp, mbhc);
+ free_irq(mbhc->intr_ids->hph_left_ocp, mbhc);
+ free_irq(mbhc->intr_ids->mbhc_hs_rem_intr, mbhc);
+ free_irq(mbhc->intr_ids->mbhc_hs_ins_intr, mbhc);
+ free_irq(mbhc->intr_ids->mbhc_btn_release_intr, mbhc);
+ free_irq(mbhc->intr_ids->mbhc_btn_press_intr, mbhc);
+ free_irq(mbhc->intr_ids->mbhc_sw_intr, mbhc);
+
mutex_lock(&mbhc->lock);
wcd_cancel_hs_detect_plug(mbhc, &mbhc->correct_plug_swch);
mutex_unlock(&mbhc->lock);
+
+ kfree(mbhc);
}
EXPORT_SYMBOL(wcd_mbhc_deinit);
diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c
index c0d1fa36d841..1b6e376f3833 100644
--- a/sound/soc/codecs/wcd934x.c
+++ b/sound/soc/codecs/wcd934x.c
@@ -2642,7 +2642,7 @@ static int wcd934x_mbhc_micb_ctrl_threshold_mic(struct snd_soc_component *compon
return rc;
}
-static inline void wcd934x_mbhc_get_result_params(struct wcd934x_codec *wcd934x,
+static void wcd934x_mbhc_get_result_params(struct wcd934x_codec *wcd934x,
s16 *d1_a, u16 noff,
int32_t *zdet)
{
@@ -2683,7 +2683,7 @@ static inline void wcd934x_mbhc_get_result_params(struct wcd934x_codec *wcd934x,
else if (x1 < minCode_param[noff])
*zdet = WCD934X_ZDET_FLOATING_IMPEDANCE;
- dev_info(wcd934x->dev, "%s: d1=%d, c1=%d, x1=0x%x, z_val=%d(milliOhm)\n",
+ dev_dbg(wcd934x->dev, "%s: d1=%d, c1=%d, x1=0x%x, z_val=%di (milliohm)\n",
__func__, d1, c1, x1, *zdet);
ramp_down:
i = 0;
@@ -2740,8 +2740,8 @@ z_right:
*zr = zdet;
}
-static inline void wcd934x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
- int32_t *z_val, int flag_l_r)
+static void wcd934x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
+ int32_t *z_val, int flag_l_r)
{
s16 q1;
int q1_cal;
@@ -3044,6 +3044,17 @@ static int wcd934x_mbhc_init(struct snd_soc_component *component)
return 0;
}
+
+static void wcd934x_mbhc_deinit(struct snd_soc_component *component)
+{
+ struct wcd934x_codec *wcd = snd_soc_component_get_drvdata(component);
+
+ if (!wcd->mbhc)
+ return;
+
+ wcd_mbhc_deinit(wcd->mbhc);
+}
+
static int wcd934x_comp_probe(struct snd_soc_component *component)
{
struct wcd934x_codec *wcd = dev_get_drvdata(component->dev);
@@ -3077,6 +3088,7 @@ static void wcd934x_comp_remove(struct snd_soc_component *comp)
{
struct wcd934x_codec *wcd = dev_get_drvdata(comp->dev);
+ wcd934x_mbhc_deinit(comp);
wcd_clsh_ctrl_free(wcd->clsh_ctrl);
}
diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
index e7d6a02cdec0..a3c680661377 100644
--- a/sound/soc/codecs/wcd938x.c
+++ b/sound/soc/codecs/wcd938x.c
@@ -210,7 +210,7 @@ struct wcd938x_priv {
};
static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
-static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(line_gain, 600, -3000);
+static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
struct wcd938x_mbhc_zdet_param {
@@ -2124,10 +2124,11 @@ static int wcd938x_mbhc_micb_ctrl_threshold_mic(struct snd_soc_component *compon
return wcd938x_mbhc_micb_adjust_voltage(component, micb_mv, MIC_BIAS_2);
}
-static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
+static void wcd938x_mbhc_get_result_params(struct snd_soc_component *component,
s16 *d1_a, u16 noff,
int32_t *zdet)
{
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
int i;
int val, val1;
s16 c1;
@@ -2154,8 +2155,8 @@ static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
usleep_range(5000, 5050);
if (!c1 || !x1) {
- pr_err("%s: Impedance detect ramp error, c1=%d, x1=0x%x\n",
- __func__, c1, x1);
+ dev_err(component->dev, "Impedance detect ramp error, c1=%d, x1=0x%x\n",
+ c1, x1);
goto ramp_down;
}
d1 = d1_a[c1];
@@ -2165,7 +2166,7 @@ static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
else if (x1 < minCode_param[noff])
*zdet = WCD938X_ZDET_FLOATING_IMPEDANCE;
- pr_err("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d(milliOhm)\n",
+ dev_dbg(component->dev, "%s: d1=%d, c1=%d, x1=0x%x, z_val=%d (milliohm)\n",
__func__, d1, c1, x1, *zdet);
ramp_down:
i = 0;
@@ -2210,7 +2211,7 @@ static void wcd938x_mbhc_zdet_ramp(struct snd_soc_component *component,
WCD938X_ANA_MBHC_ZDET, 0x80, 0x80);
dev_dbg(component->dev, "%s: ramp for HPH_L, noff = %d\n",
__func__, zdet_param->noff);
- wcd938x_mbhc_get_result_params(wcd938x, d1_a, zdet_param->noff, &zdet);
+ wcd938x_mbhc_get_result_params(component, d1_a, zdet_param->noff, &zdet);
regmap_update_bits(wcd938x->regmap,
WCD938X_ANA_MBHC_ZDET, 0x80, 0x00);
@@ -2224,15 +2225,15 @@ z_right:
WCD938X_ANA_MBHC_ZDET, 0x40, 0x40);
dev_dbg(component->dev, "%s: ramp for HPH_R, noff = %d\n",
__func__, zdet_param->noff);
- wcd938x_mbhc_get_result_params(wcd938x, d1_a, zdet_param->noff, &zdet);
+ wcd938x_mbhc_get_result_params(component, d1_a, zdet_param->noff, &zdet);
regmap_update_bits(wcd938x->regmap,
WCD938X_ANA_MBHC_ZDET, 0x40, 0x00);
*zr = zdet;
}
-static inline void wcd938x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
- int32_t *z_val, int flag_l_r)
+static void wcd938x_wcd_mbhc_qfuse_cal(struct snd_soc_component *component,
+ int32_t *z_val, int flag_l_r)
{
s16 q1;
int q1_cal;
@@ -2625,6 +2626,8 @@ static int wcd938x_mbhc_init(struct snd_soc_component *component)
WCD938X_IRQ_HPHR_OCP_INT);
wcd938x->wcd_mbhc = wcd_mbhc_init(component, &mbhc_cb, intr_ids, wcd_mbhc_fields, true);
+ if (IS_ERR(wcd938x->wcd_mbhc))
+ return PTR_ERR(wcd938x->wcd_mbhc);
snd_soc_add_component_controls(component, impedance_detect_controls,
ARRAY_SIZE(impedance_detect_controls));
@@ -2633,6 +2636,14 @@ static int wcd938x_mbhc_init(struct snd_soc_component *component)
return 0;
}
+
+static void wcd938x_mbhc_deinit(struct snd_soc_component *component)
+{
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
+ wcd_mbhc_deinit(wcd938x->wcd_mbhc);
+}
+
/* END MBHC */
static const struct snd_kcontrol_new wcd938x_snd_controls[] = {
@@ -2652,8 +2663,8 @@ static const struct snd_kcontrol_new wcd938x_snd_controls[] = {
wcd938x_get_swr_port, wcd938x_set_swr_port),
SOC_SINGLE_EXT("DSD_R Switch", WCD938X_DSD_R, 0, 1, 0,
wcd938x_get_swr_port, wcd938x_set_swr_port),
- SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 0, line_gain),
- SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 0, line_gain),
+ SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 1, line_gain),
+ SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 1, line_gain),
WCD938X_EAR_PA_GAIN_TLV("EAR_PA Volume", WCD938X_ANA_EAR_COMPANDER_CTL,
2, 0x10, 0, ear_pa_gain),
SOC_SINGLE_EXT("ADC1 Switch", WCD938X_ADC1, 1, 1, 0,
@@ -3080,16 +3091,33 @@ static int wcd938x_irq_init(struct wcd938x_priv *wcd, struct device *dev)
static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
{
struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+ struct sdw_slave *tx_sdw_dev = wcd938x->tx_sdw_dev;
struct device *dev = component->dev;
+ unsigned long time_left;
int ret, i;
+ time_left = wait_for_completion_timeout(&tx_sdw_dev->initialization_complete,
+ msecs_to_jiffies(2000));
+ if (!time_left) {
+ dev_err(dev, "soundwire device init timeout\n");
+ return -ETIMEDOUT;
+ }
+
snd_soc_component_init_regmap(component, wcd938x->regmap);
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret < 0)
+ return ret;
+
wcd938x->variant = snd_soc_component_read_field(component,
WCD938X_DIGITAL_EFUSE_REG_0,
WCD938X_ID_MASK);
wcd938x->clsh_info = wcd_clsh_ctrl_alloc(component, WCD938X);
+ if (IS_ERR(wcd938x->clsh_info)) {
+ pm_runtime_put(dev);
+ return PTR_ERR(wcd938x->clsh_info);
+ }
wcd938x_io_init(wcd938x);
/* Set all interrupts as edge triggered */
@@ -3098,6 +3126,8 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
(WCD938X_DIGITAL_INTR_LEVEL_0 + i), 0);
}
+ pm_runtime_put(dev);
+
wcd938x->hphr_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
WCD938X_IRQ_HPHR_PDM_WD_INT);
wcd938x->hphl_pdm_wd_int = regmap_irq_get_virq(wcd938x->irq_chip,
@@ -3109,20 +3139,26 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
ret = request_threaded_irq(wcd938x->hphr_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"HPHR PDM WD INT", wcd938x);
- if (ret)
+ if (ret) {
dev_err(dev, "Failed to request HPHR WD interrupt (%d)\n", ret);
+ goto err_free_clsh_ctrl;
+ }
ret = request_threaded_irq(wcd938x->hphl_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"HPHL PDM WD INT", wcd938x);
- if (ret)
+ if (ret) {
dev_err(dev, "Failed to request HPHL WD interrupt (%d)\n", ret);
+ goto err_free_hphr_pdm_wd_int;
+ }
ret = request_threaded_irq(wcd938x->aux_pdm_wd_int, NULL, wcd938x_wd_handle_irq,
IRQF_ONESHOT | IRQF_TRIGGER_RISING,
"AUX PDM WD INT", wcd938x);
- if (ret)
+ if (ret) {
dev_err(dev, "Failed to request Aux WD interrupt (%d)\n", ret);
+ goto err_free_hphl_pdm_wd_int;
+ }
/* Disable watchdog interrupt for HPH and AUX */
disable_irq_nosync(wcd938x->hphr_pdm_wd_int);
@@ -3137,7 +3173,7 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
dev_err(component->dev,
"%s: Failed to add snd ctrls for variant: %d\n",
__func__, wcd938x->variant);
- goto err;
+ goto err_free_aux_pdm_wd_int;
}
break;
case WCD9385:
@@ -3147,7 +3183,7 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
dev_err(component->dev,
"%s: Failed to add snd ctrls for variant: %d\n",
__func__, wcd938x->variant);
- goto err;
+ goto err_free_aux_pdm_wd_int;
}
break;
default:
@@ -3155,12 +3191,38 @@ static int wcd938x_soc_codec_probe(struct snd_soc_component *component)
}
ret = wcd938x_mbhc_init(component);
- if (ret)
+ if (ret) {
dev_err(component->dev, "mbhc initialization failed\n");
-err:
+ goto err_free_aux_pdm_wd_int;
+ }
+
+ return 0;
+
+err_free_aux_pdm_wd_int:
+ free_irq(wcd938x->aux_pdm_wd_int, wcd938x);
+err_free_hphl_pdm_wd_int:
+ free_irq(wcd938x->hphl_pdm_wd_int, wcd938x);
+err_free_hphr_pdm_wd_int:
+ free_irq(wcd938x->hphr_pdm_wd_int, wcd938x);
+err_free_clsh_ctrl:
+ wcd_clsh_ctrl_free(wcd938x->clsh_info);
+
return ret;
}
+static void wcd938x_soc_codec_remove(struct snd_soc_component *component)
+{
+ struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+
+ wcd938x_mbhc_deinit(component);
+
+ free_irq(wcd938x->aux_pdm_wd_int, wcd938x);
+ free_irq(wcd938x->hphl_pdm_wd_int, wcd938x);
+ free_irq(wcd938x->hphr_pdm_wd_int, wcd938x);
+
+ wcd_clsh_ctrl_free(wcd938x->clsh_info);
+}
+
static int wcd938x_codec_set_jack(struct snd_soc_component *comp,
struct snd_soc_jack *jack, void *data)
{
@@ -3177,6 +3239,7 @@ static int wcd938x_codec_set_jack(struct snd_soc_component *comp,
static const struct snd_soc_component_driver soc_codec_dev_wcd938x = {
.name = "wcd938x_codec",
.probe = wcd938x_soc_codec_probe,
+ .remove = wcd938x_soc_codec_remove,
.controls = wcd938x_snd_controls,
.num_controls = ARRAY_SIZE(wcd938x_snd_controls),
.dapm_widgets = wcd938x_dapm_widgets,
diff --git a/sound/soc/codecs/wm8904.c b/sound/soc/codecs/wm8904.c
index 068e610b1b4c..f2baee7c332e 100644
--- a/sound/soc/codecs/wm8904.c
+++ b/sound/soc/codecs/wm8904.c
@@ -2308,6 +2308,9 @@ static int wm8904_i2c_probe(struct i2c_client *i2c)
regmap_update_bits(wm8904->regmap, WM8904_BIAS_CONTROL_0,
WM8904_POBCTRL, 0);
+ /* Fill the cache for the ADC test register */
+ regmap_read(wm8904->regmap, WM8904_ADC_TEST_0, &val);
+
/* Can leave the device powered off until we need it */
regcache_cache_only(wm8904->regmap, true);
regulator_bulk_disable(ARRAY_SIZE(wm8904->supplies), wm8904->supplies);
diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c
index 3f08082a55be..9d01225dedd9 100644
--- a/sound/soc/fsl/fsl_micfil.c
+++ b/sound/soc/fsl/fsl_micfil.c
@@ -1,4 +1,4 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
// Copyright 2018 NXP
#include <linux/bitfield.h>
@@ -1254,4 +1254,4 @@ module_platform_driver(fsl_micfil_driver);
MODULE_AUTHOR("Cosmin-Gabriel Samoila <cosmin.samoila@nxp.com>");
MODULE_DESCRIPTION("NXP PDM Microphone Interface (MICFIL) driver");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/sound/soc/fsl/fsl_micfil.h b/sound/soc/fsl/fsl_micfil.h
index 9237a1c4cb8f..fee9fe3d9119 100644
--- a/sound/soc/fsl/fsl_micfil.h
+++ b/sound/soc/fsl/fsl_micfil.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
/*
* PDM Microphone Interface for the NXP i.MX SoC
* Copyright 2018 NXP
diff --git a/sound/soc/fsl/fsl_sai.c b/sound/soc/fsl/fsl_sai.c
index 5e09f634c61b..f7676d30c82f 100644
--- a/sound/soc/fsl/fsl_sai.c
+++ b/sound/soc/fsl/fsl_sai.c
@@ -507,12 +507,6 @@ static int fsl_sai_set_bclk(struct snd_soc_dai *dai, bool tx, u32 freq)
savediv / 2 - 1);
}
- if (sai->soc_data->max_register >= FSL_SAI_MCTL) {
- /* SAI is in master mode at this point, so enable MCLK */
- regmap_update_bits(sai->regmap, FSL_SAI_MCTL,
- FSL_SAI_MCTL_MCLK_EN, FSL_SAI_MCTL_MCLK_EN);
- }
-
return 0;
}
@@ -719,7 +713,7 @@ static void fsl_sai_config_disable(struct fsl_sai *sai, int dir)
u32 xcsr, count = 100;
regmap_update_bits(sai->regmap, FSL_SAI_xCSR(tx, ofs),
- FSL_SAI_CSR_TERE, 0);
+ FSL_SAI_CSR_TERE | FSL_SAI_CSR_BCE, 0);
/* TERE will remain set till the end of current frame */
do {
diff --git a/sound/soc/fsl/fsl_sai.h b/sound/soc/fsl/fsl_sai.h
index 8254c3547b87..550df87b6a06 100644
--- a/sound/soc/fsl/fsl_sai.h
+++ b/sound/soc/fsl/fsl_sai.h
@@ -91,6 +91,7 @@
/* SAI Transmit/Receive Control Register */
#define FSL_SAI_CSR_TERE BIT(31)
#define FSL_SAI_CSR_SE BIT(30)
+#define FSL_SAI_CSR_BCE BIT(28)
#define FSL_SAI_CSR_FR BIT(25)
#define FSL_SAI_CSR_SR BIT(24)
#define FSL_SAI_CSR_xF_SHIFT 16
diff --git a/sound/soc/fsl/fsl_spdif.c b/sound/soc/fsl/fsl_spdif.c
index 015c3708aa04..3fd26f2cdd60 100644
--- a/sound/soc/fsl/fsl_spdif.c
+++ b/sound/soc/fsl/fsl_spdif.c
@@ -751,6 +751,8 @@ static int fsl_spdif_trigger(struct snd_pcm_substream *substream,
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
regmap_update_bits(regmap, REG_SPDIF_SCR, dmaen, 0);
regmap_update_bits(regmap, REG_SPDIF_SIE, intr, 0);
+ regmap_write(regmap, REG_SPDIF_STL, 0x0);
+ regmap_write(regmap, REG_SPDIF_STR, 0x0);
break;
default:
return -EINVAL;
diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c
index dbee8c98ff01..0201029899ca 100644
--- a/sound/soc/intel/boards/sof_sdw.c
+++ b/sound/soc/intel/boards/sof_sdw.c
@@ -476,7 +476,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = {
DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"),
DMI_MATCH(DMI_PRODUCT_NAME, "Lunar Lake Client Platform"),
},
- .driver_data = (void *)(RT711_JD2_100K),
+ .driver_data = (void *)(RT711_JD2),
},
{}
};
diff --git a/sound/soc/intel/boards/sof_sdw_cs42l42.c b/sound/soc/intel/boards/sof_sdw_cs42l42.c
index c4a16e4c9f69..ad130d913415 100644
--- a/sound/soc/intel/boards/sof_sdw_cs42l42.c
+++ b/sound/soc/intel/boards/sof_sdw_cs42l42.c
@@ -99,9 +99,9 @@ static int cs42l42_rtd_init(struct snd_soc_pcm_runtime *rtd)
jack = &ctx->sdw_headset;
snd_jack_set_key(jack->jack, SND_JACK_BTN_0, KEY_PLAYPAUSE);
- snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOICECOMMAND);
- snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEUP);
- snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOLUMEDOWN);
+ snd_jack_set_key(jack->jack, SND_JACK_BTN_1, KEY_VOLUMEUP);
+ snd_jack_set_key(jack->jack, SND_JACK_BTN_2, KEY_VOLUMEDOWN);
+ snd_jack_set_key(jack->jack, SND_JACK_BTN_3, KEY_VOICECOMMAND);
ret = snd_soc_component_set_jack(component, jack, NULL);
diff --git a/sound/soc/meson/axg-tdm-formatter.c b/sound/soc/meson/axg-tdm-formatter.c
index 9883dc777f63..63333a2b0a9c 100644
--- a/sound/soc/meson/axg-tdm-formatter.c
+++ b/sound/soc/meson/axg-tdm-formatter.c
@@ -30,27 +30,32 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
struct axg_tdm_stream *ts,
unsigned int offset)
{
- unsigned int val, ch = ts->channels;
- unsigned long mask;
- int i, j;
+ unsigned int ch = ts->channels;
+ u32 val[AXG_TDM_NUM_LANES];
+ int i, j, k;
+
+ /*
+ * We need to mimick the slot distribution used by the HW to keep the
+ * channel placement consistent regardless of the number of channel
+ * in the stream. This is why the odd algorithm below is used.
+ */
+ memset(val, 0, sizeof(*val) * AXG_TDM_NUM_LANES);
/*
* Distribute the channels of the stream over the available slots
- * of each TDM lane
+ * of each TDM lane. We need to go over the 32 slots ...
*/
- for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
- val = 0;
- mask = ts->mask[i];
-
- for (j = find_first_bit(&mask, 32);
- (j < 32) && ch;
- j = find_next_bit(&mask, 32, j + 1)) {
- val |= 1 << j;
- ch -= 1;
+ for (i = 0; (i < 32) && ch; i += 2) {
+ /* ... of all the lanes ... */
+ for (j = 0; j < AXG_TDM_NUM_LANES; j++) {
+ /* ... then distribute the channels in pairs */
+ for (k = 0; k < 2; k++) {
+ if ((BIT(i + k) & ts->mask[j]) && ch) {
+ val[j] |= BIT(i + k);
+ ch -= 1;
+ }
+ }
}
-
- regmap_write(map, offset, val);
- offset += regmap_get_reg_stride(map);
}
/*
@@ -63,6 +68,11 @@ int axg_tdm_formatter_set_channel_masks(struct regmap *map,
return -EINVAL;
}
+ for (i = 0; i < AXG_TDM_NUM_LANES; i++) {
+ regmap_write(map, offset, val[i]);
+ offset += regmap_get_reg_stride(map);
+ }
+
return 0;
}
EXPORT_SYMBOL_GPL(axg_tdm_formatter_set_channel_masks);
diff --git a/sound/soc/qcom/qdsp6/q6afe-dai.c b/sound/soc/qcom/qdsp6/q6afe-dai.c
index 31e0bad71e95..dbff55a97162 100644
--- a/sound/soc/qcom/qdsp6/q6afe-dai.c
+++ b/sound/soc/qcom/qdsp6/q6afe-dai.c
@@ -476,7 +476,7 @@ static int q6afe_mi2s_set_sysclk(struct snd_soc_dai *dai,
static const struct snd_soc_dapm_route q6afe_dapm_routes[] = {
{"HDMI Playback", NULL, "HDMI_RX"},
- {"Display Port Playback", NULL, "DISPLAY_PORT_RX"},
+ {"DISPLAY_PORT_RX_0 Playback", NULL, "DISPLAY_PORT_RX"},
{"Slimbus Playback", NULL, "SLIMBUS_0_RX"},
{"Slimbus1 Playback", NULL, "SLIMBUS_1_RX"},
{"Slimbus2 Playback", NULL, "SLIMBUS_2_RX"},
diff --git a/sound/soc/qcom/qdsp6/q6apm-dai.c b/sound/soc/qcom/qdsp6/q6apm-dai.c
index 5eb0b864c740..c90db6daabbd 100644
--- a/sound/soc/qcom/qdsp6/q6apm-dai.c
+++ b/sound/soc/qcom/qdsp6/q6apm-dai.c
@@ -840,6 +840,7 @@ static const struct snd_soc_component_driver q6apm_fe_dai_component = {
.pointer = q6apm_dai_pointer,
.trigger = q6apm_dai_trigger,
.compress_ops = &q6apm_dai_compress_ops,
+ .use_dai_pcm_id = true,
};
static int q6apm_dai_probe(struct platform_device *pdev)
diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c
index 7bfac9492ab5..5d44d07acd69 100644
--- a/sound/soc/qcom/qdsp6/q6apm.c
+++ b/sound/soc/qcom/qdsp6/q6apm.c
@@ -511,6 +511,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
switch (hdr->opcode) {
case DATA_CMD_RSP_WR_SH_MEM_EP_DATA_BUFFER_DONE_V2:
+ if (!graph->ar_graph)
+ break;
client_event = APM_CLIENT_EVENT_DATA_WRITE_DONE;
mutex_lock(&graph->lock);
token = hdr->token & APM_WRITE_TOKEN_MASK;
@@ -544,6 +546,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
wake_up(&graph->cmd_wait);
break;
case DATA_CMD_RSP_RD_SH_MEM_EP_DATA_BUFFER_V2:
+ if (!graph->ar_graph)
+ break;
client_event = APM_CLIENT_EVENT_DATA_READ_DONE;
mutex_lock(&graph->lock);
rd_done = data->payload;
@@ -649,8 +653,9 @@ int q6apm_graph_close(struct q6apm_graph *graph)
{
struct audioreach_graph *ar_graph = graph->ar_graph;
- gpr_free_port(graph->port);
+ graph->ar_graph = NULL;
kref_put(&ar_graph->refcount, q6apm_put_audioreach_graph);
+ gpr_free_port(graph->port);
kfree(graph);
return 0;
diff --git a/sound/soc/qcom/qdsp6/topology.c b/sound/soc/qcom/qdsp6/topology.c
index cccc59b570b9..130b22a34fb3 100644
--- a/sound/soc/qcom/qdsp6/topology.c
+++ b/sound/soc/qcom/qdsp6/topology.c
@@ -1277,8 +1277,8 @@ int audioreach_tplg_init(struct snd_soc_component *component)
ret = snd_soc_tplg_component_load(component, &audioreach_tplg_ops, fw);
if (ret < 0) {
- dev_err(dev, "tplg component load failed%d\n", ret);
- ret = -EINVAL;
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "tplg component load failed: %d\n", ret);
}
release_firmware(fw);
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index 11bc5250ffd0..1a0bde23f5e6 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -1988,8 +1988,10 @@ static int snd_soc_bind_card(struct snd_soc_card *card)
/* probe all components used by DAI links on this card */
ret = soc_probe_link_components(card);
if (ret < 0) {
- dev_err(card->dev,
- "ASoC: failed to instantiate card %d\n", ret);
+ if (ret != -EPROBE_DEFER) {
+ dev_err(card->dev,
+ "ASoC: failed to instantiate card %d\n", ret);
+ }
goto probe_end;
}
diff --git a/sound/soc/soc-pcm.c b/sound/soc/soc-pcm.c
index 8896227e4fb7..3aa6b988cb4b 100644
--- a/sound/soc/soc-pcm.c
+++ b/sound/soc/soc-pcm.c
@@ -38,6 +38,7 @@ static inline int _soc_pcm_ret(struct snd_soc_pcm_runtime *rtd,
switch (ret) {
case -EPROBE_DEFER:
case -ENOTSUPP:
+ case -EINVAL:
break;
default:
dev_err(rtd->dev,
@@ -2466,8 +2467,11 @@ static int dpcm_fe_dai_prepare(struct snd_pcm_substream *substream)
/* there is no point preparing this FE if there are no BEs */
if (list_empty(&fe->dpcm[stream].be_clients)) {
- dev_err(fe->dev, "ASoC: no backend DAIs enabled for %s\n",
- fe->dai_link->name);
+ /* dev_err_once() for visibility, dev_dbg() for debugging UCM profiles */
+ dev_err_once(fe->dev, "ASoC: no backend DAIs enabled for %s, possibly missing ALSA mixer-based routing or UCM profile\n",
+ fe->dai_link->name);
+ dev_dbg(fe->dev, "ASoC: no backend DAIs enabled for %s\n",
+ fe->dai_link->name);
ret = -EINVAL;
goto out;
}
diff --git a/sound/soc/soc-topology.c b/sound/soc/soc-topology.c
index 8add361e87c6..ad08d4f75a7b 100644
--- a/sound/soc/soc-topology.c
+++ b/sound/soc/soc-topology.c
@@ -1732,7 +1732,8 @@ static int soc_tplg_fe_link_create(struct soc_tplg *tplg,
ret = snd_soc_add_pcm_runtimes(tplg->comp->card, link, 1);
if (ret < 0) {
- dev_err(tplg->dev, "ASoC: adding FE link failed\n");
+ if (ret != -EPROBE_DEFER)
+ dev_err(tplg->dev, "ASoC: adding FE link failed\n");
goto err;
}
@@ -2492,8 +2493,11 @@ static int soc_tplg_process_headers(struct soc_tplg *tplg)
/* load the header object */
ret = soc_tplg_load_header(tplg, hdr);
if (ret < 0) {
- dev_err(tplg->dev,
- "ASoC: topology: could not load header: %d\n", ret);
+ if (ret != -EPROBE_DEFER) {
+ dev_err(tplg->dev,
+ "ASoC: topology: could not load header: %d\n",
+ ret);
+ }
return ret;
}
diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c
index 2ae76bcd3590..afb505461ea1 100644
--- a/sound/soc/sof/amd/acp.c
+++ b/sound/soc/sof/amd/acp.c
@@ -217,6 +217,7 @@ int configure_and_run_sha_dma(struct acp_dev_data *adata, void *image_addr,
unsigned int image_length)
{
struct snd_sof_dev *sdev = adata->dev;
+ const struct sof_amd_acp_desc *desc = get_chip_info(sdev->pdata);
unsigned int tx_count, fw_qualifier, val;
int ret;
@@ -251,9 +252,12 @@ int configure_and_run_sha_dma(struct acp_dev_data *adata, void *image_addr,
return ret;
}
- ret = psp_send_cmd(adata, MBOX_ACP_SHA_DMA_COMMAND);
- if (ret)
- return ret;
+ /* psp_send_cmd only required for renoir platform (rev - 3) */
+ if (desc->rev == 3) {
+ ret = psp_send_cmd(adata, MBOX_ACP_SHA_DMA_COMMAND);
+ if (ret)
+ return ret;
+ }
ret = snd_sof_dsp_read_poll_timeout(sdev, ACP_DSP_BAR, ACP_SHA_DSP_FW_QUALIFIER,
fw_qualifier, fw_qualifier & DSP_FW_RUN_ENABLE,
diff --git a/sound/soc/sof/intel/hda-dai-ops.c b/sound/soc/sof/intel/hda-dai-ops.c
index f3513796c189..f33051eac1c0 100644
--- a/sound/soc/sof/intel/hda-dai-ops.c
+++ b/sound/soc/sof/intel/hda-dai-ops.c
@@ -372,6 +372,7 @@ static const struct hda_dai_widget_dma_ops hda_ipc4_chain_dma_ops = {
static int hda_ipc3_post_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *cpu_dai,
struct snd_pcm_substream *substream, int cmd)
{
+ struct hdac_ext_stream *hext_stream = hda_get_hext_stream(sdev, cpu_dai, substream);
struct snd_soc_dapm_widget *w = snd_soc_dai_get_widget(cpu_dai, substream->stream);
switch (cmd) {
@@ -379,9 +380,17 @@ static int hda_ipc3_post_trigger(struct snd_sof_dev *sdev, struct snd_soc_dai *c
case SNDRV_PCM_TRIGGER_STOP:
{
struct snd_sof_dai_config_data data = { 0 };
+ int ret;
data.dai_data = DMA_CHAN_INVALID;
- return hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_HW_FREE, &data);
+ ret = hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_HW_FREE, &data);
+ if (ret < 0)
+ return ret;
+
+ if (cmd == SNDRV_PCM_TRIGGER_STOP)
+ return hda_link_dma_cleanup(substream, hext_stream, cpu_dai);
+
+ break;
}
case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
return hda_dai_config(w, SOF_DAI_CONFIG_FLAGS_PAUSE, NULL);
diff --git a/sound/soc/sof/intel/hda-dai.c b/sound/soc/sof/intel/hda-dai.c
index 3297dea493aa..863865f3d77e 100644
--- a/sound/soc/sof/intel/hda-dai.c
+++ b/sound/soc/sof/intel/hda-dai.c
@@ -107,9 +107,8 @@ hda_dai_get_ops(struct snd_pcm_substream *substream, struct snd_soc_dai *cpu_dai
return sdai->platform_private;
}
-static int hda_link_dma_cleanup(struct snd_pcm_substream *substream,
- struct hdac_ext_stream *hext_stream,
- struct snd_soc_dai *cpu_dai)
+int hda_link_dma_cleanup(struct snd_pcm_substream *substream, struct hdac_ext_stream *hext_stream,
+ struct snd_soc_dai *cpu_dai)
{
const struct hda_dai_widget_dma_ops *ops = hda_dai_get_ops(substream, cpu_dai);
struct sof_intel_hda_stream *hda_stream;
diff --git a/sound/soc/sof/intel/hda.h b/sound/soc/sof/intel/hda.h
index 3f7c6fb05e5d..5b9e4ebcc18b 100644
--- a/sound/soc/sof/intel/hda.h
+++ b/sound/soc/sof/intel/hda.h
@@ -963,5 +963,7 @@ const struct hda_dai_widget_dma_ops *
hda_select_dai_widget_ops(struct snd_sof_dev *sdev, struct snd_sof_widget *swidget);
int hda_dai_config(struct snd_soc_dapm_widget *w, unsigned int flags,
struct snd_sof_dai_config_data *data);
+int hda_link_dma_cleanup(struct snd_pcm_substream *substream, struct hdac_ext_stream *hext_stream,
+ struct snd_soc_dai *cpu_dai);
#endif
diff --git a/sound/soc/sof/ipc3-dtrace.c b/sound/soc/sof/ipc3-dtrace.c
index 1d3bca2d28dd..35da85a45a9a 100644
--- a/sound/soc/sof/ipc3-dtrace.c
+++ b/sound/soc/sof/ipc3-dtrace.c
@@ -186,7 +186,6 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
struct snd_sof_dfsentry *dfse = file->private_data;
struct sof_ipc_trace_filter_elem *elems = NULL;
struct snd_sof_dev *sdev = dfse->sdev;
- loff_t pos = 0;
int num_elems;
char *string;
int ret;
@@ -201,11 +200,11 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
if (!string)
return -ENOMEM;
- /* assert null termination */
- string[count] = 0;
- ret = simple_write_to_buffer(string, count, &pos, from, count);
- if (ret < 0)
+ if (copy_from_user(string, from, count)) {
+ ret = -EFAULT;
goto error;
+ }
+ string[count] = '\0';
ret = trace_filter_parse(sdev, string, &num_elems, &elems);
if (ret < 0)
diff --git a/sound/soc/sof/ipc3.c b/sound/soc/sof/ipc3.c
index 2c5aac31e8b0..580960ff273d 100644
--- a/sound/soc/sof/ipc3.c
+++ b/sound/soc/sof/ipc3.c
@@ -1001,7 +1001,7 @@ void sof_ipc3_do_rx_work(struct snd_sof_dev *sdev, struct sof_ipc_cmd_hdr *hdr,
ipc3_log_header(sdev->dev, "ipc rx", hdr->cmd);
- if (hdr->size < sizeof(hdr) || hdr->size > SOF_IPC_MSG_MAX_SIZE) {
+ if (hdr->size < sizeof(*hdr) || hdr->size > SOF_IPC_MSG_MAX_SIZE) {
dev_err(sdev->dev, "The received message size is invalid: %u\n",
hdr->size);
return;
diff --git a/sound/soc/sof/ipc4-pcm.c b/sound/soc/sof/ipc4-pcm.c
index 0c905bd0fab4..027416eb2f50 100644
--- a/sound/soc/sof/ipc4-pcm.c
+++ b/sound/soc/sof/ipc4-pcm.c
@@ -708,6 +708,9 @@ static int sof_ipc4_pcm_hw_params(struct snd_soc_component *component,
struct snd_sof_pcm *spcm;
spcm = snd_sof_find_spcm_dai(component, rtd);
+ if (!spcm)
+ return -EINVAL;
+
time_info = spcm->stream[substream->stream].private;
/* delay calculation is not supported by current fw_reg ABI */
if (!time_info)
diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
index a4e1a70b607d..11361e1cd688 100644
--- a/sound/soc/sof/ipc4-topology.c
+++ b/sound/soc/sof/ipc4-topology.c
@@ -1731,6 +1731,9 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
*ipc_config_size = ipc_size;
+ /* update pipeline memory usage */
+ sof_ipc4_update_resource_usage(sdev, swidget, &copier_data->base_config);
+
/* copy IPC data */
memcpy(*ipc_config_data, (void *)copier_data, sizeof(*copier_data));
if (gtw_cfg_config_length)
@@ -1743,9 +1746,6 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
gtw_cfg_config_length,
&ipc4_copier->dma_config_tlv, dma_config_tlv_size);
- /* update pipeline memory usage */
- sof_ipc4_update_resource_usage(sdev, swidget, &copier_data->base_config);
-
return 0;
}
diff --git a/sound/soc/tegra/tegra210_adx.c b/sound/soc/tegra/tegra210_adx.c
index bd0b10c70c4c..7d003f0c8d0f 100644
--- a/sound/soc/tegra/tegra210_adx.c
+++ b/sound/soc/tegra/tegra210_adx.c
@@ -2,7 +2,7 @@
//
// tegra210_adx.c - Tegra210 ADX driver
//
-// Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021-2023 NVIDIA CORPORATION. All rights reserved.
#include <linux/clk.h>
#include <linux/device.h>
@@ -175,10 +175,20 @@ static int tegra210_adx_get_byte_map(struct snd_kcontrol *kcontrol,
mc = (struct soc_mixer_control *)kcontrol->private_value;
enabled = adx->byte_mask[mc->reg / 32] & (1 << (mc->reg % 32));
+ /*
+ * TODO: Simplify this logic to just return from bytes_map[]
+ *
+ * Presently below is required since bytes_map[] is
+ * tightly packed and cannot store the control value of 256.
+ * Byte mask state is used to know if 256 needs to be returned.
+ * Note that for control value of 256, the put() call stores 0
+ * in the bytes_map[] and disables the corresponding bit in
+ * byte_mask[].
+ */
if (enabled)
ucontrol->value.integer.value[0] = bytes_map[mc->reg];
else
- ucontrol->value.integer.value[0] = 0;
+ ucontrol->value.integer.value[0] = 256;
return 0;
}
@@ -192,19 +202,19 @@ static int tegra210_adx_put_byte_map(struct snd_kcontrol *kcontrol,
int value = ucontrol->value.integer.value[0];
struct soc_mixer_control *mc =
(struct soc_mixer_control *)kcontrol->private_value;
+ unsigned int mask_val = adx->byte_mask[mc->reg / 32];
- if (value == bytes_map[mc->reg])
+ if (value >= 0 && value <= 255)
+ mask_val |= (1 << (mc->reg % 32));
+ else
+ mask_val &= ~(1 << (mc->reg % 32));
+
+ if (mask_val == adx->byte_mask[mc->reg / 32])
return 0;
- if (value >= 0 && value <= 255) {
- /* update byte map and enable slot */
- bytes_map[mc->reg] = value;
- adx->byte_mask[mc->reg / 32] |= (1 << (mc->reg % 32));
- } else {
- /* reset byte map and disable slot */
- bytes_map[mc->reg] = 0;
- adx->byte_mask[mc->reg / 32] &= ~(1 << (mc->reg % 32));
- }
+ /* Update byte map and slot */
+ bytes_map[mc->reg] = value % 256;
+ adx->byte_mask[mc->reg / 32] = mask_val;
return 1;
}
diff --git a/sound/soc/tegra/tegra210_amx.c b/sound/soc/tegra/tegra210_amx.c
index 782a141b65c0..179876949b30 100644
--- a/sound/soc/tegra/tegra210_amx.c
+++ b/sound/soc/tegra/tegra210_amx.c
@@ -2,7 +2,7 @@
//
// tegra210_amx.c - Tegra210 AMX driver
//
-// Copyright (c) 2021 NVIDIA CORPORATION. All rights reserved.
+// Copyright (c) 2021-2023 NVIDIA CORPORATION. All rights reserved.
#include <linux/clk.h>
#include <linux/device.h>
@@ -203,10 +203,20 @@ static int tegra210_amx_get_byte_map(struct snd_kcontrol *kcontrol,
else
enabled = amx->byte_mask[0] & (1 << reg);
+ /*
+ * TODO: Simplify this logic to just return from bytes_map[]
+ *
+ * Presently below is required since bytes_map[] is
+ * tightly packed and cannot store the control value of 256.
+ * Byte mask state is used to know if 256 needs to be returned.
+ * Note that for control value of 256, the put() call stores 0
+ * in the bytes_map[] and disables the corresponding bit in
+ * byte_mask[].
+ */
if (enabled)
ucontrol->value.integer.value[0] = bytes_map[reg];
else
- ucontrol->value.integer.value[0] = 0;
+ ucontrol->value.integer.value[0] = 256;
return 0;
}
@@ -221,25 +231,19 @@ static int tegra210_amx_put_byte_map(struct snd_kcontrol *kcontrol,
unsigned char *bytes_map = (unsigned char *)&amx->map;
int reg = mc->reg;
int value = ucontrol->value.integer.value[0];
+ unsigned int mask_val = amx->byte_mask[reg / 32];
- if (value == bytes_map[reg])
+ if (value >= 0 && value <= 255)
+ mask_val |= (1 << (reg % 32));
+ else
+ mask_val &= ~(1 << (reg % 32));
+
+ if (mask_val == amx->byte_mask[reg / 32])
return 0;
- if (value >= 0 && value <= 255) {
- /* Update byte map and enable slot */
- bytes_map[reg] = value;
- if (reg > 31)
- amx->byte_mask[1] |= (1 << (reg - 32));
- else
- amx->byte_mask[0] |= (1 << reg);
- } else {
- /* Reset byte map and disable slot */
- bytes_map[reg] = 0;
- if (reg > 31)
- amx->byte_mask[1] &= ~(1 << (reg - 32));
- else
- amx->byte_mask[0] &= ~(1 << reg);
- }
+ /* Update byte map and slot */
+ bytes_map[reg] = value % 256;
+ amx->byte_mask[reg / 32] = mask_val;
return 1;
}
diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index f4bd1e8ae4b6..23260aa1919d 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c
@@ -374,6 +374,15 @@ static const struct usbmix_name_map corsair_virtuoso_map[] = {
{ 0 }
};
+/* Microsoft USB Link headset */
+/* a guess work: raw playback volume values are from 2 to 129 */
+static const struct usbmix_dB_map ms_usb_link_dB = { -3225, 0, true };
+static const struct usbmix_name_map ms_usb_link_map[] = {
+ { 9, NULL, .dB = &ms_usb_link_dB },
+ { 10, NULL }, /* Headset Capture volume; seems non-working, disabled */
+ { 0 } /* terminator */
+};
+
/* ASUS ROG Zenith II with Realtek ALC1220-VB */
static const struct usbmix_name_map asus_zenith_ii_map[] = {
{ 19, NULL, 12 }, /* FU, Input Gain Pad - broken response, disabled */
@@ -668,6 +677,11 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
.id = USB_ID(0x1395, 0x0025),
.map = sennheiser_pc8_map,
},
+ {
+ /* Microsoft USB Link headset */
+ .id = USB_ID(0x045e, 0x083c),
+ .map = ms_usb_link_map,
+ },
{ 0 } /* terminator */
};
diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h
index efb4a3311cc5..5d72dc8441cb 100644
--- a/sound/usb/quirks-table.h
+++ b/sound/usb/quirks-table.h
@@ -4507,6 +4507,35 @@ YAMAHA_DEVICE(0x7010, "UB99"),
}
}
},
+{
+ /* Advanced modes of the Mythware XA001AU.
+ * For the standard mode, Mythware XA001AU has ID ffad:a001
+ */
+ USB_DEVICE_VENDOR_SPEC(0xffad, 0xa001),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .vendor_name = "Mythware",
+ .product_name = "XA001AU",
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = (const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 0,
+ .type = QUIRK_IGNORE_INTERFACE,
+ },
+ {
+ .ifnum = 1,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE,
+ },
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE,
+ },
+ {
+ .ifnum = -1
+ }
+ }
+ }
+},
#undef USB_DEVICE_VENDOR_SPEC
#undef USB_AUDIO_DEVICE
diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 30bcb80b1cb8..598659d761cc 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c
@@ -1876,8 +1876,10 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
/* XMOS based USB DACs */
switch (chip->usb_id) {
- case USB_ID(0x1511, 0x0037): /* AURALiC VEGA */
- case USB_ID(0x21ed, 0xd75a): /* Accuphase DAC-60 option card */
+ case USB_ID(0x139f, 0x5504): /* Nagra DAC */
+ case USB_ID(0x20b1, 0x3089): /* Mola-Mola DAC */
+ case USB_ID(0x2522, 0x0007): /* LH Labs Geek Out 1V5 */
+ case USB_ID(0x2522, 0x0009): /* LH Labs Geek Pulse X Inifinity 2V0 */
case USB_ID(0x2522, 0x0012): /* LH Labs VI DAC Infinity */
case USB_ID(0x2772, 0x0230): /* Pro-Ject Pre Box S2 Digital */
if (fp->altsetting == 2)
@@ -1887,14 +1889,18 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
case USB_ID(0x0d8c, 0x0316): /* Hegel HD12 DSD */
case USB_ID(0x10cb, 0x0103): /* The Bit Opus #3; with fp->dsd_raw */
case USB_ID(0x16d0, 0x06b2): /* NuPrime DAC-10 */
- case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */
+ case USB_ID(0x16d0, 0x06b4): /* NuPrime Audio HD-AVP/AVA */
case USB_ID(0x16d0, 0x0733): /* Furutech ADL Stratos */
+ case USB_ID(0x16d0, 0x09d8): /* NuPrime IDA-8 */
case USB_ID(0x16d0, 0x09db): /* NuPrime Audio DAC-9 */
+ case USB_ID(0x16d0, 0x09dd): /* Encore mDSD */
case USB_ID(0x1db5, 0x0003): /* Bryston BDA3 */
+ case USB_ID(0x20a0, 0x4143): /* WaveIO USB Audio 2.0 */
case USB_ID(0x22e1, 0xca01): /* HDTA Serenade DSD */
case USB_ID(0x249c, 0x9326): /* M2Tech Young MkIII */
case USB_ID(0x2616, 0x0106): /* PS Audio NuWave DAC */
case USB_ID(0x2622, 0x0041): /* Audiolab M-DAC+ */
+ case USB_ID(0x278b, 0x5100): /* Rotel RC-1590 */
case USB_ID(0x27f7, 0x3002): /* W4S DAC-2v2SE */
case USB_ID(0x29a2, 0x0086): /* Mutec MC3+ USB */
case USB_ID(0x6b42, 0x0042): /* MSB Technology */
@@ -1904,9 +1910,6 @@ u64 snd_usb_interface_dsd_format_quirks(struct snd_usb_audio *chip,
/* Amanero Combo384 USB based DACs with native DSD support */
case USB_ID(0x16d0, 0x071a): /* Amanero - Combo384 */
- case USB_ID(0x2ab6, 0x0004): /* T+A DAC8DSD-V2.0, MP1000E-V2.0, MP2000R-V2.0, MP2500R-V2.0, MP3100HV-V2.0 */
- case USB_ID(0x2ab6, 0x0005): /* T+A USB HD Audio 1 */
- case USB_ID(0x2ab6, 0x0006): /* T+A USB HD Audio 2 */
if (fp->altsetting == 2) {
switch (le16_to_cpu(chip->dev->descriptor.bcdDevice)) {
case 0x199:
@@ -2013,6 +2016,9 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x041e, 0x4080, /* Creative Live Cam VF0610 */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x045e, 0x083c, /* MS USB Link headset */
+ QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_CTL_MSG_DELAY |
+ QUIRK_FLAG_DISABLE_AUTOSUSPEND),
DEVICE_FLG(0x046d, 0x084c, /* Logitech ConferenceCam Connect */
QUIRK_FLAG_GET_SAMPLE_RATE | QUIRK_FLAG_CTL_MSG_DELAY_1M),
DEVICE_FLG(0x046d, 0x0991, /* Logitech QuickCam Pro */
@@ -2048,6 +2054,9 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_IFACE_DELAY),
DEVICE_FLG(0x0644, 0x805f, /* TEAC Model 12 */
QUIRK_FLAG_FORCE_IFACE_RESET),
+ DEVICE_FLG(0x0644, 0x806b, /* TEAC UD-701 */
+ QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY |
+ QUIRK_FLAG_IFACE_DELAY),
DEVICE_FLG(0x06f8, 0xb000, /* Hercules DJ Console (Windows Edition) */
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x06f8, 0xd002, /* Hercules DJ Console (Macintosh Edition) */
@@ -2086,6 +2095,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY),
DEVICE_FLG(0x154e, 0x3006, /* Marantz SA-14S1 */
QUIRK_FLAG_ITF_USB_DSD_DAC | QUIRK_FLAG_CTL_MSG_DELAY),
+ DEVICE_FLG(0x154e, 0x300b, /* Marantz SA-KI RUBY / SA-12 */
+ QUIRK_FLAG_DSD_RAW),
DEVICE_FLG(0x154e, 0x500e, /* Denon DN-X1600 */
QUIRK_FLAG_IGNORE_CLOCK_SOURCE),
DEVICE_FLG(0x1686, 0x00dd, /* Zoom R16/24 */
@@ -2130,6 +2141,10 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x21b4, 0x0081, /* AudioQuest DragonFly */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x21b4, 0x0230, /* Ayre QB-9 Twenty */
+ QUIRK_FLAG_DSD_RAW),
+ DEVICE_FLG(0x21b4, 0x0232, /* Ayre QX-5 Twenty */
+ QUIRK_FLAG_DSD_RAW),
DEVICE_FLG(0x2522, 0x0007, /* LH Labs Geek Out HD Audio 1V5 */
QUIRK_FLAG_SET_IFACE_FIRST),
DEVICE_FLG(0x2708, 0x0002, /* Audient iD14 */
@@ -2172,12 +2187,18 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_VALIDATE_RATES),
VENDOR_FLG(0x1235, /* Focusrite Novation */
QUIRK_FLAG_VALIDATE_RATES),
+ VENDOR_FLG(0x1511, /* AURALiC */
+ QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x152a, /* Thesycon devices */
QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x18d1, /* iBasso devices */
+ QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x1de7, /* Phoenix Audio */
QUIRK_FLAG_GET_SAMPLE_RATE),
VENDOR_FLG(0x20b1, /* XMOS based devices */
QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x21ed, /* Accuphase Laboratory */
+ QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x22d9, /* Oppo */
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x23ba, /* Playback Design */
@@ -2193,10 +2214,14 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x2ab6, /* T+A devices */
QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x2d87, /* Cayin device */
+ QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x3336, /* HEM devices */
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x3353, /* Khadas devices */
QUIRK_FLAG_DSD_RAW),
+ VENDOR_FLG(0x35f4, /* MSB Technology */
+ QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0x3842, /* EVGA */
QUIRK_FLAG_DSD_RAW),
VENDOR_FLG(0xc502, /* HiBy devices */
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 683ca3af4084..5f6f84837a49 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -126,6 +126,10 @@
#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define APPLE_CPU_PART_M2_BLIZZARD 0x032
#define APPLE_CPU_PART_M2_AVALANCHE 0x033
+#define APPLE_CPU_PART_M2_BLIZZARD_PRO 0x034
+#define APPLE_CPU_PART_M2_AVALANCHE_PRO 0x035
+#define APPLE_CPU_PART_M2_BLIZZARD_MAX 0x038
+#define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039
#define AMPERE_CPU_PART_AMPERE1 0xAC3
@@ -181,6 +185,10 @@
#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
#define MIDR_APPLE_M2_BLIZZARD MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD)
#define MIDR_APPLE_M2_AVALANCHE MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE)
+#define MIDR_APPLE_M2_BLIZZARD_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_PRO)
+#define MIDR_APPLE_M2_AVALANCHE_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_PRO)
+#define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX)
+#define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX)
#define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
diff --git a/tools/arch/arm64/include/uapi/asm/bitsperlong.h b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..485d60bee26c
--- /dev/null
+++ b/tools/arch/arm64/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef __ASM_BITSPERLONG_H
+#define __ASM_BITSPERLONG_H
+
+#define __BITS_PER_LONG 64
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* __ASM_BITSPERLONG_H */
diff --git a/tools/arch/riscv/include/uapi/asm/bitsperlong.h b/tools/arch/riscv/include/uapi/asm/bitsperlong.h
new file mode 100644
index 000000000000..0b9b58b57ff6
--- /dev/null
+++ b/tools/arch/riscv/include/uapi/asm/bitsperlong.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2015 Regents of the University of California
+ */
+
+#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H
+#define _UAPI_ASM_RISCV_BITSPERLONG_H
+
+#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8)
+
+#include <asm-generic/bitsperlong.h>
+
+#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index cb8ca46213be..1f6d904c6481 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -14,7 +14,7 @@
* Defines x86 CPU feature bits
*/
#define NCAPINTS 21 /* N 32-bit words worth of info */
-#define NBUGINTS 1 /* N 32-bit bug flags */
+#define NBUGINTS 2 /* N 32-bit bug flags */
/*
* Note: If the comment begins with a quoted string, that string is used
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 3aedae61af4f..a00a53e15ab7 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -545,6 +545,7 @@
#define MSR_AMD64_DE_CFG 0xc0011029
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT 1
#define MSR_AMD64_DE_CFG_LFENCE_SERIALIZE BIT_ULL(MSR_AMD64_DE_CFG_LFENCE_SERIALIZE_BIT)
+#define MSR_AMD64_DE_CFG_ZEN2_FP_BACKUP_FIX_BIT 9
#define MSR_AMD64_BU_CFG2 0xc001102a
#define MSR_AMD64_IBSFETCHCTL 0xc0011030
diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h
index bc48a4dabe5d..4798f9d18fe8 100644
--- a/tools/arch/x86/include/uapi/asm/unistd_32.h
+++ b/tools/arch/x86/include/uapi/asm/unistd_32.h
@@ -26,3 +26,6 @@
#ifndef __NR_setns
#define __NR_setns 346
#endif
+#ifdef __NR_seccomp
+#define __NR_seccomp 354
+#endif
diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h
index f70d2cada256..d0f2043d7132 100644
--- a/tools/arch/x86/include/uapi/asm/unistd_64.h
+++ b/tools/arch/x86/include/uapi/asm/unistd_64.h
@@ -26,3 +26,6 @@
#ifndef __NR_getcpu
#define __NR_getcpu 309
#endif
+#ifndef __NR_seccomp
+#define __NR_seccomp 317
+#endif
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 0f0aa9b7d7b5..2cd6dbbee088 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -208,7 +208,7 @@ $(OUTPUT)test-libtraceevent.bin:
$(BUILD) -ltraceevent
$(OUTPUT)test-libtracefs.bin:
- $(BUILD) -ltracefs
+ $(BUILD) $(shell $(PKG_CONFIG) --cflags libtraceevent 2>/dev/null) -ltracefs
$(OUTPUT)test-libcrypto.bin:
$(BUILD) -lcrypto
diff --git a/tools/counter/Makefile b/tools/counter/Makefile
index a0f4cab71fe5..b2c2946f44c9 100644
--- a/tools/counter/Makefile
+++ b/tools/counter/Makefile
@@ -40,7 +40,8 @@ $(OUTPUT)counter_example: $(COUNTER_EXAMPLE)
clean:
rm -f $(ALL_PROGRAMS)
rm -rf $(OUTPUT)include/linux/counter.h
- rmdir -p $(OUTPUT)include/linux
+ rm -df $(OUTPUT)include/linux
+ rm -df $(OUTPUT)include
find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
install: $(ALL_PROGRAMS)
diff --git a/tools/hv/vmbus_testing b/tools/hv/vmbus_testing
index e7212903dd1d..4467979d8f69 100755
--- a/tools/hv/vmbus_testing
+++ b/tools/hv/vmbus_testing
@@ -164,7 +164,7 @@ def recursive_file_lookup(path, file_map):
def get_all_devices_test_status(file_map):
for device in file_map:
- if (get_test_state(locate_state(device, file_map)) is 1):
+ if (get_test_state(locate_state(device, file_map)) == 1):
print("Testing = ON for: {}"
.format(device.split("/")[5]))
else:
@@ -203,7 +203,7 @@ def write_test_files(path, value):
def set_test_state(state_path, state_value, quiet):
write_test_files(state_path, state_value)
- if (get_test_state(state_path) is 1):
+ if (get_test_state(state_path) == 1):
if (not quiet):
print("Testing = ON for device: {}"
.format(state_path.split("/")[5]))
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index dd7d8e10f16d..fd6c1cb585db 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -817,8 +817,11 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv)
#define __NR_set_mempolicy_home_node 450
__SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
+#define __NR_cachestat 451
+__SYSCALL(__NR_cachestat, sys_cachestat)
+
#undef __NR_syscalls
-#define __NR_syscalls 451
+#define __NR_syscalls 452
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index dba7c5a5b25e..7000e5910a1d 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -280,7 +280,16 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_SEMA(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_SEMA)
-#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
+/*
+ * Top 4 bits of every non-engine counter are GT id.
+ */
+#define __I915_PMU_GT_SHIFT (60)
+
+#define ___I915_PMU_OTHER(gt, x) \
+ (((__u64)__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x)) | \
+ ((__u64)(gt) << __I915_PMU_GT_SHIFT))
+
+#define __I915_PMU_OTHER(x) ___I915_PMU_OTHER(0, x)
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
#define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1)
@@ -290,6 +299,12 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
+#define __I915_PMU_ACTUAL_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 0)
+#define __I915_PMU_REQUESTED_FREQUENCY(gt) ___I915_PMU_OTHER(gt, 1)
+#define __I915_PMU_INTERRUPTS(gt) ___I915_PMU_OTHER(gt, 2)
+#define __I915_PMU_RC6_RESIDENCY(gt) ___I915_PMU_OTHER(gt, 3)
+#define __I915_PMU_SOFTWARE_GT_AWAKE_TIME(gt) ___I915_PMU_OTHER(gt, 4)
+
/* Each region is a minimum of 16k, and there are at most 255 of them.
*/
#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use
@@ -659,7 +674,8 @@ typedef struct drm_i915_irq_wait {
* If the IOCTL is successful, the returned parameter will be set to one of the
* following values:
* * 0 if HuC firmware load is not complete,
- * * 1 if HuC firmware is authenticated and running.
+ * * 1 if HuC firmware is loaded and fully authenticated,
+ * * 2 if HuC firmware is loaded and authenticated for clear media only
*/
#define I915_PARAM_HUC_STATUS 42
@@ -771,6 +787,25 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_OA_TIMESTAMP_FREQUENCY 57
+/*
+ * Query the status of PXP support in i915.
+ *
+ * The query can fail in the following scenarios with the listed error codes:
+ * -ENODEV = PXP support is not available on the GPU device or in the
+ * kernel due to missing component drivers or kernel configs.
+ *
+ * If the IOCTL is successful, the returned parameter will be set to one of
+ * the following values:
+ * 1 = PXP feature is supported and is ready for use.
+ * 2 = PXP feature is supported but should be ready soon (pending
+ * initialization of non-i915 system dependencies).
+ *
+ * NOTE: When param is supported (positive return values), user space should
+ * still refer to the GEM PXP context-creation UAPI header specs to be
+ * aware of possible failure due to system state machine at the time.
+ */
+#define I915_PARAM_PXP_STATUS 58
+
/* Must be kept compact -- no holes and well documented */
/**
@@ -2096,6 +2131,21 @@ struct drm_i915_gem_context_param {
*
* -ENODEV: feature not available
* -EPERM: trying to mark a recoverable or not bannable context as protected
+ * -ENXIO: A dependency such as a component driver or firmware is not yet
+ * loaded so user space may need to attempt again. Depending on the
+ * device, this error may be reported if protected context creation is
+ * attempted very early after kernel start because the internal timeout
+ * waiting for such dependencies is not guaranteed to be larger than
+ * required (numbers differ depending on system and kernel config):
+ * - ADL/RPL: dependencies may take up to 3 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * - MTL: dependencies may take up to 8 seconds from kernel start
+ * while context creation internal timeout is 250 milisecs
+ * NOTE: such dependencies happen once, so a subsequent call to create a
+ * protected context after a prior successful call will not experience
+ * such timeouts and will not return -ENXIO (unless the driver is reloaded,
+ * or, depending on the device, resumes from a suspended state).
+ * -EIO: The firmware did not succeed in creating the protected context.
*/
#define I915_CONTEXT_PARAM_PROTECTED_CONTENT 0xd
/* Must be kept compact -- no holes and well documented */
@@ -3630,9 +3680,13 @@ struct drm_i915_gem_create_ext {
*
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content.
+ *
+ * For I915_GEM_CREATE_EXT_SET_PAT usage see
+ * struct drm_i915_gem_create_ext_set_pat.
*/
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
+#define I915_GEM_CREATE_EXT_SET_PAT 2
__u64 extensions;
};
@@ -3747,6 +3801,43 @@ struct drm_i915_gem_create_ext_protected_content {
__u32 flags;
};
+/**
+ * struct drm_i915_gem_create_ext_set_pat - The
+ * I915_GEM_CREATE_EXT_SET_PAT extension.
+ *
+ * If this extension is provided, the specified caching policy (PAT index) is
+ * applied to the buffer object.
+ *
+ * Below is an example on how to create an object with specific caching policy:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_i915_gem_create_ext_set_pat set_pat_ext = {
+ * .base = { .name = I915_GEM_CREATE_EXT_SET_PAT },
+ * .pat_index = 0,
+ * };
+ * struct drm_i915_gem_create_ext create_ext = {
+ * .size = PAGE_SIZE,
+ * .extensions = (uintptr_t)&set_pat_ext,
+ * };
+ *
+ * int err = ioctl(fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create_ext);
+ * if (err) ...
+ */
+struct drm_i915_gem_create_ext_set_pat {
+ /** @base: Extension link. See struct i915_user_extension. */
+ struct i915_user_extension base;
+ /**
+ * @pat_index: PAT index to be set
+ * PAT index is a bit field in Page Table Entry to control caching
+ * behaviors for GPU accesses. The definition of PAT index is
+ * platform dependent and can be found in hardware specifications,
+ */
+ __u32 pat_index;
+ /** @rsvd: reserved for future use */
+ __u32 rsvd;
+};
+
/* ID of the protected content session managed by i915 when PXP is active */
#define I915_PROTECTED_CONTENT_DEFAULT_SESSION 0xf
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index e8c07da58c9f..6c80f96049bd 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -112,4 +112,9 @@
#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
+#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to
+ compare object identity and may not
+ be usable to open_by_handle_at(2) */
+
#endif /* _UAPI_LINUX_FCNTL_H */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 737318b1c1d9..f089ab290978 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1190,6 +1190,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 225
#define KVM_CAP_PMU_EVENT_MASKED_EVENTS 226
#define KVM_CAP_COUNTER_OFFSET 227
+#define KVM_CAP_ARM_EAGER_SPLIT_CHUNK_SIZE 228
+#define KVM_CAP_ARM_SUPPORTED_BLOCK_SIZES 229
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1442,6 +1444,8 @@ enum kvm_device_type {
#define KVM_DEV_TYPE_XIVE KVM_DEV_TYPE_XIVE
KVM_DEV_TYPE_ARM_PV_TIME,
#define KVM_DEV_TYPE_ARM_PV_TIME KVM_DEV_TYPE_ARM_PV_TIME
+ KVM_DEV_TYPE_RISCV_AIA,
+#define KVM_DEV_TYPE_RISCV_AIA KVM_DEV_TYPE_RISCV_AIA
KVM_DEV_TYPE_MAX,
};
@@ -1613,7 +1617,7 @@ struct kvm_s390_ucas_mapping {
#define KVM_GET_DEBUGREGS _IOR(KVMIO, 0xa1, struct kvm_debugregs)
#define KVM_SET_DEBUGREGS _IOW(KVMIO, 0xa2, struct kvm_debugregs)
/*
- * vcpu version available with KVM_ENABLE_CAP
+ * vcpu version available with KVM_CAP_ENABLE_CAP
* vm version available with KVM_CAP_ENABLE_CAP_VM
*/
#define KVM_ENABLE_CAP _IOW(KVMIO, 0xa3, struct kvm_enable_cap)
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index f55bc680b5b0..a246e11988d5 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -4,6 +4,7 @@
#include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
+#include <linux/types.h>
#define MREMAP_MAYMOVE 1
#define MREMAP_FIXED 2
@@ -41,4 +42,17 @@
#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB
#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB
+struct cachestat_range {
+ __u64 off;
+ __u64 len;
+};
+
+struct cachestat {
+ __u64 nr_cache;
+ __u64 nr_dirty;
+ __u64 nr_writeback;
+ __u64 nr_evicted;
+ __u64 nr_recently_evicted;
+};
+
#endif /* _UAPI_LINUX_MMAN_H */
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index 4d93967f8aea..8eb0d7b758d2 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -74,7 +74,8 @@
#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */
#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */
-#define MOVE_MOUNT__MASK 0x00000177
+#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */
+#define MOVE_MOUNT__MASK 0x00000377
/*
* fsopen() flags.
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index f23d9a16507f..3c36aeade991 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -294,4 +294,15 @@ struct prctl_mm_map {
#define PR_SET_MEMORY_MERGE 67
#define PR_GET_MEMORY_MERGE 68
+
+#define PR_RISCV_V_SET_CONTROL 69
+#define PR_RISCV_V_GET_CONTROL 70
+# define PR_RISCV_V_VSTATE_CTRL_DEFAULT 0
+# define PR_RISCV_V_VSTATE_CTRL_OFF 1
+# define PR_RISCV_V_VSTATE_CTRL_ON 2
+# define PR_RISCV_V_VSTATE_CTRL_INHERIT (1 << 4)
+# define PR_RISCV_V_VSTATE_CTRL_CUR_MASK 0x3
+# define PR_RISCV_V_VSTATE_CTRL_NEXT_MASK 0xc
+# define PR_RISCV_V_VSTATE_CTRL_MASK 0x1f
+
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 92e1b700b51c..f5c48b61ab62 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -45,6 +45,25 @@
#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
/* Specify an eventfd file descriptor to signal on log write. */
#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+/* By default, a device gets one vhost_worker that its virtqueues share. This
+ * command allows the owner of the device to create an additional vhost_worker
+ * for the device. It can later be bound to 1 or more of its virtqueues using
+ * the VHOST_ATTACH_VRING_WORKER command.
+ *
+ * This must be called after VHOST_SET_OWNER and the caller must be the owner
+ * of the device. The new thread will inherit caller's cgroups and namespaces,
+ * and will share the caller's memory space. The new thread will also be
+ * counted against the caller's RLIMIT_NPROC value.
+ *
+ * The worker's ID used in other commands will be returned in
+ * vhost_worker_state.
+ */
+#define VHOST_NEW_WORKER _IOR(VHOST_VIRTIO, 0x8, struct vhost_worker_state)
+/* Free a worker created with VHOST_NEW_WORKER if it's not attached to any
+ * virtqueue. If userspace is not able to call this for workers its created,
+ * the kernel will free all the device's workers when the device is closed.
+ */
+#define VHOST_FREE_WORKER _IOW(VHOST_VIRTIO, 0x9, struct vhost_worker_state)
/* Ring setup. */
/* Set number of descriptors in ring. This parameter can not
@@ -70,6 +89,18 @@
#define VHOST_VRING_BIG_ENDIAN 1
#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+/* Attach a vhost_worker created with VHOST_NEW_WORKER to one of the device's
+ * virtqueues.
+ *
+ * This will replace the virtqueue's existing worker. If the replaced worker
+ * is no longer attached to any virtqueues, it can be freed with
+ * VHOST_FREE_WORKER.
+ */
+#define VHOST_ATTACH_VRING_WORKER _IOW(VHOST_VIRTIO, 0x15, \
+ struct vhost_vring_worker)
+/* Return the vring worker's ID */
+#define VHOST_GET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x16, \
+ struct vhost_vring_worker)
/* The following ioctls use eventfd file descriptors to signal and poll
* for events. */
diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h
index 0aa955aa8246..f9939da41122 100644
--- a/tools/include/uapi/sound/asound.h
+++ b/tools/include/uapi/sound/asound.h
@@ -274,6 +274,7 @@ typedef int __bitwise snd_pcm_subformat_t;
#define SNDRV_PCM_INFO_DOUBLE 0x00000004 /* Double buffering needed for PCM start/stop */
#define SNDRV_PCM_INFO_BATCH 0x00000010 /* double buffering */
#define SNDRV_PCM_INFO_SYNC_APPLPTR 0x00000020 /* need the explicit sync of appl_ptr update */
+#define SNDRV_PCM_INFO_PERFECT_DRAIN 0x00000040 /* silencing at the end of stream is not required */
#define SNDRV_PCM_INFO_INTERLEAVED 0x00000100 /* channels are interleaved */
#define SNDRV_PCM_INFO_NONINTERLEAVED 0x00000200 /* channels are not interleaved */
#define SNDRV_PCM_INFO_COMPLEX 0x00000400 /* complex frame organization (mmap only) */
@@ -383,6 +384,9 @@ typedef int snd_pcm_hw_param_t;
#define SNDRV_PCM_HW_PARAMS_NORESAMPLE (1<<0) /* avoid rate resampling */
#define SNDRV_PCM_HW_PARAMS_EXPORT_BUFFER (1<<1) /* export buffer */
#define SNDRV_PCM_HW_PARAMS_NO_PERIOD_WAKEUP (1<<2) /* disable period wakeups */
+#define SNDRV_PCM_HW_PARAMS_NO_DRAIN_SILENCE (1<<3) /* suppress drain with the filling
+ * of the silence samples
+ */
struct snd_interval {
unsigned int min, max;
@@ -708,7 +712,7 @@ enum {
* Raw MIDI section - /dev/snd/midi??
*/
-#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 2)
+#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4)
enum {
SNDRV_RAWMIDI_STREAM_OUTPUT = 0,
@@ -719,6 +723,7 @@ enum {
#define SNDRV_RAWMIDI_INFO_OUTPUT 0x00000001
#define SNDRV_RAWMIDI_INFO_INPUT 0x00000002
#define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004
+#define SNDRV_RAWMIDI_INFO_UMP 0x00000008
struct snd_rawmidi_info {
unsigned int device; /* RO/WR (control): device number */
@@ -779,6 +784,72 @@ struct snd_rawmidi_status {
};
#endif
+/* UMP EP info flags */
+#define SNDRV_UMP_EP_INFO_STATIC_BLOCKS 0x01
+
+/* UMP EP Protocol / JRTS capability bits */
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI_MASK 0x0300
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI1 0x0100 /* MIDI 1.0 */
+#define SNDRV_UMP_EP_INFO_PROTO_MIDI2 0x0200 /* MIDI 2.0 */
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_MASK 0x0003
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_TX 0x0001 /* JRTS Transmit */
+#define SNDRV_UMP_EP_INFO_PROTO_JRTS_RX 0x0002 /* JRTS Receive */
+
+/* UMP Endpoint information */
+struct snd_ump_endpoint_info {
+ int card; /* card number */
+ int device; /* device number */
+ unsigned int flags; /* additional info */
+ unsigned int protocol_caps; /* protocol capabilities */
+ unsigned int protocol; /* current protocol */
+ unsigned int num_blocks; /* # of function blocks */
+ unsigned short version; /* UMP major/minor version */
+ unsigned short family_id; /* MIDI device family ID */
+ unsigned short model_id; /* MIDI family model ID */
+ unsigned int manufacturer_id; /* MIDI manufacturer ID */
+ unsigned char sw_revision[4]; /* software revision */
+ unsigned short padding;
+ unsigned char name[128]; /* endpoint name string */
+ unsigned char product_id[128]; /* unique product id string */
+ unsigned char reserved[32];
+} __packed;
+
+/* UMP direction */
+#define SNDRV_UMP_DIR_INPUT 0x01
+#define SNDRV_UMP_DIR_OUTPUT 0x02
+#define SNDRV_UMP_DIR_BIDIRECTION 0x03
+
+/* UMP block info flags */
+#define SNDRV_UMP_BLOCK_IS_MIDI1 (1U << 0) /* MIDI 1.0 port w/o restrict */
+#define SNDRV_UMP_BLOCK_IS_LOWSPEED (1U << 1) /* 31.25Kbps B/W MIDI1 port */
+
+/* UMP block user-interface hint */
+#define SNDRV_UMP_BLOCK_UI_HINT_UNKNOWN 0x00
+#define SNDRV_UMP_BLOCK_UI_HINT_RECEIVER 0x01
+#define SNDRV_UMP_BLOCK_UI_HINT_SENDER 0x02
+#define SNDRV_UMP_BLOCK_UI_HINT_BOTH 0x03
+
+/* UMP groups and blocks */
+#define SNDRV_UMP_MAX_GROUPS 16
+#define SNDRV_UMP_MAX_BLOCKS 32
+
+/* UMP Block information */
+struct snd_ump_block_info {
+ int card; /* card number */
+ int device; /* device number */
+ unsigned char block_id; /* block ID (R/W) */
+ unsigned char direction; /* UMP direction */
+ unsigned char active; /* Activeness */
+ unsigned char first_group; /* first group ID */
+ unsigned char num_groups; /* number of groups */
+ unsigned char midi_ci_version; /* MIDI-CI support version */
+ unsigned char sysex8_streams; /* max number of sysex8 streams */
+ unsigned char ui_hint; /* user interface hint */
+ unsigned int flags; /* various info flags */
+ unsigned char name[128]; /* block name string */
+ unsigned char reserved[32];
+} __packed;
+
#define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int)
#define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info)
#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int)
@@ -786,6 +857,9 @@ struct snd_rawmidi_status {
#define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status)
#define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int)
#define SNDRV_RAWMIDI_IOCTL_DRAIN _IOW('W', 0x31, int)
+/* Additional ioctls for UMP rawmidi devices */
+#define SNDRV_UMP_IOCTL_ENDPOINT_INFO _IOR('W', 0x40, struct snd_ump_endpoint_info)
+#define SNDRV_UMP_IOCTL_BLOCK_INFO _IOR('W', 0x41, struct snd_ump_block_info)
/*
* Timer section - /dev/snd/timer
@@ -961,7 +1035,7 @@ struct snd_timer_tread {
* *
****************************************************************************/
-#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8)
+#define SNDRV_CTL_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 9)
struct snd_ctl_card_info {
int card; /* card number */
@@ -1122,6 +1196,9 @@ struct snd_ctl_tlv {
#define SNDRV_CTL_IOCTL_RAWMIDI_NEXT_DEVICE _IOWR('U', 0x40, int)
#define SNDRV_CTL_IOCTL_RAWMIDI_INFO _IOWR('U', 0x41, struct snd_rawmidi_info)
#define SNDRV_CTL_IOCTL_RAWMIDI_PREFER_SUBDEVICE _IOW('U', 0x42, int)
+#define SNDRV_CTL_IOCTL_UMP_NEXT_DEVICE _IOWR('U', 0x43, int)
+#define SNDRV_CTL_IOCTL_UMP_ENDPOINT_INFO _IOWR('U', 0x44, struct snd_ump_endpoint_info)
+#define SNDRV_CTL_IOCTL_UMP_BLOCK_INFO _IOWR('U', 0x45, struct snd_ump_block_info)
#define SNDRV_CTL_IOCTL_POWER _IOWR('U', 0xd0, int)
#define SNDRV_CTL_IOCTL_POWER_STATE _IOR('U', 0xd1, int)
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 67a8d6b740ea..adfbae27dc36 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -68,8 +68,13 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
if (cmp < 0) {
- zfree(&cmds->names[cj]);
- cmds->names[cj++] = cmds->names[ci++];
+ if (ci == cj) {
+ ci++;
+ cj++;
+ } else {
+ zfree(&cmds->names[cj]);
+ cmds->names[cj++] = cmds->names[ci++];
+ }
} else if (cmp == 0) {
ci++;
ei++;
@@ -77,10 +82,11 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
ei++;
}
}
-
- while (ci < cmds->cnt) {
- zfree(&cmds->names[cj]);
- cmds->names[cj++] = cmds->names[ci++];
+ if (ci != cj) {
+ while (ci < cmds->cnt) {
+ zfree(&cmds->names[cj]);
+ cmds->names[cj++] = cmds->names[ci++];
+ }
}
for (ci = cj; ci < cmds->cnt; ci++)
zfree(&cmds->names[ci]);
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index 1b3a36fbb1c3..3ca28d4bcb18 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -417,11 +417,10 @@ class YnlFamily(SpecFamily):
pad = b'\x00' * ((4 - len(attr_payload) % 4) % 4)
return struct.pack('HH', len(attr_payload) + 4, nl_type) + attr_payload + pad
- def _decode_enum(self, rsp, attr_spec):
- raw = rsp[attr_spec['name']]
+ def _decode_enum(self, raw, attr_spec):
enum = self.consts[attr_spec['enum']]
- i = attr_spec.get('value-start', 0)
if 'enum-as-flags' in attr_spec and attr_spec['enum-as-flags']:
+ i = 0
value = set()
while raw:
if raw & 1:
@@ -429,8 +428,8 @@ class YnlFamily(SpecFamily):
raw >>= 1
i += 1
else:
- value = enum.entries_by_val[raw - i].name
- rsp[attr_spec['name']] = value
+ value = enum.entries_by_val[raw].name
+ return value
def _decode_binary(self, attr, attr_spec):
if attr_spec.struct_name:
@@ -438,7 +437,7 @@ class YnlFamily(SpecFamily):
decoded = attr.as_struct(members)
for m in members:
if m.enum:
- self._decode_enum(decoded, m)
+ decoded[m.name] = self._decode_enum(decoded[m.name], m)
elif attr_spec.sub_type:
decoded = attr.as_c_array(attr_spec.sub_type)
else:
@@ -466,6 +465,9 @@ class YnlFamily(SpecFamily):
else:
raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
+ if 'enum' in attr_spec:
+ decoded = self._decode_enum(decoded, attr_spec)
+
if not attr_spec.is_multi:
rsp[attr_spec['name']] = decoded
elif attr_spec.name in rsp:
@@ -473,8 +475,6 @@ class YnlFamily(SpecFamily):
else:
rsp[attr_spec.name] = [decoded]
- if 'enum' in attr_spec:
- self._decode_enum(rsp, attr_spec)
return rsp
def _decode_extack_path(self, attrs, attr_set, offset, target):
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 2e1caabecb18..c0f25d00181e 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -826,3 +826,9 @@ bool arch_is_rethunk(struct symbol *sym)
{
return !strcmp(sym->name, "__x86_return_thunk");
}
+
+bool arch_is_embedded_insn(struct symbol *sym)
+{
+ return !strcmp(sym->name, "retbleed_return_thunk") ||
+ !strcmp(sym->name, "srso_safe_ret");
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8936a05f0e5a..1384090530db 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -389,7 +389,7 @@ static int decode_instructions(struct objtool_file *file)
if (!strcmp(sec->name, ".noinstr.text") ||
!strcmp(sec->name, ".entry.text") ||
!strcmp(sec->name, ".cpuidle.text") ||
- !strncmp(sec->name, ".text.__x86.", 12))
+ !strncmp(sec->name, ".text..__x86.", 13))
sec->noinstr = true;
/*
@@ -455,7 +455,7 @@ static int decode_instructions(struct objtool_file *file)
return -1;
}
- if (func->return_thunk || func->alias != func)
+ if (func->embedded_insn || func->alias != func)
continue;
if (!find_insn(file, sec, func->offset)) {
@@ -1288,16 +1288,33 @@ static int add_ignore_alternatives(struct objtool_file *file)
return 0;
}
+/*
+ * Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
+ * will be added to the .retpoline_sites section.
+ */
__weak bool arch_is_retpoline(struct symbol *sym)
{
return false;
}
+/*
+ * Symbols that replace INSN_RETURN, every (tail) call to such a symbol
+ * will be added to the .return_sites section.
+ */
__weak bool arch_is_rethunk(struct symbol *sym)
{
return false;
}
+/*
+ * Symbols that are embedded inside other instructions, because sometimes crazy
+ * code exists. These are mostly ignored for validation purposes.
+ */
+__weak bool arch_is_embedded_insn(struct symbol *sym)
+{
+ return false;
+}
+
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
{
struct reloc *reloc;
@@ -1576,14 +1593,14 @@ static int add_jump_destinations(struct objtool_file *file)
struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
/*
- * This is a special case for zen_untrain_ret().
+ * This is a special case for retbleed_untrain_ret().
* It jumps to __x86_return_thunk(), but objtool
* can't find the thunk's starting RET
* instruction, because the RET is also in the
* middle of another instruction. Objtool only
* knows about the outer instruction.
*/
- if (sym && sym->return_thunk) {
+ if (sym && sym->embedded_insn) {
add_return_call(file, insn, false);
continue;
}
@@ -2502,6 +2519,9 @@ static int classify_symbols(struct objtool_file *file)
if (arch_is_rethunk(func))
func->return_thunk = true;
+ if (arch_is_embedded_insn(func))
+ func->embedded_insn = true;
+
if (arch_ftrace_match(func->name))
func->fentry = true;
@@ -2630,12 +2650,17 @@ static int decode_sections(struct objtool_file *file)
return 0;
}
-static bool is_fentry_call(struct instruction *insn)
+static bool is_special_call(struct instruction *insn)
{
- if (insn->type == INSN_CALL &&
- insn_call_dest(insn) &&
- insn_call_dest(insn)->fentry)
- return true;
+ if (insn->type == INSN_CALL) {
+ struct symbol *dest = insn_call_dest(insn);
+
+ if (!dest)
+ return false;
+
+ if (dest->fentry || dest->embedded_insn)
+ return true;
+ }
return false;
}
@@ -3636,7 +3661,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (ret)
return ret;
- if (opts.stackval && func && !is_fentry_call(insn) &&
+ if (opts.stackval && func && !is_special_call(insn) &&
!has_valid_stack_frame(&state)) {
WARN_INSN(insn, "call without frame pointer save/setup");
return 1;
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 2b6d2ce4f9a5..0b303eba660e 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -90,6 +90,7 @@ int arch_decode_hint_reg(u8 sp_reg, int *base);
bool arch_is_retpoline(struct symbol *sym);
bool arch_is_rethunk(struct symbol *sym);
+bool arch_is_embedded_insn(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index c532d70864dc..9f71e988eca4 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -66,6 +66,7 @@ struct symbol {
u8 fentry : 1;
u8 profiling_func : 1;
u8 warned : 1;
+ u8 embedded_insn : 1;
struct list_head pv_target;
struct reloc *relocs;
};
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 0609c19caabd..c5db0de49868 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -155,9 +155,9 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
ifdef CSINCLUDES
LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
endif
-OPENCSDLIBS := -lopencsd_c_api
+OPENCSDLIBS := -lopencsd_c_api -lopencsd
ifeq ($(findstring -static,${LDFLAGS}),-static)
- OPENCSDLIBS += -lopencsd -lstdc++
+ OPENCSDLIBS += -lstdc++
endif
ifdef CSLIBS
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index 561de0cb6b95..512a8f13c4de 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -54,10 +54,11 @@ double perf_pmu__cpu_slots_per_cycle(void)
perf_pmu__pathname_scnprintf(path, sizeof(path),
pmu->name, "caps/slots");
/*
- * The value of slots is not greater than 32 bits, but sysfs__read_int
- * can't read value with 0x prefix, so use sysfs__read_ull instead.
+ * The value of slots is not greater than 32 bits, but
+ * filename__read_int can't read value with 0x prefix,
+ * so use filename__read_ull instead.
*/
- sysfs__read_ull(path, &slots);
+ filename__read_ull(path, &slots);
}
return slots ? (double)slots : NAN;
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 3f1886ad9d80..cfda2511badf 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -365,3 +365,4 @@
448 n64 process_mrelease sys_process_mrelease
449 n64 futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 n64 cachestat sys_cachestat
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index a0be127475b1..8c0b08b7a80e 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -537,3 +537,4 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
index b7223feec770..5f3edb3004d8 100644
--- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c
+++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c
@@ -250,6 +250,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
if (!chain || chain->nr < 3)
return skip_slot;
+ addr_location__init(&al);
ip = chain->ips[1];
thread__find_symbol(thread, PERF_RECORD_MISC_USER, ip, &al);
@@ -259,6 +260,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
if (!dso) {
pr_debug("%" PRIx64 " dso is NULL\n", ip);
+ addr_location__exit(&al);
return skip_slot;
}
@@ -279,5 +281,7 @@ int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain)
*/
skip_slot = 3;
}
+
+ addr_location__exit(&al);
return skip_slot;
}
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index b68f47541169..a6935af2235c 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -453,3 +453,4 @@
448 common process_mrelease sys_process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat sys_cachestat
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index c84d12608cd2..227538b0ce80 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -372,6 +372,7 @@
448 common process_mrelease sys_process_mrelease
449 common futex_waitv sys_futex_waitv
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
+451 common cachestat sys_cachestat
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 0f158dc8139b..07bbc449329e 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,5 +1,6 @@
perf-y += sched-messaging.o
perf-y += sched-pipe.o
+perf-y += sched-seccomp-notify.o
perf-y += syscall.o
perf-y += mem-functions.o
perf-y += futex-hash.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 0d2b65976212..a0625c77bea3 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -21,6 +21,7 @@ extern struct timeval bench__start, bench__end, bench__runtime;
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
+int bench_sched_seccomp_notify(int argc, const char **argv);
int bench_syscall_basic(int argc, const char **argv);
int bench_syscall_getpgid(int argc, const char **argv);
int bench_syscall_fork(int argc, const char **argv);
diff --git a/tools/perf/bench/sched-seccomp-notify.c b/tools/perf/bench/sched-seccomp-notify.c
new file mode 100644
index 000000000000..b04ebcde4036
--- /dev/null
+++ b/tools/perf/bench/sched-seccomp-notify.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <subcmd/parse-options.h>
+#include "bench.h"
+
+#include <uapi/linux/filter.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <linux/unistd.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <linux/time64.h>
+#include <linux/seccomp.h>
+#include <sys/prctl.h>
+
+#include <unistd.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/wait.h>
+#include <string.h>
+#include <errno.h>
+#include <err.h>
+#include <inttypes.h>
+
+#define LOOPS_DEFAULT 1000000UL
+static uint64_t loops = LOOPS_DEFAULT;
+static bool sync_mode;
+
+static const struct option options[] = {
+ OPT_U64('l', "loop", &loops, "Specify number of loops"),
+ OPT_BOOLEAN('s', "sync-mode", &sync_mode,
+ "Enable the synchronious mode for seccomp notifications"),
+ OPT_END()
+};
+
+static const char * const bench_seccomp_usage[] = {
+ "perf bench sched secccomp-notify <options>",
+ NULL
+};
+
+static int seccomp(unsigned int op, unsigned int flags, void *args)
+{
+ return syscall(__NR_seccomp, op, flags, args);
+}
+
+static int user_notif_syscall(int nr, unsigned int flags)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ return seccomp(SECCOMP_SET_MODE_FILTER, flags, &prog);
+}
+
+#define USER_NOTIF_MAGIC INT_MAX
+static void user_notification_sync_loop(int listener)
+{
+ struct seccomp_notif_resp resp;
+ struct seccomp_notif req;
+ uint64_t nr;
+
+ for (nr = 0; nr < loops; nr++) {
+ memset(&req, 0, sizeof(req));
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req))
+ err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_RECV failed");
+
+ if (req.data.nr != __NR_gettid)
+ errx(EXIT_FAILURE, "unexpected syscall: %d", req.data.nr);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ resp.flags = 0;
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp))
+ err(EXIT_FAILURE, "SECCOMP_IOCTL_NOTIF_SEND failed");
+ }
+}
+
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
+#endif
+int bench_sched_seccomp_notify(int argc, const char **argv)
+{
+ struct timeval start, stop, diff;
+ unsigned long long result_usec = 0;
+ int status, listener;
+ pid_t pid;
+ long ret;
+
+ argc = parse_options(argc, argv, options, bench_seccomp_usage, 0);
+
+ gettimeofday(&start, NULL);
+
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ listener = user_notif_syscall(__NR_gettid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ err(EXIT_FAILURE, "can't create a notification descriptor");
+
+ pid = fork();
+ if (pid < 0)
+ err(EXIT_FAILURE, "fork");
+ if (pid == 0) {
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0))
+ err(EXIT_FAILURE, "can't set the parent death signal");
+ while (1) {
+ ret = syscall(__NR_gettid);
+ if (ret == USER_NOTIF_MAGIC)
+ continue;
+ break;
+ }
+ _exit(1);
+ }
+
+ if (sync_mode) {
+ if (ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+ SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0))
+ err(EXIT_FAILURE,
+ "can't set SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP");
+ }
+ user_notification_sync_loop(listener);
+
+ kill(pid, SIGKILL);
+ if (waitpid(pid, &status, 0) != pid)
+ err(EXIT_FAILURE, "waitpid(%d) failed", pid);
+ if (!WIFSIGNALED(status) || WTERMSIG(status) != SIGKILL)
+ errx(EXIT_FAILURE, "unexpected exit code: %d", status);
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Executed %" PRIu64 " system calls\n\n",
+ loops);
+
+ result_usec = diff.tv_sec * USEC_PER_SEC;
+ result_usec += diff.tv_usec;
+
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ (unsigned long) diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / (double)loops);
+ printf(" %14d ops/sec\n",
+ (int)((double)loops /
+ ((double)result_usec / (double)USEC_PER_SEC)));
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n",
+ (unsigned long) diff.tv_sec,
+ (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
+ break;
+
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index db435b791a09..5033e8bab276 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -47,6 +47,7 @@ static struct bench numa_benchmarks[] = {
static struct bench sched_benchmarks[] = {
{ "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging },
{ "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe },
+ { "seccomp-notify", "Benchmark for seccomp user notify", bench_sched_seccomp_notify},
{ "all", "Run all scheduler benchmarks", NULL },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
index bf5083c1c260..4d28177325a0 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/recommended.json
@@ -169,8 +169,9 @@
},
{
"MetricName": "nps1_die_to_dram",
- "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
"MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricGroup": "data_fabric",
"PerPkg": "1",
"ScaleUnit": "6.1e-5MiB"
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
index a71694a043ba..60e19456d4c8 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/recommended.json
@@ -169,8 +169,9 @@
},
{
"MetricName": "nps1_die_to_dram",
- "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
"MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricGroup": "data_fabric",
"PerPkg": "1",
"ScaleUnit": "6.1e-5MiB"
diff --git a/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json
index 988cf68ae825..3e9e1781812e 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen3/recommended.json
@@ -205,10 +205,11 @@
},
{
"MetricName": "nps1_die_to_dram",
- "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die) (may need --metric-no-group)",
+ "BriefDescription": "Approximate: Combined DRAM B/bytes of all channels on a NPS1 node (die)",
"MetricExpr": "dram_channel_data_controller_0 + dram_channel_data_controller_1 + dram_channel_data_controller_2 + dram_channel_data_controller_3 + dram_channel_data_controller_4 + dram_channel_data_controller_5 + dram_channel_data_controller_6 + dram_channel_data_controller_7",
"MetricGroup": "data_fabric",
"PerPkg": "1",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"ScaleUnit": "6.1e-5MiB"
}
]
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index b2f82847e4c3..658fb9599d95 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1631,6 +1631,16 @@ static bool test__pmu_cpu_valid(void)
return !!perf_pmus__find("cpu");
}
+static bool test__pmu_cpu_event_valid(void)
+{
+ struct perf_pmu *pmu = perf_pmus__find("cpu");
+
+ if (!pmu)
+ return false;
+
+ return perf_pmu__has_format(pmu, "event");
+}
+
static bool test__intel_pt_valid(void)
{
return !!perf_pmus__find("intel_pt");
@@ -2179,7 +2189,7 @@ static const struct evlist_test test__events_pmu[] = {
},
{
.name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
- .valid = test__pmu_cpu_valid,
+ .valid = test__pmu_cpu_event_valid,
.check = test__checkevent_complex_name,
/* 3 */
},
diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
new file mode 100755
index 000000000000..319f36ebb9a4
--- /dev/null
+++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# test perf probe of function from different CU
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+# skip if there's no gcc
+if ! [ -x "$(command -v gcc)" ]; then
+ echo "failed: no gcc compiler"
+ exit 2
+fi
+
+temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)
+
+cleanup()
+{
+ trap - EXIT TERM INT
+ if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
+ echo "--- Cleaning up ---"
+ perf probe -x ${temp_dir}/testfile -d foo || true
+ rm -f "${temp_dir}/"*
+ rmdir "${temp_dir}"
+ fi
+}
+
+trap_cleanup()
+{
+ cleanup
+ exit 1
+}
+
+trap trap_cleanup EXIT TERM INT
+
+cat > ${temp_dir}/testfile-foo.h << EOF
+struct t
+{
+ int *p;
+ int c;
+};
+
+extern int foo (int i, struct t *t);
+EOF
+
+cat > ${temp_dir}/testfile-foo.c << EOF
+#include "testfile-foo.h"
+
+int
+foo (int i, struct t *t)
+{
+ int j, res = 0;
+ for (j = 0; j < i && j < t->c; j++)
+ res += t->p[j];
+
+ return res;
+}
+EOF
+
+cat > ${temp_dir}/testfile-main.c << EOF
+#include "testfile-foo.h"
+
+static struct t g;
+
+int
+main (int argc, char **argv)
+{
+ int i;
+ int j[argc];
+ g.c = argc;
+ g.p = j;
+ for (i = 0; i < argc; i++)
+ j[i] = (int) argv[i][0];
+ return foo (3, &g);
+}
+EOF
+
+gcc -g -Og -flto -c ${temp_dir}/testfile-foo.c -o ${temp_dir}/testfile-foo.o
+gcc -g -Og -c ${temp_dir}/testfile-main.c -o ${temp_dir}/testfile-main.o
+gcc -g -Og -o ${temp_dir}/testfile ${temp_dir}/testfile-foo.o ${temp_dir}/testfile-main.o
+
+perf probe -x ${temp_dir}/testfile --funcs foo
+perf probe -x ${temp_dir}/testfile foo
+
+cleanup
diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c
index 25f075fa9125..968dddde6dda 100644
--- a/tools/perf/tests/task-exit.c
+++ b/tools/perf/tests/task-exit.c
@@ -58,9 +58,9 @@ static int test__task_exit(struct test_suite *test __maybe_unused, int subtest _
signal(SIGCHLD, sig_handler);
- evlist = evlist__new_default();
+ evlist = evlist__new_dummy();
if (evlist == NULL) {
- pr_debug("evlist__new_default\n");
+ pr_debug("evlist__new_dummy\n");
return -1;
}
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 3bef212a24d7..39b74d83c7c4 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -177,6 +177,7 @@ static inline size_t msg_data_left(struct msghdr *msg)
#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
#define SCM_SECURITY 0x03 /* rw: security label */
+#define SCM_PIDFD 0x04 /* ro: pidfd (int) */
struct ucred {
__u32 pid;
@@ -326,6 +327,7 @@ struct ucred {
*/
#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
+#define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */
#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file
descriptor received through
@@ -336,6 +338,9 @@ struct ucred {
#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */
#endif
+/* Flags to be cleared on entry by sendmsg and sendmmsg syscalls */
+#define MSG_INTERNAL_SENDMSG_FLAGS \
+ (MSG_SPLICE_PAGES | MSG_SENDPAGE_NOPOLICY | MSG_SENDPAGE_DECRYPTED)
/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
#define SOL_IP 0
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
index 32e552faf37a..ce5e632d1448 100755
--- a/tools/perf/trace/beauty/move_mount_flags.sh
+++ b/tools/perf/trace/beauty/move_mount_flags.sh
@@ -10,7 +10,7 @@ fi
linux_mount=${linux_header_dir}/mount.h
printf "static const char *move_mount_flags[] = {\n"
-regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
+regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
grep -E $regex ${linux_mount} | \
sed -r "s/$regex/\2 \1/g" | \
xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c
index aa9934020232..ed3ff969b546 100644
--- a/tools/perf/trace/beauty/msg_flags.c
+++ b/tools/perf/trace/beauty/msg_flags.c
@@ -8,6 +8,12 @@
#ifndef MSG_WAITFORONE
#define MSG_WAITFORONE 0x10000
#endif
+#ifndef MSG_BATCH
+#define MSG_BATCH 0x40000
+#endif
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY 0x4000000
+#endif
#ifndef MSG_SPLICE_PAGES
#define MSG_SPLICE_PAGES 0x8000000
#endif
@@ -50,6 +56,8 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size,
P_MSG_FLAG(NOSIGNAL);
P_MSG_FLAG(MORE);
P_MSG_FLAG(WAITFORONE);
+ P_MSG_FLAG(BATCH);
+ P_MSG_FLAG(ZEROCOPY);
P_MSG_FLAG(SPLICE_PAGES);
P_MSG_FLAG(FASTOPEN);
P_MSG_FLAG(CMSG_CLOEXEC);
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 45e018c0ebf5..2941d88f2199 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -478,8 +478,10 @@ static const char *die_get_file_name(Dwarf_Die *dw_die, int idx)
{
Dwarf_Die cu_die;
Dwarf_Files *files;
+ Dwarf_Attribute attr_mem;
- if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) ||
+ if (idx < 0 || !dwarf_attr_integrate(dw_die, DW_AT_decl_file, &attr_mem) ||
+ !dwarf_cu_die(attr_mem.cu, &cu_die, NULL, NULL, NULL, NULL, NULL, NULL) ||
dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
return NULL;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 4e62843d51b7..f4cb41ee23cd 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -45,7 +45,6 @@
static void __machine__remove_thread(struct machine *machine, struct thread_rb_node *nd,
struct thread *th, bool lock);
-static int append_inlines(struct callchain_cursor *cursor, struct map_symbol *ms, u64 ip);
static struct dso *machine__kernel_dso(struct machine *machine)
{
@@ -2385,10 +2384,6 @@ static int add_callchain_ip(struct thread *thread,
ms.maps = maps__get(al.maps);
ms.map = map__get(al.map);
ms.sym = al.sym;
-
- if (!branch && append_inlines(cursor, &ms, ip) == 0)
- goto out;
-
srcline = callchain_srcline(&ms, al.addr);
err = callchain_cursor_append(cursor, ip, &ms,
branch, flags, nr_loop_iter,
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5dcfbf316bf6..c9ec0cafb69d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1216,6 +1216,14 @@ static int config_term_pmu(struct perf_event_attr *attr,
if (term->type_term == PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE) {
const struct perf_pmu *pmu = perf_pmus__find_by_type(attr->type);
+ if (!pmu) {
+ char *err_str;
+
+ if (asprintf(&err_str, "Failed to find PMU for type %d", attr->type) >= 0)
+ parse_events_error__handle(err, term->err_term,
+ err_str, /*help=*/NULL);
+ return -EINVAL;
+ }
if (perf_pmu__supports_legacy_cache(pmu)) {
attr->type = PERF_TYPE_HW_CACHE;
return parse_events__decode_legacy_cache(term->config, pmu->type,
@@ -2092,16 +2100,16 @@ __weak int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs)
return lhs->core.idx - rhs->core.idx;
}
-static int evlist__cmp(void *state, const struct list_head *l, const struct list_head *r)
+static int evlist__cmp(void *_fg_idx, const struct list_head *l, const struct list_head *r)
{
const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node);
const struct evsel *lhs = container_of(lhs_core, struct evsel, core);
const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node);
const struct evsel *rhs = container_of(rhs_core, struct evsel, core);
- int *leader_idx = state;
- int lhs_leader_idx = *leader_idx, rhs_leader_idx = *leader_idx, ret;
+ int *force_grouped_idx = _fg_idx;
+ int lhs_sort_idx, rhs_sort_idx, ret;
const char *lhs_pmu_name, *rhs_pmu_name;
- bool lhs_has_group = false, rhs_has_group = false;
+ bool lhs_has_group, rhs_has_group;
/*
* First sort by grouping/leader. Read the leader idx only if the evsel
@@ -2113,15 +2121,25 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
*/
if (lhs_core->leader != lhs_core || lhs_core->nr_members > 1) {
lhs_has_group = true;
- lhs_leader_idx = lhs_core->leader->idx;
+ lhs_sort_idx = lhs_core->leader->idx;
+ } else {
+ lhs_has_group = false;
+ lhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(lhs)
+ ? *force_grouped_idx
+ : lhs_core->idx;
}
if (rhs_core->leader != rhs_core || rhs_core->nr_members > 1) {
rhs_has_group = true;
- rhs_leader_idx = rhs_core->leader->idx;
+ rhs_sort_idx = rhs_core->leader->idx;
+ } else {
+ rhs_has_group = false;
+ rhs_sort_idx = *force_grouped_idx != -1 && arch_evsel__must_be_in_group(rhs)
+ ? *force_grouped_idx
+ : rhs_core->idx;
}
- if (lhs_leader_idx != rhs_leader_idx)
- return lhs_leader_idx - rhs_leader_idx;
+ if (lhs_sort_idx != rhs_sort_idx)
+ return lhs_sort_idx - rhs_sort_idx;
/* Group by PMU if there is a group. Groups can't span PMUs. */
if (lhs_has_group && rhs_has_group) {
@@ -2138,10 +2156,10 @@ static int evlist__cmp(void *state, const struct list_head *l, const struct list
static int parse_events__sort_events_and_fix_groups(struct list_head *list)
{
- int idx = 0, unsorted_idx = -1;
+ int idx = 0, force_grouped_idx = -1;
struct evsel *pos, *cur_leader = NULL;
struct perf_evsel *cur_leaders_grp = NULL;
- bool idx_changed = false;
+ bool idx_changed = false, cur_leader_force_grouped = false;
int orig_num_leaders = 0, num_leaders = 0;
int ret;
@@ -2166,12 +2184,14 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
*/
pos->core.idx = idx++;
- if (unsorted_idx == -1 && pos == pos_leader && pos->core.nr_members < 2)
- unsorted_idx = pos->core.idx;
+ /* Remember an index to sort all forced grouped events together to. */
+ if (force_grouped_idx == -1 && pos == pos_leader && pos->core.nr_members < 2 &&
+ arch_evsel__must_be_in_group(pos))
+ force_grouped_idx = pos->core.idx;
}
/* Sort events. */
- list_sort(&unsorted_idx, list, evlist__cmp);
+ list_sort(&force_grouped_idx, list, evlist__cmp);
/*
* Recompute groups, splitting for PMUs and adding groups for events
@@ -2181,8 +2201,9 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
list_for_each_entry(pos, list, core.node) {
const struct evsel *pos_leader = evsel__leader(pos);
const char *pos_pmu_name = pos->group_pmu_name;
- const char *cur_leader_pmu_name, *pos_leader_pmu_name;
- bool force_grouped = arch_evsel__must_be_in_group(pos);
+ const char *cur_leader_pmu_name;
+ bool pos_force_grouped = force_grouped_idx != -1 &&
+ arch_evsel__must_be_in_group(pos);
/* Reset index and nr_members. */
if (pos->core.idx != idx)
@@ -2198,7 +2219,8 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
cur_leader = pos;
cur_leader_pmu_name = cur_leader->group_pmu_name;
- if ((cur_leaders_grp != pos->core.leader && !force_grouped) ||
+ if ((cur_leaders_grp != pos->core.leader &&
+ (!pos_force_grouped || !cur_leader_force_grouped)) ||
strcmp(cur_leader_pmu_name, pos_pmu_name)) {
/* Event is for a different group/PMU than last. */
cur_leader = pos;
@@ -2208,14 +2230,14 @@ static int parse_events__sort_events_and_fix_groups(struct list_head *list)
* group.
*/
cur_leaders_grp = pos->core.leader;
- }
- pos_leader_pmu_name = pos_leader->group_pmu_name;
- if (strcmp(pos_leader_pmu_name, pos_pmu_name) || force_grouped) {
/*
- * Event's PMU differs from its leader's. Groups can't
- * span PMUs, so update leader from the group/PMU
- * tracker.
+ * Avoid forcing events into groups with events that
+ * don't need to be in the group.
*/
+ cur_leader_force_grouped = pos_force_grouped;
+ }
+ if (pos_leader != cur_leader) {
+ /* The leader changed so update it. */
evsel__set_leader(pos, cur_leader);
}
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 7f984a7f16ca..28380e7aa8d0 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1440,6 +1440,17 @@ void perf_pmu__del_formats(struct list_head *formats)
}
}
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name)
+{
+ struct perf_pmu_format *format;
+
+ list_for_each_entry(format, &pmu->format, list) {
+ if (!strcmp(format->name, name))
+ return true;
+ }
+ return false;
+}
+
bool is_pmu_core(const char *name)
{
return !strcmp(name, "cpu") || !strcmp(name, "cpum_cf") || is_sysfs_pmu_core(name);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 203b92860e3c..6b414cecbad2 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -234,6 +234,7 @@ int perf_pmu__new_format(struct list_head *list, char *name,
void perf_pmu__set_format(unsigned long *bits, long from, long to);
int perf_pmu__format_parse(int dirfd, struct list_head *head);
void perf_pmu__del_formats(struct list_head *formats);
+bool perf_pmu__has_format(const struct perf_pmu *pmu, const char *name);
bool is_pmu_core(const char *name);
bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu);
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
index 3cd9de42139e..c58ba9fb6a36 100644
--- a/tools/perf/util/pmus.c
+++ b/tools/perf/util/pmus.c
@@ -152,16 +152,14 @@ static void pmu_read_sysfs(bool core_only)
}
closedir(dir);
- if (core_only) {
- if (!list_empty(&core_pmus))
- read_sysfs_core_pmus = true;
- else {
- if (perf_pmu__create_placeholder_core_pmu(&core_pmus))
- read_sysfs_core_pmus = true;
- }
- } else {
+ if (list_empty(&core_pmus)) {
+ if (!perf_pmu__create_placeholder_core_pmu(&core_pmus))
+ pr_err("Failure to set up any core PMUs\n");
+ }
+ if (!list_empty(&core_pmus)) {
read_sysfs_core_pmus = true;
- read_sysfs_all_pmus = true;
+ if (!core_only)
+ read_sysfs_all_pmus = true;
}
}
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 7329b3340f88..d45d5dcb0e2b 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -931,6 +931,11 @@ static bool should_skip_zero_counter(struct perf_stat_config *config,
*/
if (config->aggr_mode == AGGR_THREAD && config->system_wide)
return true;
+
+ /* Tool events have the software PMU but are only gathered on 1. */
+ if (evsel__is_tool(counter))
+ return true;
+
/*
* Skip value 0 when it's an uncore event and the given aggr id
* does not belong to the PMU cpumask.
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 374d142e7390..c6a0a27b12c2 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -1038,9 +1038,7 @@ static int thread_stack__trace_end(struct thread_stack *ts,
static bool is_x86_retpoline(const char *name)
{
- const char *p = strstr(name, "__x86_indirect_thunk_");
-
- return p == name || !strcmp(name, "__indirect_thunk_start");
+ return strstr(name, "__x86_indirect_thunk_") == name;
}
/*
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 0e78d8e19895..fb6ab9cef84f 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -999,10 +999,6 @@ static void mock_companion(struct acpi_device *adev, struct device *dev)
#define SZ_64G (SZ_32G * 2)
#endif
-#ifndef SZ_512G
-#define SZ_512G (SZ_64G * 8)
-#endif
-
static __init int cxl_rch_init(void)
{
int rc, i;
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 03539d86cdf0..75ea2081a317 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -206,9 +206,9 @@ static noinline void __init check_new_node(struct maple_tree *mt)
e = i - 1;
} else {
if (i >= 4)
- e = i - 4;
- else if (i == 3)
- e = i - 2;
+ e = i - 3;
+ else if (i >= 1)
+ e = i - 1;
else
e = 0;
}
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
index a61c7bcbc72d..63f468bf8245 100644
--- a/tools/testing/radix-tree/regression1.c
+++ b/tools/testing/radix-tree/regression1.c
@@ -177,7 +177,7 @@ void regression1_test(void)
nr_threads = 2;
pthread_barrier_init(&worker_barrier, NULL, nr_threads);
- threads = malloc(nr_threads * sizeof(pthread_t *));
+ threads = malloc(nr_threads * sizeof(*threads));
for (i = 0; i < nr_threads; i++) {
arg = i;
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 666b56f22a41..8dca8acdb671 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -18,6 +18,7 @@ TARGETS += drivers/net/bonding
TARGETS += drivers/net/team
TARGETS += efivarfs
TARGETS += exec
+TARGETS += fchmodat2
TARGETS += filesystems
TARGETS += filesystems/binderfs
TARGETS += filesystems/epoll
diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore
index 2b0d11797f25..12dc3fcd3456 100644
--- a/tools/testing/selftests/alsa/.gitignore
+++ b/tools/testing/selftests/alsa/.gitignore
@@ -1,2 +1,3 @@
mixer-test
pcm-test
+test-pcmtest-driver
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
index 71931b240a83..357adc722cba 100644
--- a/tools/testing/selftests/alsa/test-pcmtest-driver.c
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -47,10 +47,8 @@ static int read_patterns(void)
sprintf(pf, "/sys/kernel/debug/pcmtest/fill_pattern%d", i);
fp = fopen(pf, "r");
- if (!fp) {
- fclose(fpl);
+ if (!fp)
return -1;
- }
fread(patterns[i].buf, 1, patterns[i].len, fp);
fclose(fp);
}
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 9460cbe81bcc..ace8b67fb22d 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -42,7 +42,7 @@ run_tests: all
done
# Avoid any output on non arm64 on emit_tests
-emit_tests: all
+emit_tests:
@for DIR in $(ARM64_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index b4f6f3a50ae5..5674a9d0cacf 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -869,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+ int s, c0, c1, p0, p1;
+ int err, n, key, value;
+ char buf[] = "abc";
+
+ key = 0;
+ value = sizeof(buf) - 1;
+ err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+ if (err)
+ return;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ goto clean_parser_map;
+
+ err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ if (err)
+ goto close_srv;
+
+ err = add_to_sockmap(sock_map, p0, p1);
+ if (err)
+ goto close;
+
+ n = xsend(c1, buf, sizeof(buf), 0);
+ if (n < sizeof(buf))
+ FAIL("incomplete write");
+
+ n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+ if (n != sizeof(buf) - 1)
+ FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+ xclose(c0);
+ xclose(p0);
+ xclose(c1);
+ xclose(p1);
+close_srv:
+ xclose(s);
+
+clean_parser_map:
+ key = 0;
+ value = 0;
+ xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+ int parser_map = bpf_map__fd(skel->maps.parser_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_partial(family, sotype, sock_map, parser_map);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
static void test_reuseport_select_listening(int family, int sotype,
int sock_map, int verd_map,
int reuseport_prog)
@@ -1243,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
} tests[] = {
TEST(test_skb_redir_to_connected),
TEST(test_skb_redir_to_listening),
+ TEST(test_skb_redir_partial),
TEST(test_msg_redir_to_connected),
TEST(test_msg_redir_to_listening),
};
@@ -1432,7 +1504,7 @@ static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
if (n < 1)
goto out;
- n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT);
+ n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
if (n < 0)
FAIL("%s: recv() err, errno=%d", log_prefix, errno);
if (n == 0)
diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c
index 477ba950bb43..3517c0e01206 100644
--- a/tools/testing/selftests/bpf/progs/async_stack_depth.c
+++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c
@@ -22,9 +22,16 @@ static int timer_cb(void *map, int *key, struct bpf_timer *timer)
return buf[69];
}
+__attribute__((noinline))
+static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ volatile char buf[300] = {};
+ return buf[255] + timer_cb(NULL, NULL, NULL);
+}
+
SEC("tc")
-__failure __msg("combined stack size of 2 calls")
-int prog(struct __sk_buff *ctx)
+__failure __msg("combined stack size of 2 calls is 576. Too large")
+int pseudo_call_check(struct __sk_buff *ctx)
{
struct hmap_elem *elem;
volatile char buf[256] = {};
@@ -37,4 +44,18 @@ int prog(struct __sk_buff *ctx)
return bpf_timer_set_callback(&elem->timer, timer_cb) + buf[0];
}
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is 608. Too large")
+int async_call_root_check(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+ volatile char buf[256] = {};
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+
+ return bpf_timer_set_callback(&elem->timer, bad_timer_cb) + buf[0];
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index 325c9f193432..464d35bd57c7 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -28,12 +28,26 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} parser_map SEC(".maps");
+
bool test_sockmap = false; /* toggled by user-space */
bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
int prog_stream_parser(struct __sk_buff *skb)
{
+ int *value;
+ __u32 key = 0;
+
+ value = bpf_map_lookup_elem(&parser_map, &key);
+ if (value && *value)
+ return *value;
+
return skb->len;
}
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index 54d09b820ed4..c037553cfd92 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -4,10 +4,12 @@
#include <stdio.h>
#include <stdbool.h>
#include <linux/kernel.h>
+#include <linux/magic.h>
#include <linux/mman.h>
#include <sys/mman.h>
#include <sys/shm.h>
#include <sys/syscall.h>
+#include <sys/vfs.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
@@ -15,6 +17,8 @@
#include "../kselftest.h"
+#define NR_TESTS 9
+
static const char * const dev_files[] = {
"/dev/zero", "/dev/null", "/dev/urandom",
"/proc/version", "/proc"
@@ -91,19 +95,33 @@ out:
}
/*
+ * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync()
+ * test fail below, so we need to check for test file living on a tmpfs.
+ */
+static bool is_on_tmpfs(int fd)
+{
+ struct statfs statfs_buf;
+
+ if (fstatfs(fd, &statfs_buf))
+ return false;
+
+ return statfs_buf.f_type == TMPFS_MAGIC;
+}
+
+/*
* Open/create the file at filename, (optionally) write random data to it
* (exactly num_pages), then test the cachestat syscall on this file.
*
* If test_fsync == true, fsync the file, then check the number of dirty
* pages.
*/
-bool test_cachestat(const char *filename, bool write_random, bool create,
- bool test_fsync, unsigned long num_pages, int open_flags,
- mode_t open_mode)
+static int test_cachestat(const char *filename, bool write_random, bool create,
+ bool test_fsync, unsigned long num_pages,
+ int open_flags, mode_t open_mode)
{
size_t PS = sysconf(_SC_PAGESIZE);
int filesize = num_pages * PS;
- bool ret = true;
+ int ret = KSFT_PASS;
long syscall_ret;
struct cachestat cs;
struct cachestat_range cs_range = { 0, filesize };
@@ -112,7 +130,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
if (fd == -1) {
ksft_print_msg("Unable to create/open file.\n");
- ret = false;
+ ret = KSFT_FAIL;
goto out;
} else {
ksft_print_msg("Create/open %s\n", filename);
@@ -121,7 +139,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
if (write_random) {
if (!write_exactly(fd, filesize)) {
ksft_print_msg("Unable to access urandom.\n");
- ret = false;
+ ret = KSFT_FAIL;
goto out1;
}
}
@@ -132,7 +150,7 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
if (syscall_ret) {
ksft_print_msg("Cachestat returned non-zero.\n");
- ret = false;
+ ret = KSFT_FAIL;
goto out1;
} else {
@@ -142,15 +160,17 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
if (cs.nr_cache + cs.nr_evicted != num_pages) {
ksft_print_msg(
"Total number of cached and evicted pages is off.\n");
- ret = false;
+ ret = KSFT_FAIL;
}
}
}
if (test_fsync) {
- if (fsync(fd)) {
+ if (is_on_tmpfs(fd)) {
+ ret = KSFT_SKIP;
+ } else if (fsync(fd)) {
ksft_print_msg("fsync fails.\n");
- ret = false;
+ ret = KSFT_FAIL;
} else {
syscall_ret = syscall(cachestat_nr, fd, &cs_range, &cs, 0);
@@ -161,13 +181,13 @@ bool test_cachestat(const char *filename, bool write_random, bool create,
print_cachestat(&cs);
if (cs.nr_dirty) {
- ret = false;
+ ret = KSFT_FAIL;
ksft_print_msg(
"Number of dirty should be zero after fsync.\n");
}
} else {
ksft_print_msg("Cachestat (after fsync) returned non-zero.\n");
- ret = false;
+ ret = KSFT_FAIL;
goto out1;
}
}
@@ -236,13 +256,29 @@ out:
int main(void)
{
- int ret = 0;
+ int ret;
+
+ ksft_print_header();
+
+ ret = syscall(__NR_cachestat, -1, NULL, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ ksft_exit_skip("cachestat syscall not available\n");
+
+ ksft_set_plan(NR_TESTS);
+
+ if (ret == -1 && errno == EBADF) {
+ ksft_test_result_pass("bad file descriptor recognized\n");
+ ret = 0;
+ } else {
+ ksft_test_result_fail("bad file descriptor ignored\n");
+ ret = 1;
+ }
for (int i = 0; i < 5; i++) {
const char *dev_filename = dev_files[i];
if (test_cachestat(dev_filename, false, false, false,
- 4, O_RDONLY, 0400))
+ 4, O_RDONLY, 0400) == KSFT_PASS)
ksft_test_result_pass("cachestat works with %s\n", dev_filename);
else {
ksft_test_result_fail("cachestat fails with %s\n", dev_filename);
@@ -251,13 +287,27 @@ int main(void)
}
if (test_cachestat("tmpfilecachestat", true, true,
- true, 4, O_CREAT | O_RDWR, 0400 | 0600))
+ false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS)
ksft_test_result_pass("cachestat works with a normal file\n");
else {
ksft_test_result_fail("cachestat fails with normal file\n");
ret = 1;
}
+ switch (test_cachestat("tmpfilecachestat", true, true,
+ true, 4, O_CREAT | O_RDWR, 0600)) {
+ case KSFT_FAIL:
+ ksft_test_result_fail("cachestat fsync fails with normal file\n");
+ ret = KSFT_FAIL;
+ break;
+ case KSFT_PASS:
+ ksft_test_result_pass("cachestat fsync works with a normal file\n");
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("tmpfilecachestat is on tmpfs\n");
+ break;
+ }
+
if (test_cachestat_shmem())
ksft_test_result_pass("cachestat works with a shmem file\n");
else {
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index 258ddc565deb..ed2e50bb1e76 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -70,12 +70,16 @@ static int test_kmem_basic(const char *root)
goto cleanup;
cg_write(cg, "memory.high", "1M");
+
+ /* wait for RCU freeing */
+ sleep(1);
+
slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
- if (slab1 <= 0)
+ if (slab1 < 0)
goto cleanup;
current = cg_read_long(cg, "memory.current");
- if (current <= 0)
+ if (current < 0)
goto cleanup;
if (slab1 < slab0 / 2 && current < slab0 / 2)
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 03f92d7aeb19..8a72bb7de70f 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -9,10 +9,12 @@ TEST_PROGS := \
mode-1-recovery-updelay.sh \
mode-2-recovery-updelay.sh \
bond_options.sh \
- bond-eth-type-change.sh
+ bond-eth-type-change.sh \
+ bond_macvlan.sh
TEST_FILES := \
lag_lib.sh \
+ bond_topo_2d1c.sh \
bond_topo_3d1c.sh \
net_forwarding_lib.sh
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
index 47ab90596acb..6358df5752f9 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -57,8 +57,8 @@ ip link add name veth2-bond type veth peer name veth2-end
# add ports
ip link set fbond master fab-br0
-ip link set veth1-bond down master fbond
-ip link set veth2-bond down master fbond
+ip link set veth1-bond master fbond
+ip link set veth2-bond master fbond
# bring up
ip link set veth1-end up
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
new file mode 100755
index 000000000000..b609fb6231f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan over balance-alb
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+m1_ns="m1-$(mktemp -u XXXXXX)"
+m2_ns="m1-$(mktemp -u XXXXXX)"
+m1_ip4="192.0.2.11"
+m1_ip6="2001:db8::11"
+m2_ip4="192.0.2.12"
+m2_ip6="2001:db8::12"
+
+cleanup()
+{
+ ip -n ${m1_ns} link del macv0
+ ip netns del ${m1_ns}
+ ip -n ${m2_ns} link del macv0
+ ip netns del ${m2_ns}
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+check_connection()
+{
+ local ns=${1}
+ local target=${2}
+ local message=${3:-"macvlan_over_bond"}
+ RET=0
+
+
+ ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+ check_err $? "ping failed"
+ log_test "$mode: $message"
+}
+
+macvlan_over_bond()
+{
+ local param="$1"
+ RET=0
+
+ # setup new bond mode
+ bond_reset "${param}"
+
+ ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+ ip -n ${s_ns} link set macv0 netns ${m1_ns}
+ ip -n ${m1_ns} link set dev macv0 up
+ ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
+ ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
+
+ ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+ ip -n ${s_ns} link set macv0 netns ${m2_ns}
+ ip -n ${m2_ns} link set dev macv0 up
+ ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
+ ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
+
+ sleep 2
+
+ check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+ check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+ check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
+ check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
+ check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
+ check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
+ check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
+ check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
+
+
+ sleep 5
+
+ check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+ check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+ check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
+ check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
+ check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
+ check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
+ check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
+ check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
+
+ ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${m1_ns}
+ip netns add ${m2_ns}
+
+modes="active-backup balance-tlb balance-alb"
+
+for mode in $modes; do
+ macvlan_over_bond "mode $mode"
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
index 607ba5c38977..c54d1697f439 100755
--- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -9,10 +9,7 @@ ALL_TESTS="
num_grat_arp
"
-REQUIRE_MZ=no
-NUM_NETIFS=0
lib_dir=$(dirname "$0")
-source ${lib_dir}/net_forwarding_lib.sh
source ${lib_dir}/bond_topo_3d1c.sh
skip_prio()
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
new file mode 100644
index 000000000000..a509ef949dcf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+# +-------------------------+
+# | bond0 | Server
+# | + | 192.0.2.1/24
+# | eth0 | eth1 | 2001:db8::1/24
+# | +---+---+ |
+# | | | |
+# +-------------------------+
+# | |
+# +-------------------------+
+# | | | |
+# | +---+-------+---+ | Gateway
+# | | br0 | | 192.0.2.254/24
+# | +-------+-------+ | 2001:db8::254/24
+# | | |
+# +-------------------------+
+# |
+# +-------------------------+
+# | | | Client
+# | + | 192.0.2.10/24
+# | eth0 | 2001:db8::10/24
+# +-------------------------+
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source ${lib_dir}/net_forwarding_lib.sh
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+
+gateway_create()
+{
+ ip netns add ${g_ns}
+ ip -n ${g_ns} link add br0 type bridge
+ ip -n ${g_ns} link set br0 up
+ ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+ ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+ ip -n ${g_ns} link del br0
+ ip netns del ${g_ns}
+}
+
+server_create()
+{
+ ip netns add ${s_ns}
+ ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+ for i in $(seq 0 1); do
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+
+ tc -n ${g_ns} qdisc add dev s${i} clsact
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ sleep 2
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ local param="$1"
+ link_num=$((link_num -1))
+
+ ip -n ${s_ns} link set bond0 down
+ ip -n ${s_ns} link del bond0
+
+ ip -n ${s_ns} link add bond0 type bond $param
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link set eth$i master bond0
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ sleep 2
+}
+
+server_destroy()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ link_num=$((link_num -1))
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link del eth${i}
+ done
+ ip netns del ${s_ns}
+}
+
+client_create()
+{
+ ip netns add ${c_ns}
+ ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+ ip -n ${g_ns} link set c0 up
+ ip -n ${g_ns} link set c0 master br0
+
+ ip -n ${c_ns} link set eth0 up
+ ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+ ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+ ip -n ${c_ns} link del eth0
+ ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+ gateway_create
+ server_create
+ client_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+bond_check_connection()
+{
+ local msg=${1:-"check connection"}
+
+ sleep 2
+ ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping failed"
+ ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping6 failed"
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
index 69ab99a56043..3a1333d9a85b 100644
--- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -25,121 +25,19 @@
# | eth0 | 2001:db8::10/24
# +-------------------------------------+
-s_ns="s-$(mktemp -u XXXXXX)"
-c_ns="c-$(mktemp -u XXXXXX)"
-g_ns="g-$(mktemp -u XXXXXX)"
-s_ip4="192.0.2.1"
-c_ip4="192.0.2.10"
-g_ip4="192.0.2.254"
-s_ip6="2001:db8::1"
-c_ip6="2001:db8::10"
-g_ip6="2001:db8::254"
-
-gateway_create()
-{
- ip netns add ${g_ns}
- ip -n ${g_ns} link add br0 type bridge
- ip -n ${g_ns} link set br0 up
- ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
- ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
-}
-
-gateway_destroy()
-{
- ip -n ${g_ns} link del br0
- ip netns del ${g_ns}
-}
-
-server_create()
-{
- ip netns add ${s_ns}
- ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
-
- for i in $(seq 0 2); do
- ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
-
- ip -n ${g_ns} link set s${i} up
- ip -n ${g_ns} link set s${i} master br0
- ip -n ${s_ns} link set eth${i} master bond0
-
- tc -n ${g_ns} qdisc add dev s${i} clsact
- done
-
- ip -n ${s_ns} link set bond0 up
- ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
- ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
-}
-
-# Reset bond with new mode and options
-bond_reset()
-{
- local param="$1"
-
- ip -n ${s_ns} link set bond0 down
- ip -n ${s_ns} link del bond0
-
- ip -n ${s_ns} link add bond0 type bond $param
- for i in $(seq 0 2); do
- ip -n ${s_ns} link set eth$i master bond0
- done
-
- ip -n ${s_ns} link set bond0 up
- ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
- ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
- sleep 2
-}
-
-server_destroy()
-{
- for i in $(seq 0 2); do
- ip -n ${s_ns} link del eth${i}
- done
- ip netns del ${s_ns}
-}
-
-client_create()
-{
- ip netns add ${c_ns}
- ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
-
- ip -n ${g_ns} link set c0 up
- ip -n ${g_ns} link set c0 master br0
-
- ip -n ${c_ns} link set eth0 up
- ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
- ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
-}
-
-client_destroy()
-{
- ip -n ${c_ns} link del eth0
- ip netns del ${c_ns}
-}
+source bond_topo_2d1c.sh
setup_prepare()
{
gateway_create
server_create
client_create
-}
-
-cleanup()
-{
- pre_cleanup
-
- client_destroy
- server_destroy
- gateway_destroy
-}
-
-bond_check_connection()
-{
- local msg=${1:-"check connection"}
- sleep 2
- ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
- check_err $? "${msg}: ping failed"
- ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
- check_err $? "${msg}: ping6 failed"
+ # Add the extra device as we use 3 down links for bond0
+ local i=2
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+ tc -n ${g_ns} qdisc add dev s${i} clsact
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 7d9e73a43a49..0c47faff9274 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -98,12 +98,12 @@ sb_occ_etc_check()
port_pool_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
@@ -126,12 +126,12 @@ port_pool_test()
port_tc_ip_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
@@ -154,16 +154,12 @@ port_tc_ip_test()
port_tc_arp_test()
{
- local exp_max_occ=96
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
- if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
- exp_max_occ=144
- fi
-
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+ $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
devlink sb occupancy snapshot $DEVLINK_DEV
diff --git a/tools/testing/selftests/fchmodat2/.gitignore b/tools/testing/selftests/fchmodat2/.gitignore
new file mode 100644
index 000000000000..82a4846cbc4b
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/*_test
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
new file mode 100644
index 000000000000..20839f8e43f2
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES)
+TEST_GEN_PROGS := fchmodat2_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
new file mode 100644
index 000000000000..e0319417124d
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
+{
+ int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags);
+
+ return ret >= 0 ? ret : -errno;
+}
+
+int setup_testdir(void)
+{
+ int dfd, ret;
+ char dirname[] = "/tmp/ksft-fchmodat2.XXXXXX";
+
+ /* Make the top-level directory. */
+ if (!mkdtemp(dirname))
+ ksft_exit_fail_msg("%s: failed to create tmpdir\n", __func__);
+
+ dfd = open(dirname, O_PATH | O_DIRECTORY);
+ if (dfd < 0)
+ ksft_exit_fail_msg("%s: failed to open tmpdir\n", __func__);
+
+ ret = openat(dfd, "regfile", O_CREAT | O_WRONLY | O_TRUNC, 0644);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: failed to create file in tmpdir\n",
+ __func__);
+ close(ret);
+
+ ret = symlinkat("regfile", dfd, "symlink");
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: failed to create symlink in tmpdir\n",
+ __func__);
+
+ return dfd;
+}
+
+int expect_mode(int dfd, const char *filename, mode_t expect_mode)
+{
+ struct stat st;
+ int ret = fstatat(dfd, filename, &st, AT_SYMLINK_NOFOLLOW);
+
+ if (ret)
+ ksft_exit_fail_msg("%s: %s: fstatat failed\n",
+ __func__, filename);
+
+ return (st.st_mode == expect_mode);
+}
+
+void test_regfile(void)
+{
+ int dfd, ret;
+
+ dfd = setup_testdir();
+
+ ret = sys_fchmodat2(dfd, "regfile", 0640, 0);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+ __func__);
+
+ ret = sys_fchmodat2(dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(AT_SYMLINK_NOFOLLOW) failed\n",
+ __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100600))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ ksft_test_result_pass("fchmodat2(regfile)\n");
+}
+
+void test_symlink(void)
+{
+ int dfd, ret;
+
+ dfd = setup_testdir();
+
+ ret = sys_fchmodat2(dfd, "symlink", 0640, 0);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+ __func__);
+
+ if (!expect_mode(dfd, "symlink", 0120777))
+ ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2\n",
+ __func__);
+
+ ret = sys_fchmodat2(dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW);
+
+ /*
+ * On certain filesystems (xfs or btrfs), chmod operation fails. So we
+ * first check the symlink target but if the operation fails we mark the
+ * test as skipped.
+ *
+ * https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html
+ */
+ if (ret == 0 && !expect_mode(dfd, "symlink", 0120600))
+ ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ if (ret != 0)
+ ksft_test_result_skip("fchmodat2(symlink)\n");
+ else
+ ksft_test_result_pass("fchmodat2(symlink)\n");
+}
+
+#define NUM_TESTS 2
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ test_regfile();
+ test_symlink();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filelock/Makefile b/tools/testing/selftests/filelock/Makefile
new file mode 100644
index 000000000000..478e82f8b464
--- /dev/null
+++ b/tools/testing/selftests/filelock/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := ofdlocks
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
new file mode 100644
index 000000000000..a55b79810ab2
--- /dev/null
+++ b/tools/testing/selftests/filelock/ofdlocks.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include "../kselftest.h"
+
+static int lock_set(int fd, struct flock *fl)
+{
+ int ret;
+
+ fl->l_pid = 0; // needed for OFD locks
+ fl->l_whence = SEEK_SET;
+ ret = fcntl(fd, F_OFD_SETLK, fl);
+ if (ret)
+ perror("fcntl()");
+ return ret;
+}
+
+static int lock_get(int fd, struct flock *fl)
+{
+ int ret;
+
+ fl->l_pid = 0; // needed for OFD locks
+ fl->l_whence = SEEK_SET;
+ ret = fcntl(fd, F_OFD_GETLK, fl);
+ if (ret)
+ perror("fcntl()");
+ return ret;
+}
+
+int main(void)
+{
+ int rc;
+ struct flock fl, fl2;
+ int fd = open("/tmp/aa", O_RDWR | O_CREAT | O_EXCL, 0600);
+ int fd2 = open("/tmp/aa", O_RDONLY);
+
+ unlink("/tmp/aa");
+ assert(fd != -1);
+ assert(fd2 != -1);
+ ksft_print_msg("[INFO] opened fds %i %i\n", fd, fd2);
+
+ /* Set some read lock */
+ fl.l_type = F_RDLCK;
+ fl.l_start = 5;
+ fl.l_len = 3;
+ rc = lock_set(fd, &fl);
+ if (rc == 0) {
+ ksft_print_msg
+ ("[SUCCESS] set OFD read lock on first fd\n");
+ } else {
+ ksft_print_msg("[FAIL] to set OFD read lock on first fd\n");
+ return -1;
+ }
+ /* Make sure read locks do not conflict on different fds. */
+ fl.l_type = F_RDLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd2, &fl);
+ if (rc != 0)
+ return -1;
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg("[FAIL] read locks conflicted\n");
+ return -1;
+ }
+ /* Make sure read/write locks do conflict on different fds. */
+ fl.l_type = F_WRLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd2, &fl);
+ if (rc != 0)
+ return -1;
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[SUCCESS] read and write locks conflicted\n");
+ } else {
+ ksft_print_msg
+ ("[SUCCESS] read and write locks not conflicted\n");
+ return -1;
+ }
+ /* Get info about the lock on first fd. */
+ fl.l_type = F_UNLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd, &fl);
+ if (rc != 0) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+ return -1;
+ }
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[SUCCESS] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ fl.l_type, fl.l_pid, fl.l_len);
+ } else {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK did not return lock info\n");
+ return -1;
+ }
+ /* Try the same but by locking everything by len==0. */
+ fl2.l_type = F_UNLCK;
+ fl2.l_start = 0;
+ fl2.l_len = 0;
+ rc = lock_get(fd, &fl2);
+ if (rc != 0) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+ return -1;
+ }
+ if (memcmp(&fl, &fl2, sizeof(fl))) {
+ ksft_print_msg
+ ("[FAIL] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ fl.l_type, fl.l_pid, fl.l_len);
+ return -1;
+ }
+ ksft_print_msg("[SUCCESS] F_UNLCK with len==0 returned the same\n");
+ /* Get info about the lock on second fd - no locks on it. */
+ fl.l_type = F_UNLCK;
+ fl.l_start = 0;
+ fl.l_len = 0;
+ lock_get(fd2, &fl);
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK return lock info from another fd\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
new file mode 100644
index 000000000000..63b76cf2a360
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Snapshot and tracing_cpumask
+# requires: trace_marker tracing_cpumask snapshot
+# flags: instance
+
+# This testcase is constrived to reproduce a problem that the cpu buffers
+# become unavailable which is due to 'record_disabled' of array_buffer and
+# max_buffer being messed up.
+
+# Store origin cpumask
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+# Stop tracing all cpu
+echo 0 > tracing_cpumask
+
+# Take a snapshot of the main buffer
+echo 1 > snapshot
+
+# Restore origin cpumask, note that there should be some cpus being traced
+echo ${ORIG_CPUMASK} > tracing_cpumask
+
+# Set tracing on
+echo 1 > tracing_on
+
+# Write a log into buffer
+echo "test input 1" > trace_marker
+
+# Ensure the log writed so that cpu buffers are still available
+grep -q "test input 1" trace
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
index b89de1771655..f34b14ef9781 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
@@ -13,7 +13,7 @@ if grep -qF "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README ; the
FPROBES=yes
fi
-if [ -z "$KPROBES" -a "$FPROBES" ] ; then
+if [ -z "$KPROBES" -a -z "$FPROBES" ] ; then
exit_unsupported
fi
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 07732a157ccd..eb1ff597bcca 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -362,8 +362,10 @@ static inline void read_stats_header(int stats_fd, struct kvm_stats_header *head
{
ssize_t ret;
- ret = read(stats_fd, header, sizeof(*header));
- TEST_ASSERT(ret == sizeof(*header), "Read stats header");
+ ret = pread(stats_fd, header, sizeof(*header), 0);
+ TEST_ASSERT(ret == sizeof(*header),
+ "Failed to read '%lu' header bytes, ret = '%ld'",
+ sizeof(*header), ret);
}
struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index a7001e29dc06..698c1cfa3111 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -43,8 +43,10 @@ static void stats_test(int stats_fd)
id = malloc(header.name_size);
TEST_ASSERT(id, "Allocate memory for id string");
- ret = read(stats_fd, id, header.name_size);
- TEST_ASSERT(ret == header.name_size, "Read id string");
+ ret = pread(stats_fd, id, header.name_size, sizeof(header));
+ TEST_ASSERT(ret == header.name_size,
+ "Expected header size '%u', read '%lu' bytes",
+ header.name_size, ret);
/* Check id string, that should start with "kvm" */
TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size,
@@ -165,23 +167,7 @@ static void stats_test(int stats_fd)
free(stats_data);
free(stats_desc);
free(id);
-}
-
-
-static void vm_stats_test(struct kvm_vm *vm)
-{
- int stats_fd = vm_get_stats_fd(vm);
-
- stats_test(stats_fd);
- close(stats_fd);
- TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
-}
-
-static void vcpu_stats_test(struct kvm_vcpu *vcpu)
-{
- int stats_fd = vcpu_get_stats_fd(vcpu);
- stats_test(stats_fd);
close(stats_fd);
TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
}
@@ -199,6 +185,7 @@ static void vcpu_stats_test(struct kvm_vcpu *vcpu)
int main(int argc, char *argv[])
{
+ int vm_stats_fds, *vcpu_stats_fds;
int i, j;
struct kvm_vcpu **vcpus;
struct kvm_vm **vms;
@@ -231,23 +218,58 @@ int main(int argc, char *argv[])
vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu);
TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers");
+ /*
+ * Not per-VM as the array is populated, used, and invalidated within a
+ * single for-loop iteration.
+ */
+ vcpu_stats_fds = calloc(max_vm, sizeof(*vcpu_stats_fds));
+ TEST_ASSERT(vcpu_stats_fds, "Allocate memory for VM stats fds");
+
for (i = 0; i < max_vm; ++i) {
vms[i] = vm_create_barebones();
for (j = 0; j < max_vcpu; ++j)
vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j);
}
- /* Check stats read for every VM and VCPU */
+ /*
+ * Check stats read for every VM and vCPU, with a variety of flavors.
+ * Note, stats_test() closes the passed in stats fd.
+ */
for (i = 0; i < max_vm; ++i) {
- vm_stats_test(vms[i]);
+ /*
+ * Verify that creating multiple userspace references to a
+ * single stats file works and doesn't cause explosions.
+ */
+ vm_stats_fds = vm_get_stats_fd(vms[i]);
+ stats_test(dup(vm_stats_fds));
+
+ /* Verify userspace can instantiate multiple stats files. */
+ stats_test(vm_get_stats_fd(vms[i]));
+
+ for (j = 0; j < max_vcpu; ++j) {
+ vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
+ stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
+ }
+
+ /*
+ * Close the VM fd and redo the stats tests. KVM should gift a
+ * reference (to the VM) to each stats fd, i.e. stats should
+ * still be accessible even after userspace has put its last
+ * _direct_ reference to the VM.
+ */
+ kvm_vm_free(vms[i]);
+
+ stats_test(vm_stats_fds);
for (j = 0; j < max_vcpu; ++j)
- vcpu_stats_test(vcpus[i * max_vcpu + j]);
+ stats_test(vcpu_stats_fds[j]);
+
ksft_test_result_pass("vm%i\n", i);
}
- for (i = 0; i < max_vm; ++i)
- kvm_vm_free(vms[i]);
free(vms);
+ free(vcpus);
+ free(vcpu_stats_fds);
ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index a284fcef6ed7..3610981d9162 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,26 +22,25 @@
#include "kvm_util.h"
#include "processor.h"
-static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig,
- uint64_t feature_bit)
-{
- struct kvm_sregs sregs;
- int rc;
-
- /* Skip the sub-test, the feature is supported. */
- if (orig->cr4 & feature_bit)
- return;
-
- memcpy(&sregs, orig, sizeof(sregs));
- sregs.cr4 |= feature_bit;
-
- rc = _vcpu_sregs_set(vcpu, &sregs);
- TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
-
- /* Sanity check that KVM didn't change anything. */
- vcpu_sregs_get(vcpu, &sregs);
- TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
-}
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
+do { \
+ struct kvm_sregs new; \
+ int rc; \
+ \
+ /* Skip the sub-test, the feature/bit is supported. */ \
+ if (orig.cr & bit) \
+ break; \
+ \
+ memcpy(&new, &orig, sizeof(sregs)); \
+ new.cr |= bit; \
+ \
+ rc = _vcpu_sregs_set(vcpu, &new); \
+ TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
+ \
+ /* Sanity check that KVM didn't change anything. */ \
+ vcpu_sregs_get(vcpu, &new); \
+ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
+} while (0)
static uint64_t calc_supported_cr4_feature_bits(void)
{
@@ -80,7 +79,7 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t cr4;
- int rc;
+ int rc, i;
/*
* Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
@@ -92,6 +91,7 @@ int main(int argc, char *argv[])
vcpu_sregs_get(vcpu, &sregs);
+ sregs.cr0 = 0;
sregs.cr4 |= calc_supported_cr4_feature_bits();
cr4 = sregs.cr4;
@@ -103,16 +103,24 @@ int main(int argc, char *argv[])
sregs.cr4, cr4);
/* Verify all unsupported features are rejected by KVM. */
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP);
- test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+ for (i = 32; i < 64; i++)
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+ /* NW without CD is illegal, as is PG without PE. */
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
kvm_vm_free(vm);
/* Create a "real" VM and verify APIC_BASE can be set. */
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
index 4c88238fc8f0..e949a43a6145 100644
--- a/tools/testing/selftests/mincore/mincore_selftest.c
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -150,8 +150,8 @@ TEST(check_huge_pages)
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
if (addr == MAP_FAILED) {
- if (errno == ENOMEM)
- SKIP(return, "No huge pages available.");
+ if (errno == ENOMEM || errno == EINVAL)
+ SKIP(return, "No huge pages available or CONFIG_HUGETLB_PAGE disabled.");
else
TH_LOG("mmap error: %s", strerror(errno));
}
diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index a5cb4b09a46c..a5cb4b09a46c 100644..100755
--- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
diff --git a/tools/testing/selftests/mm/check_config.sh b/tools/testing/selftests/mm/check_config.sh
index 3954f4746161..3954f4746161 100644..100755
--- a/tools/testing/selftests/mm/check_config.sh
+++ b/tools/testing/selftests/mm/check_config.sh
diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 4adaad1b822f..20294553a5dd 100644
--- a/tools/testing/selftests/mm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -57,9 +57,14 @@ enum {
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
/* Just the flags we need, copied from mm.h: */
+
+#ifndef FOLL_WRITE
#define FOLL_WRITE 0x01 /* check pte is writable */
-#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */
+#endif
+#ifndef FOLL_LONGTERM
+#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */
+#endif
FIXTURE(hmm)
{
int fd;
diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index bf2d2a684edf..bf2d2a684edf 100644..100755
--- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
index 435acebdc325..380b691d3eb9 100644
--- a/tools/testing/selftests/mm/ksm_tests.c
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -831,6 +831,7 @@ int main(int argc, char *argv[])
printf("Size must be greater than 0\n");
return KSFT_FAIL;
}
+ break;
case 't':
{
int tmp = atoi(optarg);
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
index 6d71d972997b..301abb99e027 100644
--- a/tools/testing/selftests/mm/mkdirty.c
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -321,8 +321,8 @@ close_uffd:
munmap:
munmap(dst, pagesize);
free(src);
-#endif /* __NR_userfaultfd */
}
+#endif /* __NR_userfaultfd */
int main(void)
{
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 3f26f6e15b2a..3f26f6e15b2a 100644..100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
diff --git a/tools/testing/selftests/mm/test_hmm.sh b/tools/testing/selftests/mm/test_hmm.sh
index 46e19b5d648d..46e19b5d648d 100644..100755
--- a/tools/testing/selftests/mm/test_hmm.sh
+++ b/tools/testing/selftests/mm/test_hmm.sh
diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh
index d73b846736f1..d73b846736f1 100644..100755
--- a/tools/testing/selftests/mm/test_vmalloc.sh
+++ b/tools/testing/selftests/mm/test_vmalloc.sh
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index 45cae7cab27e..45cae7cab27e 100644..100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh
index 70a02301f4c2..70a02301f4c2 100644..100755
--- a/tools/testing/selftests/mm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 501854a89cc0..2f9d378edec3 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -15,6 +15,7 @@ ip_local_port_range
ipsec
ipv6_flowlabel
ipv6_flowlabel_mgr
+log.txt
msg_zerocopy
nettest
psock_fanout
@@ -45,6 +46,7 @@ test_unix_oob
timestamping
tls
toeplitz
+tools
tun
txring_overwrite
txtimestamp
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 0f5e88c8f4ff..df8d90b51867 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -1981,6 +1981,11 @@ basic()
run_cmd "$IP link set dev lo up"
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+ run_cmd "timeout 5 $IP nexthop"
+ log_test $? 0 "Maximum nexthop ID dump"
+
#
# groups
#
@@ -2201,6 +2206,11 @@ basic_res()
run_cmd "$IP nexthop bucket list fdb"
log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+ run_cmd "timeout 5 $IP nexthop bucket"
+ log_test $? 0 "Maximum nexthop ID dump"
+
#
# resilient nexthop buckets get requests
#
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index ae3f9462a2b6..d0c6c499d5da 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -617,7 +617,7 @@ __cfg_test_port_ip_sg()
grep -q "permanent"
check_err $? "Entry not added as \"permanent\" when should"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_err $? "\"permanent\" entry has a pending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -626,7 +626,7 @@ __cfg_test_port_ip_sg()
grep -q "temp"
check_err $? "Entry not added as \"temp\" when should"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_fail $? "\"temp\" entry has an unpending group timer"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -659,7 +659,7 @@ __cfg_test_port_ip_sg()
grep -q "permanent"
check_err $? "Entry not marked as \"permanent\" after replace"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_err $? "Entry has a pending group timer after replace"
bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
@@ -667,7 +667,7 @@ __cfg_test_port_ip_sg()
grep -q "temp"
check_err $? "Entry not marked as \"temp\" after replace"
bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \
- grep -q "0.00"
+ grep -q " 0.00"
check_fail $? "Entry has an unpending group timer after replace"
bridge mdb del dev br0 port $swp1 $grp_key vid 10
@@ -850,6 +850,7 @@ cfg_test()
__fwd_test_host_ip()
{
local grp=$1; shift
+ local dmac=$1; shift
local src=$1; shift
local mode=$1; shift
local name
@@ -872,27 +873,27 @@ __fwd_test_host_ip()
# Packet should only be flooded to multicast router ports when there is
# no matching MDB entry. The bridge is not configured as a multicast
# router port.
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 0
check_err $? "Packet locally received after flood"
# Install a regular port group entry and expect the packet to not be
# locally received.
bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 0
check_err $? "Packet locally received after installing a regular entry"
# Add a host entry and expect the packet to be locally received.
bridge mdb add dev br0 port br0 grp $grp temp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 1
check_err $? "Packet not locally received after adding a host entry"
# Remove the host entry and expect the packet to not be locally
# received.
bridge mdb del dev br0 port br0 grp $grp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
tc_check_packets "dev br0 ingress" 1 1
check_err $? "Packet locally received after removing a host entry"
@@ -905,8 +906,8 @@ __fwd_test_host_ip()
fwd_test_host_ip()
{
- __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4"
- __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6"
+ __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+ __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
}
fwd_test_host_l2()
@@ -966,6 +967,7 @@ fwd_test_host()
__fwd_test_port_ip()
{
local grp=$1; shift
+ local dmac=$1; shift
local valid_src=$1; shift
local invalid_src=$1; shift
local mode=$1; shift
@@ -999,43 +1001,43 @@ __fwd_test_port_ip()
vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
src_ip $invalid_src action drop
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 0
check_err $? "Packet from valid source received on H2 before adding entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 0
check_err $? "Packet from invalid source received on H2 before adding entry"
bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
filter_mode $filter_mode source_list $src_list
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 1
check_err $? "Packet from valid source not received on H2 after adding entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 0
check_err $? "Packet from invalid source received on H2 after adding entry"
bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
filter_mode exclude
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 2
check_err $? "Packet from valid source not received on H2 after allowing all sources"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 1
check_err $? "Packet from invalid source not received on H2 after allowing all sources"
bridge mdb del dev br0 port $swp2 grp $grp vid 10
- $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 1 2
check_err $? "Packet from valid source received on H2 after deleting entry"
- $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
tc_check_packets "dev $h2 ingress" 2 1
check_err $? "Packet from invalid source received on H2 after deleting entry"
@@ -1047,11 +1049,11 @@ __fwd_test_port_ip()
fwd_test_port_ip()
{
- __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude"
- __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
"exclude"
- __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include"
- __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
"include"
}
@@ -1127,7 +1129,7 @@ ctrl_igmpv3_is_in_test()
filter_mode include source_list 192.0.2.1
# IS_IN ( 192.0.2.2 )
- $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+ $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2
@@ -1140,7 +1142,7 @@ ctrl_igmpv3_is_in_test()
filter_mode include source_list 192.0.2.1
# IS_IN ( 192.0.2.2 )
- $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+ $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
-t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \
@@ -1167,7 +1169,7 @@ ctrl_mldv2_is_in_test()
# IS_IN ( 2001:db8:1::2 )
local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
- $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \
@@ -1181,7 +1183,7 @@ ctrl_mldv2_is_in_test()
filter_mode include source_list 2001:db8:1::1
# IS_IN ( 2001:db8:1::2 )
- $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
-t ip hop=1,next=0,p="$p" -q
bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \
@@ -1206,6 +1208,11 @@ ctrl_test()
ctrl_mldv2_is_in_test
}
+if ! bridge mdb help 2>&1 | grep -q "replace"; then
+ echo "SKIP: iproute2 too old, missing bridge mdb replace support"
+ exit $ksft_skip
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
index ae255b662ba3..3da9d93ab36f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -252,7 +252,8 @@ ctl4_entries_add()
local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
local peer=$(locus_dev_peer $locus)
local GRP=239.1.1.${grp}
- $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \
+ local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
-t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
sleep 1
@@ -272,7 +273,8 @@ ctl4_entries_del()
local peer=$(locus_dev_peer $locus)
local GRP=239.1.1.${grp}
- $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+ local dmac=01:00:5e:00:00:02
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
-t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
sleep 1
! bridge mdb show dev br0 | grep -q $GRP
@@ -289,8 +291,10 @@ ctl6_entries_add()
local peer=$(locus_dev_peer $locus)
local SIP=fe80::1
local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
local p=$(mldv2_is_in_get $SIP $GRP $IPs)
- $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
sleep 1
local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
@@ -310,8 +314,10 @@ ctl6_entries_del()
local peer=$(locus_dev_peer $locus)
local SIP=fe80::1
local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
local p=$(mldv1_done_get $SIP $GRP)
- $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
sleep 1
! bridge mdb show dev br0 | grep -q $GRP
}
@@ -1328,6 +1334,11 @@ test_8021qvs()
switch_destroy
}
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+ echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+ exit $ksft_skip
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index dbb9fcf759e0..aa2eafb7b243 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -286,6 +286,8 @@ different_speeds_autoneg_on()
ethtool -s $h1 autoneg on
}
+skip_on_veth
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
index 072faa77f53b..17f89c3b7c02 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -108,6 +108,8 @@ no_cable()
ip link set dev $swp3 down
}
+skip_on_veth
+
setup_prepare
tests_run
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
index c580ad623848..39e736f30322 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -258,11 +258,6 @@ h2_destroy()
setup_prepare()
{
- check_ethtool_mm_support
- check_tc_fp_support
- require_command lldptool
- bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
-
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
@@ -278,6 +273,19 @@ cleanup()
h1_destroy
}
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+ ethtool --show-mm $netif 2>&1 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: $netif does not support MAC Merge"
+ exit $ksft_skip
+ fi
+done
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
index eb9ec4a68f84..7594bbb49029 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -99,6 +99,8 @@ test_stats_rx()
test_stats g2a rx
}
+skip_on_veth
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
index 9f5b3e2e5e95..49fa94b53a1c 100755
--- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -14,6 +14,8 @@ ALL_TESTS="
NUM_NETIFS=4
source lib.sh
+require_command $TROUTE6
+
h1_create()
{
simple_if_init $h1 2001:1:1::2/64
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 9ddb68dd6a08..f69015bf2dea 100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -30,6 +30,7 @@ REQUIRE_MZ=${REQUIRE_MZ:=yes}
REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
relative_path="${BASH_SOURCE%/*}"
if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -163,6 +164,17 @@ check_port_mab_support()
fi
}
+skip_on_veth()
+{
+ local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+ jq -r '.[].linkinfo.info_kind')
+
+ if [[ $kind == veth ]]; then
+ echo "SKIP: Test cannot be run with veth pairs"
+ exit $ksft_skip
+ fi
+}
+
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
exit $ksft_skip
@@ -225,6 +237,11 @@ create_netif_veth()
for ((i = 1; i <= NUM_NETIFS; ++i)); do
local j=$((i+1))
+ if [ -z ${NETIFS[p$i]} ]; then
+ echo "SKIP: Cannot create interface. Name not specified"
+ exit $ksft_skip
+ fi
+
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
ip link add ${NETIFS[p$i]} type veth \
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index aff88f78e339..5ea9d63915f7 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -72,7 +72,8 @@ test_span_gre_ttl()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ mirror_install $swp1 ingress $tundev \
+ "prot ip flower $tcflags ip_prot icmp"
tc filter add dev $h3 ingress pref 77 prot $prot \
flower skip_hw ip_ttl 50 action pass
diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index a96cff8e7219..b0f5e55d2d0b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -9,6 +9,8 @@ NUM_NETIFS=4
source tc_common.sh
source lib.sh
+require_command ncat
+
tcflags="skip_hw"
h1_create()
@@ -220,9 +222,9 @@ mirred_egress_to_ingress_tcp_test()
ip_proto icmp \
action drop
- ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+ ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
local rpid=$!
- ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+ ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
wait -n $rpid
cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
check_err $? "server output check failed"
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 683711f41aa9..b1daad19b01e 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -52,8 +52,8 @@ match_dst_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -78,8 +78,8 @@ match_src_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
index e22c2d28b6eb..20a7cb7222b8 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -127,6 +127,7 @@ test_l2_miss_multicast_common()
local proto=$1; shift
local sip=$1; shift
local dip=$1; shift
+ local dmac=$1; shift
local mode=$1; shift
local name=$1; shift
@@ -142,7 +143,7 @@ test_l2_miss_multicast_common()
action pass
# Before adding MDB entry.
- $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
tc_check_packets "dev $swp2 egress" 101 1
check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
@@ -153,7 +154,7 @@ test_l2_miss_multicast_common()
# Adding MDB entry.
bridge mdb replace dev br1 port $swp2 grp $dip permanent
- $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
tc_check_packets "dev $swp2 egress" 101 1
check_err $? "Unregistered multicast filter was hit after adding MDB entry"
@@ -164,7 +165,7 @@ test_l2_miss_multicast_common()
# Deleting MDB entry.
bridge mdb del dev br1 port $swp2 grp $dip
- $MZ $mode $h1 -t ip -A $sip -B $dip -c 1 -p 100 -q
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
tc_check_packets "dev $swp2 egress" 101 2
check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
@@ -183,10 +184,11 @@ test_l2_miss_multicast_ipv4()
local proto="ipv4"
local sip=192.0.2.1
local dip=239.1.1.1
+ local dmac=01:00:5e:01:01:01
local mode="-4"
local name="IPv4"
- test_l2_miss_multicast_common $proto $sip $dip $mode $name
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
}
test_l2_miss_multicast_ipv6()
@@ -194,10 +196,11 @@ test_l2_miss_multicast_ipv6()
local proto="ipv6"
local sip=2001:db8:1::1
local dip=ff0e::1
+ local dmac=33:33:00:00:00:01
local mode="-6"
local name="IPv6"
- test_l2_miss_multicast_common $proto $sip $dip $mode $name
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
}
test_l2_miss_multicast()
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
index 5ac184d51809..5a5dd9034819 100755
--- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -104,11 +104,14 @@ tunnel_key_nofrag_test()
local i
tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
- flower ip_flags nofrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofrag action drop
tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
- flower ip_flags firstfrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags firstfrag action drop
tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
- flower ip_flags nofirstfrag action drop
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofirstfrag action drop
# test 'nofrag' set
tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e6c9d5451c5b..d01b73a8ed0f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -162,9 +162,7 @@ check_tools()
elif ! iptables -V &> /dev/null; then
echo "SKIP: Could not run all tests without iptables tool"
exit $ksft_skip
- fi
-
- if ! ip6tables -V &> /dev/null; then
+ elif ! ip6tables -V &> /dev/null; then
echo "SKIP: Could not run all tests without ip6tables tool"
exit $ksft_skip
fi
@@ -707,6 +705,7 @@ pm_nl_del_endpoint()
local addr=$3
if [ $ip_mptcp -eq 1 ]; then
+ [ $id -ne 0 ] && addr=''
ip -n $ns mptcp endpoint delete id $id $addr
else
ip netns exec $ns ./pm_nl_ctl del $id $addr
@@ -797,10 +796,11 @@ pm_nl_check_endpoint()
fi
if [ $ip_mptcp -eq 1 ]; then
+ # get line and trim trailing whitespace
line=$(ip -n $ns mptcp endpoint show $id)
+ line="${line% }"
# the dump order is: address id flags port dev
- expected_line="$addr"
- [ -n "$addr" ] && expected_line="$expected_line $addr"
+ [ -n "$addr" ] && expected_line="$addr"
expected_line="$expected_line $id"
[ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}"
[ -n "$dev" ] && expected_line="$expected_line $dev"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index dfe3d287f01d..f838dd370f6a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -361,6 +361,7 @@ err_buf=
tcpdump_pids=
nettest_pids=
socat_pids=
+tmpoutfile=
err() {
err_buf="${err_buf}${1}
@@ -951,6 +952,7 @@ cleanup() {
ip link del veth_A-R1 2>/dev/null
ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
+ rm -f "$tmpoutfile"
}
mtu() {
@@ -1328,6 +1330,39 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+ tmpoutfile=$(mktemp)
+
+ # Flush Exceptions, retry with TCP
+ run_cmd ${ns_a} ip route flush cached ${dst}
+ run_cmd ${ns_b} ip route flush cached ${dst}
+ run_cmd ${ns_c} ip route flush cached ${dst}
+
+ for target in "${ns_a}" "${ns_c}" ; do
+ if [ ${family} -eq 4 ]; then
+ TCPDST=TCP:${dst}:50000
+ else
+ TCPDST="TCP:[${dst}]:50000"
+ fi
+ ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile &
+
+ sleep 1
+
+ dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+ size=$(du -sb $tmpoutfile)
+ size=${size%%/tmp/*}
+
+ [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+ done
+
+ rm -f "$tmpoutfile"
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
}
test_pmtu_ipv4_br_vxlan4_exception() {
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index 0e04f9fef986..a14818164102 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -159,7 +159,7 @@ void create_clients(struct __test_metadata *_metadata,
/* Make sure SYN will be processed on the i-th CPU
* and finally distributed to the i-th listener.
*/
- sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+ ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
ASSERT_EQ(ret, 0);
for (j = 0; j < CLIENT_PER_SERVER; j++) {
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 486334981e60..970df9e55131 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -577,7 +577,6 @@ int run_syscall(int min, int max)
CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); break;
CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break;
CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break;
- CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break;
CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break;
CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break;
CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break;
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
index 9dd629cc86aa..f4b3d5c9af5b 100644
--- a/tools/testing/selftests/riscv/Makefile
+++ b/tools/testing/selftests/riscv/Makefile
@@ -43,7 +43,7 @@ run_tests: all
done
# Avoid any output on non riscv on emit_tests
-emit_tests: all
+emit_tests:
@for DIR in $(RISCV_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
index 5cbc392944a6..2c0d2b1126c1 100644
--- a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include <sys/prctl.h>
-
#define THIS_PROGRAM "./vstate_exec_nolibc"
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index b357ba24af06..7a957c7d459a 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
CLANG_FLAGS += -no-integrated-as
endif
+top_srcdir = ../../../..
+
CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
- $(CLANG_FLAGS)
+ $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
LDLIBS += -lpthread -ldl
# Own dependencies because we only want to build against 1st prerequisite, but
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 4e4aa006004c..96e812bdf8a4 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -31,12 +31,22 @@
#include <sys/auxv.h>
#include <linux/auxvec.h>
+#include <linux/compiler.h>
+
#include "../kselftest.h"
#include "rseq.h"
-static const ptrdiff_t *libc_rseq_offset_p;
-static const unsigned int *libc_rseq_size_p;
-static const unsigned int *libc_rseq_flags_p;
+/*
+ * Define weak versions to play nice with binaries that are statically linked
+ * against a libc that doesn't support registering its own rseq.
+ */
+__weak ptrdiff_t __rseq_offset;
+__weak unsigned int __rseq_size;
+__weak unsigned int __rseq_flags;
+
+static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
+static const unsigned int *libc_rseq_size_p = &__rseq_size;
+static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
/* Offset from the thread pointer to the rseq area. */
ptrdiff_t rseq_offset;
@@ -155,9 +165,17 @@ unsigned int get_rseq_feature_size(void)
static __attribute__((constructor))
void rseq_init(void)
{
- libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
- libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
- libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ /*
+ * If the libc's registered rseq size isn't already valid, it may be
+ * because the binary is dynamically linked and not necessarily due to
+ * libc not having registered a restartable sequence. Try to find the
+ * symbols if that's the case.
+ */
+ if (!*libc_rseq_size_p) {
+ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+ libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+ libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ }
if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
*libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 43ec36b179dc..38f651469968 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -2184,6 +2184,9 @@ FIXTURE_TEARDOWN(TRACE_syscall)
TEST(negative_ENOSYS)
{
+#if defined(__arm__)
+ SKIP(return, "arm32 does not support calling syscall -1");
+#endif
/*
* There should be no difference between an "internal" skip
* and userspace asking for syscall "-1".
@@ -3072,7 +3075,8 @@ TEST(syscall_restart)
timeout.tv_sec = 1;
errno = 0;
EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
- TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+ TH_LOG("Call to nanosleep() failed (errno %d: %s)",
+ errno, strerror(errno));
}
/* Read final sync from parent. */
@@ -3908,6 +3912,9 @@ TEST(user_notification_filter_empty)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@@ -3962,6 +3969,9 @@ TEST(user_notification_filter_empty_threaded)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@@ -4255,6 +4265,61 @@ TEST(user_notification_addfd_rlimit)
close(memfd);
}
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
+#endif
+
+TEST(user_notification_sync)
+{
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ int status, listener;
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ /* Try to set invalid flags. */
+ EXPECT_SYSCALL_RETURN(-EINVAL,
+ ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+ SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
+ _exit(1);
+ }
+ _exit(0);
+ }
+
+ req.pid = 0;
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ ASSERT_EQ(req.data.nr, __NR_getppid);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ resp.flags = 0;
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_EQ(status, 0);
+}
+
+
/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
FIXTURE(O_SUSPEND_SECCOMP) {
pid_t pid;
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 6e73b09c20c8..71706197ba0f 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -5,6 +5,8 @@ CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_FLOW_TABLE=m
CONFIG_NF_NAT=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
diff --git a/tools/testing/selftests/tc-testing/settings b/tools/testing/selftests/tc-testing/settings
new file mode 100644
index 000000000000..e2206265f67c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/settings
@@ -0,0 +1 @@
+timeout=900
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index a44455372646..08d4861c2e78 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -131,5 +131,30 @@
"teardown": [
"echo \"1\" > /sys/bus/netdevsim/del_device"
]
+ },
+ {
+ "id": "3e1e",
+ "name": "Add taprio Qdisc with an invalid cycle-time",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true",
+ "$IP link set dev $ETH up",
+ "$IP addr add 10.10.10.10/24 dev $ETH"
+ ],
+ "cmdUnderTest": "/bin/true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
}
]
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 5beceeed0d11..6eba203f9da7 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -129,8 +129,7 @@ int main(int argc, char **argv)
printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
/* Avg the two actual freq samples adjtimex gave us */
- ppm = (tx1.freq + tx2.freq) * 1000 / 2;
- ppm = (long long)tx1.freq * 1000;
+ ppm = (long long)(tx1.freq + tx2.freq) * 1000 / 2;
ppm = shift_right(ppm, 16);
printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index 43a254f0e14d..21a98ba565ab 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -8,5 +8,5 @@ vsock_perf: vsock_perf.o
CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
.PHONY: all test clean
clean:
- ${RM} *.o *.d vsock_test vsock_diag_test
+ ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf
-include *.d
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index dfbaafbe3a00..5bbb5612b207 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -4035,8 +4035,17 @@ static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer,
sizeof(vcpu->stat), user_buffer, size, offset);
}
+static int kvm_vcpu_stats_release(struct inode *inode, struct file *file)
+{
+ struct kvm_vcpu *vcpu = file->private_data;
+
+ kvm_put_kvm(vcpu->kvm);
+ return 0;
+}
+
static const struct file_operations kvm_vcpu_stats_fops = {
.read = kvm_vcpu_stats_read,
+ .release = kvm_vcpu_stats_release,
.llseek = noop_llseek,
};
@@ -4057,6 +4066,9 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
put_unused_fd(fd);
return PTR_ERR(file);
}
+
+ kvm_get_kvm(vcpu->kvm);
+
file->f_mode |= FMODE_PREAD;
fd_install(fd, file);
@@ -4701,8 +4713,17 @@ static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer,
sizeof(kvm->stat), user_buffer, size, offset);
}
+static int kvm_vm_stats_release(struct inode *inode, struct file *file)
+{
+ struct kvm *kvm = file->private_data;
+
+ kvm_put_kvm(kvm);
+ return 0;
+}
+
static const struct file_operations kvm_vm_stats_fops = {
.read = kvm_vm_stats_read,
+ .release = kvm_vm_stats_release,
.llseek = noop_llseek,
};
@@ -4721,6 +4742,9 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
put_unused_fd(fd);
return PTR_ERR(file);
}
+
+ kvm_get_kvm(kvm);
+
file->f_mode |= FMODE_PREAD;
fd_install(fd, file);